Browse Source

use html5 parser and update php-mf2

pull/72/head
Aaron Parecki 6 years ago
parent
commit
25b6f85c14
No known key found for this signature in database GPG Key ID: 276C2817346D6056
5 changed files with 109 additions and 11 deletions
  1. +3
    -2
      composer.json
  2. +76
    -8
      composer.lock
  3. +1
    -1
      controllers/Parse.php
  4. +12
    -0
      tests/ParseTest.php
  5. +17
    -0
      tests/data/source.example.com/html5-tags

+ 3
- 2
composer.json View File

@ -5,7 +5,7 @@
"homepage": "https://github.com/aaronpk/XRay", "homepage": "https://github.com/aaronpk/XRay",
"description": "X-Ray returns structured data from any URL", "description": "X-Ray returns structured data from any URL",
"require": { "require": {
"mf2/mf2": ">=0.4.0",
"mf2/mf2": "0.4.4-alpha",
"ezyang/htmlpurifier": "4.10.*", "ezyang/htmlpurifier": "4.10.*",
"indieweb/link-rel-parser": "0.1.*", "indieweb/link-rel-parser": "0.1.*",
"dg/twitter-php": "3.6.*", "dg/twitter-php": "3.6.*",
@ -13,7 +13,8 @@
"p3k/http": ">=0.1.7", "p3k/http": ">=0.1.7",
"cebe/markdown": "1.1.*", "cebe/markdown": "1.1.*",
"p3k/picofeed": ">=0.1.38", "p3k/picofeed": ">=0.1.38",
"facebook/graph-sdk": "^5.5"
"facebook/graph-sdk": "^5.5",
"masterminds/html5": "^2.3"
}, },
"autoload": { "autoload": {
"psr-4": { "psr-4": {

+ 76
- 8
composer.lock View File

@ -4,7 +4,7 @@
"Read more about it at https://getcomposer.org/doc/01-basic-usage.md#composer-lock-the-lock-file", "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#composer-lock-the-lock-file",
"This file is @generated automatically" "This file is @generated automatically"
], ],
"content-hash": "60f545028d44eb8e15c499fbe7ecf4d0",
"content-hash": "2c97abbbb6d2a94d0b72828c1edc41a3",
"packages": [ "packages": [
{ {
"name": "cebe/markdown", "name": "cebe/markdown",
@ -259,18 +259,83 @@
], ],
"time": "2017-01-11T17:14:49+00:00" "time": "2017-01-11T17:14:49+00:00"
}, },
{
"name": "masterminds/html5",
"version": "2.3.0",
"source": {
"type": "git",
"url": "https://github.com/Masterminds/html5-php.git",
"reference": "2c37c6c520b995b761674de3be8455a381679067"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/Masterminds/html5-php/zipball/2c37c6c520b995b761674de3be8455a381679067",
"reference": "2c37c6c520b995b761674de3be8455a381679067",
"shasum": ""
},
"require": {
"ext-libxml": "*",
"php": ">=5.3.0"
},
"require-dev": {
"phpunit/phpunit": "4.*",
"sami/sami": "~2.0",
"satooshi/php-coveralls": "1.0.*"
},
"type": "library",
"extra": {
"branch-alias": {
"dev-master": "2.2-dev"
}
},
"autoload": {
"psr-4": {
"Masterminds\\": "src"
}
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"MIT"
],
"authors": [
{
"name": "Matt Butcher",
"email": "technosophos@gmail.com"
},
{
"name": "Asmir Mustafic",
"email": "goetas@gmail.com"
},
{
"name": "Matt Farina",
"email": "matt@mattfarina.com"
}
],
"description": "An HTML5 parser and serializer.",
"homepage": "http://masterminds.github.io/html5-php",
"keywords": [
"HTML5",
"dom",
"html",
"parser",
"querypath",
"serializer",
"xml"
],
"time": "2017-09-04T12:26:28+00:00"
},
{ {
"name": "mf2/mf2", "name": "mf2/mf2",
"version": "v0.4.1",
"version": "v0.4.4-alpha",
"source": { "source": {
"type": "git", "type": "git",
"url": "https://github.com/indieweb/php-mf2.git", "url": "https://github.com/indieweb/php-mf2.git",
"reference": "e044330550a16d1a512248a5b9f663736538da44"
"reference": "e8da04f93d548d26287a8980eca4216639cbc61d"
}, },
"dist": { "dist": {
"type": "zip", "type": "zip",
"url": "https://api.github.com/repos/indieweb/php-mf2/zipball/e044330550a16d1a512248a5b9f663736538da44",
"reference": "e044330550a16d1a512248a5b9f663736538da44",
"url": "https://api.github.com/repos/indieweb/php-mf2/zipball/e8da04f93d548d26287a8980eca4216639cbc61d",
"reference": "e8da04f93d548d26287a8980eca4216639cbc61d",
"shasum": "" "shasum": ""
}, },
"require": { "require": {
@ -282,7 +347,8 @@
"phpunit/phpunit": "4.8.*" "phpunit/phpunit": "4.8.*"
}, },
"suggest": { "suggest": {
"barnabywalters/mf-cleaner": "To more easily handle the canonical data php-mf2 gives you"
"barnabywalters/mf-cleaner": "To more easily handle the canonical data php-mf2 gives you",
"masterminds/html5": "Alternative HTML parser for PHP, for better HTML5 support."
}, },
"bin": [ "bin": [
"bin/fetch-mf2", "bin/fetch-mf2",
@ -312,7 +378,7 @@
"parser", "parser",
"semantic" "semantic"
], ],
"time": "2018-03-15T16:51:15+00:00"
"time": "2018-03-29T20:48:28+00:00"
}, },
{ {
"name": "p3k/http", "name": "p3k/http",
@ -2089,7 +2155,9 @@
], ],
"aliases": [], "aliases": [],
"minimum-stability": "stable", "minimum-stability": "stable",
"stability-flags": [],
"stability-flags": {
"mf2/mf2": 15
},
"prefer-stable": false, "prefer-stable": false,
"prefer-lowest": false, "prefer-lowest": false,
"platform": [], "platform": [],

+ 1
- 1
controllers/Parse.php View File

@ -10,7 +10,7 @@ class Parse {
public $mc; public $mc;
private $_cacheTime = 120; private $_cacheTime = 120;
private $_pretty = false; private $_pretty = false;
private static $_version = '1.4.21';
private static $_version = '1.4.24';
public static function useragent() { public static function useragent() {
return 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36 XRay/'.self::$_version.' ('.\Config::$base.')'; return 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36 XRay/'.self::$_version.' ('.\Config::$base.')';

+ 12
- 0
tests/ParseTest.php View File

@ -684,4 +684,16 @@ class ParseTest extends PHPUnit_Framework_TestCase {
$this->assertEquals(1, count($data['data']['like-of'])); $this->assertEquals(1, count($data['data']['like-of']));
} }
public function testHTML5Markup() {
$url = 'http://source.example.com/html5-tags';
$response = $this->parse(['url' => $url]);
$body = $response->getContent();
$this->assertEquals(200, $response->getStatusCode());
$data = json_decode($body, true);
$this->assertEquals('Hello World', $data['data']['name']);
$this->assertEquals('The content of the blog post', $data['data']['content']['text']);
}
} }

+ 17
- 0
tests/data/source.example.com/html5-tags View File

@ -0,0 +1,17 @@
HTTP/1.1 200 OK
Server: Apache
Date: Wed, 09 Dec 2015 03:29:14 GMT
Content-Type: text/html; charset=utf-8
Connection: keep-alive
<html>
<head>
<title>Test</title>
</head>
<body>
<div class="h-entry">
<p class="p-name">Hello World
<article class="e-content">The content of the blog post</article>
</div>
</body>
</html>

Loading…
Cancel
Save