From 25b6f85c140b628738a4c464a9c7e2e8f3422eb6 Mon Sep 17 00:00:00 2001 From: Aaron Parecki Date: Thu, 29 Mar 2018 14:01:10 -0700 Subject: [PATCH] use html5 parser and update php-mf2 --- composer.json | 5 +- composer.lock | 84 +++++++++++++++++++++--- controllers/Parse.php | 2 +- tests/ParseTest.php | 12 ++++ tests/data/source.example.com/html5-tags | 17 +++++ 5 files changed, 109 insertions(+), 11 deletions(-) create mode 100644 tests/data/source.example.com/html5-tags diff --git a/composer.json b/composer.json index 380061b..1d7ecc5 100644 --- a/composer.json +++ b/composer.json @@ -5,7 +5,7 @@ "homepage": "https://github.com/aaronpk/XRay", "description": "X-Ray returns structured data from any URL", "require": { - "mf2/mf2": ">=0.4.0", + "mf2/mf2": "0.4.4-alpha", "ezyang/htmlpurifier": "4.10.*", "indieweb/link-rel-parser": "0.1.*", "dg/twitter-php": "3.6.*", @@ -13,7 +13,8 @@ "p3k/http": ">=0.1.7", "cebe/markdown": "1.1.*", "p3k/picofeed": ">=0.1.38", - "facebook/graph-sdk": "^5.5" + "facebook/graph-sdk": "^5.5", + "masterminds/html5": "^2.3" }, "autoload": { "psr-4": { diff --git a/composer.lock b/composer.lock index 04fd565..256b192 100644 --- a/composer.lock +++ b/composer.lock @@ -4,7 +4,7 @@ "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#composer-lock-the-lock-file", "This file is @generated automatically" ], - "content-hash": "60f545028d44eb8e15c499fbe7ecf4d0", + "content-hash": "2c97abbbb6d2a94d0b72828c1edc41a3", "packages": [ { "name": "cebe/markdown", @@ -259,18 +259,83 @@ ], "time": "2017-01-11T17:14:49+00:00" }, + { + "name": "masterminds/html5", + "version": "2.3.0", + "source": { + "type": "git", + "url": "https://github.com/Masterminds/html5-php.git", + "reference": "2c37c6c520b995b761674de3be8455a381679067" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/Masterminds/html5-php/zipball/2c37c6c520b995b761674de3be8455a381679067", + "reference": "2c37c6c520b995b761674de3be8455a381679067", + "shasum": "" + }, + "require": { + "ext-libxml": "*", + "php": ">=5.3.0" + }, + "require-dev": { + "phpunit/phpunit": "4.*", + "sami/sami": "~2.0", + "satooshi/php-coveralls": "1.0.*" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "2.2-dev" + } + }, + "autoload": { + "psr-4": { + "Masterminds\\": "src" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Matt Butcher", + "email": "technosophos@gmail.com" + }, + { + "name": "Asmir Mustafic", + "email": "goetas@gmail.com" + }, + { + "name": "Matt Farina", + "email": "matt@mattfarina.com" + } + ], + "description": "An HTML5 parser and serializer.", + "homepage": "http://masterminds.github.io/html5-php", + "keywords": [ + "HTML5", + "dom", + "html", + "parser", + "querypath", + "serializer", + "xml" + ], + "time": "2017-09-04T12:26:28+00:00" + }, { "name": "mf2/mf2", - "version": "v0.4.1", + "version": "v0.4.4-alpha", "source": { "type": "git", "url": "https://github.com/indieweb/php-mf2.git", - "reference": "e044330550a16d1a512248a5b9f663736538da44" + "reference": "e8da04f93d548d26287a8980eca4216639cbc61d" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/indieweb/php-mf2/zipball/e044330550a16d1a512248a5b9f663736538da44", - "reference": "e044330550a16d1a512248a5b9f663736538da44", + "url": "https://api.github.com/repos/indieweb/php-mf2/zipball/e8da04f93d548d26287a8980eca4216639cbc61d", + "reference": "e8da04f93d548d26287a8980eca4216639cbc61d", "shasum": "" }, "require": { @@ -282,7 +347,8 @@ "phpunit/phpunit": "4.8.*" }, "suggest": { - "barnabywalters/mf-cleaner": "To more easily handle the canonical data php-mf2 gives you" + "barnabywalters/mf-cleaner": "To more easily handle the canonical data php-mf2 gives you", + "masterminds/html5": "Alternative HTML parser for PHP, for better HTML5 support." }, "bin": [ "bin/fetch-mf2", @@ -312,7 +378,7 @@ "parser", "semantic" ], - "time": "2018-03-15T16:51:15+00:00" + "time": "2018-03-29T20:48:28+00:00" }, { "name": "p3k/http", @@ -2089,7 +2155,9 @@ ], "aliases": [], "minimum-stability": "stable", - "stability-flags": [], + "stability-flags": { + "mf2/mf2": 15 + }, "prefer-stable": false, "prefer-lowest": false, "platform": [], diff --git a/controllers/Parse.php b/controllers/Parse.php index 5274fed..dcd9e91 100644 --- a/controllers/Parse.php +++ b/controllers/Parse.php @@ -10,7 +10,7 @@ class Parse { public $mc; private $_cacheTime = 120; private $_pretty = false; - private static $_version = '1.4.21'; + private static $_version = '1.4.24'; public static function useragent() { return 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36 XRay/'.self::$_version.' ('.\Config::$base.')'; diff --git a/tests/ParseTest.php b/tests/ParseTest.php index d2b6b0f..65906c9 100644 --- a/tests/ParseTest.php +++ b/tests/ParseTest.php @@ -684,4 +684,16 @@ class ParseTest extends PHPUnit_Framework_TestCase { $this->assertEquals(1, count($data['data']['like-of'])); } + public function testHTML5Markup() { + $url = 'http://source.example.com/html5-tags'; + $response = $this->parse(['url' => $url]); + + $body = $response->getContent(); + $this->assertEquals(200, $response->getStatusCode()); + $data = json_decode($body, true); + + $this->assertEquals('Hello World', $data['data']['name']); + $this->assertEquals('The content of the blog post', $data['data']['content']['text']); + } + } diff --git a/tests/data/source.example.com/html5-tags b/tests/data/source.example.com/html5-tags new file mode 100644 index 0000000..8028696 --- /dev/null +++ b/tests/data/source.example.com/html5-tags @@ -0,0 +1,17 @@ +HTTP/1.1 200 OK +Server: Apache +Date: Wed, 09 Dec 2015 03:29:14 GMT +Content-Type: text/html; charset=utf-8 +Connection: keep-alive + + + + Test + + +
+

Hello World +

The content of the blog post
+
+ +