From 3eb78339d6b011478473d2bedc5dba4c06615fa3 Mon Sep 17 00:00:00 2001 From: Aaron Parecki Date: Thu, 8 Mar 2018 06:06:52 -0800 Subject: [PATCH 1/7] update to experimental php-mf2 parser --- composer.json | 2 +- composer.lock | 18 ++++++++++-------- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/composer.json b/composer.json index b74aae7..74cfc86 100644 --- a/composer.json +++ b/composer.json @@ -5,7 +5,7 @@ "homepage": "https://github.com/aaronpk/XRay", "description": "X-Ray returns structured data from any URL", "require": { - "mf2/mf2": "^0.3.2", + "mf2/mf2": "dev-master#5cbfdd4a1075b5113f2a1f4a1d6b8d7c86b93acb", "ezyang/htmlpurifier": "4.10.*", "indieweb/link-rel-parser": "0.1.*", "dg/twitter-php": "3.6.*", diff --git a/composer.lock b/composer.lock index c4cc131..f275304 100644 --- a/composer.lock +++ b/composer.lock @@ -4,7 +4,7 @@ "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#composer-lock-the-lock-file", "This file is @generated automatically" ], - "content-hash": "0a104ee89f03db919f34d3a9d387380c", + "content-hash": "b24013f210dafc33089cefc589f43a11", "packages": [ { "name": "cebe/markdown", @@ -261,16 +261,16 @@ }, { "name": "mf2/mf2", - "version": "v0.3.2", + "version": "dev-master", "source": { "type": "git", "url": "https://github.com/indieweb/php-mf2.git", - "reference": "dc0d90d4ee30864bcf37cd3a8fc8db94f9134cc4" + "reference": "5cbfdd4a1075b5113f2a1f4a1d6b8d7c86b93acb" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/indieweb/php-mf2/zipball/dc0d90d4ee30864bcf37cd3a8fc8db94f9134cc4", - "reference": "dc0d90d4ee30864bcf37cd3a8fc8db94f9134cc4", + "url": "https://api.github.com/repos/indieweb/php-mf2/zipball/5cbfdd4a1075b5113f2a1f4a1d6b8d7c86b93acb", + "reference": "5cbfdd4a1075b5113f2a1f4a1d6b8d7c86b93acb", "shasum": "" }, "require": { @@ -296,7 +296,7 @@ }, "notification-url": "https://packagist.org/downloads/", "license": [ - "CC0" + "CC0-1.0" ], "authors": [ { @@ -312,7 +312,7 @@ "parser", "semantic" ], - "time": "2017-05-27T15:27:47+00:00" + "time": "2018-03-08T13:43:49+00:00" }, { "name": "p3k/http", @@ -2089,7 +2089,9 @@ ], "aliases": [], "minimum-stability": "stable", - "stability-flags": [], + "stability-flags": { + "mf2/mf2": 20 + }, "prefer-stable": false, "prefer-lowest": false, "platform": [], From 8026279cba5b75e94282f8950fb590bfb0e5959c Mon Sep 17 00:00:00 2001 From: Aaron Parecki Date: Thu, 8 Mar 2018 06:31:17 -0800 Subject: [PATCH 2/7] fix tests for new mf2 parser main difference is the deprecated rel handling --- lib/XRay/Feeds.php | 42 +++++++++++----------- tests/ParseTest.php | 8 ++--- tests/data/source.example.com/h-entry-rsvp | 2 +- 3 files changed, 27 insertions(+), 25 deletions(-) diff --git a/lib/XRay/Feeds.php b/lib/XRay/Feeds.php index d7fad22..5d5aa54 100644 --- a/lib/XRay/Feeds.php +++ b/lib/XRay/Feeds.php @@ -76,26 +76,28 @@ class Feeds { // Some other document was returned, parse the HTML and look for rel alternates and Microformats $mf2 = \mf2\Parse($body, $result['url']); - if(isset($mf2['alternates'])) { - foreach($mf2['alternates'] as $alt) { - if(isset($alt['type'])) { - if(strpos($alt['type'], 'application/json') !== false) { - $feeds[] = [ - 'url' => $alt['url'], - 'type' => 'jsonfeed' - ]; - } - if(strpos($alt['type'], 'application/atom+xml') !== false) { - $feeds[] = [ - 'url' => $alt['url'], - 'type' => 'atom' - ]; - } - if(strpos($alt['type'], 'application/rss+xml') !== false) { - $feeds[] = [ - 'url' => $alt['url'], - 'type' => 'rss' - ]; + if(isset($mf2['rel-urls'])) { + foreach($mf2['rel-urls'] as $rel=>$info) { + if(isset($info['rels']) && in_array('alternate', $info['rels'])) { + if(isset($info['type'])) { + if(strpos($info['type'], 'application/json') !== false) { + $feeds[] = [ + 'url' => $rel, + 'type' => 'jsonfeed' + ]; + } + if(strpos($info['type'], 'application/atom+xml') !== false) { + $feeds[] = [ + 'url' => $rel, + 'type' => 'atom' + ]; + } + if(strpos($info['type'], 'application/rss+xml') !== false) { + $feeds[] = [ + 'url' => $rel, + 'type' => 'rss' + ]; + } } } } diff --git a/tests/ParseTest.php b/tests/ParseTest.php index 2b0666f..24bda5c 100644 --- a/tests/ParseTest.php +++ b/tests/ParseTest.php @@ -215,7 +215,7 @@ class ParseTest extends PHPUnit_Framework_TestCase { $body = $response->getContent(); $this->assertEquals(200, $response->getStatusCode()); $data = json_decode($body, true); - $this->assertEquals('entry', $data['data']['type']); + $this->assertEquals('entry', $data['data']['type']); $this->assertEquals('http://example.com/100', $data['data']['in-reply-to'][0]); $this->assertArrayHasKey('http://example.com/100', $data['data']['refs']); $this->assertEquals('Example Post', $data['data']['refs']['http://example.com/100']['name']); @@ -289,7 +289,7 @@ class ParseTest extends PHPUnit_Framework_TestCase { $this->assertEquals(200, $response->getStatusCode()); $data = json_decode($body, true); $this->assertEquals('entry', $data['data']['type']); - $this->assertEquals('I\'ll be there!', $data['data']['name']); + $this->assertEquals('I\'ll be there!', $data['data']['content']['text']); $this->assertEquals('yes', $data['data']['rsvp']); } @@ -537,7 +537,7 @@ class ParseTest extends PHPUnit_Framework_TestCase { $this->assertEquals('45.518716', $venue['latitude']); $this->assertEquals('Homebrew Website Club!', $data['data']['content']['text']); $this->assertEquals('https://aaronparecki.com/2017/06/07/12/photo.jpg', $data['data']['photo'][0]); - $this->assertEquals('2017-06-07T17:14:40-07:00', $data['data']['published']); + $this->assertEquals('2017-06-07T17:14:40-0700', $data['data']['published']); $this->assertArrayNotHasKey('name', $data['data']); } @@ -554,7 +554,7 @@ class ParseTest extends PHPUnit_Framework_TestCase { $this->assertEquals('https://foursquare.com/v/57104d2e498ece022e169dca', $venue['url']); $this->assertEquals('Homebrew Website Club!', $data['data']['content']['text']); $this->assertEquals('https://aaronparecki.com/2017/06/07/12/photo.jpg', $data['data']['photo'][0]); - $this->assertEquals('2017-06-07T17:14:40-07:00', $data['data']['published']); + $this->assertEquals('2017-06-07T17:14:40-0700', $data['data']['published']); $this->assertArrayNotHasKey('name', $data['data']); } diff --git a/tests/data/source.example.com/h-entry-rsvp b/tests/data/source.example.com/h-entry-rsvp index edbe224..425d1d7 100644 --- a/tests/data/source.example.com/h-entry-rsvp +++ b/tests/data/source.example.com/h-entry-rsvp @@ -10,7 +10,7 @@ Connection: keep-alive
- I'll be there! + I'll be there!
From 391b515a5db6c51a1ce248c2dc8474b248d2f120 Mon Sep 17 00:00:00 2001 From: Aaron Parecki Date: Thu, 8 Mar 2018 06:36:32 -0800 Subject: [PATCH 3/7] use 0.4.0-alpha release of php-mf2 on aaronpk fork --- composer.json | 11 +++++++++-- composer.lock | 11 +++++------ 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/composer.json b/composer.json index 74cfc86..e4500e3 100644 --- a/composer.json +++ b/composer.json @@ -5,7 +5,7 @@ "homepage": "https://github.com/aaronpk/XRay", "description": "X-Ray returns structured data from any URL", "require": { - "mf2/mf2": "dev-master#5cbfdd4a1075b5113f2a1f4a1d6b8d7c86b93acb", + "mf2/mf2": "0.4.0-alpha", "ezyang/htmlpurifier": "4.10.*", "indieweb/link-rel-parser": "0.1.*", "dg/twitter-php": "3.6.*", @@ -38,5 +38,12 @@ "controllers/Feeds.php", "controllers/Certbot.php" ] - } + }, + "repositories": [ + { + "type": "vcs", + "url": "https://github.com/aaronpk/php-mf2.git", + "no-api": true + } + ] } diff --git a/composer.lock b/composer.lock index f275304..1ccd6f5 100644 --- a/composer.lock +++ b/composer.lock @@ -4,7 +4,7 @@ "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#composer-lock-the-lock-file", "This file is @generated automatically" ], - "content-hash": "b24013f210dafc33089cefc589f43a11", + "content-hash": "ebd195e5bf9ff15a29795d27641cce90", "packages": [ { "name": "cebe/markdown", @@ -261,15 +261,15 @@ }, { "name": "mf2/mf2", - "version": "dev-master", + "version": "v0.4.0-alpha", "source": { "type": "git", - "url": "https://github.com/indieweb/php-mf2.git", + "url": "https://github.com/aaronpk/php-mf2.git", "reference": "5cbfdd4a1075b5113f2a1f4a1d6b8d7c86b93acb" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/indieweb/php-mf2/zipball/5cbfdd4a1075b5113f2a1f4a1d6b8d7c86b93acb", + "url": "https://api.github.com/repos/aaronpk/php-mf2/zipball/5cbfdd4a1075b5113f2a1f4a1d6b8d7c86b93acb", "reference": "5cbfdd4a1075b5113f2a1f4a1d6b8d7c86b93acb", "shasum": "" }, @@ -294,7 +294,6 @@ "Mf2/Parser.php" ] }, - "notification-url": "https://packagist.org/downloads/", "license": [ "CC0-1.0" ], @@ -2090,7 +2089,7 @@ "aliases": [], "minimum-stability": "stable", "stability-flags": { - "mf2/mf2": 20 + "mf2/mf2": 15 }, "prefer-stable": false, "prefer-lowest": false, From 4959ec15f2bc32d2e6890365e0660a546ee829aa Mon Sep 17 00:00:00 2001 From: Aaron Parecki Date: Sun, 11 Mar 2018 10:29:27 -0700 Subject: [PATCH 4/7] remove duplicate url values --- lib/XRay/Formats/Mf2.php | 11 ++++++++- tests/ParseTest.php | 24 +++++++++++++++++++ .../duplicate-in-reply-to-urls | 19 +++++++++++++++ .../source.example.com/duplicate-like-of-urls | 19 +++++++++++++++ 4 files changed, 72 insertions(+), 1 deletion(-) create mode 100644 tests/data/source.example.com/duplicate-in-reply-to-urls create mode 100644 tests/data/source.example.com/duplicate-like-of-urls diff --git a/lib/XRay/Formats/Mf2.php b/lib/XRay/Formats/Mf2.php index e0d3e4c..8694e58 100644 --- a/lib/XRay/Formats/Mf2.php +++ b/lib/XRay/Formats/Mf2.php @@ -323,16 +323,20 @@ class Mf2 extends Format { } private static function collectArrayURLValues($properties, $item, &$data, &$refs, &$http) { + $keys = []; + foreach($properties as $p) { if(array_key_exists($p, $item['properties'])) { foreach($item['properties'][$p] as $v) { if(is_string($v) && self::isURL($v)) { if(!array_key_exists($p, $data)) $data[$p] = []; $data[$p][] = $v; + $keys[] = $p; } elseif(self::isMicroformat($v) && ($u=self::getPlaintext($v, 'url')) && self::isURL($u)) { if(!array_key_exists($p, $data)) $data[$p] = []; $data[$p][] = $u; + $keys[] = $p; // parse the object and put the result in the "refs" object $ref = self::parse(['items'=>[$v]], $u, $http); if($ref) { @@ -340,7 +344,12 @@ class Mf2 extends Format { } } } - } + } + } + + // Remove duplicate values + foreach(array_unique($keys) as $key) { + $data[$key] = array_unique($data[$key]); } } diff --git a/tests/ParseTest.php b/tests/ParseTest.php index 24bda5c..a479ad3 100644 --- a/tests/ParseTest.php +++ b/tests/ParseTest.php @@ -660,4 +660,28 @@ class ParseTest extends PHPUnit_Framework_TestCase { $this->assertObjectNotHasAttribute('photo', $data->data); } + public function testDuplicateReplyURLValues() { + $url = 'http://source.example.com/duplicate-in-reply-to-urls'; + $response = $this->parse(['url' => $url]); + + $body = $response->getContent(); + $this->assertEquals(200, $response->getStatusCode()); + $data = json_decode($body, true); + + $this->assertEquals('http://example.com/100', $data['data']['in-reply-to'][0]); + $this->assertEquals(1, count($data['data']['in-reply-to'])); + } + + public function testDuplicateLikeOfURLValues() { + $url = 'http://source.example.com/duplicate-like-of-urls'; + $response = $this->parse(['url' => $url]); + + $body = $response->getContent(); + $this->assertEquals(200, $response->getStatusCode()); + $data = json_decode($body, true); + + $this->assertEquals('http://example.com/100', $data['data']['like-of'][0]); + $this->assertEquals(1, count($data['data']['like-of'])); + } + } diff --git a/tests/data/source.example.com/duplicate-in-reply-to-urls b/tests/data/source.example.com/duplicate-in-reply-to-urls new file mode 100644 index 0000000..74353a1 --- /dev/null +++ b/tests/data/source.example.com/duplicate-in-reply-to-urls @@ -0,0 +1,19 @@ +HTTP/1.1 200 OK +Server: Apache +Date: Wed, 09 Dec 2015 03:29:14 GMT +Content-Type: text/html; charset=utf-8 +Connection: keep-alive + + + + Test + + + in reply to +
+ this post +
+

This page has duplicate in-reply-to values.

+ permalink + + diff --git a/tests/data/source.example.com/duplicate-like-of-urls b/tests/data/source.example.com/duplicate-like-of-urls new file mode 100644 index 0000000..527530a --- /dev/null +++ b/tests/data/source.example.com/duplicate-like-of-urls @@ -0,0 +1,19 @@ +HTTP/1.1 200 OK +Server: Apache +Date: Wed, 09 Dec 2015 03:29:14 GMT +Content-Type: text/html; charset=utf-8 +Connection: keep-alive + + + + Test + + + liked + a post +
+ this post +
+ permalink + + From 85c3a179346419e5ced5260fbe7946e76312c51f Mon Sep 17 00:00:00 2001 From: Aaron Parecki Date: Sun, 11 Mar 2018 10:29:32 -0700 Subject: [PATCH 5/7] whitespace cleanup --- lib/XRay/Formats/Mf2.php | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/lib/XRay/Formats/Mf2.php b/lib/XRay/Formats/Mf2.php index 8694e58..82c0875 100644 --- a/lib/XRay/Formats/Mf2.php +++ b/lib/XRay/Formats/Mf2.php @@ -65,7 +65,7 @@ class Mf2 extends Format { } } - // Check the list of items on the page to see if one matches the URL of the page, + // Check the list of items on the page to see if one matches the URL of the page, // and treat as a permalink for that object if so. foreach($mf2['items'] as $item) { if(array_key_exists('url', $item['properties'])) { @@ -403,7 +403,7 @@ class Mf2 extends Format { $data['name'] = $name; } } - } + } } private static function parseAsHEntry($mf2, $item, $http, $url) { @@ -721,7 +721,7 @@ class Mf2 extends Format { and in_array(\p3k\XRay\normalize_url($authorPage), \p3k\XRay\normalize_urls($i['properties']['url'])) and array_key_exists('uid', $i['properties']) and in_array(\p3k\XRay\normalize_url($authorPage), \p3k\XRay\normalize_urls($i['properties']['uid'])) - ) { + ) { return self::parseAsHCard($i, $http, $url, $authorPage)['data']; } @@ -749,7 +749,7 @@ class Mf2 extends Format { } } - // Also check the "author" property + // Also check the "author" property // (for finding the author of an h-feed's children when the author is the p-author property of the h-feed) if(isset($i['properties']['author'])) { foreach($i['properties']['author'] as $ic) { @@ -775,16 +775,16 @@ class Mf2 extends Format { } private static function hasNumericKeys(array $arr) { - foreach($arr as $key=>$val) - if (is_numeric($key)) + foreach($arr as $key=>$val) + if (is_numeric($key)) return true; return false; } private static function isMicroformat($mf) { - return is_array($mf) - and !self::hasNumericKeys($mf) - and !empty($mf['type']) + return is_array($mf) + and !self::hasNumericKeys($mf) + and !empty($mf['type']) and isset($mf['properties']); } From 8b199e5385a612efcd00a825ce03bd9c9460ea05 Mon Sep 17 00:00:00 2001 From: Aaron Parecki Date: Tue, 13 Mar 2018 16:41:43 -0700 Subject: [PATCH 6/7] switch back to main mf2 parser --- composer.json | 11 ++--------- composer.lock | 19 +++++++++---------- 2 files changed, 11 insertions(+), 19 deletions(-) diff --git a/composer.json b/composer.json index e4500e3..380061b 100644 --- a/composer.json +++ b/composer.json @@ -5,7 +5,7 @@ "homepage": "https://github.com/aaronpk/XRay", "description": "X-Ray returns structured data from any URL", "require": { - "mf2/mf2": "0.4.0-alpha", + "mf2/mf2": ">=0.4.0", "ezyang/htmlpurifier": "4.10.*", "indieweb/link-rel-parser": "0.1.*", "dg/twitter-php": "3.6.*", @@ -38,12 +38,5 @@ "controllers/Feeds.php", "controllers/Certbot.php" ] - }, - "repositories": [ - { - "type": "vcs", - "url": "https://github.com/aaronpk/php-mf2.git", - "no-api": true - } - ] + } } diff --git a/composer.lock b/composer.lock index 1ccd6f5..873b62a 100644 --- a/composer.lock +++ b/composer.lock @@ -4,7 +4,7 @@ "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#composer-lock-the-lock-file", "This file is @generated automatically" ], - "content-hash": "ebd195e5bf9ff15a29795d27641cce90", + "content-hash": "60f545028d44eb8e15c499fbe7ecf4d0", "packages": [ { "name": "cebe/markdown", @@ -261,16 +261,16 @@ }, { "name": "mf2/mf2", - "version": "v0.4.0-alpha", + "version": "v0.4.0", "source": { "type": "git", - "url": "https://github.com/aaronpk/php-mf2.git", - "reference": "5cbfdd4a1075b5113f2a1f4a1d6b8d7c86b93acb" + "url": "https://github.com/indieweb/php-mf2.git", + "reference": "42ef6eb9777bffe654a70cbbc1dbd777a61c1445" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/aaronpk/php-mf2/zipball/5cbfdd4a1075b5113f2a1f4a1d6b8d7c86b93acb", - "reference": "5cbfdd4a1075b5113f2a1f4a1d6b8d7c86b93acb", + "url": "https://api.github.com/repos/indieweb/php-mf2/zipball/42ef6eb9777bffe654a70cbbc1dbd777a61c1445", + "reference": "42ef6eb9777bffe654a70cbbc1dbd777a61c1445", "shasum": "" }, "require": { @@ -294,6 +294,7 @@ "Mf2/Parser.php" ] }, + "notification-url": "https://packagist.org/downloads/", "license": [ "CC0-1.0" ], @@ -311,7 +312,7 @@ "parser", "semantic" ], - "time": "2018-03-08T13:43:49+00:00" + "time": "2018-03-13T23:33:15+00:00" }, { "name": "p3k/http", @@ -2088,9 +2089,7 @@ ], "aliases": [], "minimum-stability": "stable", - "stability-flags": { - "mf2/mf2": 15 - }, + "stability-flags": [], "prefer-stable": false, "prefer-lowest": false, "platform": [], From dc0557ddb9b217926644d009da037e7c591194f1 Mon Sep 17 00:00:00 2001 From: Aaron Parecki Date: Tue, 13 Mar 2018 16:49:21 -0700 Subject: [PATCH 7/7] update tests for fixed mf2 parsing --- tests/ParseTest.php | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/ParseTest.php b/tests/ParseTest.php index a479ad3..d2b6b0f 100644 --- a/tests/ParseTest.php +++ b/tests/ParseTest.php @@ -454,7 +454,7 @@ class ParseTest extends PHPUnit_Framework_TestCase { $data = json_decode($body, true); $this->assertEquals('review', $data['data']['type']); - $this->assertEquals('Not great', $data['data']['summary']); + $this->assertEquals('Not great', $data['data']['name']); $this->assertEquals('3', $data['data']['rating']); $this->assertEquals('5', $data['data']['best']); $this->assertEquals('This is the full text of the review', $data['data']['content']['text']); @@ -537,7 +537,7 @@ class ParseTest extends PHPUnit_Framework_TestCase { $this->assertEquals('45.518716', $venue['latitude']); $this->assertEquals('Homebrew Website Club!', $data['data']['content']['text']); $this->assertEquals('https://aaronparecki.com/2017/06/07/12/photo.jpg', $data['data']['photo'][0]); - $this->assertEquals('2017-06-07T17:14:40-0700', $data['data']['published']); + $this->assertEquals('2017-06-07T17:14:40-07:00', $data['data']['published']); $this->assertArrayNotHasKey('name', $data['data']); } @@ -554,7 +554,7 @@ class ParseTest extends PHPUnit_Framework_TestCase { $this->assertEquals('https://foursquare.com/v/57104d2e498ece022e169dca', $venue['url']); $this->assertEquals('Homebrew Website Club!', $data['data']['content']['text']); $this->assertEquals('https://aaronparecki.com/2017/06/07/12/photo.jpg', $data['data']['photo'][0]); - $this->assertEquals('2017-06-07T17:14:40-0700', $data['data']['published']); + $this->assertEquals('2017-06-07T17:14:40-07:00', $data['data']['published']); $this->assertArrayNotHasKey('name', $data['data']); }