diff --git a/composer.json b/composer.json index b74aae7..380061b 100644 --- a/composer.json +++ b/composer.json @@ -5,7 +5,7 @@ "homepage": "https://github.com/aaronpk/XRay", "description": "X-Ray returns structured data from any URL", "require": { - "mf2/mf2": "^0.3.2", + "mf2/mf2": ">=0.4.0", "ezyang/htmlpurifier": "4.10.*", "indieweb/link-rel-parser": "0.1.*", "dg/twitter-php": "3.6.*", diff --git a/composer.lock b/composer.lock index c4cc131..873b62a 100644 --- a/composer.lock +++ b/composer.lock @@ -4,7 +4,7 @@ "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#composer-lock-the-lock-file", "This file is @generated automatically" ], - "content-hash": "0a104ee89f03db919f34d3a9d387380c", + "content-hash": "60f545028d44eb8e15c499fbe7ecf4d0", "packages": [ { "name": "cebe/markdown", @@ -261,16 +261,16 @@ }, { "name": "mf2/mf2", - "version": "v0.3.2", + "version": "v0.4.0", "source": { "type": "git", "url": "https://github.com/indieweb/php-mf2.git", - "reference": "dc0d90d4ee30864bcf37cd3a8fc8db94f9134cc4" + "reference": "42ef6eb9777bffe654a70cbbc1dbd777a61c1445" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/indieweb/php-mf2/zipball/dc0d90d4ee30864bcf37cd3a8fc8db94f9134cc4", - "reference": "dc0d90d4ee30864bcf37cd3a8fc8db94f9134cc4", + "url": "https://api.github.com/repos/indieweb/php-mf2/zipball/42ef6eb9777bffe654a70cbbc1dbd777a61c1445", + "reference": "42ef6eb9777bffe654a70cbbc1dbd777a61c1445", "shasum": "" }, "require": { @@ -296,7 +296,7 @@ }, "notification-url": "https://packagist.org/downloads/", "license": [ - "CC0" + "CC0-1.0" ], "authors": [ { @@ -312,7 +312,7 @@ "parser", "semantic" ], - "time": "2017-05-27T15:27:47+00:00" + "time": "2018-03-13T23:33:15+00:00" }, { "name": "p3k/http", diff --git a/lib/XRay/Feeds.php b/lib/XRay/Feeds.php index d7fad22..5d5aa54 100644 --- a/lib/XRay/Feeds.php +++ b/lib/XRay/Feeds.php @@ -76,26 +76,28 @@ class Feeds { // Some other document was returned, parse the HTML and look for rel alternates and Microformats $mf2 = \mf2\Parse($body, $result['url']); - if(isset($mf2['alternates'])) { - foreach($mf2['alternates'] as $alt) { - if(isset($alt['type'])) { - if(strpos($alt['type'], 'application/json') !== false) { - $feeds[] = [ - 'url' => $alt['url'], - 'type' => 'jsonfeed' - ]; - } - if(strpos($alt['type'], 'application/atom+xml') !== false) { - $feeds[] = [ - 'url' => $alt['url'], - 'type' => 'atom' - ]; - } - if(strpos($alt['type'], 'application/rss+xml') !== false) { - $feeds[] = [ - 'url' => $alt['url'], - 'type' => 'rss' - ]; + if(isset($mf2['rel-urls'])) { + foreach($mf2['rel-urls'] as $rel=>$info) { + if(isset($info['rels']) && in_array('alternate', $info['rels'])) { + if(isset($info['type'])) { + if(strpos($info['type'], 'application/json') !== false) { + $feeds[] = [ + 'url' => $rel, + 'type' => 'jsonfeed' + ]; + } + if(strpos($info['type'], 'application/atom+xml') !== false) { + $feeds[] = [ + 'url' => $rel, + 'type' => 'atom' + ]; + } + if(strpos($info['type'], 'application/rss+xml') !== false) { + $feeds[] = [ + 'url' => $rel, + 'type' => 'rss' + ]; + } } } } diff --git a/lib/XRay/Formats/Mf2.php b/lib/XRay/Formats/Mf2.php index e0d3e4c..82c0875 100644 --- a/lib/XRay/Formats/Mf2.php +++ b/lib/XRay/Formats/Mf2.php @@ -65,7 +65,7 @@ class Mf2 extends Format { } } - // Check the list of items on the page to see if one matches the URL of the page, + // Check the list of items on the page to see if one matches the URL of the page, // and treat as a permalink for that object if so. foreach($mf2['items'] as $item) { if(array_key_exists('url', $item['properties'])) { @@ -323,16 +323,20 @@ class Mf2 extends Format { } private static function collectArrayURLValues($properties, $item, &$data, &$refs, &$http) { + $keys = []; + foreach($properties as $p) { if(array_key_exists($p, $item['properties'])) { foreach($item['properties'][$p] as $v) { if(is_string($v) && self::isURL($v)) { if(!array_key_exists($p, $data)) $data[$p] = []; $data[$p][] = $v; + $keys[] = $p; } elseif(self::isMicroformat($v) && ($u=self::getPlaintext($v, 'url')) && self::isURL($u)) { if(!array_key_exists($p, $data)) $data[$p] = []; $data[$p][] = $u; + $keys[] = $p; // parse the object and put the result in the "refs" object $ref = self::parse(['items'=>[$v]], $u, $http); if($ref) { @@ -340,7 +344,12 @@ class Mf2 extends Format { } } } - } + } + } + + // Remove duplicate values + foreach(array_unique($keys) as $key) { + $data[$key] = array_unique($data[$key]); } } @@ -394,7 +403,7 @@ class Mf2 extends Format { $data['name'] = $name; } } - } + } } private static function parseAsHEntry($mf2, $item, $http, $url) { @@ -712,7 +721,7 @@ class Mf2 extends Format { and in_array(\p3k\XRay\normalize_url($authorPage), \p3k\XRay\normalize_urls($i['properties']['url'])) and array_key_exists('uid', $i['properties']) and in_array(\p3k\XRay\normalize_url($authorPage), \p3k\XRay\normalize_urls($i['properties']['uid'])) - ) { + ) { return self::parseAsHCard($i, $http, $url, $authorPage)['data']; } @@ -740,7 +749,7 @@ class Mf2 extends Format { } } - // Also check the "author" property + // Also check the "author" property // (for finding the author of an h-feed's children when the author is the p-author property of the h-feed) if(isset($i['properties']['author'])) { foreach($i['properties']['author'] as $ic) { @@ -766,16 +775,16 @@ class Mf2 extends Format { } private static function hasNumericKeys(array $arr) { - foreach($arr as $key=>$val) - if (is_numeric($key)) + foreach($arr as $key=>$val) + if (is_numeric($key)) return true; return false; } private static function isMicroformat($mf) { - return is_array($mf) - and !self::hasNumericKeys($mf) - and !empty($mf['type']) + return is_array($mf) + and !self::hasNumericKeys($mf) + and !empty($mf['type']) and isset($mf['properties']); } diff --git a/tests/ParseTest.php b/tests/ParseTest.php index 2b0666f..d2b6b0f 100644 --- a/tests/ParseTest.php +++ b/tests/ParseTest.php @@ -215,7 +215,7 @@ class ParseTest extends PHPUnit_Framework_TestCase { $body = $response->getContent(); $this->assertEquals(200, $response->getStatusCode()); $data = json_decode($body, true); - $this->assertEquals('entry', $data['data']['type']); + $this->assertEquals('entry', $data['data']['type']); $this->assertEquals('http://example.com/100', $data['data']['in-reply-to'][0]); $this->assertArrayHasKey('http://example.com/100', $data['data']['refs']); $this->assertEquals('Example Post', $data['data']['refs']['http://example.com/100']['name']); @@ -289,7 +289,7 @@ class ParseTest extends PHPUnit_Framework_TestCase { $this->assertEquals(200, $response->getStatusCode()); $data = json_decode($body, true); $this->assertEquals('entry', $data['data']['type']); - $this->assertEquals('I\'ll be there!', $data['data']['name']); + $this->assertEquals('I\'ll be there!', $data['data']['content']['text']); $this->assertEquals('yes', $data['data']['rsvp']); } @@ -454,7 +454,7 @@ class ParseTest extends PHPUnit_Framework_TestCase { $data = json_decode($body, true); $this->assertEquals('review', $data['data']['type']); - $this->assertEquals('Not great', $data['data']['summary']); + $this->assertEquals('Not great', $data['data']['name']); $this->assertEquals('3', $data['data']['rating']); $this->assertEquals('5', $data['data']['best']); $this->assertEquals('This is the full text of the review', $data['data']['content']['text']); @@ -660,4 +660,28 @@ class ParseTest extends PHPUnit_Framework_TestCase { $this->assertObjectNotHasAttribute('photo', $data->data); } + public function testDuplicateReplyURLValues() { + $url = 'http://source.example.com/duplicate-in-reply-to-urls'; + $response = $this->parse(['url' => $url]); + + $body = $response->getContent(); + $this->assertEquals(200, $response->getStatusCode()); + $data = json_decode($body, true); + + $this->assertEquals('http://example.com/100', $data['data']['in-reply-to'][0]); + $this->assertEquals(1, count($data['data']['in-reply-to'])); + } + + public function testDuplicateLikeOfURLValues() { + $url = 'http://source.example.com/duplicate-like-of-urls'; + $response = $this->parse(['url' => $url]); + + $body = $response->getContent(); + $this->assertEquals(200, $response->getStatusCode()); + $data = json_decode($body, true); + + $this->assertEquals('http://example.com/100', $data['data']['like-of'][0]); + $this->assertEquals(1, count($data['data']['like-of'])); + } + } diff --git a/tests/data/source.example.com/duplicate-in-reply-to-urls b/tests/data/source.example.com/duplicate-in-reply-to-urls new file mode 100644 index 0000000..74353a1 --- /dev/null +++ b/tests/data/source.example.com/duplicate-in-reply-to-urls @@ -0,0 +1,19 @@ +HTTP/1.1 200 OK +Server: Apache +Date: Wed, 09 Dec 2015 03:29:14 GMT +Content-Type: text/html; charset=utf-8 +Connection: keep-alive + + + + Test + + + in reply to +
+ this post +
+

This page has duplicate in-reply-to values.

+ permalink + + diff --git a/tests/data/source.example.com/duplicate-like-of-urls b/tests/data/source.example.com/duplicate-like-of-urls new file mode 100644 index 0000000..527530a --- /dev/null +++ b/tests/data/source.example.com/duplicate-like-of-urls @@ -0,0 +1,19 @@ +HTTP/1.1 200 OK +Server: Apache +Date: Wed, 09 Dec 2015 03:29:14 GMT +Content-Type: text/html; charset=utf-8 +Connection: keep-alive + + + + Test + + + liked + a post +
+ this post +
+ permalink + + diff --git a/tests/data/source.example.com/h-entry-rsvp b/tests/data/source.example.com/h-entry-rsvp index edbe224..425d1d7 100644 --- a/tests/data/source.example.com/h-entry-rsvp +++ b/tests/data/source.example.com/h-entry-rsvp @@ -10,7 +10,7 @@ Connection: keep-alive
- I'll be there! + I'll be there!