From 162d2f5ef8348b2e91f8a2d2225b7131e7047979 Mon Sep 17 00:00:00 2001 From: Aaron Parecki Date: Wed, 2 Mar 2016 16:15:44 -0800 Subject: [PATCH] add tests for feeds, catch case when a permalink has other h-entrys --- lib/Formats/Mf2.php | 83 +++++++++++------- tests/FeedTest.php | 86 +++++++++++++++++++ tests/ParseTest.php | 11 +++ .../h-card-with-child-h-entrys | 2 +- .../feed.example.com/h-card-with-child-h-feed | 2 +- .../short-list-of-hentrys-with-h-card | 1 + .../multiple-h-entry-on-permalink | 21 +++++ .../data/source.example.com/person-tag-is-url | 1 + tests/data/source.example.com/reply-is-url | 1 + 9 files changed, 177 insertions(+), 31 deletions(-) create mode 100644 tests/FeedTest.php create mode 100644 tests/data/source.example.com/multiple-h-entry-on-permalink diff --git a/lib/Formats/Mf2.php b/lib/Formats/Mf2.php index f115b60..84e453a 100644 --- a/lib/Formats/Mf2.php +++ b/lib/Formats/Mf2.php @@ -6,56 +6,81 @@ use HTMLPurifier, HTMLPurifier_Config; class Mf2 { public static function parse($mf2, $url, $http) { + if(count($mf2['items']) == 0) + return false; - // TODO: Check if the list of items is a bunch of h-entrys and return as a feed + // Check if the list of items is a bunch of h-entrys and return as a feed + // Unless this page's URL matches one of the entries, then treat it as a permalink + $hentrys = 0; + $lastSeenEntry = false; + foreach($mf2['items'] as $item) { + if(in_array('h-entry', $item['type']) || in_array('h-cite', $item['type'])) { + if(array_key_exists('url', $item['properties'])) { + $urls = $item['properties']['url']; + $urls = array_map('\normalize_url', $urls); + if(in_array($url, $urls)) { + return self::parseAsHEntry($mf2, $item, $http, $url); + } + $lastSeenEntry = $item; + } + $hentrys++; + } + } + // If there was more than one h-entry on the page, treat the whole page as a feed + if($hentrys > 1) { + return self::parseAsHFeed($mf2, $http); + } + + // If the first item is an h-feed, parse as a feed + $first = $mf2['items'][0]; + if(in_array('h-feed', $first['type'])) { + return self::parseAsHFeed($mf2, $http); + } - if($item = $mf2['items'][0]) { - // If the first item is a feed, the page is a feed - if(in_array('h-feed', $item['type'])) { - return self::parseAsHFeed($mf2, $http); + // Check each top-level h-card, and if there is one that matches this URL, the page is an h-card + foreach($mf2['items'] as $item) { + if(in_array('h-card', $item['type']) + and array_key_exists('url', $item['properties']) + ) { + $urls = $item['properties']['url']; + $urls = array_map('\normalize_url', $urls); + if(in_array($url, $urls)) { + // TODO: check for children h-entrys (like tantek.com), or sibling h-entries (like aaronparecki.com) + // and return the result as a feed instead + return self::parseAsHCard($item, $http, $url); + } } + } - // Check each top-level h-card, and if there is one that matches this URL, the page is an h-card - foreach($mf2['items'] as $i) { - if(in_array('h-card', $i['type']) - and array_key_exists('url', $i['properties']) - ) { - $urls = $i['properties']['url']; - $urls = array_map('\normalize_url', $urls); - if(in_array($url, $urls)) { - // TODO: check for children h-entrys (like tantek.com), or sibling h-entries (like aaronparecki.com) - // and return the result as a feed instead - return self::parseAsHCard($i, $http, $url); - } + // If there was only one h-entry, but the URL for it is not the same as this page, then treat as a feed + if($hentrys == 1) { + if($lastSeenEntry) { + $urls = $lastSeenEntry['properties']['url']; + $urls = array_map('\normalize_url', $urls); + if(count($urls) && !in_array($url, $urls)) { + return self::parseAsHFeed($mf2, $http); } } } + // Fallback case, but hopefully we have found something before this point foreach($mf2['items'] as $item) { // Otherwise check for an h-entry if(in_array('h-entry', $item['type']) || in_array('h-cite', $item['type'])) { - return self::parseAsHEntry($mf2, $http); + return self::parseAsHEntry($mf2, $item, $http); } } return false; } - private static function parseAsHEntry($mf2, $http) { + private static function parseAsHEntry($mf2, $item, $http) { $data = [ 'type' => 'entry' ]; $refs = []; - // Find the first h-entry - foreach($mf2['items'] as $i) { - if(in_array('h-entry', $i['type']) || in_array('h-cite', $i['type'])) { - $item = $i; - continue; - } - } - // Single plaintext values $properties = ['url','published','summary','rsvp']; foreach($properties as $p) { @@ -166,12 +191,12 @@ class Mf2 { 'url' => null, 'photo' => null ], - 'items' => [], 'todo' => 'Not yet implemented. Please see https://github.com/aaronpk/XRay/issues/1' ]; return [ - 'data' => $data + 'data' => $data, + 'entries' => [] ]; } diff --git a/tests/FeedTest.php b/tests/FeedTest.php new file mode 100644 index 0000000..3f52a35 --- /dev/null +++ b/tests/FeedTest.php @@ -0,0 +1,86 @@ +client = new Parse(); + $this->client->http = new p3k\HTTPTest(dirname(__FILE__).'/data/'); + } + + private function parse($params) { + $request = new Request($params); + $response = new Response(); + return $this->client->parse($request, $response); + } + + public function testListOfHEntrys() { + $url = 'http://feed.example.com/list-of-hentrys'; + $response = $this->parse(['url' => $url]); + + $body = $response->getContent(); + $this->assertEquals(200, $response->getStatusCode()); + $data = json_decode($body); + + $this->assertEquals('feed', $data->data->type); + } + + public function testListOfHEntrysWithHCard() { + $url = 'http://feed.example.com/list-of-hentrys-with-h-card'; + $response = $this->parse(['url' => $url]); + + $body = $response->getContent(); + $this->assertEquals(200, $response->getStatusCode()); + $data = json_decode($body); + + $this->assertEquals('feed', $data->data->type); + } + + public function testShortListOfHEntrysWithHCard() { + $url = 'http://feed.example.com/short-list-of-hentrys-with-h-card'; + $response = $this->parse(['url' => $url]); + + $body = $response->getContent(); + $this->assertEquals(200, $response->getStatusCode()); + $data = json_decode($body); + + $this->assertEquals('feed', $data->data->type); + } + + public function testTopLevelHFeed() { + $url = 'http://feed.example.com/top-level-h-feed'; + $response = $this->parse(['url' => $url]); + + $body = $response->getContent(); + $this->assertEquals(200, $response->getStatusCode()); + $data = json_decode($body); + + $this->assertEquals('feed', $data->data->type); + } + + public function testHCardWithChildHEntrys() { + $url = 'http://feed.example.com/h-card-with-child-h-entrys'; + $response = $this->parse(['url' => $url]); + + $body = $response->getContent(); + $this->assertEquals(200, $response->getStatusCode()); + $data = json_decode($body); + + $this->assertEquals('card', $data->data->type); + } + + public function testHCardWithChildHFeed() { + $url = 'http://feed.example.com/h-card-with-child-h-feed'; + $response = $this->parse(['url' => $url]); + + $body = $response->getContent(); + $this->assertEquals(200, $response->getStatusCode()); + $data = json_decode($body); + + $this->assertEquals('card', $data->data->type); + } + +} \ No newline at end of file diff --git a/tests/ParseTest.php b/tests/ParseTest.php index f9cc794..a97eaea 100644 --- a/tests/ParseTest.php +++ b/tests/ParseTest.php @@ -202,4 +202,15 @@ class ParseTest extends PHPUnit_Framework_TestCase { $this->assertEquals('yes', $data['data']['rsvp']); } + public function testMultipleHEntryOnPermalink() { + $url = 'http://source.example.com/multiple-h-entry-on-permalink'; + $response = $this->parse(['url' => $url]); + + $body = $response->getContent(); + $this->assertEquals(200, $response->getStatusCode()); + $data = json_decode($body, true); + $this->assertEquals('entry', $data['data']['type']); + $this->assertEquals('Primary Post', $data['data']['name']); + } + } diff --git a/tests/data/feed.example.com/h-card-with-child-h-entrys b/tests/data/feed.example.com/h-card-with-child-h-entrys index 94977c3..7e7b523 100644 --- a/tests/data/feed.example.com/h-card-with-child-h-entrys +++ b/tests/data/feed.example.com/h-card-with-child-h-entrys @@ -11,7 +11,7 @@ Connection: keep-alive
- Author Name + Author Name