From d2b0109d3707fec57eb6c7a71739cdaa54ae8ec5 Mon Sep 17 00:00:00 2001 From: Aaron Parecki Date: Tue, 21 Apr 2020 15:07:49 -0700 Subject: [PATCH] fix for missing author property * looks through the full mf2 tree for step 7 of authorship https://github.com/indieweb/authorship/issues/2 * if no author h-card is found, falls back to returning the author URL without other data instead of missing author closes #95 --- lib/XRay/Formats/Mf2.php | 45 ++++++++++++++++--- lib/XRay/Formats/Mf2Feed.php | 14 +++--- tests/FeedTest.php | 28 ++++++++++++ tests/HelpersTest.php | 29 ++++++++++++ tests/data/author.example.com/h-feed-author | 34 ++++++++++++++ .../data/author.example.com/h-feed-author-bad | 34 ++++++++++++++ .../h-feed-author-is-bad-feed | 29 ++++++++++++ .../feed.example.com/h-feed-author-is-feed | 29 ++++++++++++ 8 files changed, 230 insertions(+), 12 deletions(-) create mode 100644 tests/data/author.example.com/h-feed-author create mode 100644 tests/data/author.example.com/h-feed-author-bad create mode 100644 tests/data/feed.example.com/h-feed-author-is-bad-feed create mode 100644 tests/data/feed.example.com/h-feed-author-is-feed diff --git a/lib/XRay/Formats/Mf2.php b/lib/XRay/Formats/Mf2.php index 7774e0b..6879c80 100644 --- a/lib/XRay/Formats/Mf2.php +++ b/lib/XRay/Formats/Mf2.php @@ -736,6 +736,12 @@ class Mf2 extends Format { 'photo' => null ]; + // Start by setting the URL of the author to the author URL if one is present in the item. + // It will be upgraded to a full h-card if additional data can be found. + if(isset($item['properties']['author'][0]) && self::isURL($item['properties']['author'][0])) { + $author['url'] = $item['properties']['author'][0]; + } + // Author Discovery // http://indiewebcamp.com/authorship @@ -779,9 +785,11 @@ class Mf2 extends Format { $authorPageContents = self::getURL($authorPage, $http); if($authorPageContents) { - foreach($authorPageContents['items'] as $i) { - if(self::isHCard($i)) { + $allHCards = self::findAllMicroformatsByType($authorPageContents, 'h-card'); + $numHCards = count($allHCards); + foreach($allHCards as $i) { + if(self::isHCard($i)) { // 7.2 "if author-page has 1+ h-card with url == uid == author-page's URL, then use first such h-card, exit." if(array_key_exists('url', $i['properties']) and in_array(\p3k\XRay\normalize_url($authorPage), \p3k\XRay\normalize_urls($i['properties']['url'])) @@ -820,7 +828,6 @@ class Mf2 extends Format { if(isset($i['properties']['author'])) { foreach($i['properties']['author'] as $ic) { if(self::isHCard($ic)) { - if(array_key_exists('url', $ic['properties']) and in_array(\p3k\XRay\normalize_url($authorPage), \p3k\XRay\normalize_urls($ic['properties']['url'])) ) { @@ -840,7 +847,7 @@ class Mf2 extends Format { if(isset($mf2['items'][0]['type'][0]) && in_array('h-feed', $mf2['items'][0]['type'])) { if(isset($mf2['items'][0]['properties']['author'][0])) { $potentialAuthor = $mf2['items'][0]['properties']['author'][0]; - if(is_array($potentialAuthor['type']) && in_array('h-card', $potentialAuthor['type'])) { + if(self::isHCard($potentialAuthor)) { return self::parseAsHCard($potentialAuthor, $http, $url)['data']; } } @@ -886,7 +893,7 @@ class Mf2 extends Format { } private static function isURL($string) { - return preg_match('/^https?:\/\/.+\..+$/', $string); + return is_string($string) && preg_match('/^https?:\/\/.+\..+$/', $string); } // Given an array of microformats properties and a key name, return the plaintext value @@ -942,4 +949,32 @@ class Mf2 extends Format { } return \mf2\Parse($result['body'], $url); } + + public static function findAllMicroformatsByType($mf2, $type='h-card') { + $objects = []; + + foreach($mf2['items'] as $item) { + if(in_array($type, $item['type'])) { + $objects[] = $item; + } else { + if(isset($item['properties']) && is_array($item['properties'])) { + foreach($item['properties'] as $property=>$values) { + foreach($values as $value) { + if(is_array($value) && isset($value['type']) && is_array($value['type'])) { + if(in_array($type, $value['type'])) { + $objects[] = $value; + } + } + } + } + } + if(isset($item['children']) && is_array($item['children'])) { + $items = $item['children']; + $objects = array_merge($objects, self::findAllMicroformatsByType(['items'=>$items], $type)); + } + } + } + + return $objects; + } } diff --git a/lib/XRay/Formats/Mf2Feed.php b/lib/XRay/Formats/Mf2Feed.php index acf012b..7abae4f 100644 --- a/lib/XRay/Formats/Mf2Feed.php +++ b/lib/XRay/Formats/Mf2Feed.php @@ -41,25 +41,25 @@ trait Mf2Feed { foreach($feed['children'] as $item) { $parsed = false; if(in_array('h-entry', $item['type']) || in_array('h-cite', $item['type'])) { - $parsed = self::parseAsHEntry($mf2, $item, false, $url); + $parsed = self::parseAsHEntry($mf2, $item, $http, $url); } elseif(in_array('h-event', $item['type'])) { - $parsed = self::parseAsHEvent($mf2, $item, false, $url); + $parsed = self::parseAsHEvent($mf2, $item, $http, $url); } elseif(in_array('h-review', $item['type'])) { - $parsed = self::parseAsHReview($mf2, $item, false, $url); + $parsed = self::parseAsHReview($mf2, $item, $http, $url); } elseif(in_array('h-recipe', $item['type'])) { - $parsed = self::parseAsHRecipe($mf2, $item, false, $url); + $parsed = self::parseAsHRecipe($mf2, $item, $http, $url); } elseif(in_array('h-product', $item['type'])) { - $parsed = self::parseAsHProduct($mf2, $item, false, $url); + $parsed = self::parseAsHProduct($mf2, $item, $http, $url); } elseif(in_array('h-item', $item['type'])) { - $parsed = self::parseAsHItem($mf2, $item, false, $url); + $parsed = self::parseAsHItem($mf2, $item, $http, $url); } elseif(in_array('h-card', $item['type'])) { - $parsed = self::parseAsHCard($item, false, $url); + $parsed = self::parseAsHCard($item, $http, $url); } if($parsed) { $data['items'][] = $parsed['data']; diff --git a/tests/FeedTest.php b/tests/FeedTest.php index d5ddadd..1a69e42 100644 --- a/tests/FeedTest.php +++ b/tests/FeedTest.php @@ -475,4 +475,32 @@ class FeedTest extends PHPUnit_Framework_TestCase { $this->assertEquals('feed', $data->type); } + + public function testAuthorFeedOnHomePage() { + $url = 'http://feed.example.com/h-feed-author-is-feed'; + $response = $this->parse(['url' => $url, 'expect' => 'feed']); + $body = $response->getContent(); + $this->assertEquals(200, $response->getStatusCode()); + $parsed = json_decode($body, true); + $data = $parsed['data']; + + $this->assertEquals('feed', $data['type']); + $this->assertEquals('http://author.example.com/h-feed-author', $data['items'][0]['author']['url']); + $this->assertEquals('Author', $data['items'][0]['author']['name']); + $this->assertEquals('http://author.example.com/h-feed-author', $data['items'][1]['author']['url']); + $this->assertEquals('Author', $data['items'][1]['author']['name']); + } + + public function testAuthorFeedOnHomePageInvalid() { + $url = 'http://feed.example.com/h-feed-author-is-bad-feed'; + $response = $this->parse(['url' => $url, 'expect' => 'feed']); + $body = $response->getContent(); + $this->assertEquals(200, $response->getStatusCode()); + $parsed = json_decode($body, true); + $data = $parsed['data']; + + $this->assertEquals('feed', $data['type']); + $this->assertEquals('http://author.example.com/h-feed-author-bad', $data['items'][0]['author']['url']); + $this->assertEquals('http://author.example.com/h-feed-author-bad', $data['items'][1]['author']['url']); + } } diff --git a/tests/HelpersTest.php b/tests/HelpersTest.php index 1b724af..0073aae 100644 --- a/tests/HelpersTest.php +++ b/tests/HelpersTest.php @@ -23,6 +23,35 @@ class HelpersTest extends PHPUnit_Framework_TestCase { $url1 = 'https://example.com/'; $url2 = 'https://example.com'; $result = p3k\XRay\urls_are_equal($url1, $url2); + $this->assertEquals(true, $result); + } + + public function testFindMicroformatsByType() { + $html = << +
+ Author +
+
+
+ Author +
+
+
+ Author +
+ +
+ Author +
+EOF; + + $mf2 = \Mf2\parse($html); + $hcards = \p3k\XRay\Formats\Mf2::findAllMicroformatsByType($mf2, 'h-card'); + $this->assertEquals('/1', $hcards[0]['properties']['url'][0]); + $this->assertEquals('/2', $hcards[1]['properties']['url'][0]); + $this->assertEquals('/3', $hcards[2]['properties']['url'][0]); + $this->assertEquals('/4', $hcards[3]['properties']['url'][0]); } } diff --git a/tests/data/author.example.com/h-feed-author b/tests/data/author.example.com/h-feed-author new file mode 100644 index 0000000..0cf2d9a --- /dev/null +++ b/tests/data/author.example.com/h-feed-author @@ -0,0 +1,34 @@ +HTTP/1.1 200 OK +Server: Apache +Date: Wed, 09 Dec 2015 03:29:14 GMT +Content-Type: text/html; charset=utf-8 + + + + + + Example + + + +
+ + + +
+

Hello World

+
+ +
+

Hello World

+
+ +
+ + + diff --git a/tests/data/author.example.com/h-feed-author-bad b/tests/data/author.example.com/h-feed-author-bad new file mode 100644 index 0000000..008d5d4 --- /dev/null +++ b/tests/data/author.example.com/h-feed-author-bad @@ -0,0 +1,34 @@ +HTTP/1.1 200 OK +Server: Apache +Date: Wed, 09 Dec 2015 03:29:14 GMT +Content-Type: text/html; charset=utf-8 + + + + + + Example + + + +
+ + + +
+

Hello World

+
+ +
+

Hello World

+
+ +
+ + + diff --git a/tests/data/feed.example.com/h-feed-author-is-bad-feed b/tests/data/feed.example.com/h-feed-author-is-bad-feed new file mode 100644 index 0000000..a670b4c --- /dev/null +++ b/tests/data/feed.example.com/h-feed-author-is-bad-feed @@ -0,0 +1,29 @@ +HTTP/1.1 200 OK +Server: Apache +Date: Wed, 09 Dec 2015 03:29:14 GMT +Content-Type: text/html; charset=utf-8 + + + + + + Example + + + +
+ +
+

Hello World

+ +
+ +
+

Hello World

+ +
+ +
+ + + diff --git a/tests/data/feed.example.com/h-feed-author-is-feed b/tests/data/feed.example.com/h-feed-author-is-feed new file mode 100644 index 0000000..15c7447 --- /dev/null +++ b/tests/data/feed.example.com/h-feed-author-is-feed @@ -0,0 +1,29 @@ +HTTP/1.1 200 OK +Server: Apache +Date: Wed, 09 Dec 2015 03:29:14 GMT +Content-Type: text/html; charset=utf-8 + + + + + + Example + + + +
+ +
+

Hello World

+ +
+ +
+

Hello World

+ +
+ +
+ + +