Browse Source

fix for missing author property

* looks through the full mf2 tree for step 7 of authorship https://github.com/indieweb/authorship/issues/2
* if no author h-card is found, falls back to returning the author URL without other data instead of missing author

closes #95
master
Aaron Parecki 1 year ago
parent
commit
d2b0109d37
No known key found for this signature in database GPG Key ID: 276C2817346D6056
8 changed files with 230 additions and 12 deletions
  1. +40
    -5
      lib/XRay/Formats/Mf2.php
  2. +7
    -7
      lib/XRay/Formats/Mf2Feed.php
  3. +28
    -0
      tests/FeedTest.php
  4. +29
    -0
      tests/HelpersTest.php
  5. +34
    -0
      tests/data/author.example.com/h-feed-author
  6. +34
    -0
      tests/data/author.example.com/h-feed-author-bad
  7. +29
    -0
      tests/data/feed.example.com/h-feed-author-is-bad-feed
  8. +29
    -0
      tests/data/feed.example.com/h-feed-author-is-feed

+ 40
- 5
lib/XRay/Formats/Mf2.php View File

@ -736,6 +736,12 @@ class Mf2 extends Format {
'photo' => null
];
// Start by setting the URL of the author to the author URL if one is present in the item.
// It will be upgraded to a full h-card if additional data can be found.
if(isset($item['properties']['author'][0]) && self::isURL($item['properties']['author'][0])) {
$author['url'] = $item['properties']['author'][0];
}
// Author Discovery
// http://indiewebcamp.com/authorship
@ -779,9 +785,11 @@ class Mf2 extends Format {
$authorPageContents = self::getURL($authorPage, $http);
if($authorPageContents) {
foreach($authorPageContents['items'] as $i) {
if(self::isHCard($i)) {
$allHCards = self::findAllMicroformatsByType($authorPageContents, 'h-card');
$numHCards = count($allHCards);
foreach($allHCards as $i) {
if(self::isHCard($i)) {
// 7.2 "if author-page has 1+ h-card with url == uid == author-page's URL, then use first such h-card, exit."
if(array_key_exists('url', $i['properties'])
and in_array(\p3k\XRay\normalize_url($authorPage), \p3k\XRay\normalize_urls($i['properties']['url']))
@ -820,7 +828,6 @@ class Mf2 extends Format {
if(isset($i['properties']['author'])) {
foreach($i['properties']['author'] as $ic) {
if(self::isHCard($ic)) {
if(array_key_exists('url', $ic['properties'])
and in_array(\p3k\XRay\normalize_url($authorPage), \p3k\XRay\normalize_urls($ic['properties']['url']))
) {
@ -840,7 +847,7 @@ class Mf2 extends Format {
if(isset($mf2['items'][0]['type'][0]) && in_array('h-feed', $mf2['items'][0]['type'])) {
if(isset($mf2['items'][0]['properties']['author'][0])) {
$potentialAuthor = $mf2['items'][0]['properties']['author'][0];
if(is_array($potentialAuthor['type']) && in_array('h-card', $potentialAuthor['type'])) {
if(self::isHCard($potentialAuthor)) {
return self::parseAsHCard($potentialAuthor, $http, $url)['data'];
}
}
@ -886,7 +893,7 @@ class Mf2 extends Format {
}
private static function isURL($string) {
return preg_match('/^https?:\/\/.+\..+$/', $string);
return is_string($string) && preg_match('/^https?:\/\/.+\..+$/', $string);
}
// Given an array of microformats properties and a key name, return the plaintext value
@ -942,4 +949,32 @@ class Mf2 extends Format {
}
return \mf2\Parse($result['body'], $url);
}
public static function findAllMicroformatsByType($mf2, $type='h-card') {
$objects = [];
foreach($mf2['items'] as $item) {
if(in_array($type, $item['type'])) {
$objects[] = $item;
} else {
if(isset($item['properties']) && is_array($item['properties'])) {
foreach($item['properties'] as $property=>$values) {
foreach($values as $value) {
if(is_array($value) && isset($value['type']) && is_array($value['type'])) {
if(in_array($type, $value['type'])) {
$objects[] = $value;
}
}
}
}
}
if(isset($item['children']) && is_array($item['children'])) {
$items = $item['children'];
$objects = array_merge($objects, self::findAllMicroformatsByType(['items'=>$items], $type));
}
}
}
return $objects;
}
}

+ 7
- 7
lib/XRay/Formats/Mf2Feed.php View File

@ -41,25 +41,25 @@ trait Mf2Feed {
foreach($feed['children'] as $item) {
$parsed = false;
if(in_array('h-entry', $item['type']) || in_array('h-cite', $item['type'])) {
$parsed = self::parseAsHEntry($mf2, $item, false, $url);
$parsed = self::parseAsHEntry($mf2, $item, $http, $url);
}
elseif(in_array('h-event', $item['type'])) {
$parsed = self::parseAsHEvent($mf2, $item, false, $url);
$parsed = self::parseAsHEvent($mf2, $item, $http, $url);
}
elseif(in_array('h-review', $item['type'])) {
$parsed = self::parseAsHReview($mf2, $item, false, $url);
$parsed = self::parseAsHReview($mf2, $item, $http, $url);
}
elseif(in_array('h-recipe', $item['type'])) {
$parsed = self::parseAsHRecipe($mf2, $item, false, $url);
$parsed = self::parseAsHRecipe($mf2, $item, $http, $url);
}
elseif(in_array('h-product', $item['type'])) {
$parsed = self::parseAsHProduct($mf2, $item, false, $url);
$parsed = self::parseAsHProduct($mf2, $item, $http, $url);
}
elseif(in_array('h-item', $item['type'])) {
$parsed = self::parseAsHItem($mf2, $item, false, $url);
$parsed = self::parseAsHItem($mf2, $item, $http, $url);
}
elseif(in_array('h-card', $item['type'])) {
$parsed = self::parseAsHCard($item, false, $url);
$parsed = self::parseAsHCard($item, $http, $url);
}
if($parsed) {
$data['items'][] = $parsed['data'];

+ 28
- 0
tests/FeedTest.php View File

@ -475,4 +475,32 @@ class FeedTest extends PHPUnit_Framework_TestCase {
$this->assertEquals('feed', $data->type);
}
public function testAuthorFeedOnHomePage() {
$url = 'http://feed.example.com/h-feed-author-is-feed';
$response = $this->parse(['url' => $url, 'expect' => 'feed']);
$body = $response->getContent();
$this->assertEquals(200, $response->getStatusCode());
$parsed = json_decode($body, true);
$data = $parsed['data'];
$this->assertEquals('feed', $data['type']);
$this->assertEquals('http://author.example.com/h-feed-author', $data['items'][0]['author']['url']);
$this->assertEquals('Author', $data['items'][0]['author']['name']);
$this->assertEquals('http://author.example.com/h-feed-author', $data['items'][1]['author']['url']);
$this->assertEquals('Author', $data['items'][1]['author']['name']);
}
public function testAuthorFeedOnHomePageInvalid() {
$url = 'http://feed.example.com/h-feed-author-is-bad-feed';
$response = $this->parse(['url' => $url, 'expect' => 'feed']);
$body = $response->getContent();
$this->assertEquals(200, $response->getStatusCode());
$parsed = json_decode($body, true);
$data = $parsed['data'];
$this->assertEquals('feed', $data['type']);
$this->assertEquals('http://author.example.com/h-feed-author-bad', $data['items'][0]['author']['url']);
$this->assertEquals('http://author.example.com/h-feed-author-bad', $data['items'][1]['author']['url']);
}
}

+ 29
- 0
tests/HelpersTest.php View File

@ -23,6 +23,35 @@ class HelpersTest extends PHPUnit_Framework_TestCase {
$url1 = 'https://example.com/';
$url2 = 'https://example.com';
$result = p3k\XRay\urls_are_equal($url1, $url2);
$this->assertEquals(true, $result);
}
public function testFindMicroformatsByType() {
$html = <<<EOF
<div class="h-feed">
<div class="u-author h-card">
<a href="/1" class="u-url p-name">Author</a>
</div>
<div class="h-entry">
<div class="u-author h-card">
<a href="/2" class="u-url p-name">Author</a>
</div>
</div>
<div class="h-card">
<a href="/3" class="u-url p-name">Author</a>
</div>
</div>
<div class="h-card">
<a href="/4" class="u-url p-name">Author</a>
</div>
EOF;
$mf2 = \Mf2\parse($html);
$hcards = \p3k\XRay\Formats\Mf2::findAllMicroformatsByType($mf2, 'h-card');
$this->assertEquals('/1', $hcards[0]['properties']['url'][0]);
$this->assertEquals('/2', $hcards[1]['properties']['url'][0]);
$this->assertEquals('/3', $hcards[2]['properties']['url'][0]);
$this->assertEquals('/4', $hcards[3]['properties']['url'][0]);
}
}

+ 34
- 0
tests/data/author.example.com/h-feed-author View File

@ -0,0 +1,34 @@
HTTP/1.1 200 OK
Server: Apache
Date: Wed, 09 Dec 2015 03:29:14 GMT
Content-Type: text/html; charset=utf-8
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<title>Example</title>
</head>
<body>
<div class="h-feed">
<div class="u-author h-card">
<a href="/h-feed-author" class="u-url u-uid">
<img src="/photo.jpg" class="u-photo">
<span class="p-name">Author</span>
</a>
</div>
<div class="h-entry">
<p class="p-name e-content">Hello World</p>
</div>
<div class="h-entry">
<p class="p-name e-content">Hello World</p>
</div>
</div>
</body>
</html>

+ 34
- 0
tests/data/author.example.com/h-feed-author-bad View File

@ -0,0 +1,34 @@
HTTP/1.1 200 OK
Server: Apache
Date: Wed, 09 Dec 2015 03:29:14 GMT
Content-Type: text/html; charset=utf-8
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<title>Example</title>
</head>
<body>
<div class="h-feed">
<div class="u-author h-card">
<a href="/h-feed-author-bad" class="u-url"> <!-- missing u-uid -->
<img src="/photo.jpg" class="u-photo">
<span class="p-name">Author</span>
</a>
</div>
<div class="h-entry">
<p class="p-name e-content">Hello World</p>
</div>
<div class="h-entry">
<p class="p-name e-content">Hello World</p>
</div>
</div>
</body>
</html>

+ 29
- 0
tests/data/feed.example.com/h-feed-author-is-bad-feed View File

@ -0,0 +1,29 @@
HTTP/1.1 200 OK
Server: Apache
Date: Wed, 09 Dec 2015 03:29:14 GMT
Content-Type: text/html; charset=utf-8
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<title>Example</title>
</head>
<body>
<div class="h-feed">
<div class="h-entry">
<p class="p-name e-content">Hello World</p>
<a href="http://author.example.com/h-feed-author-bad" class="u-author"></a>
</div>
<div class="h-entry">
<p class="p-name e-content">Hello World</p>
<a href="http://author.example.com/h-feed-author-bad" class="u-author"></a>
</div>
</div>
</body>
</html>

+ 29
- 0
tests/data/feed.example.com/h-feed-author-is-feed View File

@ -0,0 +1,29 @@
HTTP/1.1 200 OK
Server: Apache
Date: Wed, 09 Dec 2015 03:29:14 GMT
Content-Type: text/html; charset=utf-8
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<title>Example</title>
</head>
<body>
<div class="h-feed">
<div class="h-entry">
<p class="p-name e-content">Hello World</p>
<a href="http://author.example.com/h-feed-author" class="u-author"></a>
</div>
<div class="h-entry">
<p class="p-name e-content">Hello World</p>
<a href="http://author.example.com/h-feed-author" class="u-author"></a>
</div>
</div>
</body>
</html>

Loading…
Cancel
Save