Browse Source

recognize pattern of h-entry + h-card

* a single h-entry and h-card, where the h-entry has no URL, will result in a permalink page with that h-entry
* multiple h-entrys followed by an h-card is a feed
pull/83/head
Aaron Parecki 5 years ago
parent
commit
18dc92966b
No known key found for this signature in database GPG Key ID: 276C2817346D6056
4 changed files with 69 additions and 9 deletions
  1. +25
    -7
      lib/XRay/Formats/Mf2.php
  2. +42
    -0
      tests/FeedTest.php
  3. +1
    -1
      tests/data/source.example.com/rel-alternate-mf2-json
  4. +1
    -1
      tests/data/source.example.com/rel-alternate-mf2-json.json

+ 25
- 7
lib/XRay/Formats/Mf2.php View File

@ -22,8 +22,26 @@ class Mf2 extends Format {
return self::parseAsHFeed($mf2, $http, $url); return self::parseAsHFeed($mf2, $http, $url);
} }
// If there is only one item on the page, just use that
if(count($mf2['items']) == 1) {
// Remove h-breadcrumb since we never use it and it causes problems determining
// whether a page is a feed or permalink
$mf2['items'] = array_values(array_filter($mf2['items'], function($item){
return !in_array('h-breadcrumb', $item['type']);
}));
$items = $mf2['items'];
// If there is more than one item on the page, it may be a feed.
// Remove an h-card if there is one that doesn't match the page URL, then try again.
// (Don't modify the actual tree, but compare on the modified tree)
if(count($items) > 1) {
$tmpmf2 = array_filter($items, function($item) use($url){
return !(in_array('h-card', $item['type']) && isset($item['properties']['url'][0]) && $item['properties']['url'][0] != $url);
});
$items = array_values($tmpmf2);
}
// If there is only one item left on the page, it's a permalink, and just use that
if(count($items) == 1) {
$item = $mf2['items'][0]; $item = $mf2['items'][0];
if(in_array('h-entry', $item['type']) || in_array('h-cite', $item['type'])) { if(in_array('h-entry', $item['type']) || in_array('h-cite', $item['type'])) {
#Parse::debug("mf2:0: Recognized $url as an h-entry it is the only item on the page"); #Parse::debug("mf2:0: Recognized $url as an h-entry it is the only item on the page");
@ -130,18 +148,18 @@ class Mf2 extends Format {
} }
} }
// If there was more than one h-entry on the page, treat the whole page as a feed
if(count($mf2['items']) > 1) {
if(count(array_filter($mf2['items'], function($item){
// At this point, if there are any h-entrys left on the page, it's probably a feed.
if(count($items) > 0) {
if(count(array_filter($items, function($item){
return in_array('h-entry', $item['type']); return in_array('h-entry', $item['type']);
})) > 1) {
})) > 0) {
#Parse::debug("mf2:2: Recognized $url as an h-feed because there are more than one object on the page"); #Parse::debug("mf2:2: Recognized $url as an h-feed because there are more than one object on the page");
return self::parseAsHFeed($mf2, $http, $url); return self::parseAsHFeed($mf2, $http, $url);
} }
} }
// If the first item is an h-feed, parse as a feed // If the first item is an h-feed, parse as a feed
$first = $mf2['items'][0];
$first = $items[0];
if(in_array('h-feed', $first['type'])) { if(in_array('h-feed', $first['type'])) {
#Parse::debug("mf2:3: Recognized $url as an h-feed because the first item is an h-feed"); #Parse::debug("mf2:3: Recognized $url as an h-feed because the first item is an h-feed");
return self::parseAsHFeed($mf2, $http, $url); return self::parseAsHFeed($mf2, $http, $url);

+ 42
- 0
tests/FeedTest.php View File

@ -61,6 +61,48 @@ class FeedTest extends PHPUnit_Framework_TestCase {
$this->assertEquals('Author Name', $data->items[3]->author->name); $this->assertEquals('Author Name', $data->items[3]->author->name);
} }
public function testListOfHEntrysWithHCardNoExpect() {
$url = 'http://feed.example.com/list-of-hentrys-with-h-card';
$response = $this->parse(['url' => $url]);
$body = $response->getContent();
$this->assertEquals(200, $response->getStatusCode());
$result = json_decode($body);
$this->assertEquals('mf2+html', $result->{'source-format'});
$data = $result->data;
$this->assertEquals('feed', $data->type);
$this->assertEquals(4, count($data->items));
$this->assertEquals('One', $data->items[0]->name);
$this->assertEquals('article', $data->items[0]->{'post-type'});
$this->assertEquals('Two', $data->items[1]->name);
$this->assertEquals('Three', $data->items[2]->name);
$this->assertEquals('Four', $data->items[3]->name);
// Check that the author h-card was matched up with each h-entry
$this->assertEquals('Author Name', $data->items[0]->author->name);
$this->assertEquals('Author Name', $data->items[1]->author->name);
$this->assertEquals('Author Name', $data->items[2]->author->name);
$this->assertEquals('Author Name', $data->items[3]->author->name);
}
public function testShortListOfHEntrysWithHCardNoExpect() {
$url = 'http://feed.example.com/short-list-of-hentrys-with-h-card';
$response = $this->parse(['url' => $url]);
$body = $response->getContent();
$this->assertEquals(200, $response->getStatusCode());
$result = json_decode($body);
$this->assertEquals('mf2+html', $result->{'source-format'});
$data = $result->data;
// In this case, this looks like a page permalink
$this->assertEquals('entry', $data->type);
// This test should find the h-entry rather than the h-card, because the h-card does not contain the page URL
$this->assertEquals('http://feed.example.com/1', $data->url);
$this->assertEquals('Author', $data->author->name);
}
public function testShortListOfHEntrysWithHCard() { public function testShortListOfHEntrysWithHCard() {
$url = 'http://feed.example.com/short-list-of-hentrys-with-h-card'; $url = 'http://feed.example.com/short-list-of-hentrys-with-h-card';
$response = $this->parse(['url' => $url, 'expect' => 'feed']); $response = $this->parse(['url' => $url, 'expect' => 'feed']);

+ 1
- 1
tests/data/source.example.com/rel-alternate-mf2-json View File

@ -256,7 +256,7 @@ Connection: keep-alive
#<a href="/tag/indieauth" class="p-category">indieauth</a> #<a href="/tag/indieauth" class="p-category">indieauth</a>
</div> </div>
<div class="metaline pad"> <div class="metaline pad">
<a href="https://aaronparecki.com/2018/07/12/10/indieauth" class="u-url">
<a href="http://source.example.com/rel-alternate-mf2-json" class="u-url">
<time class="dt-published" datetime="2018-07-12T13:02:04-07:00"> <time class="dt-published" datetime="2018-07-12T13:02:04-07:00">
Thu, Jul 12, 2018 1:02pm -07:00 Thu, Jul 12, 2018 1:02pm -07:00
</time> </time>

+ 1
- 1
tests/data/source.example.com/rel-alternate-mf2-json.json View File

@ -18,7 +18,7 @@ Connection: keep-alive
"indieauth" "indieauth"
], ],
"url": [ "url": [
"https://aaronparecki.com/2018/07/12/10/indieauth"
"http://source.example.com/rel-alternate-mf2-json"
], ],
"syndication": [ "syndication": [
"https://twitter.com/aaronpk/status/1017500609631567872" "https://twitter.com/aaronpk/status/1017500609631567872"

Loading…
Cancel
Save