Browse Source

Merge pull request #64 from aaronpk/mf2-dev

Update to php-mf2 0.4.0
pull/72/head v1.4.22
Aaron Parecki 6 years ago
committed by GitHub
parent
commit
adfc8fa5bd
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 115 additions and 42 deletions
  1. +1
    -1
      composer.json
  2. +7
    -7
      composer.lock
  3. +22
    -20
      lib/XRay/Feeds.php
  4. +19
    -10
      lib/XRay/Formats/Mf2.php
  5. +27
    -3
      tests/ParseTest.php
  6. +19
    -0
      tests/data/source.example.com/duplicate-in-reply-to-urls
  7. +19
    -0
      tests/data/source.example.com/duplicate-like-of-urls
  8. +1
    -1
      tests/data/source.example.com/h-entry-rsvp

+ 1
- 1
composer.json View File

@ -5,7 +5,7 @@
"homepage": "https://github.com/aaronpk/XRay", "homepage": "https://github.com/aaronpk/XRay",
"description": "X-Ray returns structured data from any URL", "description": "X-Ray returns structured data from any URL",
"require": { "require": {
"mf2/mf2": "^0.3.2",
"mf2/mf2": ">=0.4.0",
"ezyang/htmlpurifier": "4.10.*", "ezyang/htmlpurifier": "4.10.*",
"indieweb/link-rel-parser": "0.1.*", "indieweb/link-rel-parser": "0.1.*",
"dg/twitter-php": "3.6.*", "dg/twitter-php": "3.6.*",

+ 7
- 7
composer.lock View File

@ -4,7 +4,7 @@
"Read more about it at https://getcomposer.org/doc/01-basic-usage.md#composer-lock-the-lock-file", "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#composer-lock-the-lock-file",
"This file is @generated automatically" "This file is @generated automatically"
], ],
"content-hash": "0a104ee89f03db919f34d3a9d387380c",
"content-hash": "60f545028d44eb8e15c499fbe7ecf4d0",
"packages": [ "packages": [
{ {
"name": "cebe/markdown", "name": "cebe/markdown",
@ -261,16 +261,16 @@
}, },
{ {
"name": "mf2/mf2", "name": "mf2/mf2",
"version": "v0.3.2",
"version": "v0.4.0",
"source": { "source": {
"type": "git", "type": "git",
"url": "https://github.com/indieweb/php-mf2.git", "url": "https://github.com/indieweb/php-mf2.git",
"reference": "dc0d90d4ee30864bcf37cd3a8fc8db94f9134cc4"
"reference": "42ef6eb9777bffe654a70cbbc1dbd777a61c1445"
}, },
"dist": { "dist": {
"type": "zip", "type": "zip",
"url": "https://api.github.com/repos/indieweb/php-mf2/zipball/dc0d90d4ee30864bcf37cd3a8fc8db94f9134cc4",
"reference": "dc0d90d4ee30864bcf37cd3a8fc8db94f9134cc4",
"url": "https://api.github.com/repos/indieweb/php-mf2/zipball/42ef6eb9777bffe654a70cbbc1dbd777a61c1445",
"reference": "42ef6eb9777bffe654a70cbbc1dbd777a61c1445",
"shasum": "" "shasum": ""
}, },
"require": { "require": {
@ -296,7 +296,7 @@
}, },
"notification-url": "https://packagist.org/downloads/", "notification-url": "https://packagist.org/downloads/",
"license": [ "license": [
"CC0"
"CC0-1.0"
], ],
"authors": [ "authors": [
{ {
@ -312,7 +312,7 @@
"parser", "parser",
"semantic" "semantic"
], ],
"time": "2017-05-27T15:27:47+00:00"
"time": "2018-03-13T23:33:15+00:00"
}, },
{ {
"name": "p3k/http", "name": "p3k/http",

+ 22
- 20
lib/XRay/Feeds.php View File

@ -76,26 +76,28 @@ class Feeds {
// Some other document was returned, parse the HTML and look for rel alternates and Microformats // Some other document was returned, parse the HTML and look for rel alternates and Microformats
$mf2 = \mf2\Parse($body, $result['url']); $mf2 = \mf2\Parse($body, $result['url']);
if(isset($mf2['alternates'])) {
foreach($mf2['alternates'] as $alt) {
if(isset($alt['type'])) {
if(strpos($alt['type'], 'application/json') !== false) {
$feeds[] = [
'url' => $alt['url'],
'type' => 'jsonfeed'
];
}
if(strpos($alt['type'], 'application/atom+xml') !== false) {
$feeds[] = [
'url' => $alt['url'],
'type' => 'atom'
];
}
if(strpos($alt['type'], 'application/rss+xml') !== false) {
$feeds[] = [
'url' => $alt['url'],
'type' => 'rss'
];
if(isset($mf2['rel-urls'])) {
foreach($mf2['rel-urls'] as $rel=>$info) {
if(isset($info['rels']) && in_array('alternate', $info['rels'])) {
if(isset($info['type'])) {
if(strpos($info['type'], 'application/json') !== false) {
$feeds[] = [
'url' => $rel,
'type' => 'jsonfeed'
];
}
if(strpos($info['type'], 'application/atom+xml') !== false) {
$feeds[] = [
'url' => $rel,
'type' => 'atom'
];
}
if(strpos($info['type'], 'application/rss+xml') !== false) {
$feeds[] = [
'url' => $rel,
'type' => 'rss'
];
}
} }
} }
} }

+ 19
- 10
lib/XRay/Formats/Mf2.php View File

@ -65,7 +65,7 @@ class Mf2 extends Format {
} }
} }
// Check the list of items on the page to see if one matches the URL of the page,
// Check the list of items on the page to see if one matches the URL of the page,
// and treat as a permalink for that object if so. // and treat as a permalink for that object if so.
foreach($mf2['items'] as $item) { foreach($mf2['items'] as $item) {
if(array_key_exists('url', $item['properties'])) { if(array_key_exists('url', $item['properties'])) {
@ -323,16 +323,20 @@ class Mf2 extends Format {
} }
private static function collectArrayURLValues($properties, $item, &$data, &$refs, &$http) { private static function collectArrayURLValues($properties, $item, &$data, &$refs, &$http) {
$keys = [];
foreach($properties as $p) { foreach($properties as $p) {
if(array_key_exists($p, $item['properties'])) { if(array_key_exists($p, $item['properties'])) {
foreach($item['properties'][$p] as $v) { foreach($item['properties'][$p] as $v) {
if(is_string($v) && self::isURL($v)) { if(is_string($v) && self::isURL($v)) {
if(!array_key_exists($p, $data)) $data[$p] = []; if(!array_key_exists($p, $data)) $data[$p] = [];
$data[$p][] = $v; $data[$p][] = $v;
$keys[] = $p;
} }
elseif(self::isMicroformat($v) && ($u=self::getPlaintext($v, 'url')) && self::isURL($u)) { elseif(self::isMicroformat($v) && ($u=self::getPlaintext($v, 'url')) && self::isURL($u)) {
if(!array_key_exists($p, $data)) $data[$p] = []; if(!array_key_exists($p, $data)) $data[$p] = [];
$data[$p][] = $u; $data[$p][] = $u;
$keys[] = $p;
// parse the object and put the result in the "refs" object // parse the object and put the result in the "refs" object
$ref = self::parse(['items'=>[$v]], $u, $http); $ref = self::parse(['items'=>[$v]], $u, $http);
if($ref) { if($ref) {
@ -340,7 +344,12 @@ class Mf2 extends Format {
} }
} }
} }
}
}
}
// Remove duplicate values
foreach(array_unique($keys) as $key) {
$data[$key] = array_unique($data[$key]);
} }
} }
@ -394,7 +403,7 @@ class Mf2 extends Format {
$data['name'] = $name; $data['name'] = $name;
} }
} }
}
}
} }
private static function parseAsHEntry($mf2, $item, $http, $url) { private static function parseAsHEntry($mf2, $item, $http, $url) {
@ -712,7 +721,7 @@ class Mf2 extends Format {
and in_array(\p3k\XRay\normalize_url($authorPage), \p3k\XRay\normalize_urls($i['properties']['url'])) and in_array(\p3k\XRay\normalize_url($authorPage), \p3k\XRay\normalize_urls($i['properties']['url']))
and array_key_exists('uid', $i['properties']) and array_key_exists('uid', $i['properties'])
and in_array(\p3k\XRay\normalize_url($authorPage), \p3k\XRay\normalize_urls($i['properties']['uid'])) and in_array(\p3k\XRay\normalize_url($authorPage), \p3k\XRay\normalize_urls($i['properties']['uid']))
) {
) {
return self::parseAsHCard($i, $http, $url, $authorPage)['data']; return self::parseAsHCard($i, $http, $url, $authorPage)['data'];
} }
@ -740,7 +749,7 @@ class Mf2 extends Format {
} }
} }
// Also check the "author" property
// Also check the "author" property
// (for finding the author of an h-feed's children when the author is the p-author property of the h-feed) // (for finding the author of an h-feed's children when the author is the p-author property of the h-feed)
if(isset($i['properties']['author'])) { if(isset($i['properties']['author'])) {
foreach($i['properties']['author'] as $ic) { foreach($i['properties']['author'] as $ic) {
@ -766,16 +775,16 @@ class Mf2 extends Format {
} }
private static function hasNumericKeys(array $arr) { private static function hasNumericKeys(array $arr) {
foreach($arr as $key=>$val)
if (is_numeric($key))
foreach($arr as $key=>$val)
if (is_numeric($key))
return true; return true;
return false; return false;
} }
private static function isMicroformat($mf) { private static function isMicroformat($mf) {
return is_array($mf)
and !self::hasNumericKeys($mf)
and !empty($mf['type'])
return is_array($mf)
and !self::hasNumericKeys($mf)
and !empty($mf['type'])
and isset($mf['properties']); and isset($mf['properties']);
} }

+ 27
- 3
tests/ParseTest.php View File

@ -215,7 +215,7 @@ class ParseTest extends PHPUnit_Framework_TestCase {
$body = $response->getContent(); $body = $response->getContent();
$this->assertEquals(200, $response->getStatusCode()); $this->assertEquals(200, $response->getStatusCode());
$data = json_decode($body, true); $data = json_decode($body, true);
$this->assertEquals('entry', $data['data']['type']);
$this->assertEquals('entry', $data['data']['type']);
$this->assertEquals('http://example.com/100', $data['data']['in-reply-to'][0]); $this->assertEquals('http://example.com/100', $data['data']['in-reply-to'][0]);
$this->assertArrayHasKey('http://example.com/100', $data['data']['refs']); $this->assertArrayHasKey('http://example.com/100', $data['data']['refs']);
$this->assertEquals('Example Post', $data['data']['refs']['http://example.com/100']['name']); $this->assertEquals('Example Post', $data['data']['refs']['http://example.com/100']['name']);
@ -289,7 +289,7 @@ class ParseTest extends PHPUnit_Framework_TestCase {
$this->assertEquals(200, $response->getStatusCode()); $this->assertEquals(200, $response->getStatusCode());
$data = json_decode($body, true); $data = json_decode($body, true);
$this->assertEquals('entry', $data['data']['type']); $this->assertEquals('entry', $data['data']['type']);
$this->assertEquals('I\'ll be there!', $data['data']['name']);
$this->assertEquals('I\'ll be there!', $data['data']['content']['text']);
$this->assertEquals('yes', $data['data']['rsvp']); $this->assertEquals('yes', $data['data']['rsvp']);
} }
@ -454,7 +454,7 @@ class ParseTest extends PHPUnit_Framework_TestCase {
$data = json_decode($body, true); $data = json_decode($body, true);
$this->assertEquals('review', $data['data']['type']); $this->assertEquals('review', $data['data']['type']);
$this->assertEquals('Not great', $data['data']['summary']);
$this->assertEquals('Not great', $data['data']['name']);
$this->assertEquals('3', $data['data']['rating']); $this->assertEquals('3', $data['data']['rating']);
$this->assertEquals('5', $data['data']['best']); $this->assertEquals('5', $data['data']['best']);
$this->assertEquals('This is the full text of the review', $data['data']['content']['text']); $this->assertEquals('This is the full text of the review', $data['data']['content']['text']);
@ -660,4 +660,28 @@ class ParseTest extends PHPUnit_Framework_TestCase {
$this->assertObjectNotHasAttribute('photo', $data->data); $this->assertObjectNotHasAttribute('photo', $data->data);
} }
public function testDuplicateReplyURLValues() {
$url = 'http://source.example.com/duplicate-in-reply-to-urls';
$response = $this->parse(['url' => $url]);
$body = $response->getContent();
$this->assertEquals(200, $response->getStatusCode());
$data = json_decode($body, true);
$this->assertEquals('http://example.com/100', $data['data']['in-reply-to'][0]);
$this->assertEquals(1, count($data['data']['in-reply-to']));
}
public function testDuplicateLikeOfURLValues() {
$url = 'http://source.example.com/duplicate-like-of-urls';
$response = $this->parse(['url' => $url]);
$body = $response->getContent();
$this->assertEquals(200, $response->getStatusCode());
$data = json_decode($body, true);
$this->assertEquals('http://example.com/100', $data['data']['like-of'][0]);
$this->assertEquals(1, count($data['data']['like-of']));
}
} }

+ 19
- 0
tests/data/source.example.com/duplicate-in-reply-to-urls View File

@ -0,0 +1,19 @@
HTTP/1.1 200 OK
Server: Apache
Date: Wed, 09 Dec 2015 03:29:14 GMT
Content-Type: text/html; charset=utf-8
Connection: keep-alive
<html>
<head>
<title>Test</title>
</head>
<body class="h-entry">
<a href="http://example.com/100" class="u-in-reply-to">in reply to</a>
<div class="u-in-reply-to h-cite">
<a href="http://example.com/100" class="u-url">this post</a>
</div>
<p class="e-content">This page has duplicate in-reply-to values.</p>
<a href="/duplicate-in-reply-to-urls" class="u-url">permalink</a>
</body>
</html>

+ 19
- 0
tests/data/source.example.com/duplicate-like-of-urls View File

@ -0,0 +1,19 @@
HTTP/1.1 200 OK
Server: Apache
Date: Wed, 09 Dec 2015 03:29:14 GMT
Content-Type: text/html; charset=utf-8
Connection: keep-alive
<html>
<head>
<title>Test</title>
</head>
<body class="h-entry">
liked
<a href="http://example.com/100" class="u-like-of">a post</a>
<div class="u-like-of h-cite">
<a href="http://example.com/100" class="u-url">this post</a>
</div>
<a href="/duplicate-like-of-urls" class="u-url">permalink</a>
</body>
</html>

+ 1
- 1
tests/data/source.example.com/h-entry-rsvp View File

@ -10,7 +10,7 @@ Connection: keep-alive
</head> </head>
<body> <body>
<div class="h-entry"> <div class="h-entry">
<data class="p-rsvp" value="yes">I'll be there!</data>
<data class="p-rsvp" value="yes"><span class="p-content">I'll be there!</span></data>
</div> </div>
</body> </body>
</html> </html>

Loading…
Cancel
Save