Browse Source

normalize URLs when comparing

Treats `https://example.com` and `https://example.com/` as equivalent when comparing URLs. Closes #33
pull/49/head
Aaron Parecki 7 years ago
parent
commit
85b8a35212
No known key found for this signature in database GPG Key ID: 276C2817346D6056
4 changed files with 29 additions and 14 deletions
  1. +8
    -9
      lib/XRay/Formats/Mf2.php
  2. +10
    -0
      lib/helpers.php
  3. +6
    -0
      tests/HelpersTest.php
  4. +5
    -5
      tests/data/feed.example.com/h-feed-with-child-author

+ 8
- 9
lib/XRay/Formats/Mf2.php View File

@ -98,9 +98,8 @@ class Mf2 extends Format {
if(isset($mf2['rels']['author'])) { if(isset($mf2['rels']['author'])) {
foreach($mf2['items'] as $card) { foreach($mf2['items'] as $card) {
if(in_array('h-card', $card['type']) && array_key_exists('url', $card['properties'])) { if(in_array('h-card', $card['type']) && array_key_exists('url', $card['properties'])) {
$urls = $card['properties']['url'];
$urls = array_map('\p3k\XRay\normalize_url', $urls);
if(count(array_intersect($urls, $mf2['rels']['author'])) > 0) {
$urls = \p3k\XRay\normalize_urls($card['properties']['url']);
if(count(array_intersect($urls, \p3k\XRay\normalize_urls($mf2['rels']['author']))) > 0) {
// There is an author h-card on this page // There is an author h-card on this page
// Now look for the first h-* object other than an h-card and use that as the object // Now look for the first h-* object other than an h-card and use that as the object
foreach($mf2['items'] as $item) { foreach($mf2['items'] as $item) {
@ -557,7 +556,7 @@ class Mf2 extends Format {
foreach($item['properties']['url'] as $url) { foreach($item['properties']['url'] as $url) {
if(self::isURL($url)) { if(self::isURL($url)) {
$url = \p3k\XRay\normalize_url($url); $url = \p3k\XRay\normalize_url($url);
if($url == $authorURL) {
if($url == \p3k\XRay\normalize_url($authorURL)) {
$data['url'] = $url; $data['url'] = $url;
$found = true; $found = true;
} }
@ -644,9 +643,9 @@ class Mf2 extends Format {
// 7.2 "if author-page has 1+ h-card with url == uid == author-page's URL, then use first such h-card, exit." // 7.2 "if author-page has 1+ h-card with url == uid == author-page's URL, then use first such h-card, exit."
if(array_key_exists('url', $i['properties']) if(array_key_exists('url', $i['properties'])
and in_array($authorPage, $i['properties']['url'])
and in_array(\p3k\XRay\normalize_url($authorPage), \p3k\XRay\normalize_urls($i['properties']['url']))
and array_key_exists('uid', $i['properties']) and array_key_exists('uid', $i['properties'])
and in_array($authorPage, $i['properties']['uid'])
and in_array(\p3k\XRay\normalize_url($authorPage), \p3k\XRay\normalize_urls($i['properties']['uid']))
) { ) {
return self::parseAsHCard($i, $http, $authorPage)['data']; return self::parseAsHCard($i, $http, $authorPage)['data'];
} }
@ -655,7 +654,7 @@ class Mf2 extends Format {
$relMeLinks = (isset($authorPageContents['rels']) && isset($authorPageContents['rels']['me'])) ? $authorPageContents['rels']['me'] : []; $relMeLinks = (isset($authorPageContents['rels']) && isset($authorPageContents['rels']['me'])) ? $authorPageContents['rels']['me'] : [];
if(count($relMeLinks) > 0 if(count($relMeLinks) > 0
and array_key_exists('url', $i['properties']) and array_key_exists('url', $i['properties'])
and count(array_intersect($i['properties']['url'], $relMeLinks)) > 0
and count(array_intersect(\p3k\XRay\normalize_urls($i['properties']['url']), \p3k\XRay\normalize_urls($relMeLinks))) > 0
) { ) {
return self::parseAsHCard($i, $http, $authorPage)['data']; return self::parseAsHCard($i, $http, $authorPage)['data'];
} }
@ -669,7 +668,7 @@ class Mf2 extends Format {
if(self::isHCard($i)) { if(self::isHCard($i)) {
if(array_key_exists('url', $i['properties']) if(array_key_exists('url', $i['properties'])
and in_array($authorPage, $i['properties']['url'])
and in_array(\p3k\XRay\normalize_url($authorPage), \p3k\XRay\normalize_urls($i['properties']['url']))
) { ) {
return self::parseAsHCard($i, $http)['data']; return self::parseAsHCard($i, $http)['data'];
} }
@ -682,7 +681,7 @@ class Mf2 extends Format {
if(self::isHCard($ic)) { if(self::isHCard($ic)) {
if(array_key_exists('url', $ic['properties']) if(array_key_exists('url', $ic['properties'])
and in_array($authorPage, $ic['properties']['url'])
and in_array(\p3k\XRay\normalize_url($authorPage), \p3k\XRay\normalize_urls($ic['properties']['url']))
) { ) {
return self::parseAsHCard($ic, $http)['data']; return self::parseAsHCard($ic, $http)['data'];
} }

+ 10
- 0
lib/helpers.php View File

@ -15,6 +15,16 @@ function normalize_url($url) {
return build_url($parts); return build_url($parts);
} }
function normalize_urls($urls) {
return array_map('\p3k\XRay\normalize_url', $urls);
}
function urls_are_equal($url1, $url2) {
$url1 = normalize_url($url1);
$url2 = normalize_url($url2);
return $url1 == $url2;
}
function build_url($parsed_url) { function build_url($parsed_url) {
$scheme = isset($parsed_url['scheme']) ? $parsed_url['scheme'] . '://' : ''; $scheme = isset($parsed_url['scheme']) ? $parsed_url['scheme'] . '://' : '';
$host = isset($parsed_url['host']) ? $parsed_url['host'] : ''; $host = isset($parsed_url['host']) ? $parsed_url['host'] : '';

+ 6
- 0
tests/HelpersTest.php View File

@ -19,4 +19,10 @@ class HelpersTest extends PHPUnit_Framework_TestCase {
$this->assertEquals('https://example.com/', $result); $this->assertEquals('https://example.com/', $result);
} }
public function testURLEquality() {
$url1 = 'https://example.com/';
$url2 = 'https://example.com';
$result = p3k\XRay\urls_are_equal($url1, $url2);
}
} }

+ 5
- 5
tests/data/feed.example.com/h-feed-with-child-author View File

@ -14,23 +14,23 @@ Connection: keep-alive
<ul> <ul>
<li class="h-entry"> <li class="h-entry">
<a href="/1" class="u-url p-name">One</a> <a href="/1" class="u-url p-name">One</a>
<a href="/author" class="u-author"></a>
<a href="http://author.example.com/" class="u-author"></a>
</li> </li>
<li class="h-entry"> <li class="h-entry">
<a href="/2" class="u-url p-name">Two</a> <a href="/2" class="u-url p-name">Two</a>
<a href="/author" class="u-author"></a>
<a href="http://author.example.com/" class="u-author"></a>
</li> </li>
<li class="h-entry"> <li class="h-entry">
<a href="/3" class="u-url p-name">Three</a> <a href="/3" class="u-url p-name">Three</a>
<a href="/author" class="u-author"></a>
<a href="http://author.example.com/" class="u-author"></a>
</li> </li>
<li class="h-entry"> <li class="h-entry">
<a href="/4" class="u-url p-name">Four</a> <a href="/4" class="u-url p-name">Four</a>
<a href="/author" class="u-author"></a>
<a href="http://author.example.com/" class="u-author"></a>
</li> </li>
</ul> </ul>
<a href="/author" class="u-author h-card">Author Name</a>
<a href="http://author.example.com" class="u-author h-card">Author Name</a>
</div> </div>
</body> </body>

Loading…
Cancel
Save