Browse Source

remove duplicate url values

pull/64/head
Aaron Parecki 6 years ago
parent
commit
4959ec15f2
No known key found for this signature in database GPG Key ID: 276C2817346D6056
4 changed files with 72 additions and 1 deletions
  1. +10
    -1
      lib/XRay/Formats/Mf2.php
  2. +24
    -0
      tests/ParseTest.php
  3. +19
    -0
      tests/data/source.example.com/duplicate-in-reply-to-urls
  4. +19
    -0
      tests/data/source.example.com/duplicate-like-of-urls

+ 10
- 1
lib/XRay/Formats/Mf2.php View File

@ -323,16 +323,20 @@ class Mf2 extends Format {
} }
private static function collectArrayURLValues($properties, $item, &$data, &$refs, &$http) { private static function collectArrayURLValues($properties, $item, &$data, &$refs, &$http) {
$keys = [];
foreach($properties as $p) { foreach($properties as $p) {
if(array_key_exists($p, $item['properties'])) { if(array_key_exists($p, $item['properties'])) {
foreach($item['properties'][$p] as $v) { foreach($item['properties'][$p] as $v) {
if(is_string($v) && self::isURL($v)) { if(is_string($v) && self::isURL($v)) {
if(!array_key_exists($p, $data)) $data[$p] = []; if(!array_key_exists($p, $data)) $data[$p] = [];
$data[$p][] = $v; $data[$p][] = $v;
$keys[] = $p;
} }
elseif(self::isMicroformat($v) && ($u=self::getPlaintext($v, 'url')) && self::isURL($u)) { elseif(self::isMicroformat($v) && ($u=self::getPlaintext($v, 'url')) && self::isURL($u)) {
if(!array_key_exists($p, $data)) $data[$p] = []; if(!array_key_exists($p, $data)) $data[$p] = [];
$data[$p][] = $u; $data[$p][] = $u;
$keys[] = $p;
// parse the object and put the result in the "refs" object // parse the object and put the result in the "refs" object
$ref = self::parse(['items'=>[$v]], $u, $http); $ref = self::parse(['items'=>[$v]], $u, $http);
if($ref) { if($ref) {
@ -340,7 +344,12 @@ class Mf2 extends Format {
} }
} }
} }
}
}
}
// Remove duplicate values
foreach(array_unique($keys) as $key) {
$data[$key] = array_unique($data[$key]);
} }
} }

+ 24
- 0
tests/ParseTest.php View File

@ -660,4 +660,28 @@ class ParseTest extends PHPUnit_Framework_TestCase {
$this->assertObjectNotHasAttribute('photo', $data->data); $this->assertObjectNotHasAttribute('photo', $data->data);
} }
public function testDuplicateReplyURLValues() {
$url = 'http://source.example.com/duplicate-in-reply-to-urls';
$response = $this->parse(['url' => $url]);
$body = $response->getContent();
$this->assertEquals(200, $response->getStatusCode());
$data = json_decode($body, true);
$this->assertEquals('http://example.com/100', $data['data']['in-reply-to'][0]);
$this->assertEquals(1, count($data['data']['in-reply-to']));
}
public function testDuplicateLikeOfURLValues() {
$url = 'http://source.example.com/duplicate-like-of-urls';
$response = $this->parse(['url' => $url]);
$body = $response->getContent();
$this->assertEquals(200, $response->getStatusCode());
$data = json_decode($body, true);
$this->assertEquals('http://example.com/100', $data['data']['like-of'][0]);
$this->assertEquals(1, count($data['data']['like-of']));
}
} }

+ 19
- 0
tests/data/source.example.com/duplicate-in-reply-to-urls View File

@ -0,0 +1,19 @@
HTTP/1.1 200 OK
Server: Apache
Date: Wed, 09 Dec 2015 03:29:14 GMT
Content-Type: text/html; charset=utf-8
Connection: keep-alive
<html>
<head>
<title>Test</title>
</head>
<body class="h-entry">
<a href="http://example.com/100" class="u-in-reply-to">in reply to</a>
<div class="u-in-reply-to h-cite">
<a href="http://example.com/100" class="u-url">this post</a>
</div>
<p class="e-content">This page has duplicate in-reply-to values.</p>
<a href="/duplicate-in-reply-to-urls" class="u-url">permalink</a>
</body>
</html>

+ 19
- 0
tests/data/source.example.com/duplicate-like-of-urls View File

@ -0,0 +1,19 @@
HTTP/1.1 200 OK
Server: Apache
Date: Wed, 09 Dec 2015 03:29:14 GMT
Content-Type: text/html; charset=utf-8
Connection: keep-alive
<html>
<head>
<title>Test</title>
</head>
<body class="h-entry">
liked
<a href="http://example.com/100" class="u-like-of">a post</a>
<div class="u-like-of h-cite">
<a href="http://example.com/100" class="u-url">this post</a>
</div>
<a href="/duplicate-like-of-urls" class="u-url">permalink</a>
</body>
</html>

Loading…
Cancel
Save