Browse Source

deduplicate categories, and strip leading hashtags

pull/39/head
Aaron Parecki 7 years ago
parent
commit
5f5392a7b8
No known key found for this signature in database GPG Key ID: 276C2817346D6056
4 changed files with 72 additions and 3 deletions
  1. +13
    -3
      lib/Formats/Mf2.php
  2. +22
    -0
      tests/ParseTest.php
  3. +18
    -0
      tests/data/source.example.com/h-entry-duplicate-categories
  4. +19
    -0
      tests/data/source.example.com/h-entry-strip-hashtag-from-categories

+ 13
- 3
lib/Formats/Mf2.php View File

@ -195,18 +195,21 @@ class Mf2 {
foreach($item['properties'][$p] as $v) { foreach($item['properties'][$p] as $v) {
if(is_string($v)) { if(is_string($v)) {
if(!array_key_exists($p, $data)) $data[$p] = []; if(!array_key_exists($p, $data)) $data[$p] = [];
$data[$p][] = $v;
if(!in_array($v, $data[$p]))
$data[$p][] = $v;
} elseif(self::isMicroformat($v)) { } elseif(self::isMicroformat($v)) {
if(($u=self::getPlaintext($v, 'url')) && self::isURL($u)) { if(($u=self::getPlaintext($v, 'url')) && self::isURL($u)) {
if(!array_key_exists($p, $data)) $data[$p] = []; if(!array_key_exists($p, $data)) $data[$p] = [];
$data[$p][] = $u;
if(!in_array($u, $data[$p]))
$data[$p][] = $u;
$ref = self::parse(['items'=>[$v]], $u, $http); $ref = self::parse(['items'=>[$v]], $u, $http);
if($ref) { if($ref) {
$refs[$u] = $ref['data']; $refs[$u] = $ref['data'];
} }
} else { } else {
if(!array_key_exists($p, $data)) $data[$p] = []; if(!array_key_exists($p, $data)) $data[$p] = [];
$data[$p][] = $v['value'];
if(!in_array($v['value'], $data[$p]))
$data[$p][] = $v['value'];
} }
} }
} }
@ -285,6 +288,13 @@ class Mf2 {
self::collectSingleValues(['published','summary','rsvp','swarm-coins'], ['url'], $item, $data); self::collectSingleValues(['published','summary','rsvp','swarm-coins'], ['url'], $item, $data);
// These properties are always returned as arrays and may contain plaintext content // These properties are always returned as arrays and may contain plaintext content
// First strip leading hashtags from category values if present
if(array_key_exists('category', $item['properties'])) {
foreach($item['properties']['category'] as $i=>$c) {
if(is_string($c))
$item['properties']['category'][$i] = ltrim($c, '#');
}
}
self::collectArrayValues(['category','invitee'], $item, $data, $refs, $http); self::collectArrayValues(['category','invitee'], $item, $data, $refs, $http);
// These properties are always returned as arrays and always URLs // These properties are always returned as arrays and always URLs

+ 22
- 0
tests/ParseTest.php View File

@ -153,6 +153,28 @@ class ParseTest extends PHPUnit_Framework_TestCase {
$this->assertObjectNotHasAttribute('content', $data->data); $this->assertObjectNotHasAttribute('content', $data->data);
} }
public function testEntryWithDuplicateCategories() {
$url = 'http://source.example.com/h-entry-duplicate-categories';
$response = $this->parse(['url' => $url]);
$body = $response->getContent();
$this->assertEquals(200, $response->getStatusCode());
$data = json_decode($body);
$this->assertEquals(['indieweb'], $data->data->category);
}
public function testEntryStripHashtagWithDuplicateCategories() {
$url = 'http://source.example.com/h-entry-strip-hashtag-from-categories';
$response = $this->parse(['url' => $url]);
$body = $response->getContent();
$this->assertEquals(200, $response->getStatusCode());
$data = json_decode($body);
$this->assertContains('indieweb', $data->data->category);
$this->assertContains('xray', $data->data->category);
$this->assertEquals(2, count($data->data->category));
}
public function testNoHEntryMarkup() { public function testNoHEntryMarkup() {
$url = 'http://source.example.com/no-h-entry'; $url = 'http://source.example.com/no-h-entry';
$response = $this->parse(['url' => $url]); $response = $this->parse(['url' => $url]);

+ 18
- 0
tests/data/source.example.com/h-entry-duplicate-categories View File

@ -0,0 +1,18 @@
HTTP/1.1 200 OK
Server: Apache
Date: Wed, 09 Dec 2015 03:29:14 GMT
Content-Type: text/html; charset=utf-8
Connection: keep-alive
<html>
<head>
<title>Test</title>
</head>
<body class="h-entry">
<p class="e-content">This page is an h-entry and has some duplicate categories #<span class="p-category">indieweb</span>.</p>
<ul>
<li>#<span class="p-category">indieweb</span></li>
</ul>
</body>
</html>

+ 19
- 0
tests/data/source.example.com/h-entry-strip-hashtag-from-categories View File

@ -0,0 +1,19 @@
HTTP/1.1 200 OK
Server: Apache
Date: Wed, 09 Dec 2015 03:29:14 GMT
Content-Type: text/html; charset=utf-8
Connection: keep-alive
<html>
<head>
<title>Test</title>
</head>
<body class="h-entry">
<p class="e-content">This page is an h-entry and has some duplicate categories #<span class="p-category">indieweb</span>.</p>
<ul>
<li class="p-category">#indieweb</li>
<li class="p-category">#xray</li>
</ul>
</body>
</html>

Loading…
Cancel
Save