Browse Source

normalize relative URLs in JSONFeed items

closes #77
pull/83/head
Aaron Parecki 2 years ago
parent
commit
9163341af2
No known key found for this signature in database GPG Key ID: 276C2817346D6056
7 changed files with 106 additions and 14 deletions
  1. +7
    -1
      lib/XRay/Formats/Format.php
  2. +7
    -5
      lib/XRay/Formats/JSONFeed.php
  3. +26
    -6
      tests/FeedTest.php
  4. +22
    -0
      tests/SanitizeTest.php
  5. +15
    -2
      tests/data/feed.example.com/jsonfeed
  6. +14
    -0
      tests/data/sanitize.example/photo-in-content-relative
  7. +15
    -0
      tests/data/sanitize.example/photo-relative

+ 7
- 1
lib/XRay/Formats/Format.php View File

@ -34,7 +34,7 @@ abstract class Format implements iFormat {
return [$doc, $xpath];
}
protected static function sanitizeHTML($html, $allowImg=true) {
protected static function sanitizeHTML($html, $allowImg=true, $baseURL=false) {
$allowed = [
'a',
'abbr',
@ -68,6 +68,12 @@ abstract class Format implements iFormat {
$config = HTMLPurifier_Config::createDefault();
$config->set('Cache.DefinitionImpl', null);
$config->set('HTML.AllowedElements', $allowed);
if($baseURL) {
$config->set('URI.MakeAbsolute', true);
$config->set('URI.Base', $baseURL);
}
$def = $config->getHTMLDefinition(true);
$def->addElement(
'time',

+ 7
- 5
lib/XRay/Formats/JSONFeed.php View File

@ -23,14 +23,14 @@ class JSONFeed extends Format {
$result['data']['type'] = 'feed';
foreach($feed['items'] as $item) {
$result['data']['items'][] = self::_hEntryFromFeedItem($item, $feed);
$result['data']['items'][] = self::_hEntryFromFeedItem($item, $feed, $url);
}
}
return $result;
}
private static function _hEntryFromFeedItem($item, $feed) {
private static function _hEntryFromFeedItem($item, $feed, $feedurl) {
$entry = [
'type' => 'entry',
'author' => [
@ -64,14 +64,16 @@ class JSONFeed extends Format {
$entry['name'] = trim($item['title']);
}
$baseURL = isset($entry['url']) ? $entry['url'] : $feedurl;
if(isset($item['content_html']) && isset($item['content_text'])) {
$entry['content'] = [
'html' => self::sanitizeHTML($item['content_html']),
'html' => self::sanitizeHTML($item['content_html'], true, $baseURL),
'text' => trim($item['content_text'])
];
} elseif(isset($item['content_html'])) {
$entry['content'] = [
'html' => self::sanitizeHTML($item['content_html']),
'html' => self::sanitizeHTML($item['content_html'], true, $baseURL),
'text' => self::stripHTML($item['content_html'])
];
} elseif(isset($item['content_text'])) {
@ -93,7 +95,7 @@ class JSONFeed extends Format {
}
if(isset($item['image'])) {
$entry['photo'] = $item['image'];
$entry['photo'] = \Mf2\resolveUrl($baseURL, $item['image']);
}
if(isset($item['tags'])) {

+ 26
- 6
tests/FeedTest.php View File

@ -199,7 +199,7 @@ class FeedTest extends PHPUnit_Framework_TestCase {
$this->assertEquals('feed+json', $result->{'source-format'});
$data = $result->data;
$this->assertEquals(10, count($data->items));
$this->assertEquals(11, count($data->items));
for($i=0; $i<8; $i++) {
$this->assertEquals('entry', $data->items[$i]->type);
$this->assertEquals('manton', $data->items[$i]->author->name);
@ -213,15 +213,35 @@ class FeedTest extends PHPUnit_Framework_TestCase {
$this->assertEquals('note', $data->items[0]->{'post-type'});
$this->assertEquals('article', $data->items[4]->{'post-type'});
$this->assertEquals('<p>Lots of good feedback on <a href="http://help.micro.blog/2017/wordpress-import/">the WordPress import</a>. Made a couple improvements this morning. Overall, pretty good.</p>', $data->items[9]->content->html);
$this->assertEquals('Lots of good feedback on the WordPress import. Made a couple improvements this morning. Overall, pretty good.', $data->items[9]->content->text);
$this->assertEquals('http://www.manton.org/2017/11/5975.html', $data->items[9]->url);
$this->assertEquals('http://www.manton.org/2017/11/5975.html', $data->items[9]->uid);
$this->assertEquals('2017-11-07T15:04:01+00:00', $data->items[9]->published);
$this->assertEquals('<p>Coming up on a year since I wrote about how <a href="http://www.manton.org/2016/11/todays-social-networks-are-broken.html">today’s social networks are broken</a>. Still what I believe.</p>', $data->items[7]->content->html);
$this->assertEquals('Coming up on a year since I wrote about how today’s social networks are broken. Still what I believe.', $data->items[7]->content->text);
$this->assertEquals('http://www.manton.org/2017/11/5979.html', $data->items[7]->url);
$this->assertEquals('http://www.manton.org/2017/11/5979.html', $data->items[7]->uid);
$this->assertEquals('2017-11-07T21:00:42+00:00', $data->items[7]->published);
$this->assertEquals('feed', $data->type);
}
public function testJSONFeedRelativeImages() {
$url = 'http://feed.example.com/jsonfeed';
$response = $this->parse(['url' => $url, 'expect' => 'feed']);
$body = $response->getContent();
$this->assertEquals(200, $response->getStatusCode());
$result = json_decode($body);
$this->assertEquals('feed+json', $result->{'source-format'});
$data = $result->data;
// Relative image on an item that has a url
$this->assertEquals('http://www.manton.org/2017/11/image.jpg', $data->items[9]->photo);
// Relative image on an item that has no URL, fall back to feed URL
$this->assertEquals('http://feed.example.com/image.jpg', $data->items[10]->photo);
// Relative image inside the content html
$this->assertContains('http://www.manton.org/2017/11/img.jpg', $data->items[9]->content->html);
}
public function testAtomFeed() {
$url = 'http://feed.example.com/atom';
$response = $this->parse(['url' => $url, 'expect' => 'feed']);

+ 22
- 0
tests/SanitizeTest.php View File

@ -186,6 +186,28 @@ class SanitizeTest extends PHPUnit_Framework_TestCase {
}
*/
public function testRelativePhotoInContent() {
$url = 'http://sanitize.example/photo-in-content-relative';
$response = $this->parse(['url' => $url]);
$body = $response->getContent();
$this->assertEquals(200, $response->getStatusCode());
$data = json_decode($body);
$this->assertContains('http://sanitize.example/photo1.jpg', $data->data->content->html);
}
public function testRelativePhotoProperty() {
$url = 'http://sanitize.example/photo-relative';
$response = $this->parse(['url' => $url]);
$body = $response->getContent();
$this->assertEquals(200, $response->getStatusCode());
$data = json_decode($body);
$this->assertEquals('http://sanitize.example/photo.jpg', $data->data->photo[0]);
}
public function testPhotoInContentEmptyAltAttribute() {
// https://github.com/aaronpk/XRay/issues/52

+ 15
- 2
tests/data/feed.example.com/jsonfeed View File

@ -119,7 +119,20 @@ Content-Type: application/json; charset=UTF-8
"id": "http://www.manton.org/2017/11/5975.html",
"url": "http://www.manton.org/2017/11/5975.html",
"title": "",
"content_html": "<p>Lots of good feedback on <a href=\"http://help.micro.blog/2017/wordpress-import/\">the WordPress import</a>. Made a couple improvements this morning. Overall, pretty good.</p>\n",
"content_html": "<p><img src=\"img.jpg\"></p>\n",
"image": "image.jpg",
"banner_image": "banner_image.jpg",
"date_published": "2017-11-07T15:04:01+00:00",
"date_modified": "2017-11-07T15:04:01+00:00",
"author": {
"name": "manton"
}
},
{
"id": "http://www.manton.org/2017/11/5975.html",
"title": "",
"content_html": "<p><img src=\"img.jpg\"></p>\n",
"image": "image.jpg",
"date_published": "2017-11-07T15:04:01+00:00",
"date_modified": "2017-11-07T15:04:01+00:00",
"author": {
@ -127,4 +140,4 @@ Content-Type: application/json; charset=UTF-8
}
}
]
}
}

+ 14
- 0
tests/data/sanitize.example/photo-in-content-relative View File

@ -0,0 +1,14 @@
HTTP/1.1 200 OK
Server: Apache
Date: Wed, 09 Dec 2015 03:29:14 GMT
Content-Type: text/html; charset=utf-8
Connection: keep-alive
<html>
<head>
<title>Test</title>
</head>
<body class="h-entry">
<p class="e-content">Test of <b>relative URL resolution</b> with two <img src="photo1.jpg"> images <img src="photo2.jpg"> inside the content</p>
</body>
</html>

+ 15
- 0
tests/data/sanitize.example/photo-relative View File

@ -0,0 +1,15 @@
HTTP/1.1 200 OK
Server: Apache
Date: Wed, 09 Dec 2015 03:29:14 GMT
Content-Type: text/html; charset=utf-8
Connection: keep-alive
<html>
<head>
<title>Test</title>
</head>
<body class="h-entry">
<p class="e-content">Test of <b>relative URL resolution</b> with an photo property</p>
<img class="u-photo" src="photo.jpg">
</body>
</html>

Loading…
Cancel
Save