From 9163341af2a5e303b7fe0e36de2c9066707354f6 Mon Sep 17 00:00:00 2001 From: Aaron Parecki Date: Fri, 9 Nov 2018 07:57:05 -0800 Subject: [PATCH] normalize relative URLs in JSONFeed items closes #77 --- lib/XRay/Formats/Format.php | 8 ++++- lib/XRay/Formats/JSONFeed.php | 12 ++++--- tests/FeedTest.php | 32 +++++++++++++++---- tests/SanitizeTest.php | 22 +++++++++++++ tests/data/feed.example.com/jsonfeed | 17 ++++++++-- .../photo-in-content-relative | 14 ++++++++ tests/data/sanitize.example/photo-relative | 15 +++++++++ 7 files changed, 106 insertions(+), 14 deletions(-) create mode 100644 tests/data/sanitize.example/photo-in-content-relative create mode 100644 tests/data/sanitize.example/photo-relative diff --git a/lib/XRay/Formats/Format.php b/lib/XRay/Formats/Format.php index 2b9bcf8..41ae28a 100644 --- a/lib/XRay/Formats/Format.php +++ b/lib/XRay/Formats/Format.php @@ -34,7 +34,7 @@ abstract class Format implements iFormat { return [$doc, $xpath]; } - protected static function sanitizeHTML($html, $allowImg=true) { + protected static function sanitizeHTML($html, $allowImg=true, $baseURL=false) { $allowed = [ 'a', 'abbr', @@ -68,6 +68,12 @@ abstract class Format implements iFormat { $config = HTMLPurifier_Config::createDefault(); $config->set('Cache.DefinitionImpl', null); $config->set('HTML.AllowedElements', $allowed); + + if($baseURL) { + $config->set('URI.MakeAbsolute', true); + $config->set('URI.Base', $baseURL); + } + $def = $config->getHTMLDefinition(true); $def->addElement( 'time', diff --git a/lib/XRay/Formats/JSONFeed.php b/lib/XRay/Formats/JSONFeed.php index e2db1e6..e3110d6 100644 --- a/lib/XRay/Formats/JSONFeed.php +++ b/lib/XRay/Formats/JSONFeed.php @@ -23,14 +23,14 @@ class JSONFeed extends Format { $result['data']['type'] = 'feed'; foreach($feed['items'] as $item) { - $result['data']['items'][] = self::_hEntryFromFeedItem($item, $feed); + $result['data']['items'][] = self::_hEntryFromFeedItem($item, $feed, $url); } } return $result; } - private static function _hEntryFromFeedItem($item, $feed) { + private static function _hEntryFromFeedItem($item, $feed, $feedurl) { $entry = [ 'type' => 'entry', 'author' => [ @@ -64,14 +64,16 @@ class JSONFeed extends Format { $entry['name'] = trim($item['title']); } + $baseURL = isset($entry['url']) ? $entry['url'] : $feedurl; + if(isset($item['content_html']) && isset($item['content_text'])) { $entry['content'] = [ - 'html' => self::sanitizeHTML($item['content_html']), + 'html' => self::sanitizeHTML($item['content_html'], true, $baseURL), 'text' => trim($item['content_text']) ]; } elseif(isset($item['content_html'])) { $entry['content'] = [ - 'html' => self::sanitizeHTML($item['content_html']), + 'html' => self::sanitizeHTML($item['content_html'], true, $baseURL), 'text' => self::stripHTML($item['content_html']) ]; } elseif(isset($item['content_text'])) { @@ -93,7 +95,7 @@ class JSONFeed extends Format { } if(isset($item['image'])) { - $entry['photo'] = $item['image']; + $entry['photo'] = \Mf2\resolveUrl($baseURL, $item['image']); } if(isset($item['tags'])) { diff --git a/tests/FeedTest.php b/tests/FeedTest.php index 6a115c5..b206857 100644 --- a/tests/FeedTest.php +++ b/tests/FeedTest.php @@ -199,7 +199,7 @@ class FeedTest extends PHPUnit_Framework_TestCase { $this->assertEquals('feed+json', $result->{'source-format'}); $data = $result->data; - $this->assertEquals(10, count($data->items)); + $this->assertEquals(11, count($data->items)); for($i=0; $i<8; $i++) { $this->assertEquals('entry', $data->items[$i]->type); $this->assertEquals('manton', $data->items[$i]->author->name); @@ -213,15 +213,35 @@ class FeedTest extends PHPUnit_Framework_TestCase { $this->assertEquals('note', $data->items[0]->{'post-type'}); $this->assertEquals('article', $data->items[4]->{'post-type'}); - $this->assertEquals('

Lots of good feedback on the WordPress import. Made a couple improvements this morning. Overall, pretty good.

', $data->items[9]->content->html); - $this->assertEquals('Lots of good feedback on the WordPress import. Made a couple improvements this morning. Overall, pretty good.', $data->items[9]->content->text); - $this->assertEquals('http://www.manton.org/2017/11/5975.html', $data->items[9]->url); - $this->assertEquals('http://www.manton.org/2017/11/5975.html', $data->items[9]->uid); - $this->assertEquals('2017-11-07T15:04:01+00:00', $data->items[9]->published); + $this->assertEquals('

Coming up on a year since I wrote about how today’s social networks are broken. Still what I believe.

', $data->items[7]->content->html); + $this->assertEquals('Coming up on a year since I wrote about how today’s social networks are broken. Still what I believe.', $data->items[7]->content->text); + $this->assertEquals('http://www.manton.org/2017/11/5979.html', $data->items[7]->url); + $this->assertEquals('http://www.manton.org/2017/11/5979.html', $data->items[7]->uid); + $this->assertEquals('2017-11-07T21:00:42+00:00', $data->items[7]->published); $this->assertEquals('feed', $data->type); } + public function testJSONFeedRelativeImages() { + $url = 'http://feed.example.com/jsonfeed'; + $response = $this->parse(['url' => $url, 'expect' => 'feed']); + + $body = $response->getContent(); + $this->assertEquals(200, $response->getStatusCode()); + $result = json_decode($body); + $this->assertEquals('feed+json', $result->{'source-format'}); + $data = $result->data; + + // Relative image on an item that has a url + $this->assertEquals('http://www.manton.org/2017/11/image.jpg', $data->items[9]->photo); + + // Relative image on an item that has no URL, fall back to feed URL + $this->assertEquals('http://feed.example.com/image.jpg', $data->items[10]->photo); + + // Relative image inside the content html + $this->assertContains('http://www.manton.org/2017/11/img.jpg', $data->items[9]->content->html); + } + public function testAtomFeed() { $url = 'http://feed.example.com/atom'; $response = $this->parse(['url' => $url, 'expect' => 'feed']); diff --git a/tests/SanitizeTest.php b/tests/SanitizeTest.php index b6e8cdc..f549d16 100644 --- a/tests/SanitizeTest.php +++ b/tests/SanitizeTest.php @@ -186,6 +186,28 @@ class SanitizeTest extends PHPUnit_Framework_TestCase { } */ + public function testRelativePhotoInContent() { + $url = 'http://sanitize.example/photo-in-content-relative'; + $response = $this->parse(['url' => $url]); + + $body = $response->getContent(); + $this->assertEquals(200, $response->getStatusCode()); + $data = json_decode($body); + + $this->assertContains('http://sanitize.example/photo1.jpg', $data->data->content->html); + } + + public function testRelativePhotoProperty() { + $url = 'http://sanitize.example/photo-relative'; + $response = $this->parse(['url' => $url]); + + $body = $response->getContent(); + $this->assertEquals(200, $response->getStatusCode()); + $data = json_decode($body); + + $this->assertEquals('http://sanitize.example/photo.jpg', $data->data->photo[0]); + } + public function testPhotoInContentEmptyAltAttribute() { // https://github.com/aaronpk/XRay/issues/52 diff --git a/tests/data/feed.example.com/jsonfeed b/tests/data/feed.example.com/jsonfeed index c4be43c..5478894 100644 --- a/tests/data/feed.example.com/jsonfeed +++ b/tests/data/feed.example.com/jsonfeed @@ -119,7 +119,20 @@ Content-Type: application/json; charset=UTF-8 "id": "http://www.manton.org/2017/11/5975.html", "url": "http://www.manton.org/2017/11/5975.html", "title": "", - "content_html": "

Lots of good feedback on the WordPress import. Made a couple improvements this morning. Overall, pretty good.

\n", + "content_html": "

\n", + "image": "image.jpg", + "banner_image": "banner_image.jpg", + "date_published": "2017-11-07T15:04:01+00:00", + "date_modified": "2017-11-07T15:04:01+00:00", + "author": { + "name": "manton" + } + }, + { + "id": "http://www.manton.org/2017/11/5975.html", + "title": "", + "content_html": "

\n", + "image": "image.jpg", "date_published": "2017-11-07T15:04:01+00:00", "date_modified": "2017-11-07T15:04:01+00:00", "author": { @@ -127,4 +140,4 @@ Content-Type: application/json; charset=UTF-8 } } ] -} \ No newline at end of file +} diff --git a/tests/data/sanitize.example/photo-in-content-relative b/tests/data/sanitize.example/photo-in-content-relative new file mode 100644 index 0000000..f02a0f5 --- /dev/null +++ b/tests/data/sanitize.example/photo-in-content-relative @@ -0,0 +1,14 @@ +HTTP/1.1 200 OK +Server: Apache +Date: Wed, 09 Dec 2015 03:29:14 GMT +Content-Type: text/html; charset=utf-8 +Connection: keep-alive + + + + Test + + +

Test of relative URL resolution with two images inside the content

+ + diff --git a/tests/data/sanitize.example/photo-relative b/tests/data/sanitize.example/photo-relative new file mode 100644 index 0000000..7affc4f --- /dev/null +++ b/tests/data/sanitize.example/photo-relative @@ -0,0 +1,15 @@ +HTTP/1.1 200 OK +Server: Apache +Date: Wed, 09 Dec 2015 03:29:14 GMT +Content-Type: text/html; charset=utf-8 +Connection: keep-alive + + + + Test + + +

Test of relative URL resolution with an photo property

+ + +