diff --git a/lib/XRay/Formats/Format.php b/lib/XRay/Formats/Format.php index 135c59d..fc481ea 100644 --- a/lib/XRay/Formats/Format.php +++ b/lib/XRay/Formats/Format.php @@ -78,7 +78,7 @@ abstract class Format implements iFormat { $purifier = new HTMLPurifier($config); $sanitized = $purifier->purify($html); $sanitized = str_replace(" ","\r",$sanitized); - return $sanitized; + return trim($sanitized); } protected static function stripHTML($html) { diff --git a/lib/XRay/Formats/JSONFeed.php b/lib/XRay/Formats/JSONFeed.php new file mode 100644 index 0000000..9117996 --- /dev/null +++ b/lib/XRay/Formats/JSONFeed.php @@ -0,0 +1,106 @@ + [ + 'type' => 'unknown', + ], + 'url' => $url, + ]; + + $feed = json_decode($json, true); + + if($feed) { + $result['data']['type'] = 'feed'; + + foreach($feed['items'] as $item) { + $result['data']['items'][] = self::_hEntryFromFeedItem($item, $feed); + } + } + + return $result; + } + + private static function _hEntryFromFeedItem($item, $feed) { + $entry = [ + 'type' => 'entry', + 'author' => [ + 'name' => null, + 'url' => null, + 'photo' => null + ] + ]; + + if(isset($item['author']['name'])) { + $entry['author']['name'] = $item['author']['name']; + } + if(isset($item['author']['url'])) { + $entry['author']['url'] = $item['author']['url']; + } elseif(isset($feed['home_page_url'])) { + $entry['author']['url'] = $feed['home_page_url']; + } + if(isset($item['author']['avatar'])) { + $entry['author']['photo'] = $item['author']['avatar']; + } + + if(isset($item['url'])) { + $entry['url'] = $item['url']; + } + + if(isset($item['id'])) { + $entry['uid'] = $item['id']; + } + + if(isset($item['title']) && trim($item['title'])) { + $entry['name'] = trim($item['title']); + } + + if(isset($item['content_html']) && isset($item['content_text'])) { + $entry['content'] = [ + 'html' => self::sanitizeHTML($item['content_html']), + 'text' => trim($item['content_text']) + ]; + } elseif(isset($item['content_html'])) { + $entry['content'] = [ + 'html' => self::sanitizeHTML($item['content_html']), + 'text' => self::stripHTML($item['content_html']) + ]; + } elseif(isset($item['content_text'])) { + $entry['content'] = [ + 'text' => trim($item['content_text']) + ]; + } + + if(isset($item['summary'])) { + $entry['summary'] = $item['summary']; + } + + if(isset($item['date_published'])) { + $entry['published'] = $item['date_published']; + } + + if(isset($item['date_modified'])) { + $entry['updated'] = $item['date_modified']; + } + + if(isset($item['image'])) { + $entry['photo'] = $item['image']; + } + + if(isset($item['tags'])) { + $entry['category'] = $item['tags']; + } + + return $entry; + } +} diff --git a/lib/XRay/Formats/XML.php b/lib/XRay/Formats/XML.php index 27a57a6..fbcb9c1 100644 --- a/lib/XRay/Formats/XML.php +++ b/lib/XRay/Formats/XML.php @@ -49,6 +49,11 @@ class XML extends Format { ] ]; + if(is_array($guid=$item->getTag('guid')) && count($guid)) + $entry['uid'] = $guid[0]; + elseif(is_array($guid=$item->getTag('id')) && count($guid)) + $entry['uid'] = $guid[0]; + if($item->getUrl()) $entry['url'] = $item->getUrl(); diff --git a/lib/XRay/Parser.php b/lib/XRay/Parser.php index 5ca3454..2d8f919 100644 --- a/lib/XRay/Parser.php +++ b/lib/XRay/Parser.php @@ -42,6 +42,10 @@ class Parser { return Formats\XML::parse($body, $url); } + if(substr($body, 0, 1) == '{' && strpos(substr($body, 0, 100), 'https://jsonfeed.org/version/1')) { + return Formats\JSONFeed::parse($body, $url); + } + // No special parsers matched, parse for Microformats now return Formats\HTML::parse($this->http, $body, $url, $opts); } diff --git a/tests/FeedTest.php b/tests/FeedTest.php index edd763b..148f18c 100644 --- a/tests/FeedTest.php +++ b/tests/FeedTest.php @@ -84,6 +84,35 @@ class FeedTest extends PHPUnit_Framework_TestCase { $this->assertEquals('feed', $data->type); } + public function testJSONFeed() { + $url = 'http://feed.example.com/jsonfeed'; + $response = $this->parse(['url' => $url, 'expect' => 'feed']); + + $body = $response->getContent(); + $this->assertEquals(200, $response->getStatusCode()); + $data = json_decode($body)->data; + + $this->assertEquals(10, count($data->items)); + for($i=0; $i<8; $i++) { + $this->assertEquals('entry', $data->items[$i]->type); + $this->assertEquals('manton', $data->items[$i]->author->name); + $this->assertEquals('http://www.manton.org', $data->items[$i]->author->url); + $this->assertNotEmpty($data->items[$i]->url); + $this->assertNotEmpty($data->items[$i]->uid); + $this->assertNotEmpty($data->items[$i]->published); + $this->assertNotEmpty($data->items[$i]->content->html); + $this->assertNotEmpty($data->items[$i]->content->text); + } + + $this->assertEquals('
Lots of good feedback on the WordPress import. Made a couple improvements this morning. Overall, pretty good.
', $data->items[9]->content->html); + $this->assertEquals('Lots of good feedback on the WordPress import. Made a couple improvements this morning. Overall, pretty good.', $data->items[9]->content->text); + $this->assertEquals('http://www.manton.org/2017/11/5975.html', $data->items[9]->url); + $this->assertEquals('http://www.manton.org/2017/11/5975.html', $data->items[9]->uid); + $this->assertEquals('2017-11-07T15:04:01+00:00', $data->items[9]->published); + + $this->assertEquals('feed', $data->type); + } + public function testAtomFeed() { $url = 'http://feed.example.com/atom'; $response = $this->parse(['url' => $url, 'expect' => 'feed']); diff --git a/tests/data/feed.example.com/jsonfeed b/tests/data/feed.example.com/jsonfeed new file mode 100644 index 0000000..c4be43c --- /dev/null +++ b/tests/data/feed.example.com/jsonfeed @@ -0,0 +1,130 @@ +HTTP/1.1 200 OK +Date: Sat, 11 Nov 2017 15:35:23 GMT +Server: Apache +Vary: Cookie +ETag: "ccd12f270264ab61d4b31870eef8d73e" +Link:I’ve updated Micro.blog’s Twitter cross-posting to support 280 characters. The apps still color the character counter blue until 140, and red after 280, just in case you want to stick to shorter posts.
\n", + "date_published": "2017-11-10T16:34:21+00:00", + "date_modified": "2017-11-10T16:34:21+00:00", + "author": { + "name": "manton" + } + }, + { + "id": "http://www.manton.org/2017/11/5991.html", + "url": "http://www.manton.org/2017/11/5991.html", + "title": "", + "content_html": "The 7-day “Photo Challenge” pin is now live on Micro.blog. Thanks again to Doug Lane for kicking things off with prompts to inspire everyone to take more photos.
\n", + "date_published": "2017-11-10T15:31:12+00:00", + "date_modified": "2017-11-10T15:31:12+00:00", + "author": { + "name": "manton" + } + }, + { + "id": "http://www.manton.org/2017/11/5989.html", + "url": "http://www.manton.org/2017/11/5989.html", + "title": "", + "content_html": "Love this 7-day photo challenge for Micro.blog started by Doug Lane.
\n", + "date_published": "2017-11-09T17:07:06+00:00", + "date_modified": "2017-11-09T17:07:06+00:00", + "author": { + "name": "manton" + } + }, + { + "id": "http://www.manton.org/2017/11/5987.html", + "url": "http://www.manton.org/2017/11/5987.html", + "title": "", + "content_html": "Brent Simmons is back to podcasting with a new show from The Omni Group. Looks great!
\n", + "date_published": "2017-11-08T19:49:44+00:00", + "date_modified": "2017-11-08T19:49:44+00:00", + "author": { + "name": "manton" + } + }, + { + "id": "http://www.manton.org/2017/11/one-election-day-down-one-more-to-go.html", + "url": "http://www.manton.org/2017/11/one-election-day-down-one-more-to-go.html", + "title": "One election day down, one more to go", + "content_html": "Josh Marshall of Talking Points Memo on last night’s victory for Democrats:
\n\n\n When a President is locked below 40% approval and often closer to 35% approval, his party will face a brutal and unforgiving electorate. This was a fact a decade ago and it\u2019s a fact today. We\u2019ve just been stunned into an unwarranted uncertainty by the fact of Trump\u2019s victory one year ago today.\n
November 2018 feels like a long time from now, but it will get here.
\n", + "date_published": "2017-11-08T18:00:58+00:00", + "date_modified": "2017-11-08T18:02:12+00:00", + "author": { + "name": "manton" + } + }, + { + "id": "http://www.manton.org/2017/11/5983.html", + "url": "http://www.manton.org/2017/11/5983.html", + "title": "", + "content_html": "I should’ve added an “I voted” pin to Micro.blog for posting voting sticker photos. We’ll do that for 2018.
\n", + "date_published": "2017-11-08T04:38:44+00:00", + "date_modified": "2017-11-08T04:38:44+00:00", + "author": { + "name": "manton" + } + }, + { + "id": "http://www.manton.org/2017/11/5981.html", + "url": "http://www.manton.org/2017/11/5981.html", + "title": "", + "content_html": "Redesigned the Micro.blog splash page (when you’re not signed in). The most concise expression of the mission statement so far.
\n", + "date_published": "2017-11-07T21:58:25+00:00", + "date_modified": "2017-11-07T21:58:25+00:00", + "author": { + "name": "manton" + } + }, + { + "id": "http://www.manton.org/2017/11/5979.html", + "url": "http://www.manton.org/2017/11/5979.html", + "title": "", + "content_html": "Coming up on a year since I wrote about how today’s social networks are broken. Still what I believe.
\n", + "date_published": "2017-11-07T21:00:42+00:00", + "date_modified": "2017-11-07T21:00:42+00:00", + "author": { + "name": "manton" + } + }, + { + "id": "http://www.manton.org/2017/11/5977.html", + "url": "http://www.manton.org/2017/11/5977.html", + "title": "", + "content_html": "Election day in the United States. There’s probably something on the ballot where you live. Doesn’t have to be big to still matter. Vote!
\n", + "date_published": "2017-11-07T16:42:00+00:00", + "date_modified": "2017-11-07T16:42:00+00:00", + "author": { + "name": "manton" + } + }, + { + "id": "http://www.manton.org/2017/11/5975.html", + "url": "http://www.manton.org/2017/11/5975.html", + "title": "", + "content_html": "Lots of good feedback on the WordPress import. Made a couple improvements this morning. Overall, pretty good.
\n", + "date_published": "2017-11-07T15:04:01+00:00", + "date_modified": "2017-11-07T15:04:01+00:00", + "author": { + "name": "manton" + } + } + ] +} \ No newline at end of file