Browse Source

add basic support for JSONFeed

pull/49/head
Aaron Parecki 7 years ago
parent
commit
7b16371418
No known key found for this signature in database GPG Key ID: 276C2817346D6056
6 changed files with 275 additions and 1 deletions
  1. +1
    -1
      lib/XRay/Formats/Format.php
  2. +106
    -0
      lib/XRay/Formats/JSONFeed.php
  3. +5
    -0
      lib/XRay/Formats/XML.php
  4. +4
    -0
      lib/XRay/Parser.php
  5. +29
    -0
      tests/FeedTest.php
  6. +130
    -0
      tests/data/feed.example.com/jsonfeed

+ 1
- 1
lib/XRay/Formats/Format.php View File

@ -78,7 +78,7 @@ abstract class Format implements iFormat {
$purifier = new HTMLPurifier($config);
$sanitized = $purifier->purify($html);
$sanitized = str_replace("
","\r",$sanitized);
return $sanitized;
return trim($sanitized);
}
protected static function stripHTML($html) {

+ 106
- 0
lib/XRay/Formats/JSONFeed.php View File

@ -0,0 +1,106 @@
<?php
namespace p3k\XRay\Formats;
use HTMLPurifier, HTMLPurifier_Config;
use DOMDocument, DOMXPath;
use p3k\XRay\Formats;
class JSONFeed extends Format {
public static function matches_host($url) { return true; }
public static function matches($url) { return true; }
public static function parse($json, $url) {
$result = [
'data' => [
'type' => 'unknown',
],
'url' => $url,
];
$feed = json_decode($json, true);
if($feed) {
$result['data']['type'] = 'feed';
foreach($feed['items'] as $item) {
$result['data']['items'][] = self::_hEntryFromFeedItem($item, $feed);
}
}
return $result;
}
private static function _hEntryFromFeedItem($item, $feed) {
$entry = [
'type' => 'entry',
'author' => [
'name' => null,
'url' => null,
'photo' => null
]
];
if(isset($item['author']['name'])) {
$entry['author']['name'] = $item['author']['name'];
}
if(isset($item['author']['url'])) {
$entry['author']['url'] = $item['author']['url'];
} elseif(isset($feed['home_page_url'])) {
$entry['author']['url'] = $feed['home_page_url'];
}
if(isset($item['author']['avatar'])) {
$entry['author']['photo'] = $item['author']['avatar'];
}
if(isset($item['url'])) {
$entry['url'] = $item['url'];
}
if(isset($item['id'])) {
$entry['uid'] = $item['id'];
}
if(isset($item['title']) && trim($item['title'])) {
$entry['name'] = trim($item['title']);
}
if(isset($item['content_html']) && isset($item['content_text'])) {
$entry['content'] = [
'html' => self::sanitizeHTML($item['content_html']),
'text' => trim($item['content_text'])
];
} elseif(isset($item['content_html'])) {
$entry['content'] = [
'html' => self::sanitizeHTML($item['content_html']),
'text' => self::stripHTML($item['content_html'])
];
} elseif(isset($item['content_text'])) {
$entry['content'] = [
'text' => trim($item['content_text'])
];
}
if(isset($item['summary'])) {
$entry['summary'] = $item['summary'];
}
if(isset($item['date_published'])) {
$entry['published'] = $item['date_published'];
}
if(isset($item['date_modified'])) {
$entry['updated'] = $item['date_modified'];
}
if(isset($item['image'])) {
$entry['photo'] = $item['image'];
}
if(isset($item['tags'])) {
$entry['category'] = $item['tags'];
}
return $entry;
}
}

+ 5
- 0
lib/XRay/Formats/XML.php View File

@ -49,6 +49,11 @@ class XML extends Format {
]
];
if(is_array($guid=$item->getTag('guid')) && count($guid))
$entry['uid'] = $guid[0];
elseif(is_array($guid=$item->getTag('id')) && count($guid))
$entry['uid'] = $guid[0];
if($item->getUrl())
$entry['url'] = $item->getUrl();

+ 4
- 0
lib/XRay/Parser.php View File

@ -42,6 +42,10 @@ class Parser {
return Formats\XML::parse($body, $url);
}
if(substr($body, 0, 1) == '{' && strpos(substr($body, 0, 100), 'https://jsonfeed.org/version/1')) {
return Formats\JSONFeed::parse($body, $url);
}
// No special parsers matched, parse for Microformats now
return Formats\HTML::parse($this->http, $body, $url, $opts);
}

+ 29
- 0
tests/FeedTest.php View File

@ -84,6 +84,35 @@ class FeedTest extends PHPUnit_Framework_TestCase {
$this->assertEquals('feed', $data->type);
}
public function testJSONFeed() {
$url = 'http://feed.example.com/jsonfeed';
$response = $this->parse(['url' => $url, 'expect' => 'feed']);
$body = $response->getContent();
$this->assertEquals(200, $response->getStatusCode());
$data = json_decode($body)->data;
$this->assertEquals(10, count($data->items));
for($i=0; $i<8; $i++) {
$this->assertEquals('entry', $data->items[$i]->type);
$this->assertEquals('manton', $data->items[$i]->author->name);
$this->assertEquals('http://www.manton.org', $data->items[$i]->author->url);
$this->assertNotEmpty($data->items[$i]->url);
$this->assertNotEmpty($data->items[$i]->uid);
$this->assertNotEmpty($data->items[$i]->published);
$this->assertNotEmpty($data->items[$i]->content->html);
$this->assertNotEmpty($data->items[$i]->content->text);
}
$this->assertEquals('<p>Lots of good feedback on <a href="http://help.micro.blog/2017/wordpress-import/">the WordPress import</a>. Made a couple improvements this morning. Overall, pretty good.</p>', $data->items[9]->content->html);
$this->assertEquals('Lots of good feedback on the WordPress import. Made a couple improvements this morning. Overall, pretty good.', $data->items[9]->content->text);
$this->assertEquals('http://www.manton.org/2017/11/5975.html', $data->items[9]->url);
$this->assertEquals('http://www.manton.org/2017/11/5975.html', $data->items[9]->uid);
$this->assertEquals('2017-11-07T15:04:01+00:00', $data->items[9]->published);
$this->assertEquals('feed', $data->type);
}
public function testAtomFeed() {
$url = 'http://feed.example.com/atom';
$response = $this->parse(['url' => $url, 'expect' => 'feed']);

+ 130
- 0
tests/data/feed.example.com/jsonfeed View File

@ -0,0 +1,130 @@
HTTP/1.1 200 OK
Date: Sat, 11 Nov 2017 15:35:23 GMT
Server: Apache
Vary: Cookie
ETag: "ccd12f270264ab61d4b31870eef8d73e"
Link: <http://www.manton.org/wp-json/>; rel="https://api.w.org/"
Last-Modified: Fri, 10 Nov 2017 16:34:21 GMT
MS-Author-Via: DAV
Content-Type: application/json; charset=UTF-8
{
"version": "https://jsonfeed.org/version/1",
"user_comment": "This feed allows you to read the posts from this site in any feed reader that supports the JSON Feed format. To add this feed to your reader, copy the following URL -- http://www.manton.org/feed/json -- and add it your reader.",
"home_page_url": "http://www.manton.org",
"feed_url": "http://www.manton.org/feed/json",
"title": "Manton Reece",
"description": "",
"items": [
{
"id": "http://www.manton.org/2017/11/5993.html",
"url": "http://www.manton.org/2017/11/5993.html",
"title": "",
"content_html": "<p>I&#8217;ve updated Micro.blog&#8217;s Twitter cross-posting to support 280 characters. The apps still color the character counter blue until 140, and red after 280, just in case you want to stick to shorter posts.</p>\n",
"date_published": "2017-11-10T16:34:21+00:00",
"date_modified": "2017-11-10T16:34:21+00:00",
"author": {
"name": "manton"
}
},
{
"id": "http://www.manton.org/2017/11/5991.html",
"url": "http://www.manton.org/2017/11/5991.html",
"title": "",
"content_html": "<p>The 7-day &#8220;Photo Challenge&#8221; pin is now live on Micro.blog. Thanks again to Doug Lane for <a href=\"http://micro.douglane.com/2017/11/09/microblog-photo-challenge.html\">kicking things off</a> with prompts to inspire everyone to take more photos.</p>\n",
"date_published": "2017-11-10T15:31:12+00:00",
"date_modified": "2017-11-10T15:31:12+00:00",
"author": {
"name": "manton"
}
},
{
"id": "http://www.manton.org/2017/11/5989.html",
"url": "http://www.manton.org/2017/11/5989.html",
"title": "",
"content_html": "<p><a href=\"http://micro.douglane.com/2017/11/09/microblog-photo-challenge.html\">Love this 7-day photo challenge</a> for Micro.blog started by Doug Lane.</p>\n",
"date_published": "2017-11-09T17:07:06+00:00",
"date_modified": "2017-11-09T17:07:06+00:00",
"author": {
"name": "manton"
}
},
{
"id": "http://www.manton.org/2017/11/5987.html",
"url": "http://www.manton.org/2017/11/5987.html",
"title": "",
"content_html": "<p>Brent Simmons is back to podcasting with <a href=\"https://theomnishow.omnigroup.com/\">a new show from The Omni Group</a>. Looks great!</p>\n",
"date_published": "2017-11-08T19:49:44+00:00",
"date_modified": "2017-11-08T19:49:44+00:00",
"author": {
"name": "manton"
}
},
{
"id": "http://www.manton.org/2017/11/one-election-day-down-one-more-to-go.html",
"url": "http://www.manton.org/2017/11/one-election-day-down-one-more-to-go.html",
"title": "One election day down, one more to go",
"content_html": "<p>Josh Marshall of Talking Points Memo <a href=\"http://talkingpointsmemo.com/edblog/get-real-thoughts-on-last-nights-anti-trump-wave\">on last night&#8217;s victory for Democrats</a>:</p>\n<blockquote><p>\n When a President is locked below 40% approval and often closer to 35% approval, his party will face a brutal and unforgiving electorate. This was a fact a decade ago and it\u2019s a fact today. We\u2019ve just been stunned into an unwarranted uncertainty by the fact of Trump\u2019s victory one year ago today.\n</p></blockquote>\n<p>November 2018 feels like a long time from now, but it will get here.</p>\n",
"date_published": "2017-11-08T18:00:58+00:00",
"date_modified": "2017-11-08T18:02:12+00:00",
"author": {
"name": "manton"
}
},
{
"id": "http://www.manton.org/2017/11/5983.html",
"url": "http://www.manton.org/2017/11/5983.html",
"title": "",
"content_html": "<p>I should&#8217;ve added an &#8220;I voted&#8221; pin to Micro.blog for posting voting sticker photos. We&#8217;ll do that for 2018.</p>\n",
"date_published": "2017-11-08T04:38:44+00:00",
"date_modified": "2017-11-08T04:38:44+00:00",
"author": {
"name": "manton"
}
},
{
"id": "http://www.manton.org/2017/11/5981.html",
"url": "http://www.manton.org/2017/11/5981.html",
"title": "",
"content_html": "<p>Redesigned the Micro.blog splash page (when you&#8217;re not signed in). The most concise expression of the mission statement so far.</p>\n",
"date_published": "2017-11-07T21:58:25+00:00",
"date_modified": "2017-11-07T21:58:25+00:00",
"author": {
"name": "manton"
}
},
{
"id": "http://www.manton.org/2017/11/5979.html",
"url": "http://www.manton.org/2017/11/5979.html",
"title": "",
"content_html": "<p>Coming up on a year since I wrote about how <a href=\"http://www.manton.org/2016/11/todays-social-networks-are-broken.html\">today&#8217;s social networks are broken</a>. Still what I believe.</p>\n",
"date_published": "2017-11-07T21:00:42+00:00",
"date_modified": "2017-11-07T21:00:42+00:00",
"author": {
"name": "manton"
}
},
{
"id": "http://www.manton.org/2017/11/5977.html",
"url": "http://www.manton.org/2017/11/5977.html",
"title": "",
"content_html": "<p>Election day in the United States. There&#8217;s probably something on the ballot where you live. Doesn&#8217;t have to be big to still matter. Vote!</p>\n",
"date_published": "2017-11-07T16:42:00+00:00",
"date_modified": "2017-11-07T16:42:00+00:00",
"author": {
"name": "manton"
}
},
{
"id": "http://www.manton.org/2017/11/5975.html",
"url": "http://www.manton.org/2017/11/5975.html",
"title": "",
"content_html": "<p>Lots of good feedback on <a href=\"http://help.micro.blog/2017/wordpress-import/\">the WordPress import</a>. Made a couple improvements this morning. Overall, pretty good.</p>\n",
"date_published": "2017-11-07T15:04:01+00:00",
"date_modified": "2017-11-07T15:04:01+00:00",
"author": {
"name": "manton"
}
}
]
}

Loading…
Cancel
Save