From d0de523746f33063ecd42251eb3c30241400ba45 Mon Sep 17 00:00:00 2001 From: Aaron Parecki Date: Thu, 8 Jun 2017 16:33:51 -0700 Subject: [PATCH 1/5] add hackernews support closes #40 --- lib/XRay/Fetcher.php | 5 ++ lib/XRay/Formats/Format.php | 49 +++++++++++ lib/XRay/Formats/Hackernews.php | 84 +++++++++++++++++++ lib/XRay/Formats/Mf2.php | 57 ++----------- lib/XRay/Parser.php | 4 + tests/HackernewsTest.php | 42 ++++++++++ .../v0_item_14516538.json | 11 +++ 7 files changed, 204 insertions(+), 48 deletions(-) create mode 100644 lib/XRay/Formats/Hackernews.php create mode 100644 tests/HackernewsTest.php create mode 100644 tests/data/hacker-news.firebaseio.com/v0_item_14516538.json diff --git a/lib/XRay/Fetcher.php b/lib/XRay/Fetcher.php index 8139cf8..608baea 100644 --- a/lib/XRay/Fetcher.php +++ b/lib/XRay/Fetcher.php @@ -48,6 +48,11 @@ class Fetcher { return $this->_fetch_github($url, $opts); } + // Check if this is a Hackernews URL and use the API + if(Formats\Hackernews::matches($url)) { + return Formats\Hackernews::fetch($this->http, $url, $opts); + } + // All other URLs are fetched normally // Special-case appspot.com URLs to not follow redirects. diff --git a/lib/XRay/Formats/Format.php b/lib/XRay/Formats/Format.php index 47e9625..0c94101 100644 --- a/lib/XRay/Formats/Format.php +++ b/lib/XRay/Formats/Format.php @@ -2,6 +2,7 @@ namespace p3k\XRay\Formats; use DOMDocument, DOMXPath; +use HTMLPurifier, HTMLPurifier_Config; interface iFormat { @@ -33,4 +34,52 @@ abstract class Format implements iFormat { return [$doc, $xpath]; } + protected static function sanitizeHTML($html) { + $config = HTMLPurifier_Config::createDefault(); + $config->set('Cache.DefinitionImpl', null); + $config->set('HTML.AllowedElements', [ + 'a', + 'abbr', + 'b', + 'code', + 'del', + 'em', + 'i', + 'img', + 'q', + 'strike', + 'strong', + 'time', + 'blockquote', + 'pre', + 'p', + 'h1', + 'h2', + 'h3', + 'h4', + 'h5', + 'h6', + 'ul', + 'li', + 'ol' + ]); + $def = $config->getHTMLDefinition(true); + $def->addElement( + 'time', + 'Inline', + 'Inline', + 'Common', + [ + 'datetime' => 'Text' + ] + ); + // Override the allowed classes to only support Microformats2 classes + $def->manager->attrTypes->set('Class', new HTMLPurifier_AttrDef_HTML_Microformats2()); + $purifier = new HTMLPurifier($config); + $sanitized = $purifier->purify($html); + $sanitized = str_replace(" ","\r",$sanitized); + return $sanitized; + } + + } diff --git a/lib/XRay/Formats/Hackernews.php b/lib/XRay/Formats/Hackernews.php new file mode 100644 index 0000000..780683d --- /dev/null +++ b/lib/XRay/Formats/Hackernews.php @@ -0,0 +1,84 @@ +get('https://hacker-news.firebaseio.com/v0/item/'.$match[1].'.json'); + if($response['code'] != 200) { + return [ + 'error' => 'hackernews_error', + 'error_description' => $response['body'], + 'code' => $response['code'], + ]; + } + + return [ + 'url' => $url, + 'body' => $response['body'], + 'code' => $response['code'], + ]; + } + + public static function parse($json, $url) { + $data = @json_decode($json, true); + + if(!$data) + return self::_unknown(); + + $match = self::matches($url); + + $date = DateTime::createFromFormat('U', $data['time']); + + // Start building the h-entry + $entry = array( + 'type' => 'entry', + 'url' => $url, + 'author' => [ + 'type' => 'card', + 'name' => $data['by'], + 'photo' => null, + 'url' => 'https://news.ycombinator.com/user?id='.$data['by'] + ], + 'published' => $date->format('c') + ); + + if(isset($data['title'])) { + $entry['name'] = $data['title']; + } + + if(isset($data['text'])) { + $htmlContent = trim(self::sanitizeHTML($data['text'])); + $textContent = str_replace('

', "

\n

", $htmlContent); + $textContent = strip_tags($textContent); + $entry['content'] = [ + 'html' => $htmlContent, + 'text' => $textContent + ]; + } + + return [ + 'data' => $entry, + 'original' => $json + ]; + } + +} diff --git a/lib/XRay/Formats/Mf2.php b/lib/XRay/Formats/Mf2.php index c25d9a3..4df9349 100644 --- a/lib/XRay/Formats/Mf2.php +++ b/lib/XRay/Formats/Mf2.php @@ -3,7 +3,15 @@ namespace p3k\XRay\Formats; use HTMLPurifier, HTMLPurifier_Config; -class Mf2 { +class Mf2 extends Format { + + public static function matches_host($url) { + return true; + } + + public static function matches($url) { + return true; + } public static function parse($mf2, $url, $http) { if(count($mf2['items']) == 0) @@ -655,53 +663,6 @@ class Mf2 { return $author; } - private static function sanitizeHTML($html) { - $config = HTMLPurifier_Config::createDefault(); - $config->set('Cache.DefinitionImpl', null); - $config->set('HTML.AllowedElements', [ - 'a', - 'abbr', - 'b', - 'code', - 'del', - 'em', - 'i', - 'img', - 'q', - 'strike', - 'strong', - 'time', - 'blockquote', - 'pre', - 'p', - 'h1', - 'h2', - 'h3', - 'h4', - 'h5', - 'h6', - 'ul', - 'li', - 'ol' - ]); - $def = $config->getHTMLDefinition(true); - $def->addElement( - 'time', - 'Inline', - 'Inline', - 'Common', - [ - 'datetime' => 'Text' - ] - ); - // Override the allowed classes to only support Microformats2 classes - $def->manager->attrTypes->set('Class', new HTMLPurifier_AttrDef_HTML_Microformats2()); - $purifier = new HTMLPurifier($config); - $sanitized = $purifier->purify($html); - $sanitized = str_replace(" ","\r",$sanitized); - return $sanitized; - } - private static function hasNumericKeys(array $arr) { foreach($arr as $key=>$val) if (is_numeric($key)) diff --git a/lib/XRay/Parser.php b/lib/XRay/Parser.php index 639aba7..bb0101a 100644 --- a/lib/XRay/Parser.php +++ b/lib/XRay/Parser.php @@ -34,6 +34,10 @@ class Parser { return Formats\XKCD::parse($body, $url); } + if(Formats\Hackernews::matches($url)) { + return Formats\Hackernews::parse($body, $url); + } + // No special parsers matched, parse for Microformats now return Formats\HTML::parse($this->http, $body, $url, $opts); } diff --git a/tests/HackernewsTest.php b/tests/HackernewsTest.php new file mode 100644 index 0000000..71cd9d4 --- /dev/null +++ b/tests/HackernewsTest.php @@ -0,0 +1,42 @@ +client = new Parse(); + $this->client->http = new p3k\HTTP\Test(dirname(__FILE__).'/data/'); + $this->client->mc = null; + } + + private function parse($params) { + $request = new Request($params); + $response = new Response(); + return $this->client->parse($request, $response); + } + + public function testSubmission() { + $url = 'https://news.ycombinator.com/item?id=14516538'; + $response = $this->parse(['url' => $url]); + + $body = $response->getContent(); + $this->assertEquals(200, $response->getStatusCode()); + $data = json_decode($body, true); + + $this->assertEquals('entry', $data['data']['type']); + $this->assertEquals('2017-06-08T19:32:12+00:00', $data['data']['published']); + $this->assertEquals('vkb', $data['data']['author']['name']); + $this->assertEquals('https://news.ycombinator.com/user?id=vkb', $data['data']['author']['url']); + $this->assertEquals('What are we doing about Facebook, Google, and the closed internet?', $data['data']['name']); + $this->assertEquals('There have been many, many posts about how toxic advertising and Facebook are (I\'ve written many myself[1][2][3]) for our internet ecosystem today.

What projects or companies are you working on to combat filter bubbles, walled gardens, emotional manipulation, and the like, and how can the HN community help you in your goals?

[1]http://veekaybee.github.io/facebook-is-collecting-this/ +[2]http://veekaybee.github.io/content-is-dead/ +[3] http://veekaybee.github.io/who-is-doing-this-to-my-internet/

', $data['data']['content']['html']); + } + + + +} + diff --git a/tests/data/hacker-news.firebaseio.com/v0_item_14516538.json b/tests/data/hacker-news.firebaseio.com/v0_item_14516538.json new file mode 100644 index 0000000..47b64cc --- /dev/null +++ b/tests/data/hacker-news.firebaseio.com/v0_item_14516538.json @@ -0,0 +1,11 @@ +HTTP/1.1 200 OK +Server: nginx +Date: Thu, 08 Jun 2017 21:28:24 GMT +Content-Type: application/json; charset=utf-8 +Content-Length: 949 +Connection: keep-alive +Access-Control-Allow-Origin: * +Cache-Control: no-cache +Strict-Transport-Security: max-age=31556926; includeSubDomains; preload + +{"by":"vkb","descendants":51,"id":14516538,"kids":[14516923,14517320,14517322,14517224,14516999,14516850,14517290,14516926,14516808,14517088,14517137,14516981,14516706,14517080,14517055,14516805,14516785,14516890,14517104,14516723,14516853,14517094],"score":84,"text":"There have been many, many posts about how toxic advertising and Facebook are (I've written many myself[1][2][3]) for our internet ecosystem today.

What projects or companies are you working on to combat filter bubbles, walled gardens, emotional manipulation, and the like, and how can the HN community help you in your goals?

[1]http://veekaybee.github.io/facebook-is-collecting-this/\n[2]http://veekaybee.github.io/content-is-dead/\n[3] http://veekaybee.github.io/who-is-doing-this-to-my-internet/","time":1496950332,"title":"What are we doing about Facebook, Google, and the closed internet?","type":"story"} \ No newline at end of file From 4fab3e9e0aa6d84859e636f385cafff27386d9d1 Mon Sep 17 00:00:00 2001 From: Aaron Parecki Date: Fri, 9 Jun 2017 07:33:01 -0700 Subject: [PATCH 2/5] add test for HN comment --- lib/XRay/Formats/Hackernews.php | 2 +- tests/HackernewsTest.php | 27 +++++++++++++++++++ .../v0_item_14516923.json | 11 ++++++++ 3 files changed, 39 insertions(+), 1 deletion(-) create mode 100644 tests/data/hacker-news.firebaseio.com/v0_item_14516923.json diff --git a/lib/XRay/Formats/Hackernews.php b/lib/XRay/Formats/Hackernews.php index 780683d..a359aff 100644 --- a/lib/XRay/Formats/Hackernews.php +++ b/lib/XRay/Formats/Hackernews.php @@ -67,7 +67,7 @@ class Hackernews extends Format { if(isset($data['text'])) { $htmlContent = trim(self::sanitizeHTML($data['text'])); - $textContent = str_replace('

', "

\n

", $htmlContent); + $textContent = str_replace('

', "\n

", $htmlContent); $textContent = strip_tags($textContent); $entry['content'] = [ 'html' => $htmlContent, diff --git a/tests/HackernewsTest.php b/tests/HackernewsTest.php index 71cd9d4..e414f92 100644 --- a/tests/HackernewsTest.php +++ b/tests/HackernewsTest.php @@ -34,8 +34,35 @@ class HackernewsTest extends PHPUnit_Framework_TestCase { $this->assertEquals('There have been many, many posts about how toxic advertising and Facebook are (I\'ve written many myself[1][2][3]) for our internet ecosystem today.

What projects or companies are you working on to combat filter bubbles, walled gardens, emotional manipulation, and the like, and how can the HN community help you in your goals?

[1]http://veekaybee.github.io/facebook-is-collecting-this/ [2]http://veekaybee.github.io/content-is-dead/ [3] http://veekaybee.github.io/who-is-doing-this-to-my-internet/

', $data['data']['content']['html']); + $this->assertEquals('There have been many, many posts about how toxic advertising and Facebook are (I\'ve written many myself[1][2][3]) for our internet ecosystem today. +What projects or companies are you working on to combat filter bubbles, walled gardens, emotional manipulation, and the like, and how can the HN community help you in your goals? +[1]http://veekaybee.github.io/facebook-is-collecting-this/ +[2]http://veekaybee.github.io/content-is-dead/ +[3] http://veekaybee.github.io/who-is-doing-this-to-my-internet/', $data['data']['content']['text']); } + public function testComment() { + $url = 'https://news.ycombinator.com/item?id=14516923'; + $response = $this->parse(['url' => $url]); + + $body = $response->getContent(); + $this->assertEquals(200, $response->getStatusCode()); + $data = json_decode($body, true); + + $this->assertEquals('entry', $data['data']['type']); + $this->assertEquals('2017-06-08T20:23:20+00:00', $data['data']['published']); + $this->assertEquals('aaronpk', $data['data']['author']['name']); + $this->assertEquals('https://news.ycombinator.com/user?id=aaronpk', $data['data']['author']['url']); + $this->assertArrayNotHasKey('name', $data['data']); + $this->assertEquals('I am a member of the W3C Social Web Working Group (https://www.w3.org/wiki/Socialwg), and have been organizing IndieWebCamp (https://indieweb.org/) conferences in this space for the last 7 years. We\'ve been making a lot of progress:

* https://www.w3.org/TR/webmention/ - cross-site commenting

* https://www.w3.org/TR/micropub/ - API for apps to create posts on various servers

* https://www.w3.org/TR/websub/ - realtime subscriptions to feeds

* More: https://indieweb.org/specs

We focus on making sure there are a plurality of implementations and approaches rather than trying to build a single software solution to solve everything.

Try commenting on my copy of this post on my website by sending me a webmention! https://aaronparecki.com/2017/06/08/9/indieweb

', $data['data']['content']['html']); + $this->assertEquals('I am a member of the W3C Social Web Working Group (https://www.w3.org/wiki/Socialwg), and have been organizing IndieWebCamp (https://indieweb.org/) conferences in this space for the last 7 years. We\'ve been making a lot of progress: +* https://www.w3.org/TR/webmention/ - cross-site commenting +* https://www.w3.org/TR/micropub/ - API for apps to create posts on various servers +* https://www.w3.org/TR/websub/ - realtime subscriptions to feeds +* More: https://indieweb.org/specs +We focus on making sure there are a plurality of implementations and approaches rather than trying to build a single software solution to solve everything. +Try commenting on my copy of this post on my website by sending me a webmention! https://aaronparecki.com/2017/06/08/9/indieweb', $data['data']['content']['text']); + } } diff --git a/tests/data/hacker-news.firebaseio.com/v0_item_14516923.json b/tests/data/hacker-news.firebaseio.com/v0_item_14516923.json new file mode 100644 index 0000000..21c8119 --- /dev/null +++ b/tests/data/hacker-news.firebaseio.com/v0_item_14516923.json @@ -0,0 +1,11 @@ +HTTP/1.1 200 OK +Server: nginx +Date: Fri, 09 Jun 2017 14:30:19 GMT +Content-Type: application/json; charset=utf-8 +Content-Length: 1701 +Connection: keep-alive +Access-Control-Allow-Origin: * +Cache-Control: no-cache +Strict-Transport-Security: max-age=31556926; includeSubDomains; preload + +{"by":"aaronpk","id":14516923,"kids":[14517124,14517655,14516983,14518902,14518663],"parent":14516538,"text":"I am a member of the W3C Social Web Working Group (https://www.w3.org/wiki/Socialwg), and have been organizing IndieWebCamp (https://indieweb.org/) conferences in this space for the last 7 years. We've been making a lot of progress:

* https://www.w3.org/TR/webmention/ - cross-site commenting

* https://www.w3.org/TR/micropub/ - API for apps to create posts on various servers

* https://www.w3.org/TR/websub/ - realtime subscriptions to feeds

* More: https://indieweb.org/specs

We focus on making sure there are a plurality of implementations and approaches rather than trying to build a single software solution to solve everything.

Try commenting on my copy of this post on my website by sending me a webmention! https://aaronparecki.com/2017/06/08/9/indieweb","time":1496953400,"type":"comment"} \ No newline at end of file From 0ccf34b6567c5cfc9f06f306ca3697bca6489327 Mon Sep 17 00:00:00 2001 From: Aaron Parecki Date: Fri, 9 Jun 2017 07:45:59 -0700 Subject: [PATCH 3/5] use consistent pattern for fetching from silos --- lib/XRay/Fetcher.php | 14 +------------- lib/XRay/Formats/Twitter.php | 11 +++++++++-- 2 files changed, 10 insertions(+), 15 deletions(-) diff --git a/lib/XRay/Fetcher.php b/lib/XRay/Fetcher.php index 608baea..3e37789 100644 --- a/lib/XRay/Fetcher.php +++ b/lib/XRay/Fetcher.php @@ -145,19 +145,7 @@ class Fetcher { ]; } - $tweet = Formats\Twitter::fetch($url, $creds); - if(!$tweet) { - return [ - 'error' => 'twitter_error', - 'error_description' => $e->getMessage() - ]; - } - - return [ - 'url' => $url, - 'body' => $tweet, - 'code' => 200, - ]; + return Formats\Twitter::fetch($url, $creds); } private function _fetch_github($url, $opts) { diff --git a/lib/XRay/Formats/Twitter.php b/lib/XRay/Formats/Twitter.php index 7462dd5..db2fd9c 100644 --- a/lib/XRay/Formats/Twitter.php +++ b/lib/XRay/Formats/Twitter.php @@ -33,10 +33,17 @@ class Twitter extends Format { try { $tweet = $twitter->request('statuses/show/'.$tweet_id, 'GET', ['tweet_mode'=>'extended']); } catch(\TwitterException $e) { - return false; + return [ + 'error' => 'twitter_error', + 'error_description' => $e->getMessage() + ]; } - return $tweet; + return [ + 'url' => $url, + 'body' => $tweet, + 'code' => 200, + ]; } public static function parse($json, $url) { From d50231142a4a1f9315359c7c9ed1c6224a5ffa5f Mon Sep 17 00:00:00 2001 From: Aaron Parecki Date: Fri, 9 Jun 2017 08:53:53 -0700 Subject: [PATCH 4/5] adds support for parsing checkins checkin data is returned embedded like author data rather than in the `refs` object closes #35 --- lib/XRay/Formats/Mf2.php | 36 ++++++++++++++++++++--- tests/ParseTest.php | 36 +++++++++++++++++++++++ tests/data/source.example.com/checkin | 28 ++++++++++++++++++ tests/data/source.example.com/checkin-url | 24 +++++++++++++++ 4 files changed, 120 insertions(+), 4 deletions(-) create mode 100644 tests/data/source.example.com/checkin create mode 100644 tests/data/source.example.com/checkin-url diff --git a/lib/XRay/Formats/Mf2.php b/lib/XRay/Formats/Mf2.php index 4df9349..b749c1c 100644 --- a/lib/XRay/Formats/Mf2.php +++ b/lib/XRay/Formats/Mf2.php @@ -235,6 +235,31 @@ class Mf2 extends Format { } } + private static function parseEmbeddedHCard($property, $item, &$http) { + if(array_key_exists($property, $item['properties'])) { + $mf2 = $item['properties'][$property][0]; + if(is_string($mf2) && self::isURL($mf2)) { + $hcard = [ + 'type' => 'card', + 'url' => $mf2 + ]; + return $hcard; + } if(self::isMicroformat($mf2) && in_array('h-card', $mf2['type'])) { + $hcard = [ + 'type' => 'card', + ]; + $properties = ['name','latitude','longitude','locality','region','country','url']; + foreach($properties as $p) { + if($v=self::getPlaintext($mf2, $p)) { + $hcard[$p] = $v; + } + } + return $hcard; + } + } + return false; + } + private static function collectArrayURLValues($properties, $item, &$data, &$refs, &$http) { foreach($properties as $p) { if(array_key_exists($p, $item['properties'])) { @@ -303,7 +328,7 @@ class Mf2 extends Format { $refs = []; // Single plaintext and URL values - self::collectSingleValues(['published','summary','rsvp','swarm-coins'], ['url'], $item, $data); + self::collectSingleValues(['published','summary','rsvp','swarm-coins'], ['url'], $item, $data, $http); // These properties are always returned as arrays and may contain plaintext content // First strip leading hashtags from category values if present @@ -324,6 +349,9 @@ class Mf2 extends Format { if($author = self::findAuthor($mf2, $item, $http)) $data['author'] = $author; + if($checkin = self::parseEmbeddedHCard('checkin', $item, $http)) + $data['checkin'] = $checkin; + $response = [ 'data' => $data ]; @@ -341,7 +369,7 @@ class Mf2 extends Format { ]; $refs = []; - self::collectSingleValues(['summary','published','rating','best','worst'], ['url'], $item, $data); + self::collectSingleValues(['summary','published','rating','best','worst'], ['url'], $item, $data, $http); // Fallback for Mf1 "description" as content. The PHP parser does not properly map this to "content" $description = self::parseHTMLValue('description', $item); @@ -405,7 +433,7 @@ class Mf2 extends Format { 'type' => 'product' ]; - self::collectSingleValues(['name','identifier','price'], ['url'], $item, $data); + self::collectSingleValues(['name','identifier','price'], ['url'], $item, $data, $http); $description = self::parseHTMLValue('description', $item); if($description) { @@ -454,7 +482,7 @@ class Mf2 extends Format { $refs = []; // Single plaintext and URL values - self::collectSingleValues(['name','summary','published','start','end','duration'], ['url'], $item, $data); + self::collectSingleValues(['name','summary','published','start','end','duration'], ['url'], $item, $data, $http); // These properties are always returned as arrays and may contain plaintext content self::collectArrayValues(['category','location','attendee'], $item, $data, $refs, $http); diff --git a/tests/ParseTest.php b/tests/ParseTest.php index 8853c6f..6d5b0a4 100644 --- a/tests/ParseTest.php +++ b/tests/ParseTest.php @@ -499,6 +499,42 @@ class ParseTest extends PHPUnit_Framework_TestCase { $this->assertFalse($data['info']['found_fragment']); } + public function testCheckin() { + $url = 'http://source.example.com/checkin'; + $response = $this->parse(['url' => $url]); + + $body = $response->getContent(); + $this->assertEquals(200, $response->getStatusCode()); + $data = json_decode($body, true); + + $this->assertEquals('entry', $data['data']['type']); + $venue = $data['data']['checkin']; + $this->assertEquals('https://foursquare.com/v/57104d2e498ece022e169dca', $venue['url']); + $this->assertEquals('DreamHost', $venue['name']); + $this->assertEquals('45.518716', $venue['latitude']); + $this->assertEquals('Homebrew Website Club!', $data['data']['content']['text']); + $this->assertEquals('https://aaronparecki.com/2017/06/07/12/photo.jpg', $data['data']['photo'][0]); + $this->assertEquals('2017-06-07T17:14:40-07:00', $data['data']['published']); + $this->assertArrayNotHasKey('name', $data['data']); + } + + public function testCheckinURLOnly() { + $url = 'http://source.example.com/checkin-url'; + $response = $this->parse(['url' => $url]); + + $body = $response->getContent(); + $this->assertEquals(200, $response->getStatusCode()); + $data = json_decode($body, true); + + $this->assertEquals('entry', $data['data']['type']); + $venue = $data['data']['checkin']; + $this->assertEquals('https://foursquare.com/v/57104d2e498ece022e169dca', $venue['url']); + $this->assertEquals('Homebrew Website Club!', $data['data']['content']['text']); + $this->assertEquals('https://aaronparecki.com/2017/06/07/12/photo.jpg', $data['data']['photo'][0]); + $this->assertEquals('2017-06-07T17:14:40-07:00', $data['data']['published']); + $this->assertArrayNotHasKey('name', $data['data']); + } + public function testXKCD() { $url = 'http://xkcd.com/1810/'; $response = $this->parse(['url' => $url]); diff --git a/tests/data/source.example.com/checkin b/tests/data/source.example.com/checkin new file mode 100644 index 0000000..53150d9 --- /dev/null +++ b/tests/data/source.example.com/checkin @@ -0,0 +1,28 @@ +HTTP/1.1 200 OK +Server: Apache +Date: Wed, 09 Dec 2015 03:29:14 GMT +Content-Type: text/html; charset=utf-8 +Connection: keep-alive + + + + Test + + +

+ at DreamHost +
+ 45.518716 + -122.679614 +
+
+

Homebrew Website Club!

+ + + + + + + diff --git a/tests/data/source.example.com/checkin-url b/tests/data/source.example.com/checkin-url new file mode 100644 index 0000000..9621917 --- /dev/null +++ b/tests/data/source.example.com/checkin-url @@ -0,0 +1,24 @@ +HTTP/1.1 200 OK +Server: Apache +Date: Wed, 09 Dec 2015 03:29:14 GMT +Content-Type: text/html; charset=utf-8 +Connection: keep-alive + + + + Test + + + + at DreamHost + +

Homebrew Website Club!

+ + + + + + + From a16b845af3fda4b731c579f156f693ae2f735dc6 Mon Sep 17 00:00:00 2001 From: Aaron Parecki Date: Fri, 9 Jun 2017 09:01:52 -0700 Subject: [PATCH 5/5] adds in-reply-to property for HN comments --- lib/XRay/Formats/Hackernews.php | 4 ++++ tests/HackernewsTest.php | 1 + 2 files changed, 5 insertions(+) diff --git a/lib/XRay/Formats/Hackernews.php b/lib/XRay/Formats/Hackernews.php index a359aff..ac2aa71 100644 --- a/lib/XRay/Formats/Hackernews.php +++ b/lib/XRay/Formats/Hackernews.php @@ -75,6 +75,10 @@ class Hackernews extends Format { ]; } + if(isset($data['parent'])) { + $entry['in-reply-to'] = ['https://news.ycombinator.com/item?id='.$data['parent']]; + } + return [ 'data' => $entry, 'original' => $json diff --git a/tests/HackernewsTest.php b/tests/HackernewsTest.php index e414f92..2fbceb5 100644 --- a/tests/HackernewsTest.php +++ b/tests/HackernewsTest.php @@ -53,6 +53,7 @@ What projects or companies are you working on to combat filter bubbles, walled g $this->assertEquals('2017-06-08T20:23:20+00:00', $data['data']['published']); $this->assertEquals('aaronpk', $data['data']['author']['name']); $this->assertEquals('https://news.ycombinator.com/user?id=aaronpk', $data['data']['author']['url']); + $this->assertEquals('https://news.ycombinator.com/item?id=14516538', $data['data']['in-reply-to'][0]); $this->assertArrayNotHasKey('name', $data['data']); $this->assertEquals('I am a member of the W3C Social Web Working Group (https://www.w3.org/wiki/Socialwg), and have been organizing IndieWebCamp (https://indieweb.org/) conferences in this space for the last 7 years. We\'ve been making a lot of progress:

* https://www.w3.org/TR/webmention/ - cross-site commenting

* https://www.w3.org/TR/micropub/ - API for apps to create posts on various servers

* https://www.w3.org/TR/websub/ - realtime subscriptions to feeds

* More: https://indieweb.org/specs

We focus on making sure there are a plurality of implementations and approaches rather than trying to build a single software solution to solve everything.

Try commenting on my copy of this post on my website by sending me a webmention! https://aaronparecki.com/2017/06/08/9/indieweb

', $data['data']['content']['html']); $this->assertEquals('I am a member of the W3C Social Web Working Group (https://www.w3.org/wiki/Socialwg), and have been organizing IndieWebCamp (https://indieweb.org/) conferences in this space for the last 7 years. We\'ve been making a lot of progress: