From a9b1001e6232d566fd0687ab773212559062f7a1 Mon Sep 17 00:00:00 2001 From: Aaron Parecki Date: Wed, 29 Nov 2017 16:46:26 -0800 Subject: [PATCH] switch to fork of picofeed with authorUrl support * adds test of instagram-atom feed with individual authors per item * dedupes atom/rss title if it's a prefix of the content --- composer.json | 11 +- composer.lock | 19 +- lib/XRay/Formats/XML.php | 26 +- tests/FeedTest.php | 19 + tests/data/feed.example.com/instagram-atom | 1067 ++++++++++++++++++++ 5 files changed, 1127 insertions(+), 15 deletions(-) create mode 100644 tests/data/feed.example.com/instagram-atom diff --git a/composer.json b/composer.json index 1b981ac..690d07b 100644 --- a/composer.json +++ b/composer.json @@ -12,7 +12,7 @@ "p3k/timezone": "*", "p3k/http": "0.1.*", "cebe/markdown": "1.1.*", - "miniflux/picofeed": "^0.1.37", + "miniflux/picofeed": "dev-master", "facebook/graph-sdk": "^5.5" }, "autoload": { @@ -38,5 +38,12 @@ "controllers/Feeds.php", "controllers/Certbot.php" ] - } + }, + "repositories": [ + { + "type": "vcs", + "url": "https://github.com/aaronpk/picoFeed.git", + "no-api": true + } + ] } diff --git a/composer.lock b/composer.lock index 19d658b..0fdcb80 100644 --- a/composer.lock +++ b/composer.lock @@ -4,7 +4,7 @@ "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#composer-lock-the-lock-file", "This file is @generated automatically" ], - "content-hash": "a1330f39bf5204a5cc6bdd1222915639", + "content-hash": "c61ccebf0105e57a0a60f431a95fcaed", "packages": [ { "name": "cebe/markdown", @@ -316,16 +316,16 @@ }, { "name": "miniflux/picofeed", - "version": "v0.1.37", + "version": "dev-master", "source": { "type": "git", - "url": "https://github.com/miniflux/picoFeed.git", - "reference": "402b7f07629577e7929625e78bc88d3d5831a22d" + "url": "https://github.com/aaronpk/picoFeed.git", + "reference": "989c0bcf2eac016a4104abce1aadff791fc287ab" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/miniflux/picoFeed/zipball/402b7f07629577e7929625e78bc88d3d5831a22d", - "reference": "402b7f07629577e7929625e78bc88d3d5831a22d", + "url": "https://api.github.com/repos/aaronpk/picoFeed/zipball/989c0bcf2eac016a4104abce1aadff791fc287ab", + "reference": "989c0bcf2eac016a4104abce1aadff791fc287ab", "shasum": "" }, "require": { @@ -354,7 +354,6 @@ "PicoFeed": "lib/" } }, - "notification-url": "https://packagist.org/downloads/", "license": [ "MIT" ], @@ -365,7 +364,7 @@ ], "description": "Modern library to handle RSS/Atom feeds", "homepage": "https://github.com/miniflux/picoFeed", - "time": "2017-11-02T03:20:36+00:00" + "time": "2017-11-30T00:16:58+00:00" }, { "name": "p3k/http", @@ -2090,7 +2089,9 @@ ], "aliases": [], "minimum-stability": "stable", - "stability-flags": [], + "stability-flags": { + "miniflux/picofeed": 20 + }, "prefer-stable": false, "prefer-lowest": false, "platform": [], diff --git a/lib/XRay/Formats/XML.php b/lib/XRay/Formats/XML.php index a7fdbf0..91bca70 100644 --- a/lib/XRay/Formats/XML.php +++ b/lib/XRay/Formats/XML.php @@ -60,20 +60,38 @@ class XML extends Format { if($item->getPublishedDate()) $entry['published'] = $item->getPublishedDate()->format('c'); - if($item->getTitle() && $item->getTitle() != $item->getUrl()) - $entry['name'] = $item->getTitle(); - if($item->getContent()) $entry['content'] = [ 'html' => self::sanitizeHTML($item->getContent()), 'text' => self::stripHTML($item->getContent()) ]; + if($item->getTitle() && $item->getTitle() != $item->getUrl()) { + $title = $item->getTitle(); + $entry['name'] = $title; + + // Check if the title is a prefix of the content and drop if so + if(isset($entry['content'])) { + if(substr($title, -3) == '...' || substr($title, -1) == '…') { + if(substr($title, -3) == '...') { + $trimmedTitle = substr($title, 0, -3); + } else { + $trimmedTitle = substr($title, 0, -1); + } + if(substr($entry['content']['text'], 0, strlen($trimmedTitle)) == $trimmedTitle) { + unset($entry['name']); + } + } + } + } + if($item->getAuthor()) { $entry['author']['name'] = $item->getAuthor(); } - if($feed->siteUrl) { + if($item->getAuthorUrl()) { + $entry['author']['url'] = $item->getAuthorUrl(); + } else if($feed->siteUrl) { $entry['author']['url'] = $feed->siteUrl; } diff --git a/tests/FeedTest.php b/tests/FeedTest.php index 0fa66c8..07d44fa 100644 --- a/tests/FeedTest.php +++ b/tests/FeedTest.php @@ -295,4 +295,23 @@ class FeedTest extends PHPUnit_Framework_TestCase { $this->assertEquals('feed', $data->type); } + public function testInstagramAtomFeed() { + $url = 'http://feed.example.com/instagram-atom'; + $response = $this->parse(['url' => $url, 'expect' => 'feed']); + + $body = $response->getContent(); + $this->assertEquals(200, $response->getStatusCode()); + $data = json_decode($body)->data; + + $this->assertEquals(12, count($data->items)); + + $this->assertEquals('Marshall Kirkpatrick', $data->items[11]->author->name); + $this->assertEquals('https://www.instagram.com/marshallk/', $data->items[11]->author->url); + $this->assertEquals('https://www.instagram.com/p/BcFjw9SHYql/', $data->items[11]->url); + $this->assertEquals('2017-11-29T17:04:00+00:00', $data->items[11]->published); + // Should remove the "name" since it's a prefix of the content + $this->assertObjectNotHasAttribute('name', $data->items[11]); + $this->assertEquals('Sometimes my job requires me to listen to 55 minutes of an hour long phone call while I go for a long walk on a sunny morning and wait for my turn to give an update. Pretty nice!', $data->items[11]->content->text); + } + } \ No newline at end of file diff --git a/tests/data/feed.example.com/instagram-atom b/tests/data/feed.example.com/instagram-atom new file mode 100644 index 0000000..369787c --- /dev/null +++ b/tests/data/feed.example.com/instagram-atom @@ -0,0 +1,1067 @@ +HTTP/1.1 200 OK +Server: Apache +Date: Wed, 09 Dec 2015 03:29:14 GMT +Content-Type: application/atom+xml; charset=utf-8 +Connection: keep-alive + + + +granary +https://instagram-atom.appspot.com/ +instagram-atom feed for Aaron Parecki + +📡 w7apk.com 🔒 oauth.net 🎥 backpedal.tv 🎙 streampdx.com + +https://scontent-dft4-2.cdninstagram.com/t51.2885-19/s150x150/14240576_268350536897085_1129715662_a.jpg +2017-11-29T21:50:03 + + http://activitystrea.ms/schema/1.0/person + https://aaronparecki.com/ + Aaron Parecki + + + + + + + + + + + + + + http://activitystrea.ms/schema/1.0/person + https://www.instagram.com/sweetmadicakes/ + sweetmadicakes + + + + + http://activitystrea.ms/schema/1.0/photo + + + + https://www.instagram.com/p/BcGEgBjBuhm/ + Every single one of her worksheets is like this... 😂😂😂 she couldn't be more... + + +
+ + + + + + +Every single one of her worksheets is like this... 😂😂😂 she couldn't be more like me if she tried. +

+ + + +

+ +
+
+ + + + + + + + + + + + + + + + + + + + + + http://activitystrea.ms/schema/1.0/post + + 2017-11-29T21:50:03 + 2017-11-29T21:50:03 + + + + + + + + + + +
+ + + + + + http://activitystrea.ms/schema/1.0/person + https://www.instagram.com/jessman5/ + jessman5 + + + + + http://activitystrea.ms/schema/1.0/photo + + + + https://www.instagram.com/p/BcGGtYNHvRu/ + Did some late evening whiteboard lettering stuff again today. Still learning but improving :)... + + +
+ + + + + + +Did some late evening whiteboard lettering stuff again today. Still learning but improving :)
+.
+.
+#lettering #type #typography #chill #itsonlychaos #chaos #art #artwork #denkwerk #denkwerk_cgn #jj_forum #allshots_ +

+ + + +

+

+ + denkwerk + + + +

+ +
+
+ + + + + + + + + + + + + + + + + + + + + + http://activitystrea.ms/schema/1.0/post + + 2017-11-29T22:09:21 + 2017-11-29T22:09:21 + + + + + denkwerk + + + + + + + +
+ + + + + + http://activitystrea.ms/schema/1.0/person + https://www.instagram.com/theta444/ + Esther + + + + + http://activitystrea.ms/schema/1.0/photo + + + + https://www.instagram.com/p/BcEY0hgnZtK/ + Latergram: Gabi's first Hot Pot 😊 + + +
+ + + + + + +Latergram: Gabi's first Hot Pot 😊 +

+ + + +

+ +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + http://activitystrea.ms/schema/1.0/post + + 2017-11-29T06:09:08 + 2017-11-29T06:09:08 + + + + + + + + + + +
+ + + + + + http://activitystrea.ms/schema/1.0/person + https://www.instagram.com/silverthegreat/ + Silver Fox + + + + + http://activitystrea.ms/schema/1.0/photo + + + + https://www.instagram.com/p/BcDzePPDWHF/ + Sawed off ball joint is a future me problem. The rain let up just... + + +
+ + + + + + +Sawed off ball joint is a future me problem. The rain let up just long enough to get my transmission off. Soon I'll be bringing my new engine over!
+#hondacivic #engineswap #fuckingballjoint #girlmechanic +

+ + + +

+

+ + + +

+

+ + + +

+ +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + http://activitystrea.ms/schema/1.0/post + + 2017-11-29T00:42:47 + 2017-11-29T00:42:47 + + + + + + + + + + + + + + +
+ + + + + + http://activitystrea.ms/schema/1.0/person + https://www.instagram.com/lily.a.cool/ + Lily Cool + + + + + http://activitystrea.ms/schema/1.0/photo + + + + https://www.instagram.com/p/Bb3HILgBqGzDfibZ5UIPRDymOa578GbyvKGGsU0/ + Demonstration deforestation? + + +
+ + + + + + +Demonstration deforestation? +

+ + + +

+

+ + Magness Memorial Tree Farm + + + +

+ +
+
+ + + + + + + + + + http://activitystrea.ms/schema/1.0/post + + 2017-11-24T02:24:24 + 2017-11-24T02:24:24 + + + + + Magness Memorial Tree Farm + + + + + + + +
+ + + + + + http://activitystrea.ms/schema/1.0/person + https://www.instagram.com/stephrouth/ + Steph Routh + + + + + http://activitystrea.ms/schema/1.0/photo + + + + https://www.instagram.com/p/Bb9_TFIDou8/ + Our morning scavenger hunt list to the park. #gooutside #aunttime + + +
+ + + + + + +Our morning scavenger hunt list to the park. #gooutside #aunttime +

+ + + +

+

+ + Lents, Portland + + + +

+ +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + http://activitystrea.ms/schema/1.0/post + + 2017-11-26T18:30:40 + 2017-11-26T18:30:40 + + + + + Lents, Portland + + + + + + + +
+ + + + + + http://activitystrea.ms/schema/1.0/person + https://www.instagram.com/jennidy/ + jennidy + + + + + http://activitystrea.ms/schema/1.0/photo + + + + https://www.instagram.com/p/Bb5yqFhlq0G/ + This Good Good Boy was waiting outside of @baristapdx today. I caught this pic... + + +
+ + + + + + +This Good Good Boy was waiting outside of @baristapdx today. I caught this pic of him after getting a cuppa and then checking out the @littleboxespdx sale at @shopredsail
+#dogfriends #dogsofinstgram +

+ + + +

+

+ + Barista + + + +

+ +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + http://activitystrea.ms/schema/1.0/post + + 2017-11-25T03:23:15 + 2017-11-25T03:23:15 + + + + + Barista + + + + + + + +
+ + + + + + http://activitystrea.ms/schema/1.0/person + https://www.instagram.com/amandajudkins/ + Amanda Judkins + + + + + http://activitystrea.ms/schema/1.0/photo + + + + https://www.instagram.com/p/Bb-gO85lT8P/ + Maritime adventures #vancouverbc #sundayfunday + + +
+ + + + + + +Maritime adventures #vancouverbc #sundayfunday +

+ + + +

+

+ + Vancouver Maritime Museum (official) + + + +

+ +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + http://activitystrea.ms/schema/1.0/post + + 2017-11-26T23:18:28 + 2017-11-26T23:18:28 + + + + + Vancouver Maritime Museum (official) + + + + + + + +
+ + + + + + http://activitystrea.ms/schema/1.0/person + https://www.instagram.com/mikeestee/ + Mike Estee + + + + + http://activitystrea.ms/schema/1.0/photo + + + + https://www.instagram.com/p/BcDy0gzgZR1/ + new machine! :0 #bantamtools + + +
+ + + + + + +new machine! :0 #bantamtools +

+ + + +

+ +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + http://activitystrea.ms/schema/1.0/post + + 2017-11-29T00:37:05 + 2017-11-29T00:37:05 + + + + + + + + + + +
+ + + + + + http://activitystrea.ms/schema/1.0/person + https://www.instagram.com/krees/ + Kim Rees + + + + + http://activitystrea.ms/schema/1.0/photo + + + + https://www.instagram.com/p/BcAPViLBCGh/ + What does the 🦊 say? This one looked like he was begging for a... + + +
+ + + + + + +What does the 🦊 say? This one looked like he was begging for a cup of coffee +

+ + + +

+

+ + Great Falls, Virginia + + + +

+ +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + http://activitystrea.ms/schema/1.0/post + + 2017-11-27T15:29:18 + 2017-11-27T15:29:18 + + + + + Great Falls, Virginia + + + + + + + +
+ + + + + + http://activitystrea.ms/schema/1.0/person + https://www.instagram.com/reidab/ + Reid Beels + + + + + http://activitystrea.ms/schema/1.0/photo + + + + https://www.instagram.com/p/Bb8fW4agNWm/ + The many fungi of Tryon Creek 🌲🍄🌲 + + +
+ + + + + + +The many fungi of Tryon Creek 🌲🍄🌲 +

+ + + +

+

+ + + +

+

+ + + +

+

+ + + +

+

+ + + +

+

+ + + +

+

+ + + +

+

+ + + +

+

+ + Tryon Creek State Natural Area + + + +

+ +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + http://activitystrea.ms/schema/1.0/post + + 2017-11-26T04:32:19 + 2017-11-26T04:32:19 + + + + + Tryon Creek State Natural Area + + + + + + + + + + + + + + + + + + + + + +
+ + + + + + http://activitystrea.ms/schema/1.0/person + https://www.instagram.com/marshallk/ + Marshall Kirkpatrick + + + + + http://activitystrea.ms/schema/1.0/photo + + + + https://www.instagram.com/p/BcFjw9SHYql/ + Sometimes my job requires me to listen to 55 minutes of an hour long... + + +
+ + + + + + +Sometimes my job requires me to listen to 55 minutes of an hour long phone call while I go for a long walk on a sunny morning and wait for my turn to give an update. Pretty nice! +

+ + + +

+ +
+
+ + + + + + + + + + + + + + + + + + + + http://activitystrea.ms/schema/1.0/post + + 2017-11-29T17:04:00 + 2017-11-29T17:04:00 + + + + + + + + + + +
+ +
\ No newline at end of file