| @ -0,0 +1,58 @@ | |||||
| <?php | |||||
| use Symfony\Component\HttpFoundation\Request; | |||||
| use Symfony\Component\HttpFoundation\Response; | |||||
| class Feeds { | |||||
| public $http; | |||||
| private $_pretty = false; | |||||
| public function __construct() { | |||||
| $this->http = new p3k\HTTP(); | |||||
| } | |||||
| private function respond(Response $response, $code, $params, $headers=[]) { | |||||
| $response->setStatusCode($code); | |||||
| foreach($headers as $k=>$v) { | |||||
| $response->headers->set($k, $v); | |||||
| } | |||||
| $response->headers->set('Content-Type', 'application/json'); | |||||
| $opts = JSON_UNESCAPED_SLASHES; | |||||
| if($this->_pretty) $opts += JSON_PRETTY_PRINT; | |||||
| $response->setContent(json_encode($params, $opts)."\n"); | |||||
| return $response; | |||||
| } | |||||
| public function find(Request $request, Response $response) { | |||||
| $opts = []; | |||||
| if($request->get('timeout')) { | |||||
| // We might make 2 HTTP requests, so each request gets half the desired timeout | |||||
| $opts['timeout'] = $request->get('timeout') / 2; | |||||
| } | |||||
| if($request->get('max_redirects')) { | |||||
| $opts['max_redirects'] = (int)$request->get('max_redirects'); | |||||
| } | |||||
| if($request->get('pretty')) { | |||||
| $this->_pretty = true; | |||||
| } | |||||
| $url = $request->get('url'); | |||||
| if(!$url) { | |||||
| return $this->respond($response, 400, [ | |||||
| 'error' => 'missing_url', | |||||
| 'error_description' => 'Provide a URL to fetch' | |||||
| ]); | |||||
| } | |||||
| $xray = new p3k\XRay(); | |||||
| $xray->http = $this->http; | |||||
| $res = $xray->feeds($url, $opts); | |||||
| return $this->respond($response, !empty($res['error']) ? 400 : 200, $res); | |||||
| } | |||||
| } | |||||
| @ -0,0 +1,113 @@ | |||||
| <?php | |||||
| namespace p3k\XRay; | |||||
| use p3k\XRay\Formats; | |||||
| class Feeds { | |||||
| private $http; | |||||
| public function __construct($http) { | |||||
| $this->http = $http; | |||||
| } | |||||
| public function find($url, $opts=[]) { | |||||
| if(isset($opts['timeout'])) | |||||
| $this->http->set_timeout($opts['timeout']); | |||||
| if(isset($opts['max_redirects'])) | |||||
| $this->http->set_max_redirects($opts['max_redirects']); | |||||
| $scheme = parse_url($url, PHP_URL_SCHEME); | |||||
| if(!in_array($scheme, ['http','https'])) { | |||||
| return [ | |||||
| 'error' => 'invalid_url', | |||||
| 'error_description' => 'Only http and https URLs are supported' | |||||
| ]; | |||||
| } | |||||
| $host = parse_url($url, PHP_URL_HOST); | |||||
| if(!$host) { | |||||
| return [ | |||||
| 'error' => 'invalid_url', | |||||
| 'error_description' => 'The URL provided was not valid' | |||||
| ]; | |||||
| } | |||||
| $url = normalize_url($url); | |||||
| $result = $this->http->get($url); | |||||
| $body = $result['body']; | |||||
| $feeds = []; | |||||
| // First check the content type of the response | |||||
| $contentType = isset($result['headers']['Content-Type']) ? $result['headers']['Content-Type'] : ''; | |||||
| if(is_array($contentType)) | |||||
| $contentType = $contentType[count($contentType)-1]; | |||||
| if(strpos($contentType, 'application/atom+xml') !== false) { | |||||
| $feeds[] = [ | |||||
| 'url' => $result['url'], | |||||
| 'type' => 'atom' | |||||
| ]; | |||||
| } elseif(strpos($contentType, 'application/rss+xml') !== false) { | |||||
| $feeds[] = [ | |||||
| 'url' => $result['url'], | |||||
| 'type' => 'rss' | |||||
| ]; | |||||
| } elseif(strpos($contentType, 'application/json') !== false | |||||
| && substr($body, 0, 1) == '{' && strpos(substr($body, 0, 100), 'https://jsonfeed.org/version/1')) { | |||||
| $feeds[] = [ | |||||
| 'url' => $result['url'], | |||||
| 'type' => 'jsonfeed' | |||||
| ]; | |||||
| } else { | |||||
| // Some other document was returned, parse the HTML and look for rel alternates and Microformats | |||||
| $mf2 = \mf2\Parse($body, $result['url']); | |||||
| if(isset($mf2['alternates'])) { | |||||
| foreach($mf2['alternates'] as $alt) { | |||||
| if(strpos($alt['type'], 'application/json') !== false) { | |||||
| $feeds[] = [ | |||||
| 'url' => $alt['url'], | |||||
| 'type' => 'jsonfeed' | |||||
| ]; | |||||
| } | |||||
| if(strpos($alt['type'], 'application/atom+xml') !== false) { | |||||
| $feeds[] = [ | |||||
| 'url' => $alt['url'], | |||||
| 'type' => 'atom' | |||||
| ]; | |||||
| } | |||||
| if(strpos($alt['type'], 'application/rss+xml') !== false) { | |||||
| $feeds[] = [ | |||||
| 'url' => $alt['url'], | |||||
| 'type' => 'rss' | |||||
| ]; | |||||
| } | |||||
| } | |||||
| } | |||||
| $parsed = Formats\HTML::parse($this->http, $body, $result['url'], array_merge($opts, ['expect'=>'feed'])); | |||||
| if($parsed && isset($parsed['data']['type']) && $parsed['data']['type'] == 'feed') { | |||||
| $feeds[] = [ | |||||
| 'url' => $result['url'], | |||||
| 'type' => 'microformats' | |||||
| ]; | |||||
| } | |||||
| } | |||||
| // Sort feeds by priority | |||||
| $rank = ['microformats'=>0,'jsonfeed'=>1,'atom'=>2,'rss'=>3]; | |||||
| usort($feeds, function($a, $b) use($rank) { | |||||
| return $rank[$a['type']] > $rank[$b['type']]; | |||||
| }); | |||||
| return [ | |||||
| 'url' => $result['url'], | |||||
| 'code' => $result['code'], | |||||
| 'feeds' => $feeds | |||||
| ]; | |||||
| } | |||||
| } | |||||
| @ -0,0 +1,156 @@ | |||||
| <?php | |||||
| use Symfony\Component\HttpFoundation\Request; | |||||
| use Symfony\Component\HttpFoundation\Response; | |||||
| class FindFeedsTest extends PHPUnit_Framework_TestCase { | |||||
| private $http; | |||||
| public function setUp() { | |||||
| $this->client = new Feeds(); | |||||
| $this->client->http = new p3k\HTTP\Test(dirname(__FILE__).'/data/'); | |||||
| $this->client->mc = null; | |||||
| } | |||||
| private function parse($params) { | |||||
| $request = new Request($params); | |||||
| $response = new Response(); | |||||
| return $this->client->find($request, $response); | |||||
| } | |||||
| // h-feed with no alternates | |||||
| public function testMf2HFeed() { | |||||
| $url = 'http://feed.example.com/h-feed-with-child-author'; | |||||
| $response = $this->parse(['url' => $url]); | |||||
| $body = $response->getContent(); | |||||
| $this->assertEquals(200, $response->getStatusCode()); | |||||
| $feeds = json_decode($body)->feeds; | |||||
| $this->assertEquals(1, count($feeds)); | |||||
| $this->assertEquals('http://feed.example.com/h-feed-with-child-author', $feeds[0]->url); | |||||
| $this->assertEquals('microformats', $feeds[0]->type); | |||||
| } | |||||
| // h-feed that links to Atom alternate | |||||
| public function testMf2WithAtomAlternate() { | |||||
| $url = 'http://feed.example.com/h-feed-with-atom-alternate'; | |||||
| $response = $this->parse(['url' => $url]); | |||||
| $body = $response->getContent(); | |||||
| $this->assertEquals(200, $response->getStatusCode()); | |||||
| $feeds = json_decode($body)->feeds; | |||||
| $this->assertEquals(2, count($feeds)); | |||||
| // Should rank JSONFeed above Atom | |||||
| $this->assertEquals('http://feed.example.com/h-feed-with-atom-alternate', $feeds[0]->url); | |||||
| $this->assertEquals('microformats', $feeds[0]->type); | |||||
| $this->assertEquals('http://feed.example.com/atom', $feeds[1]->url); | |||||
| $this->assertEquals('atom', $feeds[1]->type); | |||||
| } | |||||
| // h-feed that links to RSS alternate | |||||
| public function testMf2WithRSSAlternate() { | |||||
| $url = 'http://feed.example.com/h-feed-with-rss-alternate'; | |||||
| $response = $this->parse(['url' => $url]); | |||||
| $body = $response->getContent(); | |||||
| $this->assertEquals(200, $response->getStatusCode()); | |||||
| $feeds = json_decode($body)->feeds; | |||||
| $this->assertEquals(2, count($feeds)); | |||||
| // Should rank JSONFeed above Atom | |||||
| $this->assertEquals('http://feed.example.com/h-feed-with-rss-alternate', $feeds[0]->url); | |||||
| $this->assertEquals('microformats', $feeds[0]->type); | |||||
| $this->assertEquals('http://feed.example.com/podcast.xml', $feeds[1]->url); | |||||
| $this->assertEquals('rss', $feeds[1]->type); | |||||
| } | |||||
| // No mf2 but links to Atom alternate | |||||
| public function testNoMf2() { | |||||
| $url = 'http://feed.example.com/html-with-atom-alternate'; | |||||
| $response = $this->parse(['url' => $url]); | |||||
| $body = $response->getContent(); | |||||
| $this->assertEquals(200, $response->getStatusCode()); | |||||
| $feeds = json_decode($body)->feeds; | |||||
| $this->assertEquals(1, count($feeds)); | |||||
| $this->assertEquals('http://feed.example.com/atom', $feeds[0]->url); | |||||
| $this->assertEquals('atom', $feeds[0]->type); | |||||
| } | |||||
| public function testNoMf2WithJSONAndAtom() { | |||||
| $url = 'http://feed.example.com/html-with-json-and-atom'; | |||||
| $response = $this->parse(['url' => $url]); | |||||
| $body = $response->getContent(); | |||||
| $this->assertEquals(200, $response->getStatusCode()); | |||||
| $feeds = json_decode($body)->feeds; | |||||
| $this->assertEquals(2, count($feeds)); | |||||
| // Should rank JSONFeed above Atom | |||||
| $this->assertEquals('http://feed.example.com/jsonfeed', $feeds[0]->url); | |||||
| $this->assertEquals('jsonfeed', $feeds[0]->type); | |||||
| $this->assertEquals('http://feed.example.com/atom', $feeds[1]->url); | |||||
| $this->assertEquals('atom', $feeds[1]->type); | |||||
| } | |||||
| // input URL is an Atom feed | |||||
| public function testInputIsAtom() { | |||||
| $url = 'http://feed.example.com/atom'; | |||||
| $response = $this->parse(['url' => $url]); | |||||
| $body = $response->getContent(); | |||||
| $this->assertEquals(200, $response->getStatusCode()); | |||||
| $feeds = json_decode($body)->feeds; | |||||
| $this->assertEquals(1, count($feeds)); | |||||
| $this->assertEquals('http://feed.example.com/atom', $feeds[0]->url); | |||||
| $this->assertEquals('atom', $feeds[0]->type); | |||||
| } | |||||
| // input URL redirects to an Atom feed | |||||
| public function testInputIsRedirectToAtom() { | |||||
| $url = 'http://feed.example.com/redirect-to-atom'; | |||||
| $response = $this->parse(['url' => $url]); | |||||
| $body = $response->getContent(); | |||||
| $this->assertEquals(200, $response->getStatusCode()); | |||||
| $feeds = json_decode($body)->feeds; | |||||
| $this->assertEquals(1, count($feeds)); | |||||
| $this->assertEquals('http://feed.example.com/atom', $feeds[0]->url); | |||||
| $this->assertEquals('atom', $feeds[0]->type); | |||||
| } | |||||
| // input URL is an RSS feed | |||||
| public function testInputIsRSS() { | |||||
| $url = 'http://feed.example.com/rss'; | |||||
| $response = $this->parse(['url' => $url]); | |||||
| $body = $response->getContent(); | |||||
| $this->assertEquals(200, $response->getStatusCode()); | |||||
| $feeds = json_decode($body)->feeds; | |||||
| $this->assertEquals(1, count($feeds)); | |||||
| $this->assertEquals('http://feed.example.com/rss', $feeds[0]->url); | |||||
| $this->assertEquals('rss', $feeds[0]->type); | |||||
| } | |||||
| // input URL is a JSON feed | |||||
| public function testInputIsJSONFeed() { | |||||
| $url = 'http://feed.example.com/jsonfeed'; | |||||
| $response = $this->parse(['url' => $url]); | |||||
| $body = $response->getContent(); | |||||
| $this->assertEquals(200, $response->getStatusCode()); | |||||
| $feeds = json_decode($body)->feeds; | |||||
| $this->assertEquals(1, count($feeds)); | |||||
| $this->assertEquals('http://feed.example.com/jsonfeed', $feeds[0]->url); | |||||
| $this->assertEquals('jsonfeed', $feeds[0]->type); | |||||
| } | |||||
| } | |||||
| @ -0,0 +1,36 @@ | |||||
| HTTP/1.1 200 OK | |||||
| Server: Apache | |||||
| Date: Wed, 09 Dec 2015 03:29:14 GMT | |||||
| Content-Type: text/html; charset=utf-8 | |||||
| Connection: keep-alive | |||||
| <html> | |||||
| <head> | |||||
| <title>Test</title> | |||||
| <link rel="alternate" type="application/atom+xml" href="/atom"> | |||||
| </head> | |||||
| <body> | |||||
| <a href="/author" class="h-card">Author Name</a> | |||||
| <ul> | |||||
| <li class="h-entry"> | |||||
| <a href="/1" class="u-url p-name">One</a> | |||||
| <a href="/author" class="u-author"></a> | |||||
| </li> | |||||
| <li class="h-entry"> | |||||
| <a href="/2" class="u-url p-name">Two</a> | |||||
| <a href="/author" class="u-author"></a> | |||||
| </li> | |||||
| <li class="h-entry"> | |||||
| <a href="/3" class="u-url p-name">Three</a> | |||||
| <a href="/author" class="u-author"></a> | |||||
| </li> | |||||
| <li class="h-entry"> | |||||
| <a href="/4" class="u-url p-name">Four</a> | |||||
| <a href="/author" class="u-author"></a> | |||||
| </li> | |||||
| </ul> | |||||
| </body> | |||||
| </html> | |||||
| @ -0,0 +1,36 @@ | |||||
| HTTP/1.1 200 OK | |||||
| Server: Apache | |||||
| Date: Wed, 09 Dec 2015 03:29:14 GMT | |||||
| Content-Type: text/html; charset=utf-8 | |||||
| Connection: keep-alive | |||||
| <html> | |||||
| <head> | |||||
| <title>Test</title> | |||||
| <link rel="alternate" type="application/rss+xml" href="/podcast.xml"> | |||||
| </head> | |||||
| <body> | |||||
| <a href="/author" class="h-card">Author Name</a> | |||||
| <ul> | |||||
| <li class="h-entry"> | |||||
| <a href="/1" class="u-url p-name">One</a> | |||||
| <a href="/author" class="u-author"></a> | |||||
| </li> | |||||
| <li class="h-entry"> | |||||
| <a href="/2" class="u-url p-name">Two</a> | |||||
| <a href="/author" class="u-author"></a> | |||||
| </li> | |||||
| <li class="h-entry"> | |||||
| <a href="/3" class="u-url p-name">Three</a> | |||||
| <a href="/author" class="u-author"></a> | |||||
| </li> | |||||
| <li class="h-entry"> | |||||
| <a href="/4" class="u-url p-name">Four</a> | |||||
| <a href="/author" class="u-author"></a> | |||||
| </li> | |||||
| </ul> | |||||
| </body> | |||||
| </html> | |||||
| @ -0,0 +1,36 @@ | |||||
| HTTP/1.1 200 OK | |||||
| Server: Apache | |||||
| Date: Wed, 09 Dec 2015 03:29:14 GMT | |||||
| Content-Type: text/html; charset=utf-8 | |||||
| Connection: keep-alive | |||||
| <html> | |||||
| <head> | |||||
| <title>Test</title> | |||||
| <link rel="alternate" type="application/atom+xml" href="/atom"> | |||||
| </head> | |||||
| <body> | |||||
| <h1><a href="/author">Author Name</a></h1> | |||||
| <ul> | |||||
| <li> | |||||
| <a href="/1">One</a> | |||||
| <a href="/author"></a> | |||||
| </li> | |||||
| <li> | |||||
| <a href="/2">Two</a> | |||||
| <a href="/author"></a> | |||||
| </li> | |||||
| <li> | |||||
| <a href="/3">Three</a> | |||||
| <a href="/author"></a> | |||||
| </li> | |||||
| <li> | |||||
| <a href="/4">Four</a> | |||||
| <a href="/author"></a> | |||||
| </li> | |||||
| </ul> | |||||
| </body> | |||||
| </html> | |||||
| @ -0,0 +1,37 @@ | |||||
| HTTP/1.1 200 OK | |||||
| Server: Apache | |||||
| Date: Wed, 09 Dec 2015 03:29:14 GMT | |||||
| Content-Type: text/html; charset=utf-8 | |||||
| Connection: keep-alive | |||||
| <html> | |||||
| <head> | |||||
| <title>Test</title> | |||||
| <link rel="alternate" type="application/atom+xml" href="/atom"> | |||||
| <link rel="alternate" type="application/json" href="/jsonfeed"> | |||||
| </head> | |||||
| <body> | |||||
| <h1><a href="/author">Author Name</a></h1> | |||||
| <ul> | |||||
| <li> | |||||
| <a href="/1">One</a> | |||||
| <a href="/author"></a> | |||||
| </li> | |||||
| <li> | |||||
| <a href="/2">Two</a> | |||||
| <a href="/author"></a> | |||||
| </li> | |||||
| <li> | |||||
| <a href="/3">Three</a> | |||||
| <a href="/author"></a> | |||||
| </li> | |||||
| <li> | |||||
| <a href="/4">Four</a> | |||||
| <a href="/author"></a> | |||||
| </li> | |||||
| </ul> | |||||
| </body> | |||||
| </html> | |||||
| @ -0,0 +1,15 @@ | |||||
| HTTP/1.1 301 Moved Permanently | |||||
| Server: Apache | |||||
| Date: Wed, 09 Dec 2015 03:29:14 GMT | |||||
| Content-Type: text/html; charset=utf-8 | |||||
| Connection: keep-alive | |||||
| Location: http://feed.example.com/atom | |||||
| <html> | |||||
| <head> | |||||
| <title>Moved</title> | |||||
| </head> | |||||
| <body> | |||||
| This page has moved | |||||
| </body> | |||||
| </html> | |||||