| @ -0,0 +1,58 @@ | |||
| <?php | |||
| use Symfony\Component\HttpFoundation\Request; | |||
| use Symfony\Component\HttpFoundation\Response; | |||
| class Feeds { | |||
| public $http; | |||
| private $_pretty = false; | |||
| public function __construct() { | |||
| $this->http = new p3k\HTTP(); | |||
| } | |||
| private function respond(Response $response, $code, $params, $headers=[]) { | |||
| $response->setStatusCode($code); | |||
| foreach($headers as $k=>$v) { | |||
| $response->headers->set($k, $v); | |||
| } | |||
| $response->headers->set('Content-Type', 'application/json'); | |||
| $opts = JSON_UNESCAPED_SLASHES; | |||
| if($this->_pretty) $opts += JSON_PRETTY_PRINT; | |||
| $response->setContent(json_encode($params, $opts)."\n"); | |||
| return $response; | |||
| } | |||
| public function find(Request $request, Response $response) { | |||
| $opts = []; | |||
| if($request->get('timeout')) { | |||
| // We might make 2 HTTP requests, so each request gets half the desired timeout | |||
| $opts['timeout'] = $request->get('timeout') / 2; | |||
| } | |||
| if($request->get('max_redirects')) { | |||
| $opts['max_redirects'] = (int)$request->get('max_redirects'); | |||
| } | |||
| if($request->get('pretty')) { | |||
| $this->_pretty = true; | |||
| } | |||
| $url = $request->get('url'); | |||
| if(!$url) { | |||
| return $this->respond($response, 400, [ | |||
| 'error' => 'missing_url', | |||
| 'error_description' => 'Provide a URL to fetch' | |||
| ]); | |||
| } | |||
| $xray = new p3k\XRay(); | |||
| $xray->http = $this->http; | |||
| $res = $xray->feeds($url, $opts); | |||
| return $this->respond($response, !empty($res['error']) ? 400 : 200, $res); | |||
| } | |||
| } | |||
| @ -0,0 +1,113 @@ | |||
| <?php | |||
| namespace p3k\XRay; | |||
| use p3k\XRay\Formats; | |||
| class Feeds { | |||
| private $http; | |||
| public function __construct($http) { | |||
| $this->http = $http; | |||
| } | |||
| public function find($url, $opts=[]) { | |||
| if(isset($opts['timeout'])) | |||
| $this->http->set_timeout($opts['timeout']); | |||
| if(isset($opts['max_redirects'])) | |||
| $this->http->set_max_redirects($opts['max_redirects']); | |||
| $scheme = parse_url($url, PHP_URL_SCHEME); | |||
| if(!in_array($scheme, ['http','https'])) { | |||
| return [ | |||
| 'error' => 'invalid_url', | |||
| 'error_description' => 'Only http and https URLs are supported' | |||
| ]; | |||
| } | |||
| $host = parse_url($url, PHP_URL_HOST); | |||
| if(!$host) { | |||
| return [ | |||
| 'error' => 'invalid_url', | |||
| 'error_description' => 'The URL provided was not valid' | |||
| ]; | |||
| } | |||
| $url = normalize_url($url); | |||
| $result = $this->http->get($url); | |||
| $body = $result['body']; | |||
| $feeds = []; | |||
| // First check the content type of the response | |||
| $contentType = isset($result['headers']['Content-Type']) ? $result['headers']['Content-Type'] : ''; | |||
| if(is_array($contentType)) | |||
| $contentType = $contentType[count($contentType)-1]; | |||
| if(strpos($contentType, 'application/atom+xml') !== false) { | |||
| $feeds[] = [ | |||
| 'url' => $result['url'], | |||
| 'type' => 'atom' | |||
| ]; | |||
| } elseif(strpos($contentType, 'application/rss+xml') !== false) { | |||
| $feeds[] = [ | |||
| 'url' => $result['url'], | |||
| 'type' => 'rss' | |||
| ]; | |||
| } elseif(strpos($contentType, 'application/json') !== false | |||
| && substr($body, 0, 1) == '{' && strpos(substr($body, 0, 100), 'https://jsonfeed.org/version/1')) { | |||
| $feeds[] = [ | |||
| 'url' => $result['url'], | |||
| 'type' => 'jsonfeed' | |||
| ]; | |||
| } else { | |||
| // Some other document was returned, parse the HTML and look for rel alternates and Microformats | |||
| $mf2 = \mf2\Parse($body, $result['url']); | |||
| if(isset($mf2['alternates'])) { | |||
| foreach($mf2['alternates'] as $alt) { | |||
| if(strpos($alt['type'], 'application/json') !== false) { | |||
| $feeds[] = [ | |||
| 'url' => $alt['url'], | |||
| 'type' => 'jsonfeed' | |||
| ]; | |||
| } | |||
| if(strpos($alt['type'], 'application/atom+xml') !== false) { | |||
| $feeds[] = [ | |||
| 'url' => $alt['url'], | |||
| 'type' => 'atom' | |||
| ]; | |||
| } | |||
| if(strpos($alt['type'], 'application/rss+xml') !== false) { | |||
| $feeds[] = [ | |||
| 'url' => $alt['url'], | |||
| 'type' => 'rss' | |||
| ]; | |||
| } | |||
| } | |||
| } | |||
| $parsed = Formats\HTML::parse($this->http, $body, $result['url'], array_merge($opts, ['expect'=>'feed'])); | |||
| if($parsed && isset($parsed['data']['type']) && $parsed['data']['type'] == 'feed') { | |||
| $feeds[] = [ | |||
| 'url' => $result['url'], | |||
| 'type' => 'microformats' | |||
| ]; | |||
| } | |||
| } | |||
| // Sort feeds by priority | |||
| $rank = ['microformats'=>0,'jsonfeed'=>1,'atom'=>2,'rss'=>3]; | |||
| usort($feeds, function($a, $b) use($rank) { | |||
| return $rank[$a['type']] > $rank[$b['type']]; | |||
| }); | |||
| return [ | |||
| 'url' => $result['url'], | |||
| 'code' => $result['code'], | |||
| 'feeds' => $feeds | |||
| ]; | |||
| } | |||
| } | |||
| @ -0,0 +1,156 @@ | |||
| <?php | |||
| use Symfony\Component\HttpFoundation\Request; | |||
| use Symfony\Component\HttpFoundation\Response; | |||
| class FindFeedsTest extends PHPUnit_Framework_TestCase { | |||
| private $http; | |||
| public function setUp() { | |||
| $this->client = new Feeds(); | |||
| $this->client->http = new p3k\HTTP\Test(dirname(__FILE__).'/data/'); | |||
| $this->client->mc = null; | |||
| } | |||
| private function parse($params) { | |||
| $request = new Request($params); | |||
| $response = new Response(); | |||
| return $this->client->find($request, $response); | |||
| } | |||
| // h-feed with no alternates | |||
| public function testMf2HFeed() { | |||
| $url = 'http://feed.example.com/h-feed-with-child-author'; | |||
| $response = $this->parse(['url' => $url]); | |||
| $body = $response->getContent(); | |||
| $this->assertEquals(200, $response->getStatusCode()); | |||
| $feeds = json_decode($body)->feeds; | |||
| $this->assertEquals(1, count($feeds)); | |||
| $this->assertEquals('http://feed.example.com/h-feed-with-child-author', $feeds[0]->url); | |||
| $this->assertEquals('microformats', $feeds[0]->type); | |||
| } | |||
| // h-feed that links to Atom alternate | |||
| public function testMf2WithAtomAlternate() { | |||
| $url = 'http://feed.example.com/h-feed-with-atom-alternate'; | |||
| $response = $this->parse(['url' => $url]); | |||
| $body = $response->getContent(); | |||
| $this->assertEquals(200, $response->getStatusCode()); | |||
| $feeds = json_decode($body)->feeds; | |||
| $this->assertEquals(2, count($feeds)); | |||
| // Should rank JSONFeed above Atom | |||
| $this->assertEquals('http://feed.example.com/h-feed-with-atom-alternate', $feeds[0]->url); | |||
| $this->assertEquals('microformats', $feeds[0]->type); | |||
| $this->assertEquals('http://feed.example.com/atom', $feeds[1]->url); | |||
| $this->assertEquals('atom', $feeds[1]->type); | |||
| } | |||
| // h-feed that links to RSS alternate | |||
| public function testMf2WithRSSAlternate() { | |||
| $url = 'http://feed.example.com/h-feed-with-rss-alternate'; | |||
| $response = $this->parse(['url' => $url]); | |||
| $body = $response->getContent(); | |||
| $this->assertEquals(200, $response->getStatusCode()); | |||
| $feeds = json_decode($body)->feeds; | |||
| $this->assertEquals(2, count($feeds)); | |||
| // Should rank JSONFeed above Atom | |||
| $this->assertEquals('http://feed.example.com/h-feed-with-rss-alternate', $feeds[0]->url); | |||
| $this->assertEquals('microformats', $feeds[0]->type); | |||
| $this->assertEquals('http://feed.example.com/podcast.xml', $feeds[1]->url); | |||
| $this->assertEquals('rss', $feeds[1]->type); | |||
| } | |||
| // No mf2 but links to Atom alternate | |||
| public function testNoMf2() { | |||
| $url = 'http://feed.example.com/html-with-atom-alternate'; | |||
| $response = $this->parse(['url' => $url]); | |||
| $body = $response->getContent(); | |||
| $this->assertEquals(200, $response->getStatusCode()); | |||
| $feeds = json_decode($body)->feeds; | |||
| $this->assertEquals(1, count($feeds)); | |||
| $this->assertEquals('http://feed.example.com/atom', $feeds[0]->url); | |||
| $this->assertEquals('atom', $feeds[0]->type); | |||
| } | |||
| public function testNoMf2WithJSONAndAtom() { | |||
| $url = 'http://feed.example.com/html-with-json-and-atom'; | |||
| $response = $this->parse(['url' => $url]); | |||
| $body = $response->getContent(); | |||
| $this->assertEquals(200, $response->getStatusCode()); | |||
| $feeds = json_decode($body)->feeds; | |||
| $this->assertEquals(2, count($feeds)); | |||
| // Should rank JSONFeed above Atom | |||
| $this->assertEquals('http://feed.example.com/jsonfeed', $feeds[0]->url); | |||
| $this->assertEquals('jsonfeed', $feeds[0]->type); | |||
| $this->assertEquals('http://feed.example.com/atom', $feeds[1]->url); | |||
| $this->assertEquals('atom', $feeds[1]->type); | |||
| } | |||
| // input URL is an Atom feed | |||
| public function testInputIsAtom() { | |||
| $url = 'http://feed.example.com/atom'; | |||
| $response = $this->parse(['url' => $url]); | |||
| $body = $response->getContent(); | |||
| $this->assertEquals(200, $response->getStatusCode()); | |||
| $feeds = json_decode($body)->feeds; | |||
| $this->assertEquals(1, count($feeds)); | |||
| $this->assertEquals('http://feed.example.com/atom', $feeds[0]->url); | |||
| $this->assertEquals('atom', $feeds[0]->type); | |||
| } | |||
| // input URL redirects to an Atom feed | |||
| public function testInputIsRedirectToAtom() { | |||
| $url = 'http://feed.example.com/redirect-to-atom'; | |||
| $response = $this->parse(['url' => $url]); | |||
| $body = $response->getContent(); | |||
| $this->assertEquals(200, $response->getStatusCode()); | |||
| $feeds = json_decode($body)->feeds; | |||
| $this->assertEquals(1, count($feeds)); | |||
| $this->assertEquals('http://feed.example.com/atom', $feeds[0]->url); | |||
| $this->assertEquals('atom', $feeds[0]->type); | |||
| } | |||
| // input URL is an RSS feed | |||
| public function testInputIsRSS() { | |||
| $url = 'http://feed.example.com/rss'; | |||
| $response = $this->parse(['url' => $url]); | |||
| $body = $response->getContent(); | |||
| $this->assertEquals(200, $response->getStatusCode()); | |||
| $feeds = json_decode($body)->feeds; | |||
| $this->assertEquals(1, count($feeds)); | |||
| $this->assertEquals('http://feed.example.com/rss', $feeds[0]->url); | |||
| $this->assertEquals('rss', $feeds[0]->type); | |||
| } | |||
| // input URL is a JSON feed | |||
| public function testInputIsJSONFeed() { | |||
| $url = 'http://feed.example.com/jsonfeed'; | |||
| $response = $this->parse(['url' => $url]); | |||
| $body = $response->getContent(); | |||
| $this->assertEquals(200, $response->getStatusCode()); | |||
| $feeds = json_decode($body)->feeds; | |||
| $this->assertEquals(1, count($feeds)); | |||
| $this->assertEquals('http://feed.example.com/jsonfeed', $feeds[0]->url); | |||
| $this->assertEquals('jsonfeed', $feeds[0]->type); | |||
| } | |||
| } | |||
| @ -0,0 +1,36 @@ | |||
| HTTP/1.1 200 OK | |||
| Server: Apache | |||
| Date: Wed, 09 Dec 2015 03:29:14 GMT | |||
| Content-Type: text/html; charset=utf-8 | |||
| Connection: keep-alive | |||
| <html> | |||
| <head> | |||
| <title>Test</title> | |||
| <link rel="alternate" type="application/atom+xml" href="/atom"> | |||
| </head> | |||
| <body> | |||
| <a href="/author" class="h-card">Author Name</a> | |||
| <ul> | |||
| <li class="h-entry"> | |||
| <a href="/1" class="u-url p-name">One</a> | |||
| <a href="/author" class="u-author"></a> | |||
| </li> | |||
| <li class="h-entry"> | |||
| <a href="/2" class="u-url p-name">Two</a> | |||
| <a href="/author" class="u-author"></a> | |||
| </li> | |||
| <li class="h-entry"> | |||
| <a href="/3" class="u-url p-name">Three</a> | |||
| <a href="/author" class="u-author"></a> | |||
| </li> | |||
| <li class="h-entry"> | |||
| <a href="/4" class="u-url p-name">Four</a> | |||
| <a href="/author" class="u-author"></a> | |||
| </li> | |||
| </ul> | |||
| </body> | |||
| </html> | |||
| @ -0,0 +1,36 @@ | |||
| HTTP/1.1 200 OK | |||
| Server: Apache | |||
| Date: Wed, 09 Dec 2015 03:29:14 GMT | |||
| Content-Type: text/html; charset=utf-8 | |||
| Connection: keep-alive | |||
| <html> | |||
| <head> | |||
| <title>Test</title> | |||
| <link rel="alternate" type="application/rss+xml" href="/podcast.xml"> | |||
| </head> | |||
| <body> | |||
| <a href="/author" class="h-card">Author Name</a> | |||
| <ul> | |||
| <li class="h-entry"> | |||
| <a href="/1" class="u-url p-name">One</a> | |||
| <a href="/author" class="u-author"></a> | |||
| </li> | |||
| <li class="h-entry"> | |||
| <a href="/2" class="u-url p-name">Two</a> | |||
| <a href="/author" class="u-author"></a> | |||
| </li> | |||
| <li class="h-entry"> | |||
| <a href="/3" class="u-url p-name">Three</a> | |||
| <a href="/author" class="u-author"></a> | |||
| </li> | |||
| <li class="h-entry"> | |||
| <a href="/4" class="u-url p-name">Four</a> | |||
| <a href="/author" class="u-author"></a> | |||
| </li> | |||
| </ul> | |||
| </body> | |||
| </html> | |||
| @ -0,0 +1,36 @@ | |||
| HTTP/1.1 200 OK | |||
| Server: Apache | |||
| Date: Wed, 09 Dec 2015 03:29:14 GMT | |||
| Content-Type: text/html; charset=utf-8 | |||
| Connection: keep-alive | |||
| <html> | |||
| <head> | |||
| <title>Test</title> | |||
| <link rel="alternate" type="application/atom+xml" href="/atom"> | |||
| </head> | |||
| <body> | |||
| <h1><a href="/author">Author Name</a></h1> | |||
| <ul> | |||
| <li> | |||
| <a href="/1">One</a> | |||
| <a href="/author"></a> | |||
| </li> | |||
| <li> | |||
| <a href="/2">Two</a> | |||
| <a href="/author"></a> | |||
| </li> | |||
| <li> | |||
| <a href="/3">Three</a> | |||
| <a href="/author"></a> | |||
| </li> | |||
| <li> | |||
| <a href="/4">Four</a> | |||
| <a href="/author"></a> | |||
| </li> | |||
| </ul> | |||
| </body> | |||
| </html> | |||
| @ -0,0 +1,37 @@ | |||
| HTTP/1.1 200 OK | |||
| Server: Apache | |||
| Date: Wed, 09 Dec 2015 03:29:14 GMT | |||
| Content-Type: text/html; charset=utf-8 | |||
| Connection: keep-alive | |||
| <html> | |||
| <head> | |||
| <title>Test</title> | |||
| <link rel="alternate" type="application/atom+xml" href="/atom"> | |||
| <link rel="alternate" type="application/json" href="/jsonfeed"> | |||
| </head> | |||
| <body> | |||
| <h1><a href="/author">Author Name</a></h1> | |||
| <ul> | |||
| <li> | |||
| <a href="/1">One</a> | |||
| <a href="/author"></a> | |||
| </li> | |||
| <li> | |||
| <a href="/2">Two</a> | |||
| <a href="/author"></a> | |||
| </li> | |||
| <li> | |||
| <a href="/3">Three</a> | |||
| <a href="/author"></a> | |||
| </li> | |||
| <li> | |||
| <a href="/4">Four</a> | |||
| <a href="/author"></a> | |||
| </li> | |||
| </ul> | |||
| </body> | |||
| </html> | |||
| @ -0,0 +1,15 @@ | |||
| HTTP/1.1 301 Moved Permanently | |||
| Server: Apache | |||
| Date: Wed, 09 Dec 2015 03:29:14 GMT | |||
| Content-Type: text/html; charset=utf-8 | |||
| Connection: keep-alive | |||
| Location: http://feed.example.com/atom | |||
| <html> | |||
| <head> | |||
| <title>Moved</title> | |||
| </head> | |||
| <body> | |||
| This page has moved | |||
| </body> | |||
| </html> | |||