@ -0,0 +1,58 @@ | |||||
<?php | |||||
use Symfony\Component\HttpFoundation\Request; | |||||
use Symfony\Component\HttpFoundation\Response; | |||||
class Feeds { | |||||
public $http; | |||||
private $_pretty = false; | |||||
public function __construct() { | |||||
$this->http = new p3k\HTTP(); | |||||
} | |||||
private function respond(Response $response, $code, $params, $headers=[]) { | |||||
$response->setStatusCode($code); | |||||
foreach($headers as $k=>$v) { | |||||
$response->headers->set($k, $v); | |||||
} | |||||
$response->headers->set('Content-Type', 'application/json'); | |||||
$opts = JSON_UNESCAPED_SLASHES; | |||||
if($this->_pretty) $opts += JSON_PRETTY_PRINT; | |||||
$response->setContent(json_encode($params, $opts)."\n"); | |||||
return $response; | |||||
} | |||||
public function find(Request $request, Response $response) { | |||||
$opts = []; | |||||
if($request->get('timeout')) { | |||||
// We might make 2 HTTP requests, so each request gets half the desired timeout | |||||
$opts['timeout'] = $request->get('timeout') / 2; | |||||
} | |||||
if($request->get('max_redirects')) { | |||||
$opts['max_redirects'] = (int)$request->get('max_redirects'); | |||||
} | |||||
if($request->get('pretty')) { | |||||
$this->_pretty = true; | |||||
} | |||||
$url = $request->get('url'); | |||||
if(!$url) { | |||||
return $this->respond($response, 400, [ | |||||
'error' => 'missing_url', | |||||
'error_description' => 'Provide a URL to fetch' | |||||
]); | |||||
} | |||||
$xray = new p3k\XRay(); | |||||
$xray->http = $this->http; | |||||
$res = $xray->feeds($url, $opts); | |||||
return $this->respond($response, !empty($res['error']) ? 400 : 200, $res); | |||||
} | |||||
} |
@ -0,0 +1,113 @@ | |||||
<?php | |||||
namespace p3k\XRay; | |||||
use p3k\XRay\Formats; | |||||
class Feeds { | |||||
private $http; | |||||
public function __construct($http) { | |||||
$this->http = $http; | |||||
} | |||||
public function find($url, $opts=[]) { | |||||
if(isset($opts['timeout'])) | |||||
$this->http->set_timeout($opts['timeout']); | |||||
if(isset($opts['max_redirects'])) | |||||
$this->http->set_max_redirects($opts['max_redirects']); | |||||
$scheme = parse_url($url, PHP_URL_SCHEME); | |||||
if(!in_array($scheme, ['http','https'])) { | |||||
return [ | |||||
'error' => 'invalid_url', | |||||
'error_description' => 'Only http and https URLs are supported' | |||||
]; | |||||
} | |||||
$host = parse_url($url, PHP_URL_HOST); | |||||
if(!$host) { | |||||
return [ | |||||
'error' => 'invalid_url', | |||||
'error_description' => 'The URL provided was not valid' | |||||
]; | |||||
} | |||||
$url = normalize_url($url); | |||||
$result = $this->http->get($url); | |||||
$body = $result['body']; | |||||
$feeds = []; | |||||
// First check the content type of the response | |||||
$contentType = isset($result['headers']['Content-Type']) ? $result['headers']['Content-Type'] : ''; | |||||
if(is_array($contentType)) | |||||
$contentType = $contentType[count($contentType)-1]; | |||||
if(strpos($contentType, 'application/atom+xml') !== false) { | |||||
$feeds[] = [ | |||||
'url' => $result['url'], | |||||
'type' => 'atom' | |||||
]; | |||||
} elseif(strpos($contentType, 'application/rss+xml') !== false) { | |||||
$feeds[] = [ | |||||
'url' => $result['url'], | |||||
'type' => 'rss' | |||||
]; | |||||
} elseif(strpos($contentType, 'application/json') !== false | |||||
&& substr($body, 0, 1) == '{' && strpos(substr($body, 0, 100), 'https://jsonfeed.org/version/1')) { | |||||
$feeds[] = [ | |||||
'url' => $result['url'], | |||||
'type' => 'jsonfeed' | |||||
]; | |||||
} else { | |||||
// Some other document was returned, parse the HTML and look for rel alternates and Microformats | |||||
$mf2 = \mf2\Parse($body, $result['url']); | |||||
if(isset($mf2['alternates'])) { | |||||
foreach($mf2['alternates'] as $alt) { | |||||
if(strpos($alt['type'], 'application/json') !== false) { | |||||
$feeds[] = [ | |||||
'url' => $alt['url'], | |||||
'type' => 'jsonfeed' | |||||
]; | |||||
} | |||||
if(strpos($alt['type'], 'application/atom+xml') !== false) { | |||||
$feeds[] = [ | |||||
'url' => $alt['url'], | |||||
'type' => 'atom' | |||||
]; | |||||
} | |||||
if(strpos($alt['type'], 'application/rss+xml') !== false) { | |||||
$feeds[] = [ | |||||
'url' => $alt['url'], | |||||
'type' => 'rss' | |||||
]; | |||||
} | |||||
} | |||||
} | |||||
$parsed = Formats\HTML::parse($this->http, $body, $result['url'], array_merge($opts, ['expect'=>'feed'])); | |||||
if($parsed && isset($parsed['data']['type']) && $parsed['data']['type'] == 'feed') { | |||||
$feeds[] = [ | |||||
'url' => $result['url'], | |||||
'type' => 'microformats' | |||||
]; | |||||
} | |||||
} | |||||
// Sort feeds by priority | |||||
$rank = ['microformats'=>0,'jsonfeed'=>1,'atom'=>2,'rss'=>3]; | |||||
usort($feeds, function($a, $b) use($rank) { | |||||
return $rank[$a['type']] > $rank[$b['type']]; | |||||
}); | |||||
return [ | |||||
'url' => $result['url'], | |||||
'code' => $result['code'], | |||||
'feeds' => $feeds | |||||
]; | |||||
} | |||||
} |
@ -0,0 +1,156 @@ | |||||
<?php | |||||
use Symfony\Component\HttpFoundation\Request; | |||||
use Symfony\Component\HttpFoundation\Response; | |||||
class FindFeedsTest extends PHPUnit_Framework_TestCase { | |||||
private $http; | |||||
public function setUp() { | |||||
$this->client = new Feeds(); | |||||
$this->client->http = new p3k\HTTP\Test(dirname(__FILE__).'/data/'); | |||||
$this->client->mc = null; | |||||
} | |||||
private function parse($params) { | |||||
$request = new Request($params); | |||||
$response = new Response(); | |||||
return $this->client->find($request, $response); | |||||
} | |||||
// h-feed with no alternates | |||||
public function testMf2HFeed() { | |||||
$url = 'http://feed.example.com/h-feed-with-child-author'; | |||||
$response = $this->parse(['url' => $url]); | |||||
$body = $response->getContent(); | |||||
$this->assertEquals(200, $response->getStatusCode()); | |||||
$feeds = json_decode($body)->feeds; | |||||
$this->assertEquals(1, count($feeds)); | |||||
$this->assertEquals('http://feed.example.com/h-feed-with-child-author', $feeds[0]->url); | |||||
$this->assertEquals('microformats', $feeds[0]->type); | |||||
} | |||||
// h-feed that links to Atom alternate | |||||
public function testMf2WithAtomAlternate() { | |||||
$url = 'http://feed.example.com/h-feed-with-atom-alternate'; | |||||
$response = $this->parse(['url' => $url]); | |||||
$body = $response->getContent(); | |||||
$this->assertEquals(200, $response->getStatusCode()); | |||||
$feeds = json_decode($body)->feeds; | |||||
$this->assertEquals(2, count($feeds)); | |||||
// Should rank JSONFeed above Atom | |||||
$this->assertEquals('http://feed.example.com/h-feed-with-atom-alternate', $feeds[0]->url); | |||||
$this->assertEquals('microformats', $feeds[0]->type); | |||||
$this->assertEquals('http://feed.example.com/atom', $feeds[1]->url); | |||||
$this->assertEquals('atom', $feeds[1]->type); | |||||
} | |||||
// h-feed that links to RSS alternate | |||||
public function testMf2WithRSSAlternate() { | |||||
$url = 'http://feed.example.com/h-feed-with-rss-alternate'; | |||||
$response = $this->parse(['url' => $url]); | |||||
$body = $response->getContent(); | |||||
$this->assertEquals(200, $response->getStatusCode()); | |||||
$feeds = json_decode($body)->feeds; | |||||
$this->assertEquals(2, count($feeds)); | |||||
// Should rank JSONFeed above Atom | |||||
$this->assertEquals('http://feed.example.com/h-feed-with-rss-alternate', $feeds[0]->url); | |||||
$this->assertEquals('microformats', $feeds[0]->type); | |||||
$this->assertEquals('http://feed.example.com/podcast.xml', $feeds[1]->url); | |||||
$this->assertEquals('rss', $feeds[1]->type); | |||||
} | |||||
// No mf2 but links to Atom alternate | |||||
public function testNoMf2() { | |||||
$url = 'http://feed.example.com/html-with-atom-alternate'; | |||||
$response = $this->parse(['url' => $url]); | |||||
$body = $response->getContent(); | |||||
$this->assertEquals(200, $response->getStatusCode()); | |||||
$feeds = json_decode($body)->feeds; | |||||
$this->assertEquals(1, count($feeds)); | |||||
$this->assertEquals('http://feed.example.com/atom', $feeds[0]->url); | |||||
$this->assertEquals('atom', $feeds[0]->type); | |||||
} | |||||
public function testNoMf2WithJSONAndAtom() { | |||||
$url = 'http://feed.example.com/html-with-json-and-atom'; | |||||
$response = $this->parse(['url' => $url]); | |||||
$body = $response->getContent(); | |||||
$this->assertEquals(200, $response->getStatusCode()); | |||||
$feeds = json_decode($body)->feeds; | |||||
$this->assertEquals(2, count($feeds)); | |||||
// Should rank JSONFeed above Atom | |||||
$this->assertEquals('http://feed.example.com/jsonfeed', $feeds[0]->url); | |||||
$this->assertEquals('jsonfeed', $feeds[0]->type); | |||||
$this->assertEquals('http://feed.example.com/atom', $feeds[1]->url); | |||||
$this->assertEquals('atom', $feeds[1]->type); | |||||
} | |||||
// input URL is an Atom feed | |||||
public function testInputIsAtom() { | |||||
$url = 'http://feed.example.com/atom'; | |||||
$response = $this->parse(['url' => $url]); | |||||
$body = $response->getContent(); | |||||
$this->assertEquals(200, $response->getStatusCode()); | |||||
$feeds = json_decode($body)->feeds; | |||||
$this->assertEquals(1, count($feeds)); | |||||
$this->assertEquals('http://feed.example.com/atom', $feeds[0]->url); | |||||
$this->assertEquals('atom', $feeds[0]->type); | |||||
} | |||||
// input URL redirects to an Atom feed | |||||
public function testInputIsRedirectToAtom() { | |||||
$url = 'http://feed.example.com/redirect-to-atom'; | |||||
$response = $this->parse(['url' => $url]); | |||||
$body = $response->getContent(); | |||||
$this->assertEquals(200, $response->getStatusCode()); | |||||
$feeds = json_decode($body)->feeds; | |||||
$this->assertEquals(1, count($feeds)); | |||||
$this->assertEquals('http://feed.example.com/atom', $feeds[0]->url); | |||||
$this->assertEquals('atom', $feeds[0]->type); | |||||
} | |||||
// input URL is an RSS feed | |||||
public function testInputIsRSS() { | |||||
$url = 'http://feed.example.com/rss'; | |||||
$response = $this->parse(['url' => $url]); | |||||
$body = $response->getContent(); | |||||
$this->assertEquals(200, $response->getStatusCode()); | |||||
$feeds = json_decode($body)->feeds; | |||||
$this->assertEquals(1, count($feeds)); | |||||
$this->assertEquals('http://feed.example.com/rss', $feeds[0]->url); | |||||
$this->assertEquals('rss', $feeds[0]->type); | |||||
} | |||||
// input URL is a JSON feed | |||||
public function testInputIsJSONFeed() { | |||||
$url = 'http://feed.example.com/jsonfeed'; | |||||
$response = $this->parse(['url' => $url]); | |||||
$body = $response->getContent(); | |||||
$this->assertEquals(200, $response->getStatusCode()); | |||||
$feeds = json_decode($body)->feeds; | |||||
$this->assertEquals(1, count($feeds)); | |||||
$this->assertEquals('http://feed.example.com/jsonfeed', $feeds[0]->url); | |||||
$this->assertEquals('jsonfeed', $feeds[0]->type); | |||||
} | |||||
} |
@ -0,0 +1,36 @@ | |||||
HTTP/1.1 200 OK | |||||
Server: Apache | |||||
Date: Wed, 09 Dec 2015 03:29:14 GMT | |||||
Content-Type: text/html; charset=utf-8 | |||||
Connection: keep-alive | |||||
<html> | |||||
<head> | |||||
<title>Test</title> | |||||
<link rel="alternate" type="application/atom+xml" href="/atom"> | |||||
</head> | |||||
<body> | |||||
<a href="/author" class="h-card">Author Name</a> | |||||
<ul> | |||||
<li class="h-entry"> | |||||
<a href="/1" class="u-url p-name">One</a> | |||||
<a href="/author" class="u-author"></a> | |||||
</li> | |||||
<li class="h-entry"> | |||||
<a href="/2" class="u-url p-name">Two</a> | |||||
<a href="/author" class="u-author"></a> | |||||
</li> | |||||
<li class="h-entry"> | |||||
<a href="/3" class="u-url p-name">Three</a> | |||||
<a href="/author" class="u-author"></a> | |||||
</li> | |||||
<li class="h-entry"> | |||||
<a href="/4" class="u-url p-name">Four</a> | |||||
<a href="/author" class="u-author"></a> | |||||
</li> | |||||
</ul> | |||||
</body> | |||||
</html> |
@ -0,0 +1,36 @@ | |||||
HTTP/1.1 200 OK | |||||
Server: Apache | |||||
Date: Wed, 09 Dec 2015 03:29:14 GMT | |||||
Content-Type: text/html; charset=utf-8 | |||||
Connection: keep-alive | |||||
<html> | |||||
<head> | |||||
<title>Test</title> | |||||
<link rel="alternate" type="application/rss+xml" href="/podcast.xml"> | |||||
</head> | |||||
<body> | |||||
<a href="/author" class="h-card">Author Name</a> | |||||
<ul> | |||||
<li class="h-entry"> | |||||
<a href="/1" class="u-url p-name">One</a> | |||||
<a href="/author" class="u-author"></a> | |||||
</li> | |||||
<li class="h-entry"> | |||||
<a href="/2" class="u-url p-name">Two</a> | |||||
<a href="/author" class="u-author"></a> | |||||
</li> | |||||
<li class="h-entry"> | |||||
<a href="/3" class="u-url p-name">Three</a> | |||||
<a href="/author" class="u-author"></a> | |||||
</li> | |||||
<li class="h-entry"> | |||||
<a href="/4" class="u-url p-name">Four</a> | |||||
<a href="/author" class="u-author"></a> | |||||
</li> | |||||
</ul> | |||||
</body> | |||||
</html> |
@ -0,0 +1,36 @@ | |||||
HTTP/1.1 200 OK | |||||
Server: Apache | |||||
Date: Wed, 09 Dec 2015 03:29:14 GMT | |||||
Content-Type: text/html; charset=utf-8 | |||||
Connection: keep-alive | |||||
<html> | |||||
<head> | |||||
<title>Test</title> | |||||
<link rel="alternate" type="application/atom+xml" href="/atom"> | |||||
</head> | |||||
<body> | |||||
<h1><a href="/author">Author Name</a></h1> | |||||
<ul> | |||||
<li> | |||||
<a href="/1">One</a> | |||||
<a href="/author"></a> | |||||
</li> | |||||
<li> | |||||
<a href="/2">Two</a> | |||||
<a href="/author"></a> | |||||
</li> | |||||
<li> | |||||
<a href="/3">Three</a> | |||||
<a href="/author"></a> | |||||
</li> | |||||
<li> | |||||
<a href="/4">Four</a> | |||||
<a href="/author"></a> | |||||
</li> | |||||
</ul> | |||||
</body> | |||||
</html> |
@ -0,0 +1,37 @@ | |||||
HTTP/1.1 200 OK | |||||
Server: Apache | |||||
Date: Wed, 09 Dec 2015 03:29:14 GMT | |||||
Content-Type: text/html; charset=utf-8 | |||||
Connection: keep-alive | |||||
<html> | |||||
<head> | |||||
<title>Test</title> | |||||
<link rel="alternate" type="application/atom+xml" href="/atom"> | |||||
<link rel="alternate" type="application/json" href="/jsonfeed"> | |||||
</head> | |||||
<body> | |||||
<h1><a href="/author">Author Name</a></h1> | |||||
<ul> | |||||
<li> | |||||
<a href="/1">One</a> | |||||
<a href="/author"></a> | |||||
</li> | |||||
<li> | |||||
<a href="/2">Two</a> | |||||
<a href="/author"></a> | |||||
</li> | |||||
<li> | |||||
<a href="/3">Three</a> | |||||
<a href="/author"></a> | |||||
</li> | |||||
<li> | |||||
<a href="/4">Four</a> | |||||
<a href="/author"></a> | |||||
</li> | |||||
</ul> | |||||
</body> | |||||
</html> |
@ -0,0 +1,15 @@ | |||||
HTTP/1.1 301 Moved Permanently | |||||
Server: Apache | |||||
Date: Wed, 09 Dec 2015 03:29:14 GMT | |||||
Content-Type: text/html; charset=utf-8 | |||||
Connection: keep-alive | |||||
Location: http://feed.example.com/atom | |||||
<html> | |||||
<head> | |||||
<title>Moved</title> | |||||
</head> | |||||
<body> | |||||
This page has moved | |||||
</body> | |||||
</html> |