@ -0,0 +1,58 @@ | |||
<?php | |||
use Symfony\Component\HttpFoundation\Request; | |||
use Symfony\Component\HttpFoundation\Response; | |||
class Feeds { | |||
public $http; | |||
private $_pretty = false; | |||
public function __construct() { | |||
$this->http = new p3k\HTTP(); | |||
} | |||
private function respond(Response $response, $code, $params, $headers=[]) { | |||
$response->setStatusCode($code); | |||
foreach($headers as $k=>$v) { | |||
$response->headers->set($k, $v); | |||
} | |||
$response->headers->set('Content-Type', 'application/json'); | |||
$opts = JSON_UNESCAPED_SLASHES; | |||
if($this->_pretty) $opts += JSON_PRETTY_PRINT; | |||
$response->setContent(json_encode($params, $opts)."\n"); | |||
return $response; | |||
} | |||
public function find(Request $request, Response $response) { | |||
$opts = []; | |||
if($request->get('timeout')) { | |||
// We might make 2 HTTP requests, so each request gets half the desired timeout | |||
$opts['timeout'] = $request->get('timeout') / 2; | |||
} | |||
if($request->get('max_redirects')) { | |||
$opts['max_redirects'] = (int)$request->get('max_redirects'); | |||
} | |||
if($request->get('pretty')) { | |||
$this->_pretty = true; | |||
} | |||
$url = $request->get('url'); | |||
if(!$url) { | |||
return $this->respond($response, 400, [ | |||
'error' => 'missing_url', | |||
'error_description' => 'Provide a URL to fetch' | |||
]); | |||
} | |||
$xray = new p3k\XRay(); | |||
$xray->http = $this->http; | |||
$res = $xray->feeds($url, $opts); | |||
return $this->respond($response, !empty($res['error']) ? 400 : 200, $res); | |||
} | |||
} |
@ -0,0 +1,113 @@ | |||
<?php | |||
namespace p3k\XRay; | |||
use p3k\XRay\Formats; | |||
class Feeds { | |||
private $http; | |||
public function __construct($http) { | |||
$this->http = $http; | |||
} | |||
public function find($url, $opts=[]) { | |||
if(isset($opts['timeout'])) | |||
$this->http->set_timeout($opts['timeout']); | |||
if(isset($opts['max_redirects'])) | |||
$this->http->set_max_redirects($opts['max_redirects']); | |||
$scheme = parse_url($url, PHP_URL_SCHEME); | |||
if(!in_array($scheme, ['http','https'])) { | |||
return [ | |||
'error' => 'invalid_url', | |||
'error_description' => 'Only http and https URLs are supported' | |||
]; | |||
} | |||
$host = parse_url($url, PHP_URL_HOST); | |||
if(!$host) { | |||
return [ | |||
'error' => 'invalid_url', | |||
'error_description' => 'The URL provided was not valid' | |||
]; | |||
} | |||
$url = normalize_url($url); | |||
$result = $this->http->get($url); | |||
$body = $result['body']; | |||
$feeds = []; | |||
// First check the content type of the response | |||
$contentType = isset($result['headers']['Content-Type']) ? $result['headers']['Content-Type'] : ''; | |||
if(is_array($contentType)) | |||
$contentType = $contentType[count($contentType)-1]; | |||
if(strpos($contentType, 'application/atom+xml') !== false) { | |||
$feeds[] = [ | |||
'url' => $result['url'], | |||
'type' => 'atom' | |||
]; | |||
} elseif(strpos($contentType, 'application/rss+xml') !== false) { | |||
$feeds[] = [ | |||
'url' => $result['url'], | |||
'type' => 'rss' | |||
]; | |||
} elseif(strpos($contentType, 'application/json') !== false | |||
&& substr($body, 0, 1) == '{' && strpos(substr($body, 0, 100), 'https://jsonfeed.org/version/1')) { | |||
$feeds[] = [ | |||
'url' => $result['url'], | |||
'type' => 'jsonfeed' | |||
]; | |||
} else { | |||
// Some other document was returned, parse the HTML and look for rel alternates and Microformats | |||
$mf2 = \mf2\Parse($body, $result['url']); | |||
if(isset($mf2['alternates'])) { | |||
foreach($mf2['alternates'] as $alt) { | |||
if(strpos($alt['type'], 'application/json') !== false) { | |||
$feeds[] = [ | |||
'url' => $alt['url'], | |||
'type' => 'jsonfeed' | |||
]; | |||
} | |||
if(strpos($alt['type'], 'application/atom+xml') !== false) { | |||
$feeds[] = [ | |||
'url' => $alt['url'], | |||
'type' => 'atom' | |||
]; | |||
} | |||
if(strpos($alt['type'], 'application/rss+xml') !== false) { | |||
$feeds[] = [ | |||
'url' => $alt['url'], | |||
'type' => 'rss' | |||
]; | |||
} | |||
} | |||
} | |||
$parsed = Formats\HTML::parse($this->http, $body, $result['url'], array_merge($opts, ['expect'=>'feed'])); | |||
if($parsed && isset($parsed['data']['type']) && $parsed['data']['type'] == 'feed') { | |||
$feeds[] = [ | |||
'url' => $result['url'], | |||
'type' => 'microformats' | |||
]; | |||
} | |||
} | |||
// Sort feeds by priority | |||
$rank = ['microformats'=>0,'jsonfeed'=>1,'atom'=>2,'rss'=>3]; | |||
usort($feeds, function($a, $b) use($rank) { | |||
return $rank[$a['type']] > $rank[$b['type']]; | |||
}); | |||
return [ | |||
'url' => $result['url'], | |||
'code' => $result['code'], | |||
'feeds' => $feeds | |||
]; | |||
} | |||
} |
@ -0,0 +1,156 @@ | |||
<?php | |||
use Symfony\Component\HttpFoundation\Request; | |||
use Symfony\Component\HttpFoundation\Response; | |||
class FindFeedsTest extends PHPUnit_Framework_TestCase { | |||
private $http; | |||
public function setUp() { | |||
$this->client = new Feeds(); | |||
$this->client->http = new p3k\HTTP\Test(dirname(__FILE__).'/data/'); | |||
$this->client->mc = null; | |||
} | |||
private function parse($params) { | |||
$request = new Request($params); | |||
$response = new Response(); | |||
return $this->client->find($request, $response); | |||
} | |||
// h-feed with no alternates | |||
public function testMf2HFeed() { | |||
$url = 'http://feed.example.com/h-feed-with-child-author'; | |||
$response = $this->parse(['url' => $url]); | |||
$body = $response->getContent(); | |||
$this->assertEquals(200, $response->getStatusCode()); | |||
$feeds = json_decode($body)->feeds; | |||
$this->assertEquals(1, count($feeds)); | |||
$this->assertEquals('http://feed.example.com/h-feed-with-child-author', $feeds[0]->url); | |||
$this->assertEquals('microformats', $feeds[0]->type); | |||
} | |||
// h-feed that links to Atom alternate | |||
public function testMf2WithAtomAlternate() { | |||
$url = 'http://feed.example.com/h-feed-with-atom-alternate'; | |||
$response = $this->parse(['url' => $url]); | |||
$body = $response->getContent(); | |||
$this->assertEquals(200, $response->getStatusCode()); | |||
$feeds = json_decode($body)->feeds; | |||
$this->assertEquals(2, count($feeds)); | |||
// Should rank JSONFeed above Atom | |||
$this->assertEquals('http://feed.example.com/h-feed-with-atom-alternate', $feeds[0]->url); | |||
$this->assertEquals('microformats', $feeds[0]->type); | |||
$this->assertEquals('http://feed.example.com/atom', $feeds[1]->url); | |||
$this->assertEquals('atom', $feeds[1]->type); | |||
} | |||
// h-feed that links to RSS alternate | |||
public function testMf2WithRSSAlternate() { | |||
$url = 'http://feed.example.com/h-feed-with-rss-alternate'; | |||
$response = $this->parse(['url' => $url]); | |||
$body = $response->getContent(); | |||
$this->assertEquals(200, $response->getStatusCode()); | |||
$feeds = json_decode($body)->feeds; | |||
$this->assertEquals(2, count($feeds)); | |||
// Should rank JSONFeed above Atom | |||
$this->assertEquals('http://feed.example.com/h-feed-with-rss-alternate', $feeds[0]->url); | |||
$this->assertEquals('microformats', $feeds[0]->type); | |||
$this->assertEquals('http://feed.example.com/podcast.xml', $feeds[1]->url); | |||
$this->assertEquals('rss', $feeds[1]->type); | |||
} | |||
// No mf2 but links to Atom alternate | |||
public function testNoMf2() { | |||
$url = 'http://feed.example.com/html-with-atom-alternate'; | |||
$response = $this->parse(['url' => $url]); | |||
$body = $response->getContent(); | |||
$this->assertEquals(200, $response->getStatusCode()); | |||
$feeds = json_decode($body)->feeds; | |||
$this->assertEquals(1, count($feeds)); | |||
$this->assertEquals('http://feed.example.com/atom', $feeds[0]->url); | |||
$this->assertEquals('atom', $feeds[0]->type); | |||
} | |||
public function testNoMf2WithJSONAndAtom() { | |||
$url = 'http://feed.example.com/html-with-json-and-atom'; | |||
$response = $this->parse(['url' => $url]); | |||
$body = $response->getContent(); | |||
$this->assertEquals(200, $response->getStatusCode()); | |||
$feeds = json_decode($body)->feeds; | |||
$this->assertEquals(2, count($feeds)); | |||
// Should rank JSONFeed above Atom | |||
$this->assertEquals('http://feed.example.com/jsonfeed', $feeds[0]->url); | |||
$this->assertEquals('jsonfeed', $feeds[0]->type); | |||
$this->assertEquals('http://feed.example.com/atom', $feeds[1]->url); | |||
$this->assertEquals('atom', $feeds[1]->type); | |||
} | |||
// input URL is an Atom feed | |||
public function testInputIsAtom() { | |||
$url = 'http://feed.example.com/atom'; | |||
$response = $this->parse(['url' => $url]); | |||
$body = $response->getContent(); | |||
$this->assertEquals(200, $response->getStatusCode()); | |||
$feeds = json_decode($body)->feeds; | |||
$this->assertEquals(1, count($feeds)); | |||
$this->assertEquals('http://feed.example.com/atom', $feeds[0]->url); | |||
$this->assertEquals('atom', $feeds[0]->type); | |||
} | |||
// input URL redirects to an Atom feed | |||
public function testInputIsRedirectToAtom() { | |||
$url = 'http://feed.example.com/redirect-to-atom'; | |||
$response = $this->parse(['url' => $url]); | |||
$body = $response->getContent(); | |||
$this->assertEquals(200, $response->getStatusCode()); | |||
$feeds = json_decode($body)->feeds; | |||
$this->assertEquals(1, count($feeds)); | |||
$this->assertEquals('http://feed.example.com/atom', $feeds[0]->url); | |||
$this->assertEquals('atom', $feeds[0]->type); | |||
} | |||
// input URL is an RSS feed | |||
public function testInputIsRSS() { | |||
$url = 'http://feed.example.com/rss'; | |||
$response = $this->parse(['url' => $url]); | |||
$body = $response->getContent(); | |||
$this->assertEquals(200, $response->getStatusCode()); | |||
$feeds = json_decode($body)->feeds; | |||
$this->assertEquals(1, count($feeds)); | |||
$this->assertEquals('http://feed.example.com/rss', $feeds[0]->url); | |||
$this->assertEquals('rss', $feeds[0]->type); | |||
} | |||
// input URL is a JSON feed | |||
public function testInputIsJSONFeed() { | |||
$url = 'http://feed.example.com/jsonfeed'; | |||
$response = $this->parse(['url' => $url]); | |||
$body = $response->getContent(); | |||
$this->assertEquals(200, $response->getStatusCode()); | |||
$feeds = json_decode($body)->feeds; | |||
$this->assertEquals(1, count($feeds)); | |||
$this->assertEquals('http://feed.example.com/jsonfeed', $feeds[0]->url); | |||
$this->assertEquals('jsonfeed', $feeds[0]->type); | |||
} | |||
} |
@ -0,0 +1,36 @@ | |||
HTTP/1.1 200 OK | |||
Server: Apache | |||
Date: Wed, 09 Dec 2015 03:29:14 GMT | |||
Content-Type: text/html; charset=utf-8 | |||
Connection: keep-alive | |||
<html> | |||
<head> | |||
<title>Test</title> | |||
<link rel="alternate" type="application/atom+xml" href="/atom"> | |||
</head> | |||
<body> | |||
<a href="/author" class="h-card">Author Name</a> | |||
<ul> | |||
<li class="h-entry"> | |||
<a href="/1" class="u-url p-name">One</a> | |||
<a href="/author" class="u-author"></a> | |||
</li> | |||
<li class="h-entry"> | |||
<a href="/2" class="u-url p-name">Two</a> | |||
<a href="/author" class="u-author"></a> | |||
</li> | |||
<li class="h-entry"> | |||
<a href="/3" class="u-url p-name">Three</a> | |||
<a href="/author" class="u-author"></a> | |||
</li> | |||
<li class="h-entry"> | |||
<a href="/4" class="u-url p-name">Four</a> | |||
<a href="/author" class="u-author"></a> | |||
</li> | |||
</ul> | |||
</body> | |||
</html> |
@ -0,0 +1,36 @@ | |||
HTTP/1.1 200 OK | |||
Server: Apache | |||
Date: Wed, 09 Dec 2015 03:29:14 GMT | |||
Content-Type: text/html; charset=utf-8 | |||
Connection: keep-alive | |||
<html> | |||
<head> | |||
<title>Test</title> | |||
<link rel="alternate" type="application/rss+xml" href="/podcast.xml"> | |||
</head> | |||
<body> | |||
<a href="/author" class="h-card">Author Name</a> | |||
<ul> | |||
<li class="h-entry"> | |||
<a href="/1" class="u-url p-name">One</a> | |||
<a href="/author" class="u-author"></a> | |||
</li> | |||
<li class="h-entry"> | |||
<a href="/2" class="u-url p-name">Two</a> | |||
<a href="/author" class="u-author"></a> | |||
</li> | |||
<li class="h-entry"> | |||
<a href="/3" class="u-url p-name">Three</a> | |||
<a href="/author" class="u-author"></a> | |||
</li> | |||
<li class="h-entry"> | |||
<a href="/4" class="u-url p-name">Four</a> | |||
<a href="/author" class="u-author"></a> | |||
</li> | |||
</ul> | |||
</body> | |||
</html> |
@ -0,0 +1,36 @@ | |||
HTTP/1.1 200 OK | |||
Server: Apache | |||
Date: Wed, 09 Dec 2015 03:29:14 GMT | |||
Content-Type: text/html; charset=utf-8 | |||
Connection: keep-alive | |||
<html> | |||
<head> | |||
<title>Test</title> | |||
<link rel="alternate" type="application/atom+xml" href="/atom"> | |||
</head> | |||
<body> | |||
<h1><a href="/author">Author Name</a></h1> | |||
<ul> | |||
<li> | |||
<a href="/1">One</a> | |||
<a href="/author"></a> | |||
</li> | |||
<li> | |||
<a href="/2">Two</a> | |||
<a href="/author"></a> | |||
</li> | |||
<li> | |||
<a href="/3">Three</a> | |||
<a href="/author"></a> | |||
</li> | |||
<li> | |||
<a href="/4">Four</a> | |||
<a href="/author"></a> | |||
</li> | |||
</ul> | |||
</body> | |||
</html> |
@ -0,0 +1,37 @@ | |||
HTTP/1.1 200 OK | |||
Server: Apache | |||
Date: Wed, 09 Dec 2015 03:29:14 GMT | |||
Content-Type: text/html; charset=utf-8 | |||
Connection: keep-alive | |||
<html> | |||
<head> | |||
<title>Test</title> | |||
<link rel="alternate" type="application/atom+xml" href="/atom"> | |||
<link rel="alternate" type="application/json" href="/jsonfeed"> | |||
</head> | |||
<body> | |||
<h1><a href="/author">Author Name</a></h1> | |||
<ul> | |||
<li> | |||
<a href="/1">One</a> | |||
<a href="/author"></a> | |||
</li> | |||
<li> | |||
<a href="/2">Two</a> | |||
<a href="/author"></a> | |||
</li> | |||
<li> | |||
<a href="/3">Three</a> | |||
<a href="/author"></a> | |||
</li> | |||
<li> | |||
<a href="/4">Four</a> | |||
<a href="/author"></a> | |||
</li> | |||
</ul> | |||
</body> | |||
</html> |
@ -0,0 +1,15 @@ | |||
HTTP/1.1 301 Moved Permanently | |||
Server: Apache | |||
Date: Wed, 09 Dec 2015 03:29:14 GMT | |||
Content-Type: text/html; charset=utf-8 | |||
Connection: keep-alive | |||
Location: http://feed.example.com/atom | |||
<html> | |||
<head> | |||
<title>Moved</title> | |||
</head> | |||
<body> | |||
This page has moved | |||
</body> | |||
</html> |