<?php
|
|
namespace p3k\XRay;
|
|
|
|
class Fetcher {
|
|
private $http;
|
|
|
|
public function __construct($http) {
|
|
$this->http = $http;
|
|
}
|
|
|
|
public function fetch($url, $opts=[]) {
|
|
if($opts == false) $opts = [];
|
|
|
|
if(isset($opts['timeout']))
|
|
$this->http->set_timeout($opts['timeout']);
|
|
if(isset($opts['max_redirects']))
|
|
$this->http->set_max_redirects($opts['max_redirects']);
|
|
|
|
// Attempt some basic URL validation
|
|
$scheme = parse_url($url, PHP_URL_SCHEME);
|
|
if(!in_array($scheme, ['http','https'])) {
|
|
return [
|
|
'error_code' => 400,
|
|
'error' => 'invalid_url',
|
|
'error_description' => 'Only http and https URLs are supported'
|
|
];
|
|
}
|
|
|
|
$host = parse_url($url, PHP_URL_HOST);
|
|
if(!$host) {
|
|
return [
|
|
'error_code' => 400,
|
|
'error' => 'invalid_url',
|
|
'error_description' => 'The URL provided was not valid'
|
|
];
|
|
}
|
|
|
|
$url = normalize_url($url);
|
|
$host = parse_url($url, PHP_URL_HOST);
|
|
|
|
// Check if this is a Twitter URL and use the API
|
|
if(Formats\Twitter::matches_host($url)) {
|
|
return $this->_fetch_tweet($url, $opts);
|
|
}
|
|
|
|
// Check if this is a Facebook URL and use the API
|
|
if(Formats\Facebook::matches_host($url)) {
|
|
return $this->_fetch_facebook($url, $opts);
|
|
}
|
|
|
|
// Transform the HTML GitHub URL into an GitHub API request and fetch the API response
|
|
if(Formats\GitHub::matches_host($url)) {
|
|
return $this->_fetch_github($url, $opts);
|
|
}
|
|
|
|
// Check if this is a Hackernews URL and use the API
|
|
if(Formats\Hackernews::matches($url)) {
|
|
return Formats\Hackernews::fetch($this->http, $url, $opts);
|
|
}
|
|
|
|
// All other URLs are fetched normally
|
|
|
|
// Special-case appspot.com URLs to not follow redirects.
|
|
// https://cloud.google.com/appengine/docs/php/urlfetch/
|
|
if(!should_follow_redirects($url)) {
|
|
$this->http->set_max_redirects(0);
|
|
$this->http->set_transport(new \p3k\HTTP\Stream());
|
|
} else {
|
|
$this->http->set_transport(new \p3k\HTTP\Curl());
|
|
}
|
|
|
|
$headers = [];
|
|
if(isset($opts['token']))
|
|
$headers[] = 'Authorization: Bearer ' . $opts['token'];
|
|
|
|
$result = $this->http->get($url, $headers);
|
|
|
|
if($result['error']) {
|
|
return [
|
|
'error' => $result['error'],
|
|
'error_description' => $result['error_description'],
|
|
'url' => $result['url'],
|
|
'code' => $result['code'],
|
|
];
|
|
}
|
|
|
|
if(trim($result['body']) == '') {
|
|
if($result['code'] == 410) {
|
|
// 410 Gone responses are valid and should not return an error
|
|
return $this->respond($response, 200, [
|
|
'data' => [
|
|
'type' => 'unknown'
|
|
],
|
|
'url' => $result['url'],
|
|
'code' => $result['code']
|
|
]);
|
|
}
|
|
|
|
return [
|
|
'error' => 'no_content',
|
|
'error_description' => 'We did not get a response body when fetching the URL',
|
|
'url' => $result['url'],
|
|
'code' => $result['code']
|
|
];
|
|
}
|
|
|
|
// Check for HTTP 401/403
|
|
if($result['code'] == 401) {
|
|
return [
|
|
'error' => 'unauthorized',
|
|
'error_description' => 'The URL returned "HTTP 401 Unauthorized"',
|
|
'url' => $result['url'],
|
|
'code' => $result['code']
|
|
];
|
|
}
|
|
if($result['code'] == 403) {
|
|
return [
|
|
'error' => 'forbidden',
|
|
'error_description' => 'The URL returned "HTTP 403 Forbidden"',
|
|
'url' => $result['url'],
|
|
'code' => $result['code']
|
|
];
|
|
}
|
|
|
|
// If the original URL had a fragment, include it in the final URL
|
|
if(($fragment=parse_url($url, PHP_URL_FRAGMENT)) && !parse_url($result['url'], PHP_URL_FRAGMENT)) {
|
|
$result['url'] .= '#'.$fragment;
|
|
}
|
|
|
|
return [
|
|
'url' => $result['url'],
|
|
'body' => $result['body'],
|
|
'code' => $result['code'],
|
|
];
|
|
}
|
|
|
|
private function _fetch_tweet($url, $opts) {
|
|
$fields = ['twitter_api_key','twitter_api_secret','twitter_access_token','twitter_access_token_secret'];
|
|
$creds = [];
|
|
foreach($fields as $f) {
|
|
if(isset($opts[$f]))
|
|
$creds[$f] = $opts[$f];
|
|
}
|
|
|
|
if(count($creds) < 4) {
|
|
return [
|
|
'error_code' => 400,
|
|
'error' => 'missing_parameters',
|
|
'error_description' => 'All 4 Twitter credentials must be included in the request'
|
|
];
|
|
}
|
|
|
|
return Formats\Twitter::fetch($url, $creds);
|
|
}
|
|
|
|
private function _fetch_facebook($url, $opts) {
|
|
$fields = ['facebook_app_id','facebook_app_secret'];
|
|
$creds = [];
|
|
foreach($fields as $f) {
|
|
if(isset($opts[$f]))
|
|
$creds[$f] = $opts[$f];
|
|
}
|
|
|
|
if(count($creds) < 2) {
|
|
return [
|
|
'error_code' => 400,
|
|
'error' => 'missing_parameters',
|
|
'error_description' => 'Both Facebook credentials must be included in the request'
|
|
];
|
|
}
|
|
|
|
// TODO: Question, should I do this like Twitter or like Github?
|
|
return Formats\Facebook::fetch($url, $creds);
|
|
}
|
|
|
|
private function _fetch_github($url, $opts) {
|
|
$fields = ['github_access_token'];
|
|
$creds = [];
|
|
foreach($fields as $f) {
|
|
if(isset($opts[$f]))
|
|
$creds[$f] = $opts[$f];
|
|
}
|
|
|
|
return Formats\GitHub::fetch($this->http, $url, $creds);
|
|
}
|
|
|
|
}
|