|
@ -10,75 +10,81 @@ class Parser { |
|
|
$this->http = $http; |
|
|
$this->http = $http; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
public function parse($body, $url, $opts=[]) { |
|
|
|
|
|
|
|
|
public function parse($http_response, $opts=[]) { |
|
|
if(isset($opts['timeout'])) |
|
|
if(isset($opts['timeout'])) |
|
|
$this->http->set_timeout($opts['timeout']); |
|
|
$this->http->set_timeout($opts['timeout']); |
|
|
if(isset($opts['max_redirects'])) |
|
|
if(isset($opts['max_redirects'])) |
|
|
$this->http->set_max_redirects($opts['max_redirects']); |
|
|
$this->http->set_max_redirects($opts['max_redirects']); |
|
|
|
|
|
|
|
|
// Check if the URL matches a special parser
|
|
|
// Check if the URL matches a special parser
|
|
|
|
|
|
$url = $http_response['url']; |
|
|
|
|
|
|
|
|
if(Formats\Instagram::matches($url)) { |
|
|
if(Formats\Instagram::matches($url)) { |
|
|
return Formats\Instagram::parse($this->http, $body, $url, $opts); |
|
|
|
|
|
|
|
|
return Formats\Instagram::parse($this->http, $http_response, $opts); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
if(Formats\GitHub::matches($url)) { |
|
|
if(Formats\GitHub::matches($url)) { |
|
|
return Formats\GitHub::parse($body, $url); |
|
|
|
|
|
|
|
|
return Formats\GitHub::parse($http_response); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
if(Formats\Twitter::matches($url)) { |
|
|
if(Formats\Twitter::matches($url)) { |
|
|
return Formats\Twitter::parse($body, $url); |
|
|
|
|
|
|
|
|
return Formats\Twitter::parse($http_response); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
if(Formats\Facebook::matches($url)) { |
|
|
if(Formats\Facebook::matches($url)) { |
|
|
return Formats\Facebook::parse($body, $url); |
|
|
|
|
|
|
|
|
return Formats\Facebook::parse($http_response); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
if(Formats\XKCD::matches($url)) { |
|
|
if(Formats\XKCD::matches($url)) { |
|
|
return Formats\XKCD::parse($body, $url); |
|
|
|
|
|
|
|
|
return Formats\XKCD::parse($http_response); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
if(Formats\Hackernews::matches($url)) { |
|
|
if(Formats\Hackernews::matches($url)) { |
|
|
return Formats\Hackernews::parse($body, $url); |
|
|
|
|
|
|
|
|
return Formats\Hackernews::parse($http_response); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
$body = $http_response['body']; |
|
|
|
|
|
|
|
|
// Check if an mf2 JSON object was passed in
|
|
|
// Check if an mf2 JSON object was passed in
|
|
|
if(is_array($body) && isset($body['items'][0]['type']) && isset($body['items'][0]['properties'])) { |
|
|
if(is_array($body) && isset($body['items'][0]['type']) && isset($body['items'][0]['properties'])) { |
|
|
$data = Formats\Mf2::parse($body, $url, $this->http, $opts); |
|
|
|
|
|
|
|
|
$data = Formats\Mf2::parse($http_response, $this->http, $opts); |
|
|
$data['source-format'] = 'mf2+json'; |
|
|
$data['source-format'] = 'mf2+json'; |
|
|
return $data; |
|
|
return $data; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
// Check if an ActivityStreams JSON object was passed in
|
|
|
// Check if an ActivityStreams JSON object was passed in
|
|
|
if(Formats\ActivityStreams::is_as2_json($body)) { |
|
|
if(Formats\ActivityStreams::is_as2_json($body)) { |
|
|
$data = Formats\ActivityStreams::parse($body, $url, $this->http, $opts); |
|
|
|
|
|
|
|
|
$data = Formats\ActivityStreams::parse($http_response, $this->http, $opts); |
|
|
$data['source-format'] = 'activity+json'; |
|
|
$data['source-format'] = 'activity+json'; |
|
|
return $data; |
|
|
return $data; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
if(substr($body, 0, 5) == '<?xml') { |
|
|
if(substr($body, 0, 5) == '<?xml') { |
|
|
return Formats\XML::parse($body, $url); |
|
|
|
|
|
|
|
|
return Formats\XML::parse($http_response); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
if(substr($body, 0, 1) == '{') { |
|
|
if(substr($body, 0, 1) == '{') { |
|
|
$parsed = json_decode($body, true); |
|
|
$parsed = json_decode($body, true); |
|
|
if($parsed && isset($parsed['version']) && $parsed['version'] == 'https://jsonfeed.org/version/1') { |
|
|
if($parsed && isset($parsed['version']) && $parsed['version'] == 'https://jsonfeed.org/version/1') { |
|
|
return Formats\JSONFeed::parse($parsed, $url); |
|
|
|
|
|
|
|
|
$http_response['body'] = $parsed; |
|
|
|
|
|
return Formats\JSONFeed::parse($http_response); |
|
|
} elseif($parsed && isset($parsed['items'][0]['type']) && isset($parsed['items'][0]['properties'])) { |
|
|
} elseif($parsed && isset($parsed['items'][0]['type']) && isset($parsed['items'][0]['properties'])) { |
|
|
// Check if an mf2 JSON string was passed in
|
|
|
// Check if an mf2 JSON string was passed in
|
|
|
$data = Formats\Mf2::parse($parsed, $url, $this->http, $opts); |
|
|
|
|
|
|
|
|
$http_response['body'] = $parsed; |
|
|
|
|
|
$data = Formats\Mf2::parse($http_response, $this->http, $opts); |
|
|
$data['source-format'] = 'mf2+json'; |
|
|
$data['source-format'] = 'mf2+json'; |
|
|
return $data; |
|
|
return $data; |
|
|
} elseif($parsed && Formats\ActivityStreams::is_as2_json($parsed)) { |
|
|
} elseif($parsed && Formats\ActivityStreams::is_as2_json($parsed)) { |
|
|
// Check if an ActivityStreams JSON string was passed in
|
|
|
// Check if an ActivityStreams JSON string was passed in
|
|
|
$data = Formats\ActivityStreams::parse($parsed, $url, $this->http, $opts); |
|
|
|
|
|
|
|
|
$http_response['body'] = $parsed; |
|
|
|
|
|
$data = Formats\ActivityStreams::parse($http_response, $this->http, $opts); |
|
|
$data['source-format'] = 'activity+json'; |
|
|
$data['source-format'] = 'activity+json'; |
|
|
return $data; |
|
|
return $data; |
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
// No special parsers matched, parse for Microformats now
|
|
|
// No special parsers matched, parse for Microformats now
|
|
|
$data = Formats\HTML::parse($this->http, $body, $url, $opts); |
|
|
|
|
|
|
|
|
$data = Formats\HTML::parse($this->http, $http_response, $opts); |
|
|
if(!isset($data['source-format']) && isset($data['type']) && $data['type'] != 'unknown') |
|
|
if(!isset($data['source-format']) && isset($data['type']) && $data['type'] != 'unknown') |
|
|
$data['source-format'] = 'mf2+html'; |
|
|
$data['source-format'] = 'mf2+html'; |
|
|
return $data; |
|
|
return $data; |
|
|