diff --git a/controllers/Parse.php b/controllers/Parse.php index 485de53..b2cd612 100644 --- a/controllers/Parse.php +++ b/controllers/Parse.php @@ -98,6 +98,23 @@ class Parse { } } + $parser = new p3k\XRay\Parser($this->http); + $parsed = $parser->parse($result['body'], $result['url'], $opts); + + // Allow the parser to override the HTTP response code, e.g. a meta-equiv tag + if(isset($parsed['code'])) + $result['code'] = $parsed['code']; + + $data = [ + 'data' => $parsed['data'], + 'url' => $result['url'], + 'code' => $result['code'] + ]; + if($request->get('include_original') && isset($parsed['original'])) + $data['original'] = $parsed['original']; + + return $this->respond($response, 200, $data); + // Check for known services diff --git a/lib/XRay.php b/lib/XRay.php index f633045..7fe7192 100644 --- a/lib/XRay.php +++ b/lib/XRay.php @@ -13,15 +13,29 @@ class XRay { return $rels->parse($url, $opts); } - public function parse($url, $opts=[]) { - $fetch = new XRay\Fetch($this->http); - $response = $fetch->fetch($url, $opts); - return $this->parse_doc($response, $url, $opts); - } + public function parse($url, $opts_or_body=false, $opts_for_body=[]) { + if(!$opts_or_body || is_array($opts_or_body)) { + $fetch = new XRay\Fetcher($this->http); + $response = $fetch->fetch($url, $opts_or_body); + if(!empty($response['error'])) + return $response; + $body = $response['body']; + $url = $response['url']; + $code = $response['code']; + $opts = is_array($opts_or_body) ? $opts_or_body : $opts_for_body; + } else { + $body = $opts_or_body; + $opts = $opts_for_body; + $code = null; + } + $parser = new XRay\Parser($this->http); - public function parse_doc($response, $url=false, $opts=[]) { - - + $result = $parser->parse($body, $url, $opts); + if(!isset($opts['include_original']) || !$opts['include_original']) + unset($result['original']); + $result['url'] = $url; + $result['code'] = isset($result['code']) ? $result['code'] : $code; + return $result; } } diff --git a/lib/XRay/Fetcher.php b/lib/XRay/Fetcher.php index ea37f8b..9b82909 100644 --- a/lib/XRay/Fetcher.php +++ b/lib/XRay/Fetcher.php @@ -9,6 +9,8 @@ class Fetcher { } public function fetch($url, $opts=[]) { + if($opts == false) $opts = []; + if(isset($opts['timeout'])) $this->http->set_timeout($opts['timeout']); if(isset($opts['max_redirects'])) diff --git a/lib/XRay/Formats/GitHub.php b/lib/XRay/Formats/GitHub.php index 91c8e96..f55c184 100644 --- a/lib/XRay/Formats/GitHub.php +++ b/lib/XRay/Formats/GitHub.php @@ -19,37 +19,50 @@ class GitHub extends Format { || preg_match('~https://github.com/([^/]+)/([^/]+)/issues/(\d+)#issuecomment-(\d+)~', $url, $match); } - public static function fetch($http, $url, $creds) { - // Transform the GitHub URL to an API request + private static function extract_url_parts($url) { + $response = false; + if(preg_match('~https://github.com/([^/]+)/([^/]+)/pull/(\d+)$~', $url, $match)) { - $type = 'pull'; - $org = $match[1]; - $repo = $match[2]; - $pull = $match[3]; - $apiurl = 'https://api.github.com/repos/'.$org.'/'.$repo.'/pulls/'.$pull; + $response = []; + $response['type'] = 'pull'; + $response['org'] = $match[1]; + $response['repo'] = $match[2]; + $response['pull'] = $match[3]; + $response['apiurl'] = 'https://api.github.com/repos/'.$response['org'].'/'.$response['repo'].'/pulls/'.$response['pull']; } elseif(preg_match('~https://github.com/([^/]+)/([^/]+)/issues/(\d+)$~', $url, $match)) { - $type = 'issue'; - $org = $match[1]; - $repo = $match[2]; - $issue = $match[3]; - $apiurl = 'https://api.github.com/repos/'.$org.'/'.$repo.'/issues/'.$issue; + $response = []; + $response['type'] = 'issue'; + $response['org'] = $match[1]; + $response['repo'] = $match[2]; + $response['issue'] = $match[3]; + $response['apiurl'] = 'https://api.github.com/repos/'.$response['org'].'/'.$response['repo'].'/issues/'.$response['issue']; } elseif(preg_match('~https://github.com/([^/]+)/([^/]+)$~', $url, $match)) { - $type = 'repo'; - $org = $match[1]; - $repo = $match[2]; - $apiurl = 'https://api.github.com/repos/'.$org.'/'.$repo; + $response = []; + $response['type'] = 'repo'; + $response['org'] = $match[1]; + $response['repo'] = $match[2]; + $response['apiurl'] = 'https://api.github.com/repos/'.$response['org'].'/'.$response['repo']; } elseif(preg_match('~https://github.com/([^/]+)/([^/]+)/issues/(\d+)#issuecomment-(\d+)~', $url, $match)) { - $type = 'comment'; - $org = $match[1]; - $repo = $match[2]; - $issue = $match[3]; - $comment = $match[4]; - $apiurl = 'https://api.github.com/repos/'.$org.'/'.$repo.'/issues/comments/'.$comment; - - } else { + $response = []; + $response['type'] = 'comment'; + $response['org'] = $match[1]; + $response['repo'] = $match[2]; + $response['issue'] = $match[3]; + $response['comment'] = $match[4]; + $response['apiurl'] = 'https://api.github.com/repos/'.$response['org'].'/'.$response['repo'].'/issues/comments/'.$response['comment']; + + } + + return $response; + } + + public static function fetch($http, $url, $creds) { + $parts = self::extract_url_parts($url); + + if(!$parts) { return [ 'error' => 'unsupported_url', 'error_description' => 'This GitHub URL is not supported', @@ -62,7 +75,7 @@ class GitHub extends Format { $headers[] = 'Authorization: Bearer ' . $creds['github_access_token']; } - $response = $http->get($apiurl, $headers); + $response = $http->get($parts['apiurl'], $headers); if($response['code'] != 200) { return [ 'error' => 'github_error', @@ -78,20 +91,20 @@ class GitHub extends Format { ]; } - public static function parse($http, $url, $creds, $json=null) { + public static function parse($json, $url) { + $data = @json_decode($json, true); - if(false) { - } else { - $data = json_decode($json, true); - } + if(!$data) + return self::_unknown(); - if(!$data) { - return [null, null, 0]; - } + $parts = self::extract_url_parts($url); + + if(!$parts) + return self::_unknown(); // Start building the h-entry $entry = array( - 'type' => ($type == 'repo' ? 'repo' : 'entry'), + 'type' => ($parts['type'] == 'repo' ? 'repo' : 'entry'), 'url' => $url, 'author' => [ 'type' => 'card', @@ -101,7 +114,7 @@ class GitHub extends Format { ] ); - if($type == 'repo') + if($parts['type'] == 'repo') $authorkey = 'owner'; else $authorkey = 'user'; @@ -110,20 +123,20 @@ class GitHub extends Format { $entry['author']['photo'] = $data[$authorkey]['avatar_url']; $entry['author']['url'] = $data[$authorkey]['html_url']; - if($type == 'pull') { - $entry['name'] = '#' . $pull . ' ' . $data['title']; - } elseif($type == 'issue') { - $entry['name'] = '#' . $issue . ' ' . $data['title']; - } elseif($type == 'repo') { + if($parts['type'] == 'pull') { + $entry['name'] = '#' . $parts['pull'] . ' ' . $data['title']; + } elseif($parts['type'] == 'issue') { + $entry['name'] = '#' . $parts['issue'] . ' ' . $data['title']; + } elseif($parts['type'] == 'repo') { $entry['name'] = $data['name']; } - if($type == 'repo') { + if($parts['type'] == 'repo') { if(!empty($data['description'])) $entry['summary'] = $data['description']; } - if($type != 'repo' && !empty($data['body'])) { + if($parts['type'] != 'repo' && !empty($data['body'])) { $parser = new GithubMarkdown(); $entry['content'] = [ @@ -132,8 +145,8 @@ class GitHub extends Format { ]; } - if($type == 'comment') { - $entry['in-reply-to'] = ['https://github.com/'.$org.'/'.$repo.'/issues/'.$issue]; + if($parts['type'] == 'comment') { + $entry['in-reply-to'] = ['https://github.com/'.$parts['org'].'/'.$parts['repo'].'/issues/'.$parts['issue']]; } if(!empty($data['labels'])) { @@ -144,11 +157,10 @@ class GitHub extends Format { $entry['published'] = $data['created_at']; - $r = [ - 'data' => $entry + return [ + 'data' => $entry, + 'original' => $json ]; - - return [$r, $json, $response['code']]; } } diff --git a/lib/XRay/Formats/Instagram.php b/lib/XRay/Formats/Instagram.php index 1cfaee5..2c02b51 100644 --- a/lib/XRay/Formats/Instagram.php +++ b/lib/XRay/Formats/Instagram.php @@ -3,9 +3,8 @@ namespace p3k\XRay\Formats; use DOMDocument, DOMXPath; use DateTime, DateTimeZone; -use Parse; -class Instagram { +class Instagram extends Format { public static function matches_host($url) { $host = parse_url($url, PHP_URL_HOST); @@ -16,12 +15,12 @@ class Instagram { return self::matches_host($url); } - public static function parse($html, $url, $http) { + public static function parse($http, $html, $url) { $photoData = self::_extractPhotoDataFromPhotoPage($html); if(!$photoData) - return false; + return self::_unknown(); // Start building the h-entry $entry = array( @@ -140,19 +139,18 @@ class Instagram { $entry['published'] = $published->format('c'); - $response = [ - 'data' => $entry - ]; - if(count($refs)) { - $response['refs'] = $refs; + $entry['refs'] = $refs; } - return [$response, [ - 'photo' => $photoData, - 'profiles' => $profiles, - 'locations' => $locations - ]]; + return [ + 'data' => $entry, + 'original' => json_encode([ + 'photo' => $photoData, + 'profiles' => $profiles, + 'locations' => $locations + ]) + ]; } private static function _buildHCardFromInstagramProfile($profile) { diff --git a/lib/XRay/Parser.php b/lib/XRay/Parser.php index d39369c..97fab1f 100644 --- a/lib/XRay/Parser.php +++ b/lib/XRay/Parser.php @@ -1,12 +1,38 @@ http = $http; + } - public function parse($url, $body) { + public function parse($body, $url, $opts=[]) { + if(isset($opts['timeout'])) + $this->http->set_timeout($opts['timeout']); + if(isset($opts['max_redirects'])) + $this->http->set_max_redirects($opts['max_redirects']); + + // Check if the URL matches a special parser + + if(Formats\Instagram::matches($url)) { + return Formats\Instagram::parse($this->http, $body, $url); + } + + if(Formats\GitHub::matches($url)) { + return Formats\GitHub::parse($body, $url); + } - + + return [ + 'data' => [ + 'type' => 'unknown' + ] + ]; } } diff --git a/tests/InstagramTest.php b/tests/InstagramTest.php index 2c15845..d3addc4 100644 --- a/tests/InstagramTest.php +++ b/tests/InstagramTest.php @@ -71,8 +71,8 @@ class InstagramTest extends PHPUnit_Framework_TestCase { $this->assertEquals(2, count($data['data']['category'])); $this->assertContains('http://tinyletter.com/kmikeym', $data['data']['category']); - $this->assertArrayHasKey('http://tinyletter.com/kmikeym', $data['refs']); - $this->assertEquals(['type'=>'card','name'=>'Mike Merrill','url'=>'http://tinyletter.com/kmikeym','photo'=>'https://instagram.fsjc1-3.fna.fbcdn.net/t51.2885-19/s320x320/12627953_686238411518831_1544976311_a.jpg'], $data['refs']['http://tinyletter.com/kmikeym']); + $this->assertArrayHasKey('http://tinyletter.com/kmikeym', $data['data']['refs']); + $this->assertEquals(['type'=>'card','name'=>'Mike Merrill','url'=>'http://tinyletter.com/kmikeym','photo'=>'https://instagram.fsjc1-3.fna.fbcdn.net/t51.2885-19/s320x320/12627953_686238411518831_1544976311_a.jpg'], $data['data']['refs']['http://tinyletter.com/kmikeym']); } public function testInstagramPhotoWithVenue() { @@ -86,8 +86,8 @@ class InstagramTest extends PHPUnit_Framework_TestCase { $this->assertEquals(1, count($data['data']['location'])); $this->assertContains('https://www.instagram.com/explore/locations/109284789535230/', $data['data']['location']); - $this->assertArrayHasKey('https://www.instagram.com/explore/locations/109284789535230/', $data['refs']); - $venue = $data['refs']['https://www.instagram.com/explore/locations/109284789535230/']; + $this->assertArrayHasKey('https://www.instagram.com/explore/locations/109284789535230/', $data['data']['refs']); + $venue = $data['data']['refs']['https://www.instagram.com/explore/locations/109284789535230/']; $this->assertEquals('XOXO Outpost', $venue['name']); $this->assertEquals('45.5261002', $venue['latitude']); $this->assertEquals('-122.6558081', $venue['longitude']);