Browse Source

refactor for parsing Instagram and GitHub complete

pull/38/head
Aaron Parecki 7 years ago
parent
commit
6b65ae1b94
No known key found for this signature in database GPG Key ID: 276C2817346D6056
7 changed files with 145 additions and 76 deletions
  1. +17
    -0
      controllers/Parse.php
  2. +22
    -8
      lib/XRay.php
  3. +2
    -0
      lib/XRay/Fetcher.php
  4. +60
    -48
      lib/XRay/Formats/GitHub.php
  5. +12
    -14
      lib/XRay/Formats/Instagram.php
  6. +28
    -2
      lib/XRay/Parser.php
  7. +4
    -4
      tests/InstagramTest.php

+ 17
- 0
controllers/Parse.php View File

@ -98,6 +98,23 @@ class Parse {
} }
} }
$parser = new p3k\XRay\Parser($this->http);
$parsed = $parser->parse($result['body'], $result['url'], $opts);
// Allow the parser to override the HTTP response code, e.g. a meta-equiv tag
if(isset($parsed['code']))
$result['code'] = $parsed['code'];
$data = [
'data' => $parsed['data'],
'url' => $result['url'],
'code' => $result['code']
];
if($request->get('include_original') && isset($parsed['original']))
$data['original'] = $parsed['original'];
return $this->respond($response, 200, $data);
// Check for known services // Check for known services

+ 22
- 8
lib/XRay.php View File

@ -13,15 +13,29 @@ class XRay {
return $rels->parse($url, $opts); return $rels->parse($url, $opts);
} }
public function parse($url, $opts=[]) {
$fetch = new XRay\Fetch($this->http);
$response = $fetch->fetch($url, $opts);
return $this->parse_doc($response, $url, $opts);
}
public function parse($url, $opts_or_body=false, $opts_for_body=[]) {
if(!$opts_or_body || is_array($opts_or_body)) {
$fetch = new XRay\Fetcher($this->http);
$response = $fetch->fetch($url, $opts_or_body);
if(!empty($response['error']))
return $response;
$body = $response['body'];
$url = $response['url'];
$code = $response['code'];
$opts = is_array($opts_or_body) ? $opts_or_body : $opts_for_body;
} else {
$body = $opts_or_body;
$opts = $opts_for_body;
$code = null;
}
$parser = new XRay\Parser($this->http);
public function parse_doc($response, $url=false, $opts=[]) {
$result = $parser->parse($body, $url, $opts);
if(!isset($opts['include_original']) || !$opts['include_original'])
unset($result['original']);
$result['url'] = $url;
$result['code'] = isset($result['code']) ? $result['code'] : $code;
return $result;
} }
} }

+ 2
- 0
lib/XRay/Fetcher.php View File

@ -9,6 +9,8 @@ class Fetcher {
} }
public function fetch($url, $opts=[]) { public function fetch($url, $opts=[]) {
if($opts == false) $opts = [];
if(isset($opts['timeout'])) if(isset($opts['timeout']))
$this->http->set_timeout($opts['timeout']); $this->http->set_timeout($opts['timeout']);
if(isset($opts['max_redirects'])) if(isset($opts['max_redirects']))

+ 60
- 48
lib/XRay/Formats/GitHub.php View File

@ -19,37 +19,50 @@ class GitHub extends Format {
|| preg_match('~https://github.com/([^/]+)/([^/]+)/issues/(\d+)#issuecomment-(\d+)~', $url, $match); || preg_match('~https://github.com/([^/]+)/([^/]+)/issues/(\d+)#issuecomment-(\d+)~', $url, $match);
} }
public static function fetch($http, $url, $creds) {
// Transform the GitHub URL to an API request
private static function extract_url_parts($url) {
$response = false;
if(preg_match('~https://github.com/([^/]+)/([^/]+)/pull/(\d+)$~', $url, $match)) { if(preg_match('~https://github.com/([^/]+)/([^/]+)/pull/(\d+)$~', $url, $match)) {
$type = 'pull';
$org = $match[1];
$repo = $match[2];
$pull = $match[3];
$apiurl = 'https://api.github.com/repos/'.$org.'/'.$repo.'/pulls/'.$pull;
$response = [];
$response['type'] = 'pull';
$response['org'] = $match[1];
$response['repo'] = $match[2];
$response['pull'] = $match[3];
$response['apiurl'] = 'https://api.github.com/repos/'.$response['org'].'/'.$response['repo'].'/pulls/'.$response['pull'];
} elseif(preg_match('~https://github.com/([^/]+)/([^/]+)/issues/(\d+)$~', $url, $match)) { } elseif(preg_match('~https://github.com/([^/]+)/([^/]+)/issues/(\d+)$~', $url, $match)) {
$type = 'issue';
$org = $match[1];
$repo = $match[2];
$issue = $match[3];
$apiurl = 'https://api.github.com/repos/'.$org.'/'.$repo.'/issues/'.$issue;
$response = [];
$response['type'] = 'issue';
$response['org'] = $match[1];
$response['repo'] = $match[2];
$response['issue'] = $match[3];
$response['apiurl'] = 'https://api.github.com/repos/'.$response['org'].'/'.$response['repo'].'/issues/'.$response['issue'];
} elseif(preg_match('~https://github.com/([^/]+)/([^/]+)$~', $url, $match)) { } elseif(preg_match('~https://github.com/([^/]+)/([^/]+)$~', $url, $match)) {
$type = 'repo';
$org = $match[1];
$repo = $match[2];
$apiurl = 'https://api.github.com/repos/'.$org.'/'.$repo;
$response = [];
$response['type'] = 'repo';
$response['org'] = $match[1];
$response['repo'] = $match[2];
$response['apiurl'] = 'https://api.github.com/repos/'.$response['org'].'/'.$response['repo'];
} elseif(preg_match('~https://github.com/([^/]+)/([^/]+)/issues/(\d+)#issuecomment-(\d+)~', $url, $match)) { } elseif(preg_match('~https://github.com/([^/]+)/([^/]+)/issues/(\d+)#issuecomment-(\d+)~', $url, $match)) {
$type = 'comment';
$org = $match[1];
$repo = $match[2];
$issue = $match[3];
$comment = $match[4];
$apiurl = 'https://api.github.com/repos/'.$org.'/'.$repo.'/issues/comments/'.$comment;
} else {
$response = [];
$response['type'] = 'comment';
$response['org'] = $match[1];
$response['repo'] = $match[2];
$response['issue'] = $match[3];
$response['comment'] = $match[4];
$response['apiurl'] = 'https://api.github.com/repos/'.$response['org'].'/'.$response['repo'].'/issues/comments/'.$response['comment'];
}
return $response;
}
public static function fetch($http, $url, $creds) {
$parts = self::extract_url_parts($url);
if(!$parts) {
return [ return [
'error' => 'unsupported_url', 'error' => 'unsupported_url',
'error_description' => 'This GitHub URL is not supported', 'error_description' => 'This GitHub URL is not supported',
@ -62,7 +75,7 @@ class GitHub extends Format {
$headers[] = 'Authorization: Bearer ' . $creds['github_access_token']; $headers[] = 'Authorization: Bearer ' . $creds['github_access_token'];
} }
$response = $http->get($apiurl, $headers);
$response = $http->get($parts['apiurl'], $headers);
if($response['code'] != 200) { if($response['code'] != 200) {
return [ return [
'error' => 'github_error', 'error' => 'github_error',
@ -78,20 +91,20 @@ class GitHub extends Format {
]; ];
} }
public static function parse($http, $url, $creds, $json=null) {
public static function parse($json, $url) {
$data = @json_decode($json, true);
if(false) {
} else {
$data = json_decode($json, true);
}
if(!$data)
return self::_unknown();
if(!$data) {
return [null, null, 0];
}
$parts = self::extract_url_parts($url);
if(!$parts)
return self::_unknown();
// Start building the h-entry // Start building the h-entry
$entry = array( $entry = array(
'type' => ($type == 'repo' ? 'repo' : 'entry'),
'type' => ($parts['type'] == 'repo' ? 'repo' : 'entry'),
'url' => $url, 'url' => $url,
'author' => [ 'author' => [
'type' => 'card', 'type' => 'card',
@ -101,7 +114,7 @@ class GitHub extends Format {
] ]
); );
if($type == 'repo')
if($parts['type'] == 'repo')
$authorkey = 'owner'; $authorkey = 'owner';
else else
$authorkey = 'user'; $authorkey = 'user';
@ -110,20 +123,20 @@ class GitHub extends Format {
$entry['author']['photo'] = $data[$authorkey]['avatar_url']; $entry['author']['photo'] = $data[$authorkey]['avatar_url'];
$entry['author']['url'] = $data[$authorkey]['html_url']; $entry['author']['url'] = $data[$authorkey]['html_url'];
if($type == 'pull') {
$entry['name'] = '#' . $pull . ' ' . $data['title'];
} elseif($type == 'issue') {
$entry['name'] = '#' . $issue . ' ' . $data['title'];
} elseif($type == 'repo') {
if($parts['type'] == 'pull') {
$entry['name'] = '#' . $parts['pull'] . ' ' . $data['title'];
} elseif($parts['type'] == 'issue') {
$entry['name'] = '#' . $parts['issue'] . ' ' . $data['title'];
} elseif($parts['type'] == 'repo') {
$entry['name'] = $data['name']; $entry['name'] = $data['name'];
} }
if($type == 'repo') {
if($parts['type'] == 'repo') {
if(!empty($data['description'])) if(!empty($data['description']))
$entry['summary'] = $data['description']; $entry['summary'] = $data['description'];
} }
if($type != 'repo' && !empty($data['body'])) {
if($parts['type'] != 'repo' && !empty($data['body'])) {
$parser = new GithubMarkdown(); $parser = new GithubMarkdown();
$entry['content'] = [ $entry['content'] = [
@ -132,8 +145,8 @@ class GitHub extends Format {
]; ];
} }
if($type == 'comment') {
$entry['in-reply-to'] = ['https://github.com/'.$org.'/'.$repo.'/issues/'.$issue];
if($parts['type'] == 'comment') {
$entry['in-reply-to'] = ['https://github.com/'.$parts['org'].'/'.$parts['repo'].'/issues/'.$parts['issue']];
} }
if(!empty($data['labels'])) { if(!empty($data['labels'])) {
@ -144,11 +157,10 @@ class GitHub extends Format {
$entry['published'] = $data['created_at']; $entry['published'] = $data['created_at'];
$r = [
'data' => $entry
return [
'data' => $entry,
'original' => $json
]; ];
return [$r, $json, $response['code']];
} }
} }

+ 12
- 14
lib/XRay/Formats/Instagram.php View File

@ -3,9 +3,8 @@ namespace p3k\XRay\Formats;
use DOMDocument, DOMXPath; use DOMDocument, DOMXPath;
use DateTime, DateTimeZone; use DateTime, DateTimeZone;
use Parse;
class Instagram {
class Instagram extends Format {
public static function matches_host($url) { public static function matches_host($url) {
$host = parse_url($url, PHP_URL_HOST); $host = parse_url($url, PHP_URL_HOST);
@ -16,12 +15,12 @@ class Instagram {
return self::matches_host($url); return self::matches_host($url);
} }
public static function parse($html, $url, $http) {
public static function parse($http, $html, $url) {
$photoData = self::_extractPhotoDataFromPhotoPage($html); $photoData = self::_extractPhotoDataFromPhotoPage($html);
if(!$photoData) if(!$photoData)
return false;
return self::_unknown();
// Start building the h-entry // Start building the h-entry
$entry = array( $entry = array(
@ -140,19 +139,18 @@ class Instagram {
$entry['published'] = $published->format('c'); $entry['published'] = $published->format('c');
$response = [
'data' => $entry
];
if(count($refs)) { if(count($refs)) {
$response['refs'] = $refs;
$entry['refs'] = $refs;
} }
return [$response, [
'photo' => $photoData,
'profiles' => $profiles,
'locations' => $locations
]];
return [
'data' => $entry,
'original' => json_encode([
'photo' => $photoData,
'profiles' => $profiles,
'locations' => $locations
])
];
} }
private static function _buildHCardFromInstagramProfile($profile) { private static function _buildHCardFromInstagramProfile($profile) {

+ 28
- 2
lib/XRay/Parser.php View File

@ -1,12 +1,38 @@
<?php <?php
namespace p3k\XRay; namespace p3k\XRay;
use p3k\XRay\Formats;
class Parser { class Parser {
private $http;
public function __construct($http) {
$this->http = $http;
}
public function parse($url, $body) {
public function parse($body, $url, $opts=[]) {
if(isset($opts['timeout']))
$this->http->set_timeout($opts['timeout']);
if(isset($opts['max_redirects']))
$this->http->set_max_redirects($opts['max_redirects']);
// Check if the URL matches a special parser
if(Formats\Instagram::matches($url)) {
return Formats\Instagram::parse($this->http, $body, $url);
}
if(Formats\GitHub::matches($url)) {
return Formats\GitHub::parse($body, $url);
}
return [
'data' => [
'type' => 'unknown'
]
];
} }
} }

+ 4
- 4
tests/InstagramTest.php View File

@ -71,8 +71,8 @@ class InstagramTest extends PHPUnit_Framework_TestCase {
$this->assertEquals(2, count($data['data']['category'])); $this->assertEquals(2, count($data['data']['category']));
$this->assertContains('http://tinyletter.com/kmikeym', $data['data']['category']); $this->assertContains('http://tinyletter.com/kmikeym', $data['data']['category']);
$this->assertArrayHasKey('http://tinyletter.com/kmikeym', $data['refs']);
$this->assertEquals(['type'=>'card','name'=>'Mike Merrill','url'=>'http://tinyletter.com/kmikeym','photo'=>'https://instagram.fsjc1-3.fna.fbcdn.net/t51.2885-19/s320x320/12627953_686238411518831_1544976311_a.jpg'], $data['refs']['http://tinyletter.com/kmikeym']);
$this->assertArrayHasKey('http://tinyletter.com/kmikeym', $data['data']['refs']);
$this->assertEquals(['type'=>'card','name'=>'Mike Merrill','url'=>'http://tinyletter.com/kmikeym','photo'=>'https://instagram.fsjc1-3.fna.fbcdn.net/t51.2885-19/s320x320/12627953_686238411518831_1544976311_a.jpg'], $data['data']['refs']['http://tinyletter.com/kmikeym']);
} }
public function testInstagramPhotoWithVenue() { public function testInstagramPhotoWithVenue() {
@ -86,8 +86,8 @@ class InstagramTest extends PHPUnit_Framework_TestCase {
$this->assertEquals(1, count($data['data']['location'])); $this->assertEquals(1, count($data['data']['location']));
$this->assertContains('https://www.instagram.com/explore/locations/109284789535230/', $data['data']['location']); $this->assertContains('https://www.instagram.com/explore/locations/109284789535230/', $data['data']['location']);
$this->assertArrayHasKey('https://www.instagram.com/explore/locations/109284789535230/', $data['refs']);
$venue = $data['refs']['https://www.instagram.com/explore/locations/109284789535230/'];
$this->assertArrayHasKey('https://www.instagram.com/explore/locations/109284789535230/', $data['data']['refs']);
$venue = $data['data']['refs']['https://www.instagram.com/explore/locations/109284789535230/'];
$this->assertEquals('XOXO Outpost', $venue['name']); $this->assertEquals('XOXO Outpost', $venue['name']);
$this->assertEquals('45.5261002', $venue['latitude']); $this->assertEquals('45.5261002', $venue['latitude']);
$this->assertEquals('-122.6558081', $venue['longitude']); $this->assertEquals('-122.6558081', $venue['longitude']);

Loading…
Cancel
Save