Browse Source

reorganize XRay classes, use p3k-http lib

* removes the HTTP classes from this project and uses p3k-http library instead
* reorganizes the XRay classes into a psr-4 compatible folder
* moves controller autoload into -dev in preparation for turning this into a library (#17)
pull/38/head
Aaron Parecki 7 years ago
parent
commit
35669270e3
No known key found for this signature in database GPG Key ID: 276C2817346D6056
23 changed files with 69 additions and 444 deletions
  1. +8
    -13
      composer.json
  2. +39
    -1
      composer.lock
  3. +12
    -7
      controllers/Parse.php
  4. +1
    -1
      controllers/Rels.php
  5. +1
    -1
      controllers/Token.php
  6. +0
    -56
      lib/HTTP.php
  7. +0
    -127
      lib/HTTPCurl.php
  8. +0
    -138
      lib/HTTPStream.php
  9. +0
    -92
      lib/HTTPTest.php
  10. +0
    -0
      lib/XRay/Formats/GitHub.php
  11. +0
    -0
      lib/XRay/Formats/HTMLPurifier_AttrDef_HTML_Microformats2.php
  12. +0
    -0
      lib/XRay/Formats/Instagram.php
  13. +0
    -0
      lib/XRay/Formats/Mf2.php
  14. +0
    -0
      lib/XRay/Formats/Twitter.php
  15. +0
    -0
      lib/XRay/Formats/XKCD.php
  16. +1
    -1
      tests/AuthorTest.php
  17. +1
    -1
      tests/FeedTest.php
  18. +1
    -1
      tests/FetchTest.php
  19. +1
    -1
      tests/GitHubTest.php
  20. +1
    -1
      tests/InstagramTest.php
  21. +1
    -1
      tests/ParseTest.php
  22. +1
    -1
      tests/SanitizeTest.php
  23. +1
    -1
      tests/TokenTest.php

+ 8
- 13
composer.json View File

@ -1,12 +1,14 @@
{
"name": "p3k/xray",
"type": "library",
"require": {
"mf2/mf2": "~0.3",
"ezyang/htmlpurifier": "4.*",
"indieweb/link-rel-parser": "0.1.*",
"dg/twitter-php": "^3.6",
"dg/twitter-php": "3.6.*",
"p3k/timezone": "*",
"cebe/markdown": "~1.1.1"
"p3k/http": "*",
"cebe/markdown": "1.1.*"
},
"require-dev": {
"league/plates": "3.*",
@ -14,22 +16,15 @@
"phpunit/phpunit": "5.7.*"
},
"autoload": {
"psr-4": {
"XRay\\": "lib/XRay"
},
"files": [
"lib/helpers.php",
"lib/HTTPCurl.php",
"lib/HTTPStream.php",
"lib/HTTP.php",
"lib/Formats/Mf2.php",
"lib/Formats/Instagram.php",
"lib/Formats/GitHub.php",
"lib/Formats/Twitter.php",
"lib/Formats/XKCD.php",
"lib/Formats/HTMLPurifier_AttrDef_HTML_Microformats2.php"
"lib/helpers.php"
]
},
"autoload-dev": {
"files": [
"lib/HTTPTest.php",
"controllers/Main.php",
"controllers/Parse.php",
"controllers/Token.php",

+ 39
- 1
composer.lock View File

@ -4,7 +4,7 @@
"Read more about it at https://getcomposer.org/doc/01-basic-usage.md#composer-lock-the-lock-file",
"This file is @generated automatically"
],
"content-hash": "64404924cbc0d9b0e604d99859f3d673",
"content-hash": "10235592166c486bf7cf2601c9811861",
"packages": [
{
"name": "cebe/markdown",
@ -254,6 +254,44 @@
],
"time": "2016-03-14T12:13:34+00:00"
},
{
"name": "p3k/http",
"version": "0.1.1",
"source": {
"type": "git",
"url": "https://github.com/aaronpk/p3k-http.git",
"reference": "7409b0a44f190b053d694304e716de7ce5b3568b"
},
"dist": {
"type": "zip",
"url": "https://api.github.com/repos/aaronpk/p3k-http/zipball/7409b0a44f190b053d694304e716de7ce5b3568b",
"reference": "7409b0a44f190b053d694304e716de7ce5b3568b",
"shasum": ""
},
"require": {
"indieweb/link-rel-parser": "0.1.*",
"mf2/mf2": "0.3.*"
},
"type": "library",
"autoload": {
"psr-4": {
"p3k\\": "src/p3k"
}
},
"notification-url": "https://packagist.org/downloads/",
"license": [
"MIT"
],
"authors": [
{
"name": "Aaron Parecki",
"homepage": "https://aaronparecki.com"
}
],
"description": "A simple wrapper API around the PHP curl functions",
"homepage": "https://github.com/aaronpk/p3k-http",
"time": "2017-04-28T18:51:28+00:00"
},
{
"name": "p3k/timezone",
"version": "0.1.0",

+ 12
- 7
controllers/Parse.php View File

@ -12,11 +12,11 @@ class Parse {
private $_pretty = false;
public static function useragent() {
return 'User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36 XRay/1.0.0 ('.\Config::$base.')';
return 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36 XRay/1.0.0 ('.\Config::$base.')';
}
public function __construct() {
$this->http = new p3k\HTTP();
$this->http = new p3k\HTTP(self::useragent());
if(Config::$cache && class_exists('Memcache')) {
$this->mc = new Memcache();
$this->mc->addServer('127.0.0.1');
@ -49,11 +49,11 @@ class Parse {
if($request->get('timeout')) {
// We might make 2 HTTP requests, so each request gets half the desired timeout
$this->http->timeout = $request->get('timeout') / 2;
$this->http->set_timeout($request->get('timeout') / 2);
}
if($request->get('max_redirects')) {
$this->http->max_redirects = (int)$request->get('max_redirects');
if($request->get('max_redirects') !== null) {
$this->http->set_max_redirects((int)$request->get('max_redirects'));
}
if($request->get('pretty')) {
@ -103,6 +103,11 @@ class Parse {
return $this->parseGitHubURL($request, $response, $url);
}
if(!should_follow_redirects($url))
$this->http->set_transport(new p3k\HTTP\Stream());
else
$this->http->set_transport(new p3k\HTTP\Curl());
// Now fetch the URL and check for any curl errors
// Don't cache the response if a token is used to fetch it
if($this->mc && !$request->get('token')) {
@ -111,14 +116,14 @@ class Parse {
$result = json_decode($cached, true);
self::debug('using HTML from cache', 'X-Cache-Debug');
} else {
$result = $this->http->get($url, [self::useragent()]);
$result = $this->http->get($url);
$cacheData = json_encode($result);
// App Engine limits the size of cached items, so don't cache ones larger than that
if(strlen($cacheData) < 1000000)
$this->mc->set($cacheKey, $cacheData, MEMCACHE_COMPRESSED, $this->_cacheTime);
}
} else {
$headers = [self::useragent()];
$headers = [];
if($request->get('token')) {
$headers[] = 'Authorization: Bearer ' . $request->get('token');
}

+ 1
- 1
controllers/Rels.php View File

@ -70,7 +70,7 @@ class Rels {
$html = $result['body'];
$mf2 = mf2\Parse($html, $result['url']);
$rels = p3k\HTTP::link_rels($result['headers']);
$rels = $result['rels'];
if(isset($mf2['rels'])) {
$rels = array_merge($rels, $mf2['rels']);
}

+ 1
- 1
controllers/Token.php View File

@ -55,7 +55,7 @@ class Token {
if(is_string($head['headers']['Link']))
$head['headers']['Link'] = [$head['headers']['Link']];
$rels = p3k\HTTP::link_rels($head['headers']);
$rels = $head['rels'];
$endpoint = false;
if(array_key_exists('token_endpoint', $rels)) {

+ 0
- 56
lib/HTTP.php View File

@ -1,56 +0,0 @@
<?php
namespace p3k;
class HTTP {
public $timeout = 4;
public $max_redirects = 8;
public function get($url, $headers=[]) {
$class = $this->_class($url);
$http = new $class($url);
$http->timeout = $this->timeout;
$http->max_redirects = $this->max_redirects;
return $http->get($url, $headers);
}
public function post($url, $body, $headers=[]) {
$class = $this->_class($url);
$http = new $class($url);
$http->timeout = $this->timeout;
$http->max_redirects = $this->max_redirects;
return $http->post($url, $body, $headers);
}
public function head($url) {
$class = $this->_class($url);
$http = new $class($url);
$http->timeout = $this->timeout;
$http->max_redirects = $this->max_redirects;
return $http->head($url);
}
private function _class($url) {
if(!should_follow_redirects($url)) {
return 'p3k\HTTPStream';
} else {
return 'p3k\HTTPCurl';
}
}
public static function link_rels($header_array) {
$headers = '';
foreach($header_array as $k=>$header) {
if(is_string($header)) {
$headers .= $k . ': ' . $header . "\r\n";
} else {
foreach($header as $h) {
$headers .= $k . ': ' . $h . "\r\n";
}
}
}
$rels = \IndieWeb\http_rels($headers);
return $rels;
}
}

+ 0
- 127
lib/HTTPCurl.php View File

@ -1,127 +0,0 @@
<?php
namespace p3k;
class HTTPCurl {
public $timeout = 4;
public $max_redirects = 8;
public function get($url, $headers=[]) {
$ch = curl_init($url);
$this->_set_curlopts($ch, $url);
if($headers)
curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
$response = curl_exec($ch);
$header_size = curl_getinfo($ch, CURLINFO_HEADER_SIZE);
return array(
'code' => curl_getinfo($ch, CURLINFO_HTTP_CODE),
'headers' => self::parse_headers(trim(substr($response, 0, $header_size))),
'body' => substr($response, $header_size),
'error' => self::error_string_from_code(curl_errno($ch)),
'error_description' => curl_error($ch),
'error_code' => curl_errno($ch),
'url' => curl_getinfo($ch, CURLINFO_EFFECTIVE_URL),
);
}
public function post($url, $body, $headers=[]) {
$ch = curl_init($url);
$this->_set_curlopts($ch, $url);
curl_setopt($ch, CURLOPT_POST, true);
curl_setopt($ch, CURLOPT_POSTFIELDS, $body);
if($headers)
curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
$response = curl_exec($ch);
$header_size = curl_getinfo($ch, CURLINFO_HEADER_SIZE);
return array(
'code' => curl_getinfo($ch, CURLINFO_HTTP_CODE),
'headers' => self::parse_headers(trim(substr($response, 0, $header_size))),
'body' => substr($response, $header_size),
'error' => self::error_string_from_code(curl_errno($ch)),
'error_description' => curl_error($ch),
'error_code' => curl_errno($ch),
'url' => curl_getinfo($ch, CURLINFO_EFFECTIVE_URL),
);
}
public function head($url) {
$ch = curl_init($url);
$this->_set_curlopts($ch, $url);
curl_setopt($ch, CURLOPT_NOBODY, true);
$response = curl_exec($ch);
return array(
'code' => curl_getinfo($ch, CURLINFO_HTTP_CODE),
'headers' => self::parse_headers(trim($response)),
'error' => self::error_string_from_code(curl_errno($ch)),
'error_description' => curl_error($ch),
'error_code' => curl_errno($ch),
'url' => curl_getinfo($ch, CURLINFO_EFFECTIVE_URL),
);
}
private function _set_curlopts($ch, $url) {
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_HEADER, true);
// Special-case appspot.com URLs to not follow redirects.
// https://cloud.google.com/appengine/docs/php/urlfetch/
if(should_follow_redirects($url)) {
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_MAXREDIRS, $this->max_redirects);
} else {
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, false);
}
curl_setopt($ch, CURLOPT_TIMEOUT_MS, round($this->timeout * 1000));
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT_MS, 2000);
}
public static function error_string_from_code($code) {
switch($code) {
case 0:
return '';
case CURLE_COULDNT_RESOLVE_HOST:
return 'dns_error';
case CURLE_COULDNT_CONNECT:
return 'connect_error';
case CURLE_OPERATION_TIMEDOUT:
return 'timeout';
case CURLE_SSL_CONNECT_ERROR:
return 'ssl_error';
case CURLE_SSL_CERTPROBLEM:
return 'ssl_cert_error';
case CURLE_SSL_CIPHER:
return 'ssl_unsupported_cipher';
case CURLE_SSL_CACERT:
return 'ssl_cert_error';
case CURLE_TOO_MANY_REDIRECTS:
return 'too_many_redirects';
default:
return 'unknown';
}
}
public static function parse_headers($headers) {
$retVal = array();
$fields = explode("\r\n", preg_replace('/\x0D\x0A[\x09\x20]+/', ' ', $headers));
foreach($fields as $field) {
if(preg_match('/([^:]+): (.+)/m', $field, $match)) {
$match[1] = preg_replace_callback('/(?<=^|[\x09\x20\x2D])./', function($m) {
return strtoupper($m[0]);
}, strtolower(trim($match[1])));
// If there's already a value set for the header name being returned, turn it into an array and add the new value
$match[1] = preg_replace_callback('/(?<=^|[\x09\x20\x2D])./', function($m) {
return strtoupper($m[0]);
}, strtolower(trim($match[1])));
if(isset($retVal[$match[1]])) {
if(!is_array($retVal[$match[1]]))
$retVal[$match[1]] = array($retVal[$match[1]]);
$retVal[$match[1]][] = $match[2];
} else {
$retVal[$match[1]] = trim($match[2]);
}
}
}
return $retVal;
}
}

+ 0
- 138
lib/HTTPStream.php View File

@ -1,138 +0,0 @@
<?php
namespace p3k;
class HTTPStream {
public $timeout = 4;
public $max_redirects = 8;
public static function exception_error_handler($severity, $message, $file, $line) {
if (!(error_reporting() & $severity)) {
// This error code is not included in error_reporting
return;
}
throw new \ErrorException($message, 0, $severity, $file, $line);
}
public function get($url, $headers=[]) {
set_error_handler("p3k\HTTPStream::exception_error_handler");
$context = $this->_stream_context('GET', $url, false, $headers);
return $this->_fetch($url, $context);
}
public function post($url, $body, $headers=[]) {
set_error_handler("p3k\HTTPStream::exception_error_handler");
$context = $this->_stream_context('POST', $url, $body, $headers);
return $this->_fetch($url, $context);
}
public function head($url) {
set_error_handler("p3k\HTTPStream::exception_error_handler");
$context = $this->_stream_context('HEAD', $url);
return $this->_fetch($url, $context);
}
private function _fetch($url, $context) {
$error = false;
try {
$body = file_get_contents($url, false, $context);
// This sets $http_response_header
// see http://php.net/manual/en/reserved.variables.httpresponseheader.php
} catch(\Exception $e) {
$body = false;
$http_response_header = [];
$description = str_replace('file_get_contents(): ', '', $e->getMessage());
$code = 'unknown';
if(preg_match('/getaddrinfo failed/', $description)) {
$code = 'dns_error';
$description = str_replace('php_network_getaddresses: ', '', $description);
}
if(preg_match('/timed out|request failed/', $description)) {
$code = 'timeout';
}
if(preg_match('/certificate/', $description)) {
$code = 'ssl_error';
}
$error = [
'description' => $description,
'code' => $code
];
}
return array(
'code' => self::parse_response_code($http_response_header),
'headers' => self::parse_headers($http_response_header),
'body' => $body,
'error' => $error ? $error['code'] : false,
'error_description' => $error ? $error['description'] : false,
'url' => $url,
);
}
private function _stream_context($method, $url, $body=false, $headers=[]) {
$options = [
'method' => $method,
'timeout' => $this->timeout,
'ignore_errors' => true,
];
if($body) {
$options['content'] = $body;
}
if($headers) {
$options['header'] = implode("\r\n", $headers);
}
// Special-case appspot.com URLs to not follow redirects.
// https://cloud.google.com/appengine/docs/php/urlfetch/
if(should_follow_redirects($url)) {
$options['follow_location'] = 1;
$options['max_redirects'] = $this->max_redirects;
} else {
$options['follow_location'] = 0;
}
return stream_context_create(['http' => $options]);
}
public static function parse_response_code($headers) {
// When a response is a redirect, we want to find the last occurrence of the HTTP code
$code = false;
foreach($headers as $field) {
if(preg_match('/HTTP\/\d\.\d (\d+)/', $field, $match)) {
$code = $match[1];
}
}
return $code;
}
public static function parse_headers($headers) {
$retVal = array();
foreach($headers as $field) {
if(preg_match('/([^:]+): (.+)/m', $field, $match)) {
$match[1] = preg_replace_callback('/(?<=^|[\x09\x20\x2D])./', function($m) {
return strtoupper($m[0]);
}, strtolower(trim($match[1])));
// If there's already a value set for the header name being returned, turn it into an array and add the new value
$match[1] = preg_replace_callback('/(?<=^|[\x09\x20\x2D])./', function($m) {
return strtoupper($m[0]);
}, strtolower(trim($match[1])));
if(isset($retVal[$match[1]])) {
if(!is_array($retVal[$match[1]]))
$retVal[$match[1]] = array($retVal[$match[1]]);
$retVal[$match[1]][] = $match[2];
} else {
$retVal[$match[1]] = trim($match[2]);
}
}
}
return $retVal;
}
}

+ 0
- 92
lib/HTTPTest.php View File

@ -1,92 +0,0 @@
<?php
namespace p3k;
class HTTPTest extends HTTPCurl {
private $_testDataPath;
private $_redirects_remaining;
public function __construct($testDataPath) {
$this->_testDataPath = $testDataPath;
}
public function get($url, $headers=[]) {
$this->_redirects_remaining = $this->max_redirects;
$parts = parse_url($url);
unset($parts['fragment']);
$url = \build_url($parts);
return $this->_read_file($url);
}
public function post($url, $body, $headers=[]) {
return $this->_read_file($url);
}
public function head($url) {
$response = $this->_read_file($url);
return array(
'code' => $response['code'],
'headers' => $response['headers'],
'error' => '',
'error_description' => '',
'url' => $response['url']
);
}
private function _read_file($url) {
$parts = parse_url($url);
if($parts['path']) {
$parts['path'] = '/'.str_replace('/','_',substr($parts['path'],1));
$url = \build_url($parts);
}
$filename = $this->_testDataPath.preg_replace('/https?:\/\//', '', $url);
if(!file_exists($filename)) {
$filename = $this->_testDataPath.'404.response.txt';
}
$response = file_get_contents($filename);
$split = explode("\r\n\r\n", $response);
if(count($split) < 2) {
throw new \Exception("Invalid file contents in test data, check that newlines are CRLF: $url");
}
$headers = array_shift($split);
$body = implode("\r\n", $split);
if(preg_match('/HTTP\/1\.1 (\d+)/', $headers, $match)) {
$code = $match[1];
}
$headers = preg_replace('/HTTP\/1\.1 \d+ .+/', '', $headers);
$parsedHeaders = self::parse_headers($headers);
if(array_key_exists('Location', $parsedHeaders)) {
$effectiveUrl = \mf2\resolveUrl($url, $parsedHeaders['Location']);
if($this->_redirects_remaining > 0) {
$this->_redirects_remaining--;
return $this->_read_file($effectiveUrl);
} else {
return [
'code' => 0,
'headers' => $parsedHeaders,
'body' => $body,
'error' => 'too_many_redirects',
'error_description' => '',
'url' => $effectiveUrl
];
}
} else {
$effectiveUrl = $url;
}
return array(
'code' => $code,
'headers' => $parsedHeaders,
'body' => $body,
'error' => (isset($parsedHeaders['X-Test-Error']) ? $parsedHeaders['X-Test-Error'] : ''),
'error_description' => '',
'url' => $effectiveUrl
);
}
}

lib/Formats/GitHub.php → lib/XRay/Formats/GitHub.php View File


lib/Formats/HTMLPurifier_AttrDef_HTML_Microformats2.php → lib/XRay/Formats/HTMLPurifier_AttrDef_HTML_Microformats2.php View File


lib/Formats/Instagram.php → lib/XRay/Formats/Instagram.php View File


lib/Formats/Mf2.php → lib/XRay/Formats/Mf2.php View File


lib/Formats/Twitter.php → lib/XRay/Formats/Twitter.php View File


lib/Formats/XKCD.php → lib/XRay/Formats/XKCD.php View File


+ 1
- 1
tests/AuthorTest.php View File

@ -8,7 +8,7 @@ class AuthorTest extends PHPUnit_Framework_TestCase {
public function setUp() {
$this->client = new Parse();
$this->client->http = new p3k\HTTPTest(dirname(__FILE__).'/data/');
$this->client->http = new p3k\HTTP\Test(dirname(__FILE__).'/data/');
$this->client->mc = null;
}

+ 1
- 1
tests/FeedTest.php View File

@ -8,7 +8,7 @@ class FeedTest extends PHPUnit_Framework_TestCase {
public function setUp() {
$this->client = new Parse();
$this->client->http = new p3k\HTTPTest(dirname(__FILE__).'/data/');
$this->client->http = new p3k\HTTP\Test(dirname(__FILE__).'/data/');
$this->client->mc = null;
}

+ 1
- 1
tests/FetchTest.php View File

@ -8,7 +8,7 @@ class FetchTest extends PHPUnit_Framework_TestCase {
public function setUp() {
$this->client = new Parse();
$this->client->http = new p3k\HTTPTest(dirname(__FILE__).'/data/');
$this->client->http = new p3k\HTTP\Test(dirname(__FILE__).'/data/');
$this->client->mc = null;
}

+ 1
- 1
tests/GitHubTest.php View File

@ -8,7 +8,7 @@ class GitHubTest extends PHPUnit_Framework_TestCase {
public function setUp() {
$this->client = new Parse();
$this->client->http = new p3k\HTTPTest(dirname(__FILE__).'/data/');
$this->client->http = new p3k\HTTP\Test(dirname(__FILE__).'/data/');
$this->client->mc = null;
}

+ 1
- 1
tests/InstagramTest.php View File

@ -8,7 +8,7 @@ class InstagramTest extends PHPUnit_Framework_TestCase {
public function setUp() {
$this->client = new Parse();
$this->client->http = new p3k\HTTPTest(dirname(__FILE__).'/data/');
$this->client->http = new p3k\HTTP\Test(dirname(__FILE__).'/data/');
$this->client->mc = null;
}

+ 1
- 1
tests/ParseTest.php View File

@ -8,7 +8,7 @@ class ParseTest extends PHPUnit_Framework_TestCase {
public function setUp() {
$this->client = new Parse();
$this->client->http = new p3k\HTTPTest(dirname(__FILE__).'/data/');
$this->client->http = new p3k\HTTP\Test(dirname(__FILE__).'/data/');
$this->client->mc = null;
}

+ 1
- 1
tests/SanitizeTest.php View File

@ -8,7 +8,7 @@ class SanitizeTest extends PHPUnit_Framework_TestCase {
public function setUp() {
$this->client = new Parse();
$this->client->http = new p3k\HTTPTest(dirname(__FILE__).'/data/');
$this->client->http = new p3k\HTTP\Test(dirname(__FILE__).'/data/');
$this->client->mc = null;
}

+ 1
- 1
tests/TokenTest.php View File

@ -8,7 +8,7 @@ class TokenTest extends PHPUnit_Framework_TestCase {
public function setUp() {
$this->client = new Token();
$this->client->http = new p3k\HTTPTest(dirname(__FILE__).'/data/');
$this->client->http = new p3k\HTTP\Test(dirname(__FILE__).'/data/');
}
private function token($params) {

Loading…
Cancel
Save