diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..f6ad211 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +vendor/ +composer.lock + diff --git a/README.md b/README.md index d95f303..7e14ed9 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,4 @@ # p3k-http -A simple wrapper API around the PHP curl functions, used by https://p3k.io projects + +A simple wrapper around the PHP curl functions, used by https://p3k.io projects. + diff --git a/composer.json b/composer.json new file mode 100644 index 0000000..fce58db --- /dev/null +++ b/composer.json @@ -0,0 +1,24 @@ +{ + "name": "p3k/http", + "type": "library", + "description": "A simple wrapper API around the PHP curl functions", + "license": "MIT", + "homepage": "https://github.com/aaronpk/p3k-http", + "authors": [ + { + "name": "Aaron Parecki", + "homepage": "https://aaronparecki.com" + } + ], + "require": { + "indieweb/link-rel-parser": "0.1.*", + "mf2/mf2": "0.3.*" + }, + "autoload": { + "psr-4": { + "p3k\\HTTP\\": "src/p3k" + } + }, + "autoload-dev": { + } +} diff --git a/src/p3k/HTTP.php b/src/p3k/HTTP.php new file mode 100644 index 0000000..c924d87 --- /dev/null +++ b/src/p3k/HTTP.php @@ -0,0 +1,90 @@ +_user_agent = $user_agent; + } + if(!$transport) { + $this->_transport = new HTTP\Curl(); + } else { + $this->set_transport($transport); + } + } + + public function set_max_redirects($max) { + $this->_max_redirects = $max; + } + + public function set_timeout($timeout) { + $this->_timeout = $timeout; + } + + public function set_transport(HTTP\Transport $transport) { + $this->_transport = $transport; + } + + public function get($url, $headers=[]) { + $this->_transport->set_timeout($this->_timeout); + $this->_transport->set_max_redirects($this->_max_redirects); + $response = $this->_transport->get($url, $headers); + $response = $this->_build_response($response); + return $response; + } + + public function post($url, $body, $headers=[]) { + $this->_transport->set_timeout($this->_timeout); + $this->_transport->set_max_redirects($this->_max_redirects); + $response = $this->_transport->post($url, $body, $headers); + $response = $this->_build_response($response); + return $response; + } + + public function head($url) { + $this->_transport->set_timeout($this->_timeout); + $this->_transport->set_max_redirects($this->_max_redirects); + $response = $this->_transport->head($url); + $response = $this->_build_response($response); + return $response; + } + + private function _build_response($response) { + // Parses the HTTP headers and adds the "headers" and "rels" response keys + $response['headers'] = self::_parse_headers($response['header']); + $response['rels'] = \IndieWeb\http_rels($response['header']); + unset($response['header']); + return $response; + } + + private static function _parse_headers($headers) { + $retVal = array(); + $fields = explode("\r\n", preg_replace('/\x0D\x0A[\x09\x20]+/', ' ', $headers)); + foreach($fields as $field) { + if(preg_match('/([^:]+): (.+)/m', $field, $match)) { + $match[1] = preg_replace_callback('/(?<=^|[\x09\x20\x2D])./', function($m) { + return strtoupper($m[0]); + }, strtolower(trim($match[1]))); + // If there's already a value set for the header name being returned, turn it into an array and add the new value + $match[1] = preg_replace_callback('/(?<=^|[\x09\x20\x2D])./', function($m) { + return strtoupper($m[0]); + }, strtolower(trim($match[1]))); + if(isset($retVal[$match[1]])) { + if(!is_array($retVal[$match[1]])) + $retVal[$match[1]] = array($retVal[$match[1]]); + $retVal[$match[1]][] = $match[2]; + } else { + $retVal[$match[1]] = trim($match[2]); + } + } + } + return $retVal; + } +} diff --git a/src/p3k/HTTP/Curl.php b/src/p3k/HTTP/Curl.php new file mode 100644 index 0000000..8eb8477 --- /dev/null +++ b/src/p3k/HTTP/Curl.php @@ -0,0 +1,105 @@ +_max_redirects = $max; + } + + public function set_timeout($timeout) { + $this->_timeout = $timeout; + } + + public function get($url, $headers=[]) { + $ch = curl_init($url); + $this->_set_curlopts($ch, $url); + if($headers) + curl_setopt($ch, CURLOPT_HTTPHEADER, $headers); + $response = curl_exec($ch); + $header_size = curl_getinfo($ch, CURLINFO_HEADER_SIZE); + $header_str = trim(substr($response, 0, $header_size)); + return array( + 'code' => curl_getinfo($ch, CURLINFO_HTTP_CODE), + 'header' => $header_str, + 'body' => substr($response, $header_size), + 'error' => self::error_string_from_code(curl_errno($ch)), + 'error_description' => curl_error($ch), + 'url' => curl_getinfo($ch, CURLINFO_EFFECTIVE_URL), + 'debug' => $response + ); + } + + public function post($url, $body, $headers=[]) { + $ch = curl_init($url); + $this->_set_curlopts($ch, $url); + curl_setopt($ch, CURLOPT_POST, true); + curl_setopt($ch, CURLOPT_POSTFIELDS, $body); + if($headers) + curl_setopt($ch, CURLOPT_HTTPHEADER, $headers); + $response = curl_exec($ch); + $header_size = curl_getinfo($ch, CURLINFO_HEADER_SIZE); + $header_str = trim(substr($response, 0, $header_size)); + return array( + 'code' => curl_getinfo($ch, CURLINFO_HTTP_CODE), + 'header' => $header_str, + 'body' => substr($response, $header_size), + 'error' => self::error_string_from_code(curl_errno($ch)), + 'error_description' => curl_error($ch), + 'url' => curl_getinfo($ch, CURLINFO_EFFECTIVE_URL), + 'debug' => $response + ); + } + + public function head($url) { + $ch = curl_init($url); + $this->_set_curlopts($ch, $url); + curl_setopt($ch, CURLOPT_NOBODY, true); + $response = curl_exec($ch); + return array( + 'code' => curl_getinfo($ch, CURLINFO_HTTP_CODE), + 'header' => trim($response), + 'error' => self::error_string_from_code(curl_errno($ch)), + 'error_description' => curl_error($ch), + 'url' => curl_getinfo($ch, CURLINFO_EFFECTIVE_URL), + 'debug' => $response + ); + } + + private function _set_curlopts($ch, $url) { + curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); + curl_setopt($ch, CURLOPT_HEADER, true); + curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); + curl_setopt($ch, CURLOPT_MAXREDIRS, $this->_max_redirects); + curl_setopt($ch, CURLOPT_TIMEOUT_MS, round($this->_timeout * 1000)); + curl_setopt($ch, CURLOPT_CONNECTTIMEOUT_MS, 2000); + } + + public static function error_string_from_code($code) { + switch($code) { + case 0: + return ''; + case CURLE_COULDNT_RESOLVE_HOST: + return 'dns_error'; + case CURLE_COULDNT_CONNECT: + return 'connect_error'; + case CURLE_OPERATION_TIMEDOUT: + return 'timeout'; + case CURLE_SSL_CONNECT_ERROR: + return 'ssl_error'; + case CURLE_SSL_CERTPROBLEM: + return 'ssl_cert_error'; + case CURLE_SSL_CIPHER: + return 'ssl_unsupported_cipher'; + case CURLE_SSL_CACERT: + return 'ssl_cert_error'; + case CURLE_TOO_MANY_REDIRECTS: + return 'too_many_redirects'; + default: + return 'unknown'; + } + } +} diff --git a/src/p3k/HTTP/Stream.php b/src/p3k/HTTP/Stream.php new file mode 100644 index 0000000..5cb3a9d --- /dev/null +++ b/src/p3k/HTTP/Stream.php @@ -0,0 +1,123 @@ +_max_redirects = $max; + } + + public function set_timeout($timeout) { + $this->_timeout = $timeout; + } + + public static function exception_error_handler($severity, $message, $file, $line) { + if (!(error_reporting() & $severity)) { + // This error code is not included in error_reporting + return; + } + throw new \ErrorException($message, 0, $severity, $file, $line); + } + + public function get($url, $headers=[]) { + set_error_handler("p3k\HTTPStream::exception_error_handler"); + $context = $this->_stream_context('GET', $url, false, $headers); + return $this->_fetch($url, $context); + } + + public function post($url, $body, $headers=[]) { + set_error_handler("p3k\HTTPStream::exception_error_handler"); + $context = $this->_stream_context('POST', $url, $body, $headers); + return $this->_fetch($url, $context); + } + + public function head($url) { + set_error_handler("p3k\HTTPStream::exception_error_handler"); + $context = $this->_stream_context('HEAD', $url); + return $this->_fetch($url, $context); + } + + private function _fetch($url, $context) { + $error = false; + + try { + $body = file_get_contents($url, false, $context); + // This sets $http_response_header + // see http://php.net/manual/en/reserved.variables.httpresponseheader.php + } catch(\Exception $e) { + $body = false; + $http_response_header = []; + $description = str_replace('file_get_contents(): ', '', $e->getMessage()); + $code = 'unknown'; + + if(preg_match('/getaddrinfo failed/', $description)) { + $code = 'dns_error'; + $description = str_replace('php_network_getaddresses: ', '', $description); + } + + if(preg_match('/timed out|request failed/', $description)) { + $code = 'timeout'; + } + + if(preg_match('/certificate/', $description)) { + $code = 'ssl_error'; + } + + $error = [ + 'description' => $description, + 'code' => $code + ]; + } + + return array( + 'code' => self::parse_response_code($http_response_header), + 'header' => implode("\r\n", $http_response_header), + 'body' => $body, + 'error' => $error ? $error['code'] : false, + 'error_description' => $error ? $error['description'] : false, + 'url' => $url, + ); + } + + private function _stream_context($method, $url, $body=false, $headers=[]) { + $options = [ + 'method' => $method, + 'timeout' => $this->_timeout, + 'ignore_errors' => true, + ]; + + if($body) { + $options['content'] = $body; + } + + if($headers) { + $options['header'] = implode("\r\n", $headers); + } + + // Special-case appspot.com URLs to not follow redirects. + // https://cloud.google.com/appengine/docs/php/urlfetch/ + if(should_follow_redirects($url)) { + $options['follow_location'] = 1; + $options['max_redirects'] = $this->_max_redirects; + } else { + $options['follow_location'] = 0; + } + + return stream_context_create(['http' => $options]); + } + + public static function parse_response_code($headers) { + // When a response is a redirect, we want to find the last occurrence of the HTTP code + $code = false; + foreach($headers as $field) { + if(preg_match('/HTTP\/\d\.\d (\d+)/', $field, $match)) { + $code = $match[1]; + } + } + return $code; + } + +} diff --git a/src/p3k/HTTP/Test.php b/src/p3k/HTTP/Test.php new file mode 100644 index 0000000..9a17bb8 --- /dev/null +++ b/src/p3k/HTTP/Test.php @@ -0,0 +1,138 @@ +_testDataPath = $testDataPath; + } + + protected $_timeout = 4; + protected $_max_redirects = 8; + + public function set_max_redirects($max) { + $this->_max_redirects = $max; + } + + public function set_timeout($timeout) { + $this->_timeout = $timeout; + } + + public function set_transport(Transport $transport) { + } + + public function get($url, $headers=[]) { + $this->_redirects_remaining = $this->_max_redirects; + $parts = parse_url($url); + unset($parts['fragment']); + $url = \build_url($parts); + return $this->_read_file($url); + } + + public function post($url, $body, $headers=[]) { + return $this->_read_file($url); + } + + public function head($url) { + $response = $this->_read_file($url); + return array( + 'code' => $response['code'], + 'headers' => $response['headers'], + 'rels' => $response['rels'], + 'error' => '', + 'error_description' => '', + 'url' => $response['url'] + ); + } + + private function _read_file($url) { + $parts = parse_url($url); + if($parts['path']) { + $parts['path'] = '/'.str_replace('/','_',substr($parts['path'],1)); + $url = \build_url($parts); + } + + $filename = $this->_testDataPath.preg_replace('/https?:\/\//', '', $url); + if(!file_exists($filename)) { + $filename = $this->_testDataPath.'404.response.txt'; + } + $response = file_get_contents($filename); + + $split = explode("\r\n\r\n", $response); + if(count($split) < 2) { + throw new \Exception("Invalid file contents in test data, check that newlines are CRLF: $url"); + } + $headers = array_shift($split); + $body = implode("\r\n", $split); + + if(preg_match('/HTTP\/1\.1 (\d+)/', $headers, $match)) { + $code = $match[1]; + } + + $headers = preg_replace('/HTTP\/1\.1 \d+ .+/', '', $headers); + $parsedHeaders = self::_parse_headers($headers); + + if(array_key_exists('Location', $parsedHeaders)) { + $effectiveUrl = \mf2\resolveUrl($url, $parsedHeaders['Location']); + if($this->_redirects_remaining > 0) { + $this->_redirects_remaining--; + return $this->_read_file($effectiveUrl); + } else { + return [ + 'code' => 0, + 'headers' => $parsedHeaders, + 'rels' => \IndieWeb\http_rels($headers), + 'body' => $body, + 'error' => 'too_many_redirects', + 'error_description' => '', + 'url' => $effectiveUrl + ]; + } + } else { + $effectiveUrl = $url; + } + + return array( + 'code' => $code, + 'headers' => $parsedHeaders, + 'rels' => \IndieWeb\http_rels($headers), + 'body' => $body, + 'error' => (isset($parsedHeaders['X-Test-Error']) ? $parsedHeaders['X-Test-Error'] : ''), + 'error_description' => '', + 'url' => $effectiveUrl + ); + } + + private static function _parse_headers($headers) { + $retVal = array(); + $fields = explode("\r\n", preg_replace('/\x0D\x0A[\x09\x20]+/', ' ', $headers)); + foreach($fields as $field) { + if(preg_match('/([^:]+): (.+)/m', $field, $match)) { + $match[1] = preg_replace_callback('/(?<=^|[\x09\x20\x2D])./', function($m) { + return strtoupper($m[0]); + }, strtolower(trim($match[1]))); + // If there's already a value set for the header name being returned, turn it into an array and add the new value + $match[1] = preg_replace_callback('/(?<=^|[\x09\x20\x2D])./', function($m) { + return strtoupper($m[0]); + }, strtolower(trim($match[1]))); + if(isset($retVal[$match[1]])) { + if(!is_array($retVal[$match[1]])) + $retVal[$match[1]] = array($retVal[$match[1]]); + $retVal[$match[1]][] = $match[2]; + } else { + $retVal[$match[1]] = trim($match[2]); + } + } + } + return $retVal; + } + +} diff --git a/src/p3k/HTTP/Transport.php b/src/p3k/HTTP/Transport.php new file mode 100644 index 0000000..0b54f78 --- /dev/null +++ b/src/p3k/HTTP/Transport.php @@ -0,0 +1,34 @@ +