diff --git a/composer.json b/composer.json index 60dbf6c..67b3521 100644 --- a/composer.json +++ b/composer.json @@ -9,6 +9,8 @@ "lib/helpers.php", "controllers/Main.php", "controllers/Parse.php", + "lib/HTTPCurl.php", + "lib/HTTPStream.php", "lib/HTTP.php", "lib/Formats/Mf2.php" ] diff --git a/controllers/Parse.php b/controllers/Parse.php index 6141a59..d38be86 100644 --- a/controllers/Parse.php +++ b/controllers/Parse.php @@ -33,6 +33,10 @@ class Parse { $this->http->timeout = $request->get('timeout') / 2; } + if($request->get('max_redirects')) { + $this->http->max_redirects = (int)$request->get('max_redirects'); + } + $url = $request->get('url'); if(!$url) { diff --git a/lib/Formats/Mf2.php b/lib/Formats/Mf2.php index d67bd02..6e8f2de 100644 --- a/lib/Formats/Mf2.php +++ b/lib/Formats/Mf2.php @@ -58,8 +58,15 @@ class Mf2 { // Always arrays $properties = ['photo','video','syndication','in-reply-to','like-of','repost-of','category']; foreach($properties as $p) { - if(array_key_exists($p, $item['properties'])) - $data[$p] = $item['properties'][$p]; + if(array_key_exists($p, $item['properties'])) { + $data[$p] = []; + foreach($item['properties'][$p] as $v) { + if(is_string($v)) + $data[$p][] = $v; + elseif(is_array($v) and array_key_exists('value', $v)) + $data[$p][] = $v['value']; + } + } } // Determine if the name is distinct from the content @@ -73,18 +80,22 @@ class Mf2 { $textContent = $content; } elseif(!is_string($content) && is_array($content) && array_key_exists('value', $content)) { if(array_key_exists('html', $content)) { - $textContent = strip_tags($content['html']); - $htmlContent = $content['html']; + $textContent = trim(strip_tags($content['html'])); + $htmlContent = trim($content['html']); } else { - $textContent = $content['value']; + $textContent = trim($content['value']); } } // Trim ellipses from the name $name = preg_replace('/ ?(\.\.\.|…)$/', '', $name); + // Remove all whitespace when checking equality + $nameCompare = preg_replace('/\s/','',trim($name)); + $contentCompare = preg_replace('/\s/','',trim($textContent)); + // Check if the name is a prefix of the content - if(strpos($textContent, $name) === 0) { + if(strpos($contentCompare, $nameCompare) === 0) { $name = null; } } diff --git a/lib/HTTP.php b/lib/HTTP.php index fd39929..1a635f5 100644 --- a/lib/HTTP.php +++ b/lib/HTTP.php @@ -1,119 +1,6 @@ _set_curlopts($ch, $url); - $response = curl_exec($ch); - $header_size = curl_getinfo($ch, CURLINFO_HEADER_SIZE); - return array( - 'code' => curl_getinfo($ch, CURLINFO_HTTP_CODE), - 'headers' => self::parse_headers(trim(substr($response, 0, $header_size))), - 'body' => substr($response, $header_size), - 'error' => self::error_string_from_code(curl_errno($ch)), - 'error_description' => curl_error($ch), - 'error_code' => curl_errno($ch), - ); - } - - public function post($url, $body, $headers=array()) { - $ch = curl_init($url); - $this->_set_curlopts($ch, $url); - $response = curl_exec($ch); - $header_size = curl_getinfo($ch, CURLINFO_HEADER_SIZE); - return array( - 'code' => curl_getinfo($ch, CURLINFO_HTTP_CODE), - 'headers' => self::parse_headers(trim(substr($response, 0, $header_size))), - 'body' => substr($response, $header_size), - 'error' => self::error_string_from_code(curl_errno($ch)), - 'error_description' => curl_error($ch), - 'error_code' => curl_errno($ch), - ); - } - - public function head($url) { - $ch = curl_init($url); - $this->_set_curlopts($ch, $url); - $response = curl_exec($ch); - return array( - 'code' => curl_getinfo($ch, CURLINFO_HTTP_CODE), - 'headers' => self::parse_headers(trim($response)), - 'error' => self::error_string_from_code(curl_errno($ch)), - 'error_description' => curl_error($ch), - 'error_code' => curl_errno($ch), - ); - } - - private function _set_curlopts($ch, $url) { - $host = parse_url($url, PHP_URL_HOST); - - curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); - curl_setopt($ch, CURLOPT_HEADER, true); - - // Special-case appspot.com URLs to not follow redirects. - // https://cloud.google.com/appengine/docs/php/urlfetch/ - if(substr($host, -12) == '.appspot.com') { - curl_setopt($ch, CURLOPT_FOLLOWLOCATION, false); - } else { - curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); - curl_setopt($ch, CURLOPT_MAXREDIRS, $this->max_redirects); - } - - curl_setopt($ch, CURLOPT_MAXREDIRS, $this->max_redirects); - curl_setopt($ch, CURLOPT_TIMEOUT_MS, round($this->timeout * 1000)); - } - - public static function error_string_from_code($code) { - switch($code) { - case 0: - return ''; - case CURLE_COULDNT_RESOLVE_HOST: - return 'dns_error'; - case CURLE_COULDNT_CONNECT: - return 'connect_error'; - case CURLE_OPERATION_TIMEDOUT: - return 'timeout'; - case CURLE_SSL_CONNECT_ERROR: - return 'ssl_error'; - case CURLE_SSL_CERTPROBLEM: - return 'ssl_cert_error'; - case CURLE_SSL_CIPHER: - return 'ssl_unsupported_cipher'; - case CURLE_SSL_CACERT: - return 'ssl_cert_error'; - case CURLE_TOO_MANY_REDIRECTS: - return 'too_many_redirects'; - default: - return 'unknown'; - } - } - - public static function parse_headers($headers) { - $retVal = array(); - $fields = explode("\r\n", preg_replace('/\x0D\x0A[\x09\x20]+/', ' ', $headers)); - foreach($fields as $field) { - if(preg_match('/([^:]+): (.+)/m', $field, $match)) { - $match[1] = preg_replace_callback('/(?<=^|[\x09\x20\x2D])./', function($m) { - return strtoupper($m[0]); - }, strtolower(trim($match[1]))); - // If there's already a value set for the header name being returned, turn it into an array and add the new value - $match[1] = preg_replace_callback('/(?<=^|[\x09\x20\x2D])./', function($m) { - return strtoupper($m[0]); - }, strtolower(trim($match[1]))); - if(isset($retVal[$match[1]])) { - if(!is_array($retVal[$match[1]])) - $retVal[$match[1]] = array($retVal[$match[1]]); - $retVal[$match[1]][] = $match[2]; - } else { - $retVal[$match[1]] = trim($match[2]); - } - } - } - return $retVal; - } +class HTTP extends HTTPStream { } + diff --git a/lib/HTTPCurl.php b/lib/HTTPCurl.php new file mode 100644 index 0000000..000ab2c --- /dev/null +++ b/lib/HTTPCurl.php @@ -0,0 +1,122 @@ +_set_curlopts($ch, $url); + $response = curl_exec($ch); + $header_size = curl_getinfo($ch, CURLINFO_HEADER_SIZE); + return array( + 'code' => curl_getinfo($ch, CURLINFO_HTTP_CODE), + 'headers' => self::parse_headers(trim(substr($response, 0, $header_size))), + 'body' => substr($response, $header_size), + 'error' => self::error_string_from_code(curl_errno($ch)), + 'error_description' => curl_error($ch), + 'error_code' => curl_errno($ch), + ); + } + + public function post($url, $body, $headers=array()) { + $ch = curl_init($url); + $this->_set_curlopts($ch, $url); + curl_setopt($ch, CURLOPT_POST, true); + curl_setopt($ch, CURLOPT_POSTFIELDS, $body); + curl_setopt($ch, CURLOPT_HTTPHEADER, $headers); + $response = curl_exec($ch); + $header_size = curl_getinfo($ch, CURLINFO_HEADER_SIZE); + return array( + 'code' => curl_getinfo($ch, CURLINFO_HTTP_CODE), + 'headers' => self::parse_headers(trim(substr($response, 0, $header_size))), + 'body' => substr($response, $header_size), + 'error' => self::error_string_from_code(curl_errno($ch)), + 'error_description' => curl_error($ch), + 'error_code' => curl_errno($ch), + ); + } + + public function head($url) { + $ch = curl_init($url); + $this->_set_curlopts($ch, $url); + curl_setopt($ch, CURLOPT_NOBODY, true); + $response = curl_exec($ch); + return array( + 'code' => curl_getinfo($ch, CURLINFO_HTTP_CODE), + 'headers' => self::parse_headers(trim($response)), + 'error' => self::error_string_from_code(curl_errno($ch)), + 'error_description' => curl_error($ch), + 'error_code' => curl_errno($ch), + ); + } + + private function _set_curlopts($ch, $url) { + $host = parse_url($url, PHP_URL_HOST); + + curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); + curl_setopt($ch, CURLOPT_HEADER, true); + + // Special-case appspot.com URLs to not follow redirects. + // https://cloud.google.com/appengine/docs/php/urlfetch/ + if(substr($host, -12) == '.appspot.com') { + curl_setopt($ch, CURLOPT_FOLLOWLOCATION, false); + } else { + curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); + curl_setopt($ch, CURLOPT_MAXREDIRS, $this->max_redirects); + } + + curl_setopt($ch, CURLOPT_TIMEOUT_MS, round($this->timeout * 1000)); + } + + public static function error_string_from_code($code) { + switch($code) { + case 0: + return ''; + case CURLE_COULDNT_RESOLVE_HOST: + return 'dns_error'; + case CURLE_COULDNT_CONNECT: + return 'connect_error'; + case CURLE_OPERATION_TIMEDOUT: + return 'timeout'; + case CURLE_SSL_CONNECT_ERROR: + return 'ssl_error'; + case CURLE_SSL_CERTPROBLEM: + return 'ssl_cert_error'; + case CURLE_SSL_CIPHER: + return 'ssl_unsupported_cipher'; + case CURLE_SSL_CACERT: + return 'ssl_cert_error'; + case CURLE_TOO_MANY_REDIRECTS: + return 'too_many_redirects'; + default: + return 'unknown'; + } + } + + public static function parse_headers($headers) { + $retVal = array(); + $fields = explode("\r\n", preg_replace('/\x0D\x0A[\x09\x20]+/', ' ', $headers)); + foreach($fields as $field) { + if(preg_match('/([^:]+): (.+)/m', $field, $match)) { + $match[1] = preg_replace_callback('/(?<=^|[\x09\x20\x2D])./', function($m) { + return strtoupper($m[0]); + }, strtolower(trim($match[1]))); + // If there's already a value set for the header name being returned, turn it into an array and add the new value + $match[1] = preg_replace_callback('/(?<=^|[\x09\x20\x2D])./', function($m) { + return strtoupper($m[0]); + }, strtolower(trim($match[1]))); + if(isset($retVal[$match[1]])) { + if(!is_array($retVal[$match[1]])) + $retVal[$match[1]] = array($retVal[$match[1]]); + $retVal[$match[1]][] = $match[2]; + } else { + $retVal[$match[1]] = trim($match[2]); + } + } + } + return $retVal; + } +} diff --git a/lib/HTTPStream.php b/lib/HTTPStream.php new file mode 100644 index 0000000..23363c5 --- /dev/null +++ b/lib/HTTPStream.php @@ -0,0 +1,137 @@ +_stream_context('GET', $url); + return $this->_fetch($url, $context); + } + + public function post($url, $body, $headers=array()) { + set_error_handler("p3k\HTTPStream::exception_error_handler"); + $context = $this->_stream_context('POST', $url, $body, $headers); + return $this->_fetch($url, $context); + } + + public function head($url) { + set_error_handler("p3k\HTTPStream::exception_error_handler"); + $context = $this->_stream_context('HEAD', $url); + return $this->_fetch($url, $context); + } + + private function _fetch($url, $context) { + $error = false; + + try { + $body = file_get_contents($url, false, $context); + } catch(\Exception $e) { + $body = false; + $http_response_header = []; + $description = str_replace('file_get_contents(): ', '', $e->getMessage()); + $code = 'unknown'; + + if(preg_match('/getaddrinfo failed/', $description)) { + $code = 'dns_error'; + $description = str_replace('php_network_getaddresses: ', '', $description); + } + + if(preg_match('/timed out/', $description)) { + $code = 'timeout'; + } + + if(preg_match('/certificate/', $description)) { + $code = 'ssl_error'; + } + + $error = [ + 'description' => $description, + 'code' => $code + ]; + } + + return array( + 'code' => self::parse_response_code($http_response_header), + 'headers' => self::parse_headers($http_response_header), + 'body' => $body, + 'error' => $error ? $error['code'] : false, + 'error_description' => $error ? $error['description'] : false, + ); + } + + private function _stream_context($method, $url, $body=false, $headers=[]) { + $host = parse_url($url, PHP_URL_HOST); + + $options = [ + 'method' => $method, + 'timeout' => $this->timeout, + 'ignore_errors' => true, + ]; + + if($body) { + $options['content'] = $body; + } + + if($headers) { + $options['header'] = $headers; + } + + // Special-case appspot.com URLs to not follow redirects. + // https://cloud.google.com/appengine/docs/php/urlfetch/ + if(substr($host, -12) == '.appspot.com') { + $options['follow_location'] = 0; + } else { + $options['follow_location'] = 1; + $options['max_redirects'] = $this->max_redirects; + } + + return stream_context_create(['http' => $options]); + } + + public static function parse_response_code($headers) { + // When a response is a redirect, we want to find the last occurrence of the HTTP code + $code = false; + foreach($headers as $field) { + if(preg_match('/HTTP\/\d\.\d (\d+)/', $field, $match)) { + $code = $match[1]; + } + } + return $code; + } + + public static function parse_headers($headers) { + $retVal = array(); + foreach($headers as $field) { + if(preg_match('/([^:]+): (.+)/m', $field, $match)) { + $match[1] = preg_replace_callback('/(?<=^|[\x09\x20\x2D])./', function($m) { + return strtoupper($m[0]); + }, strtolower(trim($match[1]))); + // If there's already a value set for the header name being returned, turn it into an array and add the new value + $match[1] = preg_replace_callback('/(?<=^|[\x09\x20\x2D])./', function($m) { + return strtoupper($m[0]); + }, strtolower(trim($match[1]))); + if(isset($retVal[$match[1]])) { + if(!is_array($retVal[$match[1]])) + $retVal[$match[1]] = array($retVal[$match[1]]); + $retVal[$match[1]][] = $match[2]; + } else { + $retVal[$match[1]] = trim($match[2]); + } + } + } + return $retVal; + } + +}