Browse Source

switch to using file_get_contents for appengine

pull/39/head
Aaron Parecki 8 years ago
parent
commit
82931e46bc
6 changed files with 284 additions and 121 deletions
  1. +2
    -0
      composer.json
  2. +4
    -0
      controllers/Parse.php
  3. +17
    -6
      lib/Formats/Mf2.php
  4. +2
    -115
      lib/HTTP.php
  5. +122
    -0
      lib/HTTPCurl.php
  6. +137
    -0
      lib/HTTPStream.php

+ 2
- 0
composer.json View File

@ -9,6 +9,8 @@
"lib/helpers.php", "lib/helpers.php",
"controllers/Main.php", "controllers/Main.php",
"controllers/Parse.php", "controllers/Parse.php",
"lib/HTTPCurl.php",
"lib/HTTPStream.php",
"lib/HTTP.php", "lib/HTTP.php",
"lib/Formats/Mf2.php" "lib/Formats/Mf2.php"
] ]

+ 4
- 0
controllers/Parse.php View File

@ -33,6 +33,10 @@ class Parse {
$this->http->timeout = $request->get('timeout') / 2; $this->http->timeout = $request->get('timeout') / 2;
} }
if($request->get('max_redirects')) {
$this->http->max_redirects = (int)$request->get('max_redirects');
}
$url = $request->get('url'); $url = $request->get('url');
if(!$url) { if(!$url) {

+ 17
- 6
lib/Formats/Mf2.php View File

@ -58,8 +58,15 @@ class Mf2 {
// Always arrays // Always arrays
$properties = ['photo','video','syndication','in-reply-to','like-of','repost-of','category']; $properties = ['photo','video','syndication','in-reply-to','like-of','repost-of','category'];
foreach($properties as $p) { foreach($properties as $p) {
if(array_key_exists($p, $item['properties']))
$data[$p] = $item['properties'][$p];
if(array_key_exists($p, $item['properties'])) {
$data[$p] = [];
foreach($item['properties'][$p] as $v) {
if(is_string($v))
$data[$p][] = $v;
elseif(is_array($v) and array_key_exists('value', $v))
$data[$p][] = $v['value'];
}
}
} }
// Determine if the name is distinct from the content // Determine if the name is distinct from the content
@ -73,18 +80,22 @@ class Mf2 {
$textContent = $content; $textContent = $content;
} elseif(!is_string($content) && is_array($content) && array_key_exists('value', $content)) { } elseif(!is_string($content) && is_array($content) && array_key_exists('value', $content)) {
if(array_key_exists('html', $content)) { if(array_key_exists('html', $content)) {
$textContent = strip_tags($content['html']);
$htmlContent = $content['html'];
$textContent = trim(strip_tags($content['html']));
$htmlContent = trim($content['html']);
} else { } else {
$textContent = $content['value'];
$textContent = trim($content['value']);
} }
} }
// Trim ellipses from the name // Trim ellipses from the name
$name = preg_replace('/ ?(\.\.\.|…)$/', '', $name); $name = preg_replace('/ ?(\.\.\.|…)$/', '', $name);
// Remove all whitespace when checking equality
$nameCompare = preg_replace('/\s/','',trim($name));
$contentCompare = preg_replace('/\s/','',trim($textContent));
// Check if the name is a prefix of the content // Check if the name is a prefix of the content
if(strpos($textContent, $name) === 0) {
if(strpos($contentCompare, $nameCompare) === 0) {
$name = null; $name = null;
} }
} }

+ 2
- 115
lib/HTTP.php View File

@ -1,119 +1,6 @@
<?php <?php
namespace p3k; namespace p3k;
class HTTP {
public $timeout = 4;
public $max_redirects = 8;
public function get($url) {
$ch = curl_init($url);
$this->_set_curlopts($ch, $url);
$response = curl_exec($ch);
$header_size = curl_getinfo($ch, CURLINFO_HEADER_SIZE);
return array(
'code' => curl_getinfo($ch, CURLINFO_HTTP_CODE),
'headers' => self::parse_headers(trim(substr($response, 0, $header_size))),
'body' => substr($response, $header_size),
'error' => self::error_string_from_code(curl_errno($ch)),
'error_description' => curl_error($ch),
'error_code' => curl_errno($ch),
);
}
public function post($url, $body, $headers=array()) {
$ch = curl_init($url);
$this->_set_curlopts($ch, $url);
$response = curl_exec($ch);
$header_size = curl_getinfo($ch, CURLINFO_HEADER_SIZE);
return array(
'code' => curl_getinfo($ch, CURLINFO_HTTP_CODE),
'headers' => self::parse_headers(trim(substr($response, 0, $header_size))),
'body' => substr($response, $header_size),
'error' => self::error_string_from_code(curl_errno($ch)),
'error_description' => curl_error($ch),
'error_code' => curl_errno($ch),
);
}
public function head($url) {
$ch = curl_init($url);
$this->_set_curlopts($ch, $url);
$response = curl_exec($ch);
return array(
'code' => curl_getinfo($ch, CURLINFO_HTTP_CODE),
'headers' => self::parse_headers(trim($response)),
'error' => self::error_string_from_code(curl_errno($ch)),
'error_description' => curl_error($ch),
'error_code' => curl_errno($ch),
);
}
private function _set_curlopts($ch, $url) {
$host = parse_url($url, PHP_URL_HOST);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_HEADER, true);
// Special-case appspot.com URLs to not follow redirects.
// https://cloud.google.com/appengine/docs/php/urlfetch/
if(substr($host, -12) == '.appspot.com') {
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, false);
} else {
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_MAXREDIRS, $this->max_redirects);
}
curl_setopt($ch, CURLOPT_MAXREDIRS, $this->max_redirects);
curl_setopt($ch, CURLOPT_TIMEOUT_MS, round($this->timeout * 1000));
}
public static function error_string_from_code($code) {
switch($code) {
case 0:
return '';
case CURLE_COULDNT_RESOLVE_HOST:
return 'dns_error';
case CURLE_COULDNT_CONNECT:
return 'connect_error';
case CURLE_OPERATION_TIMEDOUT:
return 'timeout';
case CURLE_SSL_CONNECT_ERROR:
return 'ssl_error';
case CURLE_SSL_CERTPROBLEM:
return 'ssl_cert_error';
case CURLE_SSL_CIPHER:
return 'ssl_unsupported_cipher';
case CURLE_SSL_CACERT:
return 'ssl_cert_error';
case CURLE_TOO_MANY_REDIRECTS:
return 'too_many_redirects';
default:
return 'unknown';
}
}
public static function parse_headers($headers) {
$retVal = array();
$fields = explode("\r\n", preg_replace('/\x0D\x0A[\x09\x20]+/', ' ', $headers));
foreach($fields as $field) {
if(preg_match('/([^:]+): (.+)/m', $field, $match)) {
$match[1] = preg_replace_callback('/(?<=^|[\x09\x20\x2D])./', function($m) {
return strtoupper($m[0]);
}, strtolower(trim($match[1])));
// If there's already a value set for the header name being returned, turn it into an array and add the new value
$match[1] = preg_replace_callback('/(?<=^|[\x09\x20\x2D])./', function($m) {
return strtoupper($m[0]);
}, strtolower(trim($match[1])));
if(isset($retVal[$match[1]])) {
if(!is_array($retVal[$match[1]]))
$retVal[$match[1]] = array($retVal[$match[1]]);
$retVal[$match[1]][] = $match[2];
} else {
$retVal[$match[1]] = trim($match[2]);
}
}
}
return $retVal;
}
class HTTP extends HTTPStream {
} }

+ 122
- 0
lib/HTTPCurl.php View File

@ -0,0 +1,122 @@
<?php
namespace p3k;
class HTTPCurl {
public $timeout = 4;
public $max_redirects = 8;
public function get($url) {
$ch = curl_init($url);
$this->_set_curlopts($ch, $url);
$response = curl_exec($ch);
$header_size = curl_getinfo($ch, CURLINFO_HEADER_SIZE);
return array(
'code' => curl_getinfo($ch, CURLINFO_HTTP_CODE),
'headers' => self::parse_headers(trim(substr($response, 0, $header_size))),
'body' => substr($response, $header_size),
'error' => self::error_string_from_code(curl_errno($ch)),
'error_description' => curl_error($ch),
'error_code' => curl_errno($ch),
);
}
public function post($url, $body, $headers=array()) {
$ch = curl_init($url);
$this->_set_curlopts($ch, $url);
curl_setopt($ch, CURLOPT_POST, true);
curl_setopt($ch, CURLOPT_POSTFIELDS, $body);
curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
$response = curl_exec($ch);
$header_size = curl_getinfo($ch, CURLINFO_HEADER_SIZE);
return array(
'code' => curl_getinfo($ch, CURLINFO_HTTP_CODE),
'headers' => self::parse_headers(trim(substr($response, 0, $header_size))),
'body' => substr($response, $header_size),
'error' => self::error_string_from_code(curl_errno($ch)),
'error_description' => curl_error($ch),
'error_code' => curl_errno($ch),
);
}
public function head($url) {
$ch = curl_init($url);
$this->_set_curlopts($ch, $url);
curl_setopt($ch, CURLOPT_NOBODY, true);
$response = curl_exec($ch);
return array(
'code' => curl_getinfo($ch, CURLINFO_HTTP_CODE),
'headers' => self::parse_headers(trim($response)),
'error' => self::error_string_from_code(curl_errno($ch)),
'error_description' => curl_error($ch),
'error_code' => curl_errno($ch),
);
}
private function _set_curlopts($ch, $url) {
$host = parse_url($url, PHP_URL_HOST);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_HEADER, true);
// Special-case appspot.com URLs to not follow redirects.
// https://cloud.google.com/appengine/docs/php/urlfetch/
if(substr($host, -12) == '.appspot.com') {
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, false);
} else {
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_MAXREDIRS, $this->max_redirects);
}
curl_setopt($ch, CURLOPT_TIMEOUT_MS, round($this->timeout * 1000));
}
public static function error_string_from_code($code) {
switch($code) {
case 0:
return '';
case CURLE_COULDNT_RESOLVE_HOST:
return 'dns_error';
case CURLE_COULDNT_CONNECT:
return 'connect_error';
case CURLE_OPERATION_TIMEDOUT:
return 'timeout';
case CURLE_SSL_CONNECT_ERROR:
return 'ssl_error';
case CURLE_SSL_CERTPROBLEM:
return 'ssl_cert_error';
case CURLE_SSL_CIPHER:
return 'ssl_unsupported_cipher';
case CURLE_SSL_CACERT:
return 'ssl_cert_error';
case CURLE_TOO_MANY_REDIRECTS:
return 'too_many_redirects';
default:
return 'unknown';
}
}
public static function parse_headers($headers) {
$retVal = array();
$fields = explode("\r\n", preg_replace('/\x0D\x0A[\x09\x20]+/', ' ', $headers));
foreach($fields as $field) {
if(preg_match('/([^:]+): (.+)/m', $field, $match)) {
$match[1] = preg_replace_callback('/(?<=^|[\x09\x20\x2D])./', function($m) {
return strtoupper($m[0]);
}, strtolower(trim($match[1])));
// If there's already a value set for the header name being returned, turn it into an array and add the new value
$match[1] = preg_replace_callback('/(?<=^|[\x09\x20\x2D])./', function($m) {
return strtoupper($m[0]);
}, strtolower(trim($match[1])));
if(isset($retVal[$match[1]])) {
if(!is_array($retVal[$match[1]]))
$retVal[$match[1]] = array($retVal[$match[1]]);
$retVal[$match[1]][] = $match[2];
} else {
$retVal[$match[1]] = trim($match[2]);
}
}
}
return $retVal;
}
}

+ 137
- 0
lib/HTTPStream.php View File

@ -0,0 +1,137 @@
<?php
namespace p3k;
class HTTPStream {
public $timeout = 4;
public $max_redirects = 8;
public static function exception_error_handler($severity, $message, $file, $line) {
if (!(error_reporting() & $severity)) {
// This error code is not included in error_reporting
return;
}
throw new \ErrorException($message, 0, $severity, $file, $line);
}
public function get($url) {
set_error_handler("p3k\HTTPStream::exception_error_handler");
$context = $this->_stream_context('GET', $url);
return $this->_fetch($url, $context);
}
public function post($url, $body, $headers=array()) {
set_error_handler("p3k\HTTPStream::exception_error_handler");
$context = $this->_stream_context('POST', $url, $body, $headers);
return $this->_fetch($url, $context);
}
public function head($url) {
set_error_handler("p3k\HTTPStream::exception_error_handler");
$context = $this->_stream_context('HEAD', $url);
return $this->_fetch($url, $context);
}
private function _fetch($url, $context) {
$error = false;
try {
$body = file_get_contents($url, false, $context);
} catch(\Exception $e) {
$body = false;
$http_response_header = [];
$description = str_replace('file_get_contents(): ', '', $e->getMessage());
$code = 'unknown';
if(preg_match('/getaddrinfo failed/', $description)) {
$code = 'dns_error';
$description = str_replace('php_network_getaddresses: ', '', $description);
}
if(preg_match('/timed out/', $description)) {
$code = 'timeout';
}
if(preg_match('/certificate/', $description)) {
$code = 'ssl_error';
}
$error = [
'description' => $description,
'code' => $code
];
}
return array(
'code' => self::parse_response_code($http_response_header),
'headers' => self::parse_headers($http_response_header),
'body' => $body,
'error' => $error ? $error['code'] : false,
'error_description' => $error ? $error['description'] : false,
);
}
private function _stream_context($method, $url, $body=false, $headers=[]) {
$host = parse_url($url, PHP_URL_HOST);
$options = [
'method' => $method,
'timeout' => $this->timeout,
'ignore_errors' => true,
];
if($body) {
$options['content'] = $body;
}
if($headers) {
$options['header'] = $headers;
}
// Special-case appspot.com URLs to not follow redirects.
// https://cloud.google.com/appengine/docs/php/urlfetch/
if(substr($host, -12) == '.appspot.com') {
$options['follow_location'] = 0;
} else {
$options['follow_location'] = 1;
$options['max_redirects'] = $this->max_redirects;
}
return stream_context_create(['http' => $options]);
}
public static function parse_response_code($headers) {
// When a response is a redirect, we want to find the last occurrence of the HTTP code
$code = false;
foreach($headers as $field) {
if(preg_match('/HTTP\/\d\.\d (\d+)/', $field, $match)) {
$code = $match[1];
}
}
return $code;
}
public static function parse_headers($headers) {
$retVal = array();
foreach($headers as $field) {
if(preg_match('/([^:]+): (.+)/m', $field, $match)) {
$match[1] = preg_replace_callback('/(?<=^|[\x09\x20\x2D])./', function($m) {
return strtoupper($m[0]);
}, strtolower(trim($match[1])));
// If there's already a value set for the header name being returned, turn it into an array and add the new value
$match[1] = preg_replace_callback('/(?<=^|[\x09\x20\x2D])./', function($m) {
return strtoupper($m[0]);
}, strtolower(trim($match[1])));
if(isset($retVal[$match[1]])) {
if(!is_array($retVal[$match[1]]))
$retVal[$match[1]] = array($retVal[$match[1]]);
$retVal[$match[1]][] = $match[2];
} else {
$retVal[$match[1]] = trim($match[2]);
}
}
}
return $retVal;
}
}

Loading…
Cancel
Save