From c4b80506da3481739083cff39e14794480d46f3a Mon Sep 17 00:00:00 2001 From: Aaron Parecki Date: Mon, 29 Feb 2016 19:33:44 -0800 Subject: [PATCH] support parsing posted HTML --- controllers/Parse.php | 56 ++++++++++++++++++++++++------------------- lib/Formats/Mf2.php | 4 +++- public/index.php | 1 + 3 files changed, 35 insertions(+), 26 deletions(-) diff --git a/controllers/Parse.php b/controllers/Parse.php index ed074a8..5cca349 100644 --- a/controllers/Parse.php +++ b/controllers/Parse.php @@ -45,41 +45,47 @@ class Parse { } $url = $request->get('url'); + $html = $request->get('html'); - if(!$url) { + if(!$url && !$html) { return $this->respond($response, 400, [ 'error' => 'missing_url', - 'error_description' => 'Provide a URL to fetch' + 'error_description' => 'Provide a URL or HTML to fetch' ]); } - // Attempt some basic URL validation - $scheme = parse_url($url, PHP_URL_SCHEME); - if(!in_array($scheme, ['http','https'])) { - return $this->respond($response, 400, [ - 'error' => 'invalid_url', - 'error_description' => 'Only http and https URLs are supported' - ]); - } + if($html) { + // If HTML is provided in the request, parse that, and use the URL provided as the base URL for mf2 resolving + $result['body'] = $html; + } else { + // Attempt some basic URL validation + $scheme = parse_url($url, PHP_URL_SCHEME); + if(!in_array($scheme, ['http','https'])) { + return $this->respond($response, 400, [ + 'error' => 'invalid_url', + 'error_description' => 'Only http and https URLs are supported' + ]); + } - $host = parse_url($url, PHP_URL_HOST); - if(!$host) { - return $this->respond($response, 400, [ - 'error' => 'invalid_url', - 'error_description' => 'The URL provided was not valid' - ]); - } + $host = parse_url($url, PHP_URL_HOST); + if(!$host) { + return $this->respond($response, 400, [ + 'error' => 'invalid_url', + 'error_description' => 'The URL provided was not valid' + ]); + } - $url = \normalize_url($url); + $url = \normalize_url($url); - // Now fetch the URL and check for any curl errors - $result = $this->http->get($url); + // Now fetch the URL and check for any curl errors + $result = $this->http->get($url); - if($result['error']) { - return $this->respond($response, 400, [ - 'error' => $result['error'], - 'error_description' => $result['error_description'] - ]); + if($result['error']) { + return $this->respond($response, 400, [ + 'error' => $result['error'], + 'error_description' => $result['error_description'] + ]); + } } // attempt to parse the page as HTML diff --git a/lib/Formats/Mf2.php b/lib/Formats/Mf2.php index f0137f0..f115b60 100644 --- a/lib/Formats/Mf2.php +++ b/lib/Formats/Mf2.php @@ -78,7 +78,7 @@ class Mf2 { } // Always returned as arrays, and may also create external references - $properties = ['in-reply-to','like-of','repost-of','bookmark-of','category']; + $properties = ['in-reply-to','like-of','repost-of','bookmark-of','category','invitee']; foreach($properties as $p) { if(array_key_exists($p, $item['properties'])) { $data[$p] = []; @@ -132,6 +132,8 @@ class Mf2 { if($name) { $data['name'] = $name; } + + // If there is content, always return the plaintext content, and return HTML content if it's different if($content) { $data['content'] = [ 'text' => $textContent diff --git a/public/index.php b/public/index.php index 899a7bf..c579bfe 100644 --- a/public/index.php +++ b/public/index.php @@ -10,6 +10,7 @@ $templates = new League\Plates\Engine(dirname(__FILE__).'/../views'); $router->addRoute('GET', '/', 'Main::index'); $router->addRoute('GET', '/parse', 'Parse::parse'); +$router->addRoute('POST', '/parse', 'Parse::parse'); $dispatcher = $router->getDispatcher(); $request = Request::createFromGlobals();