From 13bb06d2c93992375aa39eef4691407c1bb6c9cc Mon Sep 17 00:00:00 2001 From: Aaron Parecki Date: Fri, 19 Feb 2016 15:29:05 -0800 Subject: [PATCH] stub mf2 parsing --- README.md | 24 ++++++++++++++++++------ composer.json | 3 ++- controllers/Parse.php | 16 ++++++++++++++-- lib/Formats/Mf2.php | 12 ++++++++++++ tests/ParseTest.php | 3 +-- 5 files changed, 47 insertions(+), 11 deletions(-) create mode 100644 lib/Formats/Mf2.php diff --git a/README.md b/README.md index eca805a..7fb01cb 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,18 @@ Percolator ========== -API ---- + +## Discovering Content + +The contents of the URL is checked in the following order: + +* A supported silo URL +* h-entry, h-event, h-card +* OEmbed +* OGP + + +## API To parse a page and return structured data for the contents of the page, simply pass a url to the parse route. @@ -31,14 +41,14 @@ In both cases, the response will be a JSON object containing a key of "type". If Other possible errors are listed below: * not_found: The URL provided was not found. (Returned 404 when fetching) -* invalid_ssl: There was an error validating the SSL certificate. This may happen if the SSL certificate has expired, or was signed by a root authority not recognized by this service. +* ssl_cert_error: There was an error validating the SSL certificate. This may happen if the SSL certificate has expired. +* ssl_unsupported_cipher: The web server does not support any of the SSL ciphers known by the service. * timeout: The service timed out trying to connect to the URL. * invalid_content: The content at the URL was not valid. For example, providing a URL to an image will return this error. * no_link_found: The target link was not found on the page. When a target parameter is provided, this is the error that will be returned if the target could not be found on the page. +* no_content: No usable content could be found at the given URL. - -Response Format ---------------- +## Response Format ```json { @@ -75,6 +85,8 @@ The content will be an object that always contains a "text" property and may con The author will always be set in the entry if available. The service follows the [authorship discovery](http://indiewebcamp.com/authorship) algorithm to try to find the author information elsewhere on the page if it is not inside the entry in the source document. +All URLs provided in the output are absolute URLs. If the source document contains a relative URL, it will be resolved first. + Replies, likes, reposts, etc. of this post will be included if they are listed on the page. ```json diff --git a/composer.json b/composer.json index d6c8533..60dbf6c 100644 --- a/composer.json +++ b/composer.json @@ -9,7 +9,8 @@ "lib/helpers.php", "controllers/Main.php", "controllers/Parse.php", - "lib/HTTP.php" + "lib/HTTP.php", + "lib/Formats/Mf2.php" ] }, "autoload-dev": { diff --git a/controllers/Parse.php b/controllers/Parse.php index d9d1314..bf58ae2 100644 --- a/controllers/Parse.php +++ b/controllers/Parse.php @@ -2,6 +2,8 @@ use Symfony\Component\HttpFoundation\Request; use Symfony\Component\HttpFoundation\Response; +use Percolator\Formats; + class Parse { public $http; @@ -103,10 +105,20 @@ class Parse { } } + // Now start pulling in the data from the page. Start by looking for microformats2 + $mf2 = mf2\Parse($result['body']); + if($mf2 && count($mf2['items']) > 0) { + $data = Formats\Mf2::parse($mf2); + return $this->respond($response, 200, $data); + } + + // TODO: look for other content like OEmbed or known services later - return $this->respond($response, 200, [ - 'url' => $url, + return $this->respond($response, 400, [ + 'type' => 'error', + 'error' => 'no_content', + 'error_description' => 'No usable content could be found at the given URL' ]); } diff --git a/lib/Formats/Mf2.php b/lib/Formats/Mf2.php new file mode 100644 index 0000000..95c4b83 --- /dev/null +++ b/lib/Formats/Mf2.php @@ -0,0 +1,12 @@ + 'entry' + ]; + } + +} diff --git a/tests/ParseTest.php b/tests/ParseTest.php index 1360440..f103179 100644 --- a/tests/ParseTest.php +++ b/tests/ParseTest.php @@ -54,11 +54,10 @@ class ParseTest extends PHPUnit_Framework_TestCase { $response = $this->parse(['url' => $url, 'target' => 'http://target.example.com']); $body = $response->getContent(); - print_r($body); $this->assertEquals(200, $response->getStatusCode()); $data = json_decode($body); $this->assertNotEquals('error', $data->type); - $this->assertNotEquals('no_link_found', $data->error); + $this->assertObjectNotHasAttribute('error', $data); } } \ No newline at end of file