Browse Source

stub mf2 parsing

pull/39/head
Aaron Parecki 8 years ago
parent
commit
13bb06d2c9
5 changed files with 47 additions and 11 deletions
  1. +18
    -6
      README.md
  2. +2
    -1
      composer.json
  3. +14
    -2
      controllers/Parse.php
  4. +12
    -0
      lib/Formats/Mf2.php
  5. +1
    -2
      tests/ParseTest.php

+ 18
- 6
README.md View File

@ -1,8 +1,18 @@
Percolator
==========
API
---
## Discovering Content
The contents of the URL is checked in the following order:
* A supported silo URL
* h-entry, h-event, h-card
* OEmbed
* OGP
## API
To parse a page and return structured data for the contents of the page, simply pass a url to the parse route.
@ -31,14 +41,14 @@ In both cases, the response will be a JSON object containing a key of "type". If
Other possible errors are listed below:
* not_found: The URL provided was not found. (Returned 404 when fetching)
* invalid_ssl: There was an error validating the SSL certificate. This may happen if the SSL certificate has expired, or was signed by a root authority not recognized by this service.
* ssl_cert_error: There was an error validating the SSL certificate. This may happen if the SSL certificate has expired.
* ssl_unsupported_cipher: The web server does not support any of the SSL ciphers known by the service.
* timeout: The service timed out trying to connect to the URL.
* invalid_content: The content at the URL was not valid. For example, providing a URL to an image will return this error.
* no_link_found: The target link was not found on the page. When a target parameter is provided, this is the error that will be returned if the target could not be found on the page.
* no_content: No usable content could be found at the given URL.
Response Format
---------------
## Response Format
```json
{
@ -75,6 +85,8 @@ The content will be an object that always contains a "text" property and may con
The author will always be set in the entry if available. The service follows the [authorship discovery](http://indiewebcamp.com/authorship) algorithm to try to find the author information elsewhere on the page if it is not inside the entry in the source document.
All URLs provided in the output are absolute URLs. If the source document contains a relative URL, it will be resolved first.
Replies, likes, reposts, etc. of this post will be included if they are listed on the page.
```json

+ 2
- 1
composer.json View File

@ -9,7 +9,8 @@
"lib/helpers.php",
"controllers/Main.php",
"controllers/Parse.php",
"lib/HTTP.php"
"lib/HTTP.php",
"lib/Formats/Mf2.php"
]
},
"autoload-dev": {

+ 14
- 2
controllers/Parse.php View File

@ -2,6 +2,8 @@
use Symfony\Component\HttpFoundation\Request;
use Symfony\Component\HttpFoundation\Response;
use Percolator\Formats;
class Parse {
public $http;
@ -103,10 +105,20 @@ class Parse {
}
}
// Now start pulling in the data from the page. Start by looking for microformats2
$mf2 = mf2\Parse($result['body']);
if($mf2 && count($mf2['items']) > 0) {
$data = Formats\Mf2::parse($mf2);
return $this->respond($response, 200, $data);
}
// TODO: look for other content like OEmbed or known services later
return $this->respond($response, 200, [
'url' => $url,
return $this->respond($response, 400, [
'type' => 'error',
'error' => 'no_content',
'error_description' => 'No usable content could be found at the given URL'
]);
}

+ 12
- 0
lib/Formats/Mf2.php View File

@ -0,0 +1,12 @@
<?php
namespace Percolator\Formats;
class Mf2 {
public static function parse($mf2) {
return [
'type' => 'entry'
];
}
}

+ 1
- 2
tests/ParseTest.php View File

@ -54,11 +54,10 @@ class ParseTest extends PHPUnit_Framework_TestCase {
$response = $this->parse(['url' => $url, 'target' => 'http://target.example.com']);
$body = $response->getContent();
print_r($body);
$this->assertEquals(200, $response->getStatusCode());
$data = json_decode($body);
$this->assertNotEquals('error', $data->type);
$this->assertNotEquals('no_link_found', $data->error);
$this->assertObjectNotHasAttribute('error', $data);
}
}

Loading…
Cancel
Save