diff --git a/README.md b/README.md index d5ed8f2..75cb991 100644 --- a/README.md +++ b/README.md @@ -105,10 +105,15 @@ Possible errors are listed below: "html": "Now that @MozillaPersona is shutting down, the only good way to do email-based login is how @poetica does it.", "text": "Now that @MozillaPersona is shutting down, the only good way to do email-based login is how @poetica does it." }, - } + }, + "url": "https://aaronparecki.com/2016/01/16/11/" } ``` +#### Primary Data + +The primary object on the page is returned in the `data` property. This will indicate the type of object (e.g. `entry`), and will contain the vocabulary's properties that it was able to parse from the page. + If a property supports multiple values, it will always be returned as an array. The following properties support multiple values: * in-reply-to @@ -161,6 +166,14 @@ In a future version, replies, likes, reposts, etc. of this post will be included ``` +#### Other Properties + +Other properties are returned in the response at the same level as the `data` property. + +* `url` - The effective URL that the document was retrieved from. This will be the final URL after following any redirects. +* `code` - The HTTP response code returned by the URL. Typically this will be 200, but if the URL returned an alternate HTTP code that also included an h-entry (such as a 410 deleted notice with a stub h-entry), you can use this to find out that the original URL was actually deleted. + + ## Token API When verifying [Private Webmentions](https://indieweb.org/Private-Webmention#How_to_Receive_Private_Webmentions), you will need to exchange a code for an access token at the token endpoint specified by the source URL. diff --git a/controllers/Parse.php b/controllers/Parse.php index e172a53..4ed5ab6 100644 --- a/controllers/Parse.php +++ b/controllers/Parse.php @@ -119,12 +119,16 @@ class Parse { if($data) { if($request->get('include_original')) $data['original'] = $parsed; + $data['url'] = $url; + $data['code'] = 200; return $this->respond($response, 200, $data); } else { return $this->respond($response, 200, [ 'data' => [ 'type' => 'unknown' - ] + ], + 'url' => $url, + 'code' => 0 ]); } } @@ -155,14 +159,29 @@ class Parse { if($result['error']) { return $this->respond($response, 200, [ 'error' => $result['error'], - 'error_description' => $result['error_description'] + 'error_description' => $result['error_description'], + 'url' => $result['url'], + 'code' => $result['code'] ]); } if(trim($result['body']) == '') { + if($result['code'] == 410) { + // 410 Gone responses are valid and should not return an error + return $this->respond($response, 200, [ + 'data' => [ + 'type' => 'unknown' + ], + 'url' => $result['url'], + 'code' => $result['code'] + ]); + } + return $this->respond($response, 200, [ 'error' => 'no_content', - 'error_description' => 'We did not get a response body when fetching the URL' + 'error_description' => 'We did not get a response body when fetching the URL', + 'url' => $result['url'], + 'code' => $result['code'] ]); } @@ -171,12 +190,16 @@ class Parse { return $this->respond($response, 200, [ 'error' => 'unauthorized', 'error_description' => 'The URL returned "HTTP 401 Unauthorized"', + 'url' => $result['url'], + 'code' => 401 ]); } if($result['code'] == 403) { return $this->respond($response, 200, [ 'error' => 'forbidden', 'error_description' => 'The URL returned "HTTP 403 Forbidden"', + 'url' => $result['url'], + 'code' => 403 ]); } @@ -189,6 +212,8 @@ class Parse { list($data, $parsed) = Formats\Instagram::parse($result['body'], $result['url'], $this->http); if($request->get('include_original')) $data['original'] = $parsed; + $data['url'] = $result['url']; + $data['code'] = $result['code']; return $this->respond($response, 200, $data); } @@ -266,6 +291,8 @@ class Parse { } if($request->get('include_original')) $data['original'] = $html; + $data['url'] = $result['url']; // this will be the effective URL after following redirects + $data['code'] = $result['code']; return $this->respond($response, 200, $data); } } @@ -275,7 +302,9 @@ class Parse { return $this->respond($response, 200, [ 'data' => [ 'type' => 'unknown', - ] + ], + 'url' => $result['url'], + 'code' => $result['code'] ]); } diff --git a/lib/Formats/Twitter.php b/lib/Formats/Twitter.php index 5eca10a..fb6a364 100644 --- a/lib/Formats/Twitter.php +++ b/lib/Formats/Twitter.php @@ -24,12 +24,12 @@ class Twitter { try { $tweet = $twitter->request('statuses/show/'.$tweet_id, 'GET', ['tweet_mode'=>'extended']); } catch(\TwitterException $e) { - return false; + return [false, false]; } } if(!$tweet) - return false; + return [false, false]; $entry = array( 'type' => 'entry', diff --git a/lib/HTTPTest.php b/lib/HTTPTest.php index 966c722..e607086 100644 --- a/lib/HTTPTest.php +++ b/lib/HTTPTest.php @@ -4,12 +4,14 @@ namespace p3k; class HTTPTest extends HTTPCurl { private $_testDataPath; + private $_redirects_remaining; public function __construct($testDataPath) { $this->_testDataPath = $testDataPath; } public function get($url, $headers=[]) { + $this->_redirects_remaining = $this->max_redirects; $parts = parse_url($url); unset($parts['fragment']); $url = \build_url($parts); @@ -60,6 +62,19 @@ class HTTPTest extends HTTPCurl { if(array_key_exists('Location', $parsedHeaders)) { $effectiveUrl = \mf2\resolveUrl($url, $parsedHeaders['Location']); + if($this->_redirects_remaining > 0) { + $this->_redirects_remaining--; + return $this->_read_file($effectiveUrl); + } else { + return [ + 'code' => 0, + 'headers' => $parsedHeaders, + 'body' => $body, + 'error' => 'too_many_redirects', + 'error_description' => '', + 'url' => $effectiveUrl + ]; + } } else { $effectiveUrl = $url; } diff --git a/tests/FetchTest.php b/tests/FetchTest.php new file mode 100644 index 0000000..cf661b2 --- /dev/null +++ b/tests/FetchTest.php @@ -0,0 +1,106 @@ +client = new Parse(); + $this->client->http = new p3k\HTTPTest(dirname(__FILE__).'/data/'); + $this->client->mc = null; + } + + private function parse($params) { + $request = new Request($params); + $response = new Response(); + return $this->client->parse($request, $response); + } + + public function testRedirectLimit() { + $url = 'http://redirect.example.com/3'; + $response = $this->parse([ + 'url' => $url, + 'max_redirects' => 1 + ]); + + $body = $response->getContent(); + $this->assertEquals(200, $response->getStatusCode()); + $data = json_decode($body); + $this->assertObjectHasAttribute('error', $data); + $this->assertEquals('too_many_redirects', $data->error); + + $url = 'http://redirect.example.com/2'; + $response = $this->parse([ + 'url' => $url, + 'max_redirects' => 1 + ]); + $body = $response->getContent(); + $this->assertEquals(200, $response->getStatusCode()); + $data = json_decode($body); + $this->assertObjectHasAttribute('error', $data); + $this->assertEquals('too_many_redirects', $data->error); + } + + public function testRedirectUnderLimit() { + $url = 'http://redirect.example.com/2'; + $response = $this->parse([ + 'url' => $url, + 'max_redirects' => 2 + ]); + + $body = $response->getContent(); + $this->assertEquals(200, $response->getStatusCode()); + $data = json_decode($body); + $this->assertObjectNotHasAttribute('error', $data); + $this->assertEquals(200, $data->code); + $this->assertEquals('The Final Page', $data->data->name); + $this->assertEquals('http://redirect.example.com/0', $data->url); + } + + public function testReturnsHTTPStatusCode() { + $url = 'http://redirect.example.com/code-418'; + $response = $this->parse([ + 'url' => $url + ]); + + $body = $response->getContent(); + $this->assertEquals(200, $response->getStatusCode()); + $data = json_decode($body); + $this->assertObjectNotHasAttribute('error', $data); + $this->assertEquals($url, $data->url); + $this->assertEquals(418, $data->code); + } + + public function testReturnsForbidden() { + $url = 'http://redirect.example.com/code-403'; + $response = $this->parse([ + 'url' => $url + ]); + + $body = $response->getContent(); + $this->assertEquals(200, $response->getStatusCode()); + $data = json_decode($body); + $this->assertObjectHasAttribute('error', $data); + $this->assertEquals('forbidden', $data->error); + $this->assertEquals($url, $data->url); + $this->assertEquals(403, $data->code); + } + + public function testReturnsUnauthorized() { + $url = 'http://redirect.example.com/code-401'; + $response = $this->parse([ + 'url' => $url + ]); + + $body = $response->getContent(); + $this->assertEquals(200, $response->getStatusCode()); + $data = json_decode($body); + $this->assertObjectHasAttribute('error', $data); + $this->assertEquals('unauthorized', $data->error); + $this->assertEquals($url, $data->url); + $this->assertEquals(401, $data->code); + } + +} diff --git a/tests/TwitterTest.php b/tests/TwitterTest.php index de4e30b..91decbf 100644 --- a/tests/TwitterTest.php +++ b/tests/TwitterTest.php @@ -21,6 +21,8 @@ class TwitterTest extends PHPUnit_Framework_TestCase { private function loadTweet($id) { $url = 'https://twitter.com/_/status/'.$id; $json = file_get_contents(dirname(__FILE__).'/data/api.twitter.com/'.$id.'.json'); + $parsed = json_decode($json); + $url = 'https://twitter.com/'.$parsed->user->screen_name.'/status/'.$id; return [$url, $json]; } @@ -51,6 +53,8 @@ class TwitterTest extends PHPUnit_Framework_TestCase { $data = $this->parse(['url' => $url, 'json' => $json]); + $this->assertEquals(200, $data['code']); + $this->assertEquals('https://twitter.com/pkdev/status/818913630569664512', $data['url']); $this->assertEquals('entry', $data['data']['type']); $this->assertEquals('A tweet with a URL https://indieweb.org/ #and #some #hashtags', $data['data']['content']['text']); $this->assertContains('and', $data['data']['category']); diff --git a/tests/data/redirect.example.com/0 b/tests/data/redirect.example.com/0 new file mode 100644 index 0000000..06e3870 --- /dev/null +++ b/tests/data/redirect.example.com/0 @@ -0,0 +1,16 @@ +HTTP/1.1 200 OK +Server: Apache +Date: Wed, 09 Dec 2015 03:29:14 GMT +Content-Type: text/html; charset=utf-8 +Connection: keep-alive + + + + The Final Page + + +

The Final Page

+

This is the final page.

+ + + diff --git a/tests/data/redirect.example.com/1 b/tests/data/redirect.example.com/1 new file mode 100644 index 0000000..fcde0fa --- /dev/null +++ b/tests/data/redirect.example.com/1 @@ -0,0 +1,15 @@ +HTTP/1.1 301 Moved Permanently +Server: Apache +Date: Wed, 09 Dec 2015 03:29:14 GMT +Content-Type: text/html; charset=utf-8 +Connection: keep-alive +Location: http://redirect.example.com/0 + + + + Moved + + + This page has moved + + diff --git a/tests/data/redirect.example.com/2 b/tests/data/redirect.example.com/2 new file mode 100644 index 0000000..c5d4c27 --- /dev/null +++ b/tests/data/redirect.example.com/2 @@ -0,0 +1,15 @@ +HTTP/1.1 301 Moved Permanently +Server: Apache +Date: Wed, 09 Dec 2015 03:29:14 GMT +Content-Type: text/html; charset=utf-8 +Connection: keep-alive +Location: http://redirect.example.com/1 + + + + Moved + + + This page has moved + + diff --git a/tests/data/redirect.example.com/3 b/tests/data/redirect.example.com/3 new file mode 100644 index 0000000..800b773 --- /dev/null +++ b/tests/data/redirect.example.com/3 @@ -0,0 +1,15 @@ +HTTP/1.1 301 Moved Permanently +Server: Apache +Date: Wed, 09 Dec 2015 03:29:14 GMT +Content-Type: text/html; charset=utf-8 +Connection: keep-alive +Location: http://redirect.example.com/2 + + + + Moved + + + This page has moved + + diff --git a/tests/data/redirect.example.com/code-401 b/tests/data/redirect.example.com/code-401 new file mode 100644 index 0000000..ba1ad06 --- /dev/null +++ b/tests/data/redirect.example.com/code-401 @@ -0,0 +1,14 @@ +HTTP/1.1 401 Unauthorized +Server: Apache +Date: Wed, 09 Dec 2015 03:29:14 GMT +Content-Type: text/html; charset=utf-8 +Connection: keep-alive + + + + Unauthorized + + + Unauthorized + + diff --git a/tests/data/redirect.example.com/code-403 b/tests/data/redirect.example.com/code-403 new file mode 100644 index 0000000..9b5c682 --- /dev/null +++ b/tests/data/redirect.example.com/code-403 @@ -0,0 +1,14 @@ +HTTP/1.1 403 Forbidden +Server: Apache +Date: Wed, 09 Dec 2015 03:29:14 GMT +Content-Type: text/html; charset=utf-8 +Connection: keep-alive + + + + Forbidden + + + Forbidden + + diff --git a/tests/data/redirect.example.com/code-418 b/tests/data/redirect.example.com/code-418 new file mode 100644 index 0000000..8507634 --- /dev/null +++ b/tests/data/redirect.example.com/code-418 @@ -0,0 +1,14 @@ +HTTP/1.1 418 I'm a Teapot +Server: Apache +Date: Wed, 09 Dec 2015 03:29:14 GMT +Content-Type: text/html; charset=utf-8 +Connection: keep-alive + + + + I'm a Teapot + + + I'm a Teapot + +