Browse Source

return status code and final URL in response

* closes #14
* updated readme with details of the response
* includes `url` and `code` in the response with the final URL after following redirects and the HTTP status code returned
pull/39/head
Aaron Parecki 8 years ago
parent
commit
bc74919ade
No known key found for this signature in database GPG Key ID: 276C2817346D6056
13 changed files with 277 additions and 7 deletions
  1. +14
    -1
      README.md
  2. +33
    -4
      controllers/Parse.php
  3. +2
    -2
      lib/Formats/Twitter.php
  4. +15
    -0
      lib/HTTPTest.php
  5. +106
    -0
      tests/FetchTest.php
  6. +4
    -0
      tests/TwitterTest.php
  7. +16
    -0
      tests/data/redirect.example.com/0
  8. +15
    -0
      tests/data/redirect.example.com/1
  9. +15
    -0
      tests/data/redirect.example.com/2
  10. +15
    -0
      tests/data/redirect.example.com/3
  11. +14
    -0
      tests/data/redirect.example.com/code-401
  12. +14
    -0
      tests/data/redirect.example.com/code-403
  13. +14
    -0
      tests/data/redirect.example.com/code-418

+ 14
- 1
README.md View File

@ -105,10 +105,15 @@ Possible errors are listed below:
"html": "Now that <a href=\"https://twitter.com/MozillaPersona\">@MozillaPersona</a> is shutting down, the only good way to do email-based login is how <a href=\"https://twitter.com/poetica\">@poetica</a> does it.", "html": "Now that <a href=\"https://twitter.com/MozillaPersona\">@MozillaPersona</a> is shutting down, the only good way to do email-based login is how <a href=\"https://twitter.com/poetica\">@poetica</a> does it.",
"text": "Now that @MozillaPersona is shutting down, the only good way to do email-based login is how @poetica does it." "text": "Now that @MozillaPersona is shutting down, the only good way to do email-based login is how @poetica does it."
}, },
}
},
"url": "https://aaronparecki.com/2016/01/16/11/"
} }
``` ```
#### Primary Data
The primary object on the page is returned in the `data` property. This will indicate the type of object (e.g. `entry`), and will contain the vocabulary's properties that it was able to parse from the page.
If a property supports multiple values, it will always be returned as an array. The following properties support multiple values: If a property supports multiple values, it will always be returned as an array. The following properties support multiple values:
* in-reply-to * in-reply-to
@ -161,6 +166,14 @@ In a future version, replies, likes, reposts, etc. of this post will be included
``` ```
#### Other Properties
Other properties are returned in the response at the same level as the `data` property.
* `url` - The effective URL that the document was retrieved from. This will be the final URL after following any redirects.
* `code` - The HTTP response code returned by the URL. Typically this will be 200, but if the URL returned an alternate HTTP code that also included an h-entry (such as a 410 deleted notice with a stub h-entry), you can use this to find out that the original URL was actually deleted.
## Token API ## Token API
When verifying [Private Webmentions](https://indieweb.org/Private-Webmention#How_to_Receive_Private_Webmentions), you will need to exchange a code for an access token at the token endpoint specified by the source URL. When verifying [Private Webmentions](https://indieweb.org/Private-Webmention#How_to_Receive_Private_Webmentions), you will need to exchange a code for an access token at the token endpoint specified by the source URL.

+ 33
- 4
controllers/Parse.php View File

@ -119,12 +119,16 @@ class Parse {
if($data) { if($data) {
if($request->get('include_original')) if($request->get('include_original'))
$data['original'] = $parsed; $data['original'] = $parsed;
$data['url'] = $url;
$data['code'] = 200;
return $this->respond($response, 200, $data); return $this->respond($response, 200, $data);
} else { } else {
return $this->respond($response, 200, [ return $this->respond($response, 200, [
'data' => [ 'data' => [
'type' => 'unknown' 'type' => 'unknown'
]
],
'url' => $url,
'code' => 0
]); ]);
} }
} }
@ -155,14 +159,29 @@ class Parse {
if($result['error']) { if($result['error']) {
return $this->respond($response, 200, [ return $this->respond($response, 200, [
'error' => $result['error'], 'error' => $result['error'],
'error_description' => $result['error_description']
'error_description' => $result['error_description'],
'url' => $result['url'],
'code' => $result['code']
]); ]);
} }
if(trim($result['body']) == '') { if(trim($result['body']) == '') {
if($result['code'] == 410) {
// 410 Gone responses are valid and should not return an error
return $this->respond($response, 200, [
'data' => [
'type' => 'unknown'
],
'url' => $result['url'],
'code' => $result['code']
]);
}
return $this->respond($response, 200, [ return $this->respond($response, 200, [
'error' => 'no_content', 'error' => 'no_content',
'error_description' => 'We did not get a response body when fetching the URL'
'error_description' => 'We did not get a response body when fetching the URL',
'url' => $result['url'],
'code' => $result['code']
]); ]);
} }
@ -171,12 +190,16 @@ class Parse {
return $this->respond($response, 200, [ return $this->respond($response, 200, [
'error' => 'unauthorized', 'error' => 'unauthorized',
'error_description' => 'The URL returned "HTTP 401 Unauthorized"', 'error_description' => 'The URL returned "HTTP 401 Unauthorized"',
'url' => $result['url'],
'code' => 401
]); ]);
} }
if($result['code'] == 403) { if($result['code'] == 403) {
return $this->respond($response, 200, [ return $this->respond($response, 200, [
'error' => 'forbidden', 'error' => 'forbidden',
'error_description' => 'The URL returned "HTTP 403 Forbidden"', 'error_description' => 'The URL returned "HTTP 403 Forbidden"',
'url' => $result['url'],
'code' => 403
]); ]);
} }
@ -189,6 +212,8 @@ class Parse {
list($data, $parsed) = Formats\Instagram::parse($result['body'], $result['url'], $this->http); list($data, $parsed) = Formats\Instagram::parse($result['body'], $result['url'], $this->http);
if($request->get('include_original')) if($request->get('include_original'))
$data['original'] = $parsed; $data['original'] = $parsed;
$data['url'] = $result['url'];
$data['code'] = $result['code'];
return $this->respond($response, 200, $data); return $this->respond($response, 200, $data);
} }
@ -266,6 +291,8 @@ class Parse {
} }
if($request->get('include_original')) if($request->get('include_original'))
$data['original'] = $html; $data['original'] = $html;
$data['url'] = $result['url']; // this will be the effective URL after following redirects
$data['code'] = $result['code'];
return $this->respond($response, 200, $data); return $this->respond($response, 200, $data);
} }
} }
@ -275,7 +302,9 @@ class Parse {
return $this->respond($response, 200, [ return $this->respond($response, 200, [
'data' => [ 'data' => [
'type' => 'unknown', 'type' => 'unknown',
]
],
'url' => $result['url'],
'code' => $result['code']
]); ]);
} }

+ 2
- 2
lib/Formats/Twitter.php View File

@ -24,12 +24,12 @@ class Twitter {
try { try {
$tweet = $twitter->request('statuses/show/'.$tweet_id, 'GET', ['tweet_mode'=>'extended']); $tweet = $twitter->request('statuses/show/'.$tweet_id, 'GET', ['tweet_mode'=>'extended']);
} catch(\TwitterException $e) { } catch(\TwitterException $e) {
return false;
return [false, false];
} }
} }
if(!$tweet) if(!$tweet)
return false;
return [false, false];
$entry = array( $entry = array(
'type' => 'entry', 'type' => 'entry',

+ 15
- 0
lib/HTTPTest.php View File

@ -4,12 +4,14 @@ namespace p3k;
class HTTPTest extends HTTPCurl { class HTTPTest extends HTTPCurl {
private $_testDataPath; private $_testDataPath;
private $_redirects_remaining;
public function __construct($testDataPath) { public function __construct($testDataPath) {
$this->_testDataPath = $testDataPath; $this->_testDataPath = $testDataPath;
} }
public function get($url, $headers=[]) { public function get($url, $headers=[]) {
$this->_redirects_remaining = $this->max_redirects;
$parts = parse_url($url); $parts = parse_url($url);
unset($parts['fragment']); unset($parts['fragment']);
$url = \build_url($parts); $url = \build_url($parts);
@ -60,6 +62,19 @@ class HTTPTest extends HTTPCurl {
if(array_key_exists('Location', $parsedHeaders)) { if(array_key_exists('Location', $parsedHeaders)) {
$effectiveUrl = \mf2\resolveUrl($url, $parsedHeaders['Location']); $effectiveUrl = \mf2\resolveUrl($url, $parsedHeaders['Location']);
if($this->_redirects_remaining > 0) {
$this->_redirects_remaining--;
return $this->_read_file($effectiveUrl);
} else {
return [
'code' => 0,
'headers' => $parsedHeaders,
'body' => $body,
'error' => 'too_many_redirects',
'error_description' => '',
'url' => $effectiveUrl
];
}
} else { } else {
$effectiveUrl = $url; $effectiveUrl = $url;
} }

+ 106
- 0
tests/FetchTest.php View File

@ -0,0 +1,106 @@
<?php
use Symfony\Component\HttpFoundation\Request;
use Symfony\Component\HttpFoundation\Response;
class FetchTest extends PHPUnit_Framework_TestCase {
private $http;
public function setUp() {
$this->client = new Parse();
$this->client->http = new p3k\HTTPTest(dirname(__FILE__).'/data/');
$this->client->mc = null;
}
private function parse($params) {
$request = new Request($params);
$response = new Response();
return $this->client->parse($request, $response);
}
public function testRedirectLimit() {
$url = 'http://redirect.example.com/3';
$response = $this->parse([
'url' => $url,
'max_redirects' => 1
]);
$body = $response->getContent();
$this->assertEquals(200, $response->getStatusCode());
$data = json_decode($body);
$this->assertObjectHasAttribute('error', $data);
$this->assertEquals('too_many_redirects', $data->error);
$url = 'http://redirect.example.com/2';
$response = $this->parse([
'url' => $url,
'max_redirects' => 1
]);
$body = $response->getContent();
$this->assertEquals(200, $response->getStatusCode());
$data = json_decode($body);
$this->assertObjectHasAttribute('error', $data);
$this->assertEquals('too_many_redirects', $data->error);
}
public function testRedirectUnderLimit() {
$url = 'http://redirect.example.com/2';
$response = $this->parse([
'url' => $url,
'max_redirects' => 2
]);
$body = $response->getContent();
$this->assertEquals(200, $response->getStatusCode());
$data = json_decode($body);
$this->assertObjectNotHasAttribute('error', $data);
$this->assertEquals(200, $data->code);
$this->assertEquals('The Final Page', $data->data->name);
$this->assertEquals('http://redirect.example.com/0', $data->url);
}
public function testReturnsHTTPStatusCode() {
$url = 'http://redirect.example.com/code-418';
$response = $this->parse([
'url' => $url
]);
$body = $response->getContent();
$this->assertEquals(200, $response->getStatusCode());
$data = json_decode($body);
$this->assertObjectNotHasAttribute('error', $data);
$this->assertEquals($url, $data->url);
$this->assertEquals(418, $data->code);
}
public function testReturnsForbidden() {
$url = 'http://redirect.example.com/code-403';
$response = $this->parse([
'url' => $url
]);
$body = $response->getContent();
$this->assertEquals(200, $response->getStatusCode());
$data = json_decode($body);
$this->assertObjectHasAttribute('error', $data);
$this->assertEquals('forbidden', $data->error);
$this->assertEquals($url, $data->url);
$this->assertEquals(403, $data->code);
}
public function testReturnsUnauthorized() {
$url = 'http://redirect.example.com/code-401';
$response = $this->parse([
'url' => $url
]);
$body = $response->getContent();
$this->assertEquals(200, $response->getStatusCode());
$data = json_decode($body);
$this->assertObjectHasAttribute('error', $data);
$this->assertEquals('unauthorized', $data->error);
$this->assertEquals($url, $data->url);
$this->assertEquals(401, $data->code);
}
}

+ 4
- 0
tests/TwitterTest.php View File

@ -21,6 +21,8 @@ class TwitterTest extends PHPUnit_Framework_TestCase {
private function loadTweet($id) { private function loadTweet($id) {
$url = 'https://twitter.com/_/status/'.$id; $url = 'https://twitter.com/_/status/'.$id;
$json = file_get_contents(dirname(__FILE__).'/data/api.twitter.com/'.$id.'.json'); $json = file_get_contents(dirname(__FILE__).'/data/api.twitter.com/'.$id.'.json');
$parsed = json_decode($json);
$url = 'https://twitter.com/'.$parsed->user->screen_name.'/status/'.$id;
return [$url, $json]; return [$url, $json];
} }
@ -51,6 +53,8 @@ class TwitterTest extends PHPUnit_Framework_TestCase {
$data = $this->parse(['url' => $url, 'json' => $json]); $data = $this->parse(['url' => $url, 'json' => $json]);
$this->assertEquals(200, $data['code']);
$this->assertEquals('https://twitter.com/pkdev/status/818913630569664512', $data['url']);
$this->assertEquals('entry', $data['data']['type']); $this->assertEquals('entry', $data['data']['type']);
$this->assertEquals('A tweet with a URL https://indieweb.org/ #and #some #hashtags', $data['data']['content']['text']); $this->assertEquals('A tweet with a URL https://indieweb.org/ #and #some #hashtags', $data['data']['content']['text']);
$this->assertContains('and', $data['data']['category']); $this->assertContains('and', $data['data']['category']);

+ 16
- 0
tests/data/redirect.example.com/0 View File

@ -0,0 +1,16 @@
HTTP/1.1 200 OK
Server: Apache
Date: Wed, 09 Dec 2015 03:29:14 GMT
Content-Type: text/html; charset=utf-8
Connection: keep-alive
<html>
<head>
<title>The Final Page</title>
</head>
<body class="h-entry">
<h2 class="p-name">The Final Page</h2>
<p class="e-content">This is the final page.</p>
<a href="" class="u-url"></a>
</body>
</html>

+ 15
- 0
tests/data/redirect.example.com/1 View File

@ -0,0 +1,15 @@
HTTP/1.1 301 Moved Permanently
Server: Apache
Date: Wed, 09 Dec 2015 03:29:14 GMT
Content-Type: text/html; charset=utf-8
Connection: keep-alive
Location: http://redirect.example.com/0
<html>
<head>
<title>Moved</title>
</head>
<body>
This page has moved
</body>
</html>

+ 15
- 0
tests/data/redirect.example.com/2 View File

@ -0,0 +1,15 @@
HTTP/1.1 301 Moved Permanently
Server: Apache
Date: Wed, 09 Dec 2015 03:29:14 GMT
Content-Type: text/html; charset=utf-8
Connection: keep-alive
Location: http://redirect.example.com/1
<html>
<head>
<title>Moved</title>
</head>
<body>
This page has moved
</body>
</html>

+ 15
- 0
tests/data/redirect.example.com/3 View File

@ -0,0 +1,15 @@
HTTP/1.1 301 Moved Permanently
Server: Apache
Date: Wed, 09 Dec 2015 03:29:14 GMT
Content-Type: text/html; charset=utf-8
Connection: keep-alive
Location: http://redirect.example.com/2
<html>
<head>
<title>Moved</title>
</head>
<body>
This page has moved
</body>
</html>

+ 14
- 0
tests/data/redirect.example.com/code-401 View File

@ -0,0 +1,14 @@
HTTP/1.1 401 Unauthorized
Server: Apache
Date: Wed, 09 Dec 2015 03:29:14 GMT
Content-Type: text/html; charset=utf-8
Connection: keep-alive
<html>
<head>
<title>Unauthorized</title>
</head>
<body>
Unauthorized
</body>
</html>

+ 14
- 0
tests/data/redirect.example.com/code-403 View File

@ -0,0 +1,14 @@
HTTP/1.1 403 Forbidden
Server: Apache
Date: Wed, 09 Dec 2015 03:29:14 GMT
Content-Type: text/html; charset=utf-8
Connection: keep-alive
<html>
<head>
<title>Forbidden</title>
</head>
<body>
Forbidden
</body>
</html>

+ 14
- 0
tests/data/redirect.example.com/code-418 View File

@ -0,0 +1,14 @@
HTTP/1.1 418 I'm a Teapot
Server: Apache
Date: Wed, 09 Dec 2015 03:29:14 GMT
Content-Type: text/html; charset=utf-8
Connection: keep-alive
<html>
<head>
<title>I'm a Teapot</title>
</head>
<body>
I'm a Teapot
</body>
</html>

Loading…
Cancel
Save