From 8dc0caa4d0b619c93b653dfde7c6f33964e9f96e Mon Sep 17 00:00:00 2001 From: Aaron Parecki Date: Wed, 2 Mar 2016 16:46:27 -0800 Subject: [PATCH] use effective URL after following redirects when comparing URLs --- controllers/Parse.php | 4 ++-- lib/HTTPCurl.php | 3 +++ lib/HTTPStream.php | 1 + lib/HTTPTest.php | 15 ++++++++++--- tests/ParseTest.php | 22 +++++++++++++++++++ .../h-entry-redirect-with-h-card-sibling | 20 +++++++++++++++++ .../h-entry-with-h-card-sibling | 18 +++++++++++++++ 7 files changed, 78 insertions(+), 5 deletions(-) create mode 100644 tests/data/source.example.com/h-entry-redirect-with-h-card-sibling create mode 100644 tests/data/source.example.com/h-entry-with-h-card-sibling diff --git a/controllers/Parse.php b/controllers/Parse.php index 5cca349..3abfa1b 100644 --- a/controllers/Parse.php +++ b/controllers/Parse.php @@ -124,10 +124,10 @@ class Parse { } // Now start pulling in the data from the page. Start by looking for microformats2 - $mf2 = mf2\Parse($result['body'], $url); + $mf2 = mf2\Parse($result['body'], $result['url']); if($mf2 && count($mf2['items']) > 0) { - $data = Formats\Mf2::parse($mf2, $url, $this->http); + $data = Formats\Mf2::parse($mf2, $result['url'], $this->http); if($data) { return $this->respond($response, 200, $data); } diff --git a/lib/HTTPCurl.php b/lib/HTTPCurl.php index 6c0ed29..c9d8c60 100644 --- a/lib/HTTPCurl.php +++ b/lib/HTTPCurl.php @@ -18,6 +18,7 @@ class HTTPCurl { 'error' => self::error_string_from_code(curl_errno($ch)), 'error_description' => curl_error($ch), 'error_code' => curl_errno($ch), + 'url' => curl_getinfo($ch, CURLINFO_EFFECTIVE_URL), ); } @@ -36,6 +37,7 @@ class HTTPCurl { 'error' => self::error_string_from_code(curl_errno($ch)), 'error_description' => curl_error($ch), 'error_code' => curl_errno($ch), + 'url' => curl_getinfo($ch, CURLINFO_EFFECTIVE_URL), ); } @@ -50,6 +52,7 @@ class HTTPCurl { 'error' => self::error_string_from_code(curl_errno($ch)), 'error_description' => curl_error($ch), 'error_code' => curl_errno($ch), + 'url' => curl_getinfo($ch, CURLINFO_EFFECTIVE_URL), ); } diff --git a/lib/HTTPStream.php b/lib/HTTPStream.php index cba227b..9ca5636 100644 --- a/lib/HTTPStream.php +++ b/lib/HTTPStream.php @@ -68,6 +68,7 @@ class HTTPStream { 'body' => $body, 'error' => $error ? $error['code'] : false, 'error_description' => $error ? $error['description'] : false, + 'url' => $url, ); } diff --git a/lib/HTTPTest.php b/lib/HTTPTest.php index b005a2e..e27d1ad 100644 --- a/lib/HTTPTest.php +++ b/lib/HTTPTest.php @@ -23,7 +23,8 @@ class HTTPTest extends HTTPCurl { 'code' => $response['code'], 'headers' => $response['headers'], 'error' => '', - 'error_description' => '' + 'error_description' => '', + 'url' => $response['url'] ); } @@ -46,13 +47,21 @@ class HTTPTest extends HTTPCurl { } $headers = preg_replace('/HTTP\/1\.1 \d+ .+/', '', $headers); + $parsedHeaders = self::parse_headers($headers); + + if(array_key_exists('Location', $parsedHeaders)) { + $effectiveUrl = \mf2\resolveUrl($url, $parsedHeaders['Location']); + } else { + $effectiveUrl = $url; + } return array( 'code' => $code, - 'headers' => self::parse_headers($headers), + 'headers' => $parsedHeaders, 'body' => $body, 'error' => '', - 'error_description' => '' + 'error_description' => '', + 'url' => $effectiveUrl ); } diff --git a/tests/ParseTest.php b/tests/ParseTest.php index a97eaea..5714dd5 100644 --- a/tests/ParseTest.php +++ b/tests/ParseTest.php @@ -213,4 +213,26 @@ class ParseTest extends PHPUnit_Framework_TestCase { $this->assertEquals('Primary Post', $data['data']['name']); } + public function testHEntryWithHCardSibling() { + $url = 'http://source.example.com/h-entry-with-h-card-sibling'; + $response = $this->parse(['url' => $url]); + + $body = $response->getContent(); + $this->assertEquals(200, $response->getStatusCode()); + $data = json_decode($body, true); + $this->assertEquals('entry', $data['data']['type']); + $this->assertEquals('Hello World', $data['data']['content']['text']); + } + + public function testHEntryRedirectWithHCardSibling() { + $url = 'http://source.example.com/h-entry-redirect-with-h-card-sibling'; + $response = $this->parse(['url' => $url]); + + $body = $response->getContent(); + $this->assertEquals(200, $response->getStatusCode()); + $data = json_decode($body, true); + $this->assertEquals('entry', $data['data']['type']); + $this->assertEquals('Hello World', $data['data']['content']['text']); + } + } diff --git a/tests/data/source.example.com/h-entry-redirect-with-h-card-sibling b/tests/data/source.example.com/h-entry-redirect-with-h-card-sibling new file mode 100644 index 0000000..afdd7b2 --- /dev/null +++ b/tests/data/source.example.com/h-entry-redirect-with-h-card-sibling @@ -0,0 +1,20 @@ +HTTP/1.1 301 Moved Permanently +Server: Apache +Date: Wed, 09 Dec 2015 03:29:14 GMT +Content-Type: text/html; charset=utf-8 +Connection: keep-alive +Location: /h-entry-with-h-card-sibling + + + + Test + + +
+

Hello World

+ permalink + +
+ Author Name + + diff --git a/tests/data/source.example.com/h-entry-with-h-card-sibling b/tests/data/source.example.com/h-entry-with-h-card-sibling new file mode 100644 index 0000000..c117536 --- /dev/null +++ b/tests/data/source.example.com/h-entry-with-h-card-sibling @@ -0,0 +1,18 @@ +HTTP/1.1 200 OK +Server: Apache +Date: Wed, 09 Dec 2015 03:29:14 GMT +Content-Type: text/html; charset=utf-8 +Connection: keep-alive + + + + Test + + +
+

Hello World

+ permalink +
+ Author Name + +