Browse Source

use effective URL after following redirects when comparing URLs

pull/39/head
Aaron Parecki 9 years ago
parent
commit
8dc0caa4d0
7 changed files with 78 additions and 5 deletions
  1. +2
    -2
      controllers/Parse.php
  2. +3
    -0
      lib/HTTPCurl.php
  3. +1
    -0
      lib/HTTPStream.php
  4. +12
    -3
      lib/HTTPTest.php
  5. +22
    -0
      tests/ParseTest.php
  6. +20
    -0
      tests/data/source.example.com/h-entry-redirect-with-h-card-sibling
  7. +18
    -0
      tests/data/source.example.com/h-entry-with-h-card-sibling

+ 2
- 2
controllers/Parse.php View File

@ -124,10 +124,10 @@ class Parse {
}
// Now start pulling in the data from the page. Start by looking for microformats2
$mf2 = mf2\Parse($result['body'], $url);
$mf2 = mf2\Parse($result['body'], $result['url']);
if($mf2 && count($mf2['items']) > 0) {
$data = Formats\Mf2::parse($mf2, $url, $this->http);
$data = Formats\Mf2::parse($mf2, $result['url'], $this->http);
if($data) {
return $this->respond($response, 200, $data);
}

+ 3
- 0
lib/HTTPCurl.php View File

@ -18,6 +18,7 @@ class HTTPCurl {
'error' => self::error_string_from_code(curl_errno($ch)),
'error_description' => curl_error($ch),
'error_code' => curl_errno($ch),
'url' => curl_getinfo($ch, CURLINFO_EFFECTIVE_URL),
);
}
@ -36,6 +37,7 @@ class HTTPCurl {
'error' => self::error_string_from_code(curl_errno($ch)),
'error_description' => curl_error($ch),
'error_code' => curl_errno($ch),
'url' => curl_getinfo($ch, CURLINFO_EFFECTIVE_URL),
);
}
@ -50,6 +52,7 @@ class HTTPCurl {
'error' => self::error_string_from_code(curl_errno($ch)),
'error_description' => curl_error($ch),
'error_code' => curl_errno($ch),
'url' => curl_getinfo($ch, CURLINFO_EFFECTIVE_URL),
);
}

+ 1
- 0
lib/HTTPStream.php View File

@ -68,6 +68,7 @@ class HTTPStream {
'body' => $body,
'error' => $error ? $error['code'] : false,
'error_description' => $error ? $error['description'] : false,
'url' => $url,
);
}

+ 12
- 3
lib/HTTPTest.php View File

@ -23,7 +23,8 @@ class HTTPTest extends HTTPCurl {
'code' => $response['code'],
'headers' => $response['headers'],
'error' => '',
'error_description' => ''
'error_description' => '',
'url' => $response['url']
);
}
@ -46,13 +47,21 @@ class HTTPTest extends HTTPCurl {
}
$headers = preg_replace('/HTTP\/1\.1 \d+ .+/', '', $headers);
$parsedHeaders = self::parse_headers($headers);
if(array_key_exists('Location', $parsedHeaders)) {
$effectiveUrl = \mf2\resolveUrl($url, $parsedHeaders['Location']);
} else {
$effectiveUrl = $url;
}
return array(
'code' => $code,
'headers' => self::parse_headers($headers),
'headers' => $parsedHeaders,
'body' => $body,
'error' => '',
'error_description' => ''
'error_description' => '',
'url' => $effectiveUrl
);
}

+ 22
- 0
tests/ParseTest.php View File

@ -213,4 +213,26 @@ class ParseTest extends PHPUnit_Framework_TestCase {
$this->assertEquals('Primary Post', $data['data']['name']);
}
public function testHEntryWithHCardSibling() {
$url = 'http://source.example.com/h-entry-with-h-card-sibling';
$response = $this->parse(['url' => $url]);
$body = $response->getContent();
$this->assertEquals(200, $response->getStatusCode());
$data = json_decode($body, true);
$this->assertEquals('entry', $data['data']['type']);
$this->assertEquals('Hello World', $data['data']['content']['text']);
}
public function testHEntryRedirectWithHCardSibling() {
$url = 'http://source.example.com/h-entry-redirect-with-h-card-sibling';
$response = $this->parse(['url' => $url]);
$body = $response->getContent();
$this->assertEquals(200, $response->getStatusCode());
$data = json_decode($body, true);
$this->assertEquals('entry', $data['data']['type']);
$this->assertEquals('Hello World', $data['data']['content']['text']);
}
}

+ 20
- 0
tests/data/source.example.com/h-entry-redirect-with-h-card-sibling View File

@ -0,0 +1,20 @@
HTTP/1.1 301 Moved Permanently
Server: Apache
Date: Wed, 09 Dec 2015 03:29:14 GMT
Content-Type: text/html; charset=utf-8
Connection: keep-alive
Location: /h-entry-with-h-card-sibling
<html>
<head>
<title>Test</title>
</head>
<body>
<div class="h-entry">
<p class="p-content">Hello World</p>
<a href="/h-entry-with-h-card-sibling">permalink</a>
<!-- testing when the URL to this page is a redirect to the actual URL, so that the reported URL is different from what was fetched -->
</div>
<a href="/" class="h-card">Author Name</a>
</body>
</html>

+ 18
- 0
tests/data/source.example.com/h-entry-with-h-card-sibling View File

@ -0,0 +1,18 @@
HTTP/1.1 200 OK
Server: Apache
Date: Wed, 09 Dec 2015 03:29:14 GMT
Content-Type: text/html; charset=utf-8
Connection: keep-alive
<html>
<head>
<title>Test</title>
</head>
<body>
<div class="h-entry">
<p class="p-content">Hello World</p>
<a href="/h-entry-with-h-card-sibling">permalink</a>
</div>
<a href="/" class="h-card">Author Name</a>
</body>
</html>

Loading…
Cancel
Save