Browse Source

return original input URL for feed discovery when 302 is found

closes #86
pull/93/head
Aaron Parecki 4 years ago
parent
commit
5b2b4f3142
No known key found for this signature in database GPG Key ID: 276C2817346D6056
8 changed files with 132 additions and 7 deletions
  1. +1
    -1
      composer.json
  2. +6
    -6
      composer.lock
  3. +9
    -0
      lib/XRay/Feeds.php
  4. +28
    -0
      tests/FindFeedsTest.php
  5. +15
    -0
      tests/data/feed.example.com/permanent-redirect
  6. +29
    -0
      tests/data/feed.example.com/permanent-redirect-target
  7. +15
    -0
      tests/data/feed.example.com/temporary-redirect
  8. +29
    -0
      tests/data/feed.example.com/temporary-redirect-target

+ 1
- 1
composer.json View File

@ -10,7 +10,7 @@
"indieweb/link-rel-parser": "0.1.*", "indieweb/link-rel-parser": "0.1.*",
"dg/twitter-php": "3.6.*", "dg/twitter-php": "3.6.*",
"p3k/timezone": "*", "p3k/timezone": "*",
"p3k/http": ">=0.1.7",
"p3k/http": ">=0.1.8",
"cebe/markdown": "1.1.*", "cebe/markdown": "1.1.*",
"p3k/picofeed": ">=0.1.38", "p3k/picofeed": ">=0.1.38",
"facebook/graph-sdk": "^5.5", "facebook/graph-sdk": "^5.5",

+ 6
- 6
composer.lock View File

@ -4,7 +4,7 @@
"Read more about it at https://getcomposer.org/doc/01-basic-usage.md#composer-lock-the-lock-file", "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#composer-lock-the-lock-file",
"This file is @generated automatically" "This file is @generated automatically"
], ],
"content-hash": "38eef403bd3151bf73b7794372f4d8cd",
"content-hash": "fe5c8f1b6a8a559b0b76aa623ca3aef4",
"packages": [ "packages": [
{ {
"name": "cebe/markdown", "name": "cebe/markdown",
@ -382,16 +382,16 @@
}, },
{ {
"name": "p3k/http", "name": "p3k/http",
"version": "0.1.7",
"version": "0.1.8",
"source": { "source": {
"type": "git", "type": "git",
"url": "https://github.com/aaronpk/p3k-http.git", "url": "https://github.com/aaronpk/p3k-http.git",
"reference": "1826647c4902a18dea5ec532f21509ba4d51210b"
"reference": "a43977636d7a930080009eddda06994037c88fd7"
}, },
"dist": { "dist": {
"type": "zip", "type": "zip",
"url": "https://api.github.com/repos/aaronpk/p3k-http/zipball/1826647c4902a18dea5ec532f21509ba4d51210b",
"reference": "1826647c4902a18dea5ec532f21509ba4d51210b",
"url": "https://api.github.com/repos/aaronpk/p3k-http/zipball/a43977636d7a930080009eddda06994037c88fd7",
"reference": "a43977636d7a930080009eddda06994037c88fd7",
"shasum": "" "shasum": ""
}, },
"require": { "require": {
@ -416,7 +416,7 @@
], ],
"description": "A simple wrapper API around the PHP curl functions", "description": "A simple wrapper API around the PHP curl functions",
"homepage": "https://github.com/aaronpk/p3k-http", "homepage": "https://github.com/aaronpk/p3k-http",
"time": "2018-03-04T15:21:58+00:00"
"time": "2019-06-15T20:49:26+00:00"
}, },
{ {
"name": "p3k/picofeed", "name": "p3k/picofeed",

+ 9
- 0
lib/XRay/Feeds.php View File

@ -103,6 +103,15 @@ class Feeds {
} }
} }
// Check if the feed URL was a temporary redirect
if($url != $result['url']) {
// p3k\http doesn't return the intermediate HTTP codes, so we have to fetch the input URL again without following redirects
$this->http->set_max_redirects(0);
$check = $this->http->get($url);
if($check['code'] == 302)
$result['url'] = $url;
}
$parsed = Formats\HTML::parse($this->http, $result, array_merge($opts, ['expect'=>'feed'])); $parsed = Formats\HTML::parse($this->http, $result, array_merge($opts, ['expect'=>'feed']));
if($parsed && isset($parsed['data']['type']) && $parsed['data']['type'] == 'feed') { if($parsed && isset($parsed['data']['type']) && $parsed['data']['type'] == 'feed') {
$feeds[] = [ $feeds[] = [

+ 28
- 0
tests/FindFeedsTest.php View File

@ -138,6 +138,34 @@ class FindFeedsTest extends PHPUnit_Framework_TestCase {
$this->assertEquals('atom', $feeds[0]->type); $this->assertEquals('atom', $feeds[0]->type);
} }
// input URL is a temporary redirect to another page.
// report the original input URL
public function testInputIsTemporaryRedirect() {
$url = 'http://feed.example.com/temporary-redirect';
$response = $this->parse(['url' => $url]);
$body = $response->getContent();
$this->assertEquals(200, $response->getStatusCode());
$feeds = json_decode($body)->feeds;
$this->assertEquals(1, count($feeds));
$this->assertEquals('http://feed.example.com/temporary-redirect', $feeds[0]->url);
$this->assertEquals('microformats', $feeds[0]->type);
}
public function testInputIsPermanentRedirect() {
$url = 'http://feed.example.com/permanent-redirect';
$response = $this->parse(['url' => $url]);
$body = $response->getContent();
$this->assertEquals(200, $response->getStatusCode());
$feeds = json_decode($body)->feeds;
$this->assertEquals(1, count($feeds));
$this->assertEquals('http://feed.example.com/permanent-redirect-target', $feeds[0]->url);
$this->assertEquals('microformats', $feeds[0]->type);
}
// input URL is an RSS feed // input URL is an RSS feed
public function testInputIsRSS() { public function testInputIsRSS() {
$url = 'http://feed.example.com/rss'; $url = 'http://feed.example.com/rss';

+ 15
- 0
tests/data/feed.example.com/permanent-redirect View File

@ -0,0 +1,15 @@
HTTP/1.1 301 Moved Permanently
Server: Apache
Date: Wed, 09 Dec 2015 03:29:14 GMT
Content-Type: text/html; charset=utf-8
Connection: keep-alive
Location: http://feed.example.com/permanent-redirect-target
<html>
<head>
<title>Moved</title>
</head>
<body>
This page has moved
</body>
</html>

+ 29
- 0
tests/data/feed.example.com/permanent-redirect-target View File

@ -0,0 +1,29 @@
HTTP/1.1 200 OK
Server: Apache
Date: Wed, 09 Dec 2015 03:29:14 GMT
Content-Type: text/html; charset=utf-8
Connection: keep-alive
<html>
<head>
<title>Test</title>
</head>
<body>
<ul>
<li class="h-entry">
<a href="/1" class="u-url p-name">One</a>
</li>
<li class="h-entry">
<a href="/2" class="u-url p-name">Two</a>
</li>
<li class="h-entry">
<a href="/3" class="u-url p-name">Three</a>
</li>
<li class="h-entry">
<a href="/4" class="u-url p-name">Four</a>
</li>
</ul>
</body>
</html>

+ 15
- 0
tests/data/feed.example.com/temporary-redirect View File

@ -0,0 +1,15 @@
HTTP/1.1 302 Found
Server: Apache
Date: Wed, 09 Dec 2015 03:29:14 GMT
Content-Type: text/html; charset=utf-8
Connection: keep-alive
Location: http://feed.example.com/temporary-redirect-target
<html>
<head>
<title>Moved</title>
</head>
<body>
This page has moved
</body>
</html>

+ 29
- 0
tests/data/feed.example.com/temporary-redirect-target View File

@ -0,0 +1,29 @@
HTTP/1.1 200 OK
Server: Apache
Date: Wed, 09 Dec 2015 03:29:14 GMT
Content-Type: text/html; charset=utf-8
Connection: keep-alive
<html>
<head>
<title>Test</title>
</head>
<body>
<ul>
<li class="h-entry">
<a href="/1" class="u-url p-name">One</a>
</li>
<li class="h-entry">
<a href="/2" class="u-url p-name">Two</a>
</li>
<li class="h-entry">
<a href="/3" class="u-url p-name">Three</a>
</li>
<li class="h-entry">
<a href="/4" class="u-url p-name">Four</a>
</li>
</ul>
</body>
</html>

Loading…
Cancel
Save