From 227311faa94323a0274ce1fe30011f39e4645c27 Mon Sep 17 00:00:00 2001 From: Aaron Parecki Date: Mon, 16 Jan 2017 13:03:04 -0800 Subject: [PATCH] check for meta-equiv HTTP deleted closes #16 --- controllers/Parse.php | 10 ++++++++++ tests/FetchTest.php | 13 +++++++++++++ tests/data/source.example.com/deleted | 15 +++++++++++++++ 3 files changed, 38 insertions(+) create mode 100644 tests/data/source.example.com/deleted diff --git a/controllers/Parse.php b/controllers/Parse.php index b7c2a60..1dc5322 100644 --- a/controllers/Parse.php +++ b/controllers/Parse.php @@ -226,6 +226,16 @@ class Parse { } } + // Check for meta http equiv and replace the status code if present + foreach($xpath->query('//meta[@http-equiv=\'status\']') as $el) { + $equivStatus = ''.$el->getAttribute('content'); + if($equivStatus && is_string($equivStatus)) { + if(preg_match('/^(\d+)/', $equivStatus, $match)) { + $result['code'] = (int)$match[1]; + } + } + } + // If the URL has a fragment ID, find the DOM starting at that node and parse it instead $html = $result['body']; diff --git a/tests/FetchTest.php b/tests/FetchTest.php index cf661b2..fe84f0c 100644 --- a/tests/FetchTest.php +++ b/tests/FetchTest.php @@ -103,4 +103,17 @@ class FetchTest extends PHPUnit_Framework_TestCase { $this->assertEquals(401, $data->code); } + public function testMetaEquivDeleted() { + $url = 'http://source.example.com/deleted'; + $response = $this->parse([ + 'url' => $url + ]); + + $body = $response->getContent(); + $this->assertEquals(200, $response->getStatusCode()); + $data = json_decode($body); + $this->assertObjectNotHasAttribute('error', $data); + $this->assertEquals(410, $data->code); + $this->assertEquals('This post has been deleted.', $data->data->content->text); + } } diff --git a/tests/data/source.example.com/deleted b/tests/data/source.example.com/deleted new file mode 100644 index 0000000..868a1e2 --- /dev/null +++ b/tests/data/source.example.com/deleted @@ -0,0 +1,15 @@ +HTTP/1.1 200 OK +Server: Apache +Date: Wed, 09 Dec 2015 03:29:14 GMT +Content-Type: text/html; charset=utf-8 +Connection: keep-alive + + + + Test + + + +

This post has been deleted.

+ +