Browse Source

also check img/video/audio for target URL

pull/39/head
Aaron Parecki 8 years ago
parent
commit
c59ab9a2d6
No known key found for this signature in database GPG Key ID: 276C2817346D6056
5 changed files with 103 additions and 6 deletions
  1. +25
    -6
      controllers/Parse.php
  2. +33
    -0
      tests/ParseTest.php
  3. +15
    -0
      tests/data/source.example.com/link-is-audio
  4. +15
    -0
      tests/data/source.example.com/link-is-img
  5. +15
    -0
      tests/data/source.example.com/link-is-video

+ 25
- 6
controllers/Parse.php View File

@ -159,15 +159,27 @@ class Parse {
$xpath = new DOMXPath($doc); $xpath = new DOMXPath($doc);
$found = []; $found = [];
foreach($xpath->query('//a[@href]') as $href) {
$u = $href->getAttribute('href');
if($target) {
# target parameter was provided
if($target) {
self::xPathFindNodeWithAttribute($xpath, 'a', 'href', function($u) use($target, &$found){
if($u == $target) { if($u == $target) {
$found[$u] = null; $found[$u] = null;
} }
}
});
self::xPathFindNodeWithAttribute($xpath, 'img', 'src', function($u) use($target, &$found){
if($u == $target) {
$found[$u] = null;
}
});
self::xPathFindNodeWithAttribute($xpath, 'video', 'src', function($u) use($target, &$found){
if($u == $target) {
$found[$u] = null;
}
});
self::xPathFindNodeWithAttribute($xpath, 'audio', 'src', function($u) use($target, &$found){
if($u == $target) {
$found[$u] = null;
}
});
} }
if(!$found) { if(!$found) {
@ -198,4 +210,11 @@ class Parse {
]); ]);
} }
private static function xPathFindNodeWithAttribute($xpath, $node, $attr, $callback) {
foreach($xpath->query('//'.$node.'[@'.$attr.']') as $el) {
$v = $el->getAttribute($attr);
$callback($v);
}
}
} }

+ 33
- 0
tests/ParseTest.php View File

@ -73,6 +73,39 @@ class ParseTest extends PHPUnit_Framework_TestCase {
$this->assertEquals('This page has a link to <a href="http://target.example.com">target.example.com</a> and some <b>formatted text</b>.', $data->data->content->html); $this->assertEquals('This page has a link to <a href="http://target.example.com">target.example.com</a> and some <b>formatted text</b>.', $data->data->content->html);
} }
public function testFindTargetLinkIsImage() {
$url = 'http://source.example.com/link-is-img';
$response = $this->parse(['url' => $url, 'target' => 'http://target.example.com/photo.jpg']);
$body = $response->getContent();
$this->assertEquals(200, $response->getStatusCode());
$data = json_decode($body);
$this->assertObjectNotHasAttribute('name', $data->data);
$this->assertEquals('This page has an img tag with the target URL.', $data->data->content->text);
}
public function testFindTargetLinkIsVideo() {
$url = 'http://source.example.com/link-is-video';
$response = $this->parse(['url' => $url, 'target' => 'http://target.example.com/movie.mp4']);
$body = $response->getContent();
$this->assertEquals(200, $response->getStatusCode());
$data = json_decode($body);
$this->assertObjectNotHasAttribute('name', $data->data);
$this->assertEquals('This page has a video tag with the target URL.', $data->data->content->text);
}
public function testFindTargetLinkIsAudio() {
$url = 'http://source.example.com/link-is-audio';
$response = $this->parse(['url' => $url, 'target' => 'http://target.example.com/media.mp3']);
$body = $response->getContent();
$this->assertEquals(200, $response->getStatusCode());
$data = json_decode($body);
$this->assertObjectNotHasAttribute('name', $data->data);
$this->assertEquals('This page has an audio tag with the target URL.', $data->data->content->text);
}
public function testTextContent() { public function testTextContent() {
$url = 'http://source.example.com/text-content'; $url = 'http://source.example.com/text-content';
$response = $this->parse(['url' => $url]); $response = $this->parse(['url' => $url]);

+ 15
- 0
tests/data/source.example.com/link-is-audio View File

@ -0,0 +1,15 @@
HTTP/1.1 200 OK
Server: Apache
Date: Wed, 09 Dec 2015 03:29:14 GMT
Content-Type: text/html; charset=utf-8
Connection: keep-alive
<html>
<head>
<title>Test</title>
</head>
<body class="h-entry">
<p class="e-content">This page has an audio tag with the target URL.</p>
<audio class="u-audio" src="http://target.example.com/media.mp3"></audio>
</body>
</html>

+ 15
- 0
tests/data/source.example.com/link-is-img View File

@ -0,0 +1,15 @@
HTTP/1.1 200 OK
Server: Apache
Date: Wed, 09 Dec 2015 03:29:14 GMT
Content-Type: text/html; charset=utf-8
Connection: keep-alive
<html>
<head>
<title>Test</title>
</head>
<body class="h-entry">
<p class="e-content">This page has an img tag with the target URL.</p>
<img class="u-photo" src="http://target.example.com/photo.jpg">
</body>
</html>

+ 15
- 0
tests/data/source.example.com/link-is-video View File

@ -0,0 +1,15 @@
HTTP/1.1 200 OK
Server: Apache
Date: Wed, 09 Dec 2015 03:29:14 GMT
Content-Type: text/html; charset=utf-8
Connection: keep-alive
<html>
<head>
<title>Test</title>
</head>
<body class="h-entry">
<p class="e-content">This page has a video tag with the target URL.</p>
<video class="u-video" src="http://target.example.com/movie.mp4"></video>
</body>
</html>

Loading…
Cancel
Save