Browse Source

accept webmention verification for pages with only mf1

if the document contains a link to the target, but that link is not in the parsed result, don't return an error, but also don't return the parsed document

closes #150
pull/97/head v1.10.5
Aaron Parecki 3 years ago
parent
commit
37e297d114
No known key found for this signature in database GPG Key ID: 276C2817346D6056
8 changed files with 163 additions and 1 deletions
  1. +4
    -0
      controllers/Parse.php
  2. +5
    -1
      lib/XRay/Formats/HTML.php
  3. +19
    -0
      lib/XRay/Parser.php
  4. +69
    -0
      tests/ParseTest.php
  5. +17
    -0
      tests/data/source.example.com/target-test-link-outside-h-entry
  6. +17
    -0
      tests/data/source.example.com/target-test-link-outside-valid-mf1
  7. +16
    -0
      tests/data/source.example.com/target-test-only-bad-mf1
  8. +16
    -0
      tests/data/source.example.com/target-test-only-good-mf1

+ 4
- 0
controllers/Parse.php View File

@ -67,6 +67,10 @@ class Parse {
$this->_pretty = true;
}
if($request->get('include-mf1')) {
$opts['include-mf1'] = $request->get('include-mf1') == 'false' ? false : true;
}
$url = $request->get('url');
$html = $request->get('html') ?: $request->get('body');

+ 5
- 1
lib/XRay/Formats/HTML.php View File

@ -58,7 +58,11 @@ class HTML extends Format {
}
}
$mf2 = \mf2\Parse($html, $url);
$includeMF1 = true;
if(isset($opts['include-mf1']) && $opts['include-mf1'] == false)
$includeMF1 = false;
$mf2 = \Mf2\parse($html, $url, $includeMF1);
$canonical = false;

+ 19
- 0
lib/XRay/Parser.php View File

@ -29,6 +29,25 @@ class Parser {
} else {
$found = $this->_findLinkInTree($opts['target'], $document['data']);
$error_description = 'The Microformats at the source URL do not contain a link to the target URL. Check the source URL in a Microformats parser such as php.microformats.io';
if(!$found && isset($document['html'])) {
// If no link was found in the parsed mf2 tree, check for a link in the HTML
$found = $this->_findLinkInHTML($opts['target'], $document['html']);
// If there is a link, and if the HTML document has no mf2, then downgrade to a regular mention
if($found) {
$mf2Data = Formats\HTML::parse($this->http, $http_response, ['include-mf1'=>false]);
if(isset($mf2Data['data']['type']) && $mf2Data['data']['type'] == 'unknown') {
// Since the link was found in the HTML, but not in the parsed tree, it shouldn't return the parsed document
$document['data'] = [
'type' => 'unknown'
];
} else {
// Otherwise, the document did have mf2, but the link wasn't in it (checked earlier), so set found=false
$found = false;
}
}
}
}
if(!$found) {

+ 69
- 0
tests/ParseTest.php View File

@ -1135,4 +1135,73 @@ class ParseTest extends PHPUnit_Framework_TestCase {
$this->assertEquals('https://aaronparecki.com/2019/12/01/10/homeautomation', $data['data']['url']);
$this->assertEquals('https://aaronparecki.com/2019/12/01/10/homeautomation', $data['data']['rels']['canonical']);
}
public function testTargetLinkOutsideHEntry() {
$url = 'http://source.example.com/target-test-link-outside-h-entry';
$response = $this->parse(['url' => $url, 'target' => 'https://target.example.com/']);
$body = $response->getContent();
$this->assertEquals(200, $response->getStatusCode());
$data = json_decode($body, true);
$this->assertEquals('no_link_found', $data['error']);
}
public function testTargetLinkWithBadMf1() {
$url = 'http://source.example.com/target-test-only-bad-mf1';
$response = $this->parse(['url' => $url, 'target' => 'https://target.example.com/']);
$body = $response->getContent();
$this->assertEquals(200, $response->getStatusCode());
$data = json_decode($body, true);
$this->assertEquals('unknown', $data['data']['type']);
}
public function testTargetLinkWithValidMf1() {
$url = 'http://source.example.com/target-test-only-good-mf1';
$response = $this->parse(['url' => $url, 'target' => 'https://target.example.com/']);
$body = $response->getContent();
$this->assertEquals(200, $response->getStatusCode());
$data = json_decode($body, true);
$this->assertEquals('entry', $data['data']['type']);
$this->assertEquals('<a href="https://target.example.com/">target</a>', $data['data']['content']['html']);
}
public function testTargetLinkOutsideValidMf1() {
$url = 'http://source.example.com/target-test-link-outside-valid-mf1';
$response = $this->parse(['url' => $url, 'target' => 'https://target.example.com/']);
$body = $response->getContent();
$this->assertEquals(200, $response->getStatusCode());
$data = json_decode($body, true);
// Since the link was found in the HTML, but not in the parsed tree, it shouldn't return the parsed document
$this->assertEquals('unknown', $data['data']['type']);
}
public function testDisableMf1Parsing() {
$url = 'http://source.example.com/target-test-only-good-mf1';
$response = $this->parse(['url' => $url, 'include-mf1' => 'false']);
$body = $response->getContent();
$this->assertEquals(200, $response->getStatusCode());
$data = json_decode($body, true);
$this->assertEquals('unknown', $data['data']['type']);
}
public function testEnableMf1Parsing() {
$url = 'http://source.example.com/target-test-only-good-mf1';
$response = $this->parse(['url' => $url, 'include-mf1' => 'true']);
$body = $response->getContent();
$this->assertEquals(200, $response->getStatusCode());
$data = json_decode($body, true);
$this->assertEquals('entry', $data['data']['type']);
}
}

+ 17
- 0
tests/data/source.example.com/target-test-link-outside-h-entry View File

@ -0,0 +1,17 @@
HTTP/1.1 200 OK
Server: Apache
Date: Wed, 09 Dec 2015 03:29:14 GMT
Content-Type: text/html; charset=utf-8
Connection: keep-alive
<html>
<head>
<title>Test</title>
</head>
<body>
<div class="h-entry">
<p class="e-content">hello world</p>
</div>
<nav><a href="https://target.example.com/">target</a></nav>
</body>
</html>

+ 17
- 0
tests/data/source.example.com/target-test-link-outside-valid-mf1 View File

@ -0,0 +1,17 @@
HTTP/1.1 200 OK
Server: Apache
Date: Wed, 09 Dec 2015 03:29:14 GMT
Content-Type: text/html; charset=utf-8
Connection: keep-alive
<html>
<head>
<title>Test</title>
</head>
<body>
<div class="hentry">
<p class="entry-content">hello world</p>
</div>
<nav><a href="https://target.example.com/">target</a></nav>
</body>
</html>

+ 16
- 0
tests/data/source.example.com/target-test-only-bad-mf1 View File

@ -0,0 +1,16 @@
HTTP/1.1 200 OK
Server: Apache
Date: Wed, 09 Dec 2015 03:29:14 GMT
Content-Type: text/html; charset=utf-8
Connection: keep-alive
<html>
<head>
<title>Test</title>
</head>
<body>
<div class="hentry">
<p><a href="https://target.example.com/">target</a></p>
</div>
</body>
</html>

+ 16
- 0
tests/data/source.example.com/target-test-only-good-mf1 View File

@ -0,0 +1,16 @@
HTTP/1.1 200 OK
Server: Apache
Date: Wed, 09 Dec 2015 03:29:14 GMT
Content-Type: text/html; charset=utf-8
Connection: keep-alive
<html>
<head>
<title>Test</title>
</head>
<body>
<div class="hentry">
<p class="entry-content"><a href="https://target.example.com/">target</a></p>
</div>
</body>
</html>

Loading…
Cancel
Save