Browse Source

fix whitespace handling for br tags in html

pull/64/head v1.4.20
Aaron Parecki 3 years ago
parent
commit
a50cd6284b
No known key found for this signature in database GPG Key ID: 276C2817346D6056
4 changed files with 46 additions and 14 deletions
  1. +2
    -1
      lib/XRay/Formats/Format.php
  2. +26
    -13
      tests/SanitizeTest.php
  3. +16
    -0
      tests/data/sanitize.example/entry-with-br-tags
  4. +2
    -0
      tests/data/sanitize.example/entry-with-valid-tags

+ 2
- 1
lib/XRay/Formats/Format.php View File

@ -39,6 +39,7 @@ abstract class Format implements iFormat {
'a',
'abbr',
'b',
'br',
'code',
'del',
'em',
@ -93,7 +94,7 @@ abstract class Format implements iFormat {
$sanitized = $purifier->purify($html);
$sanitized = str_replace("
","\r",$sanitized);
$sanitized = html_entity_decode($sanitized);
return trim(str_replace('<br>',"\n", $sanitized));
return trim(str_replace(['<br>','<br />'],"\n", $sanitized));
}

+ 26
- 13
tests/SanitizeTest.php View File

@ -28,15 +28,15 @@ class SanitizeTest extends PHPUnit_Framework_TestCase {
$html = $data['data']['content']['html'];
$this->assertEquals('entry', $data['data']['type']);
$this->assertContains('This content has only valid tags.', $html);
$this->assertContains('<a href="http://sanitize.example/example">links</a>,', $html, '<a> missing');
$this->assertContains('<abbr>abbreviations</abbr>,', $html, '<abbr> missing');
$this->assertContains('<b>bold</b>,', $html, '<b> missing');
$this->assertContains('<code>inline code</code>,', $html, '<code> missing');
$this->assertContains('<del>delete</del>,', $html, '<del> missing');
$this->assertContains('<em>emphasis</em>,', $html, '<em> missing');
$this->assertContains('<i>italics</i>,', $html, '<i> missing');
$this->assertContains('<img src="http://sanitize.example/example.jpg" alt="images are allowed" />', $html, '<img> missing');
$this->assertContains('This content has only valid tags.', $html);
$this->assertContains('<a href="http://sanitize.example/example">links</a>,', $html, '<a> missing');
$this->assertContains('<abbr>abbreviations</abbr>,', $html, '<abbr> missing');
$this->assertContains('<b>bold</b>,', $html, '<b> missing');
$this->assertContains('<code>inline code</code>,', $html, '<code> missing');
$this->assertContains('<del>delete</del>,', $html, '<del> missing');
$this->assertContains('<em>emphasis</em>,', $html, '<em> missing');
$this->assertContains('<i>italics</i>,', $html, '<i> missing');
$this->assertContains('<img src="http://sanitize.example/example.jpg" alt="images are allowed" />', $html, '<img> missing');
$this->assertContains('<q>inline quote</q>,', $html, '<q> missing');
$this->assertContains('<strike>strikethrough</strike>,', $html, '<strike> missing');
$this->assertContains('<strong>strong text</strong>,', $html, '<strong> missing');
@ -53,6 +53,7 @@ class SanitizeTest extends PHPUnit_Framework_TestCase {
$this->assertContains('<h6>Six</h6>', $html, '<h6> missing');
$this->assertContains('<ul>', $html, '<ul> missing');
$this->assertContains('<li>One</li>', $html, '<li> missing');
$this->assertContains('<p>We should allow<br />break<br />tags too</p>', $html, '<br> missing');
}
public function testRemovesUnsafeTags() {
@ -187,7 +188,7 @@ class SanitizeTest extends PHPUnit_Framework_TestCase {
public function testPhotoInContentEmptyAltAttribute() {
// https://github.com/aaronpk/XRay/issues/52
$url = 'http://sanitize.example/photo-in-content-empty-alt';
$response = $this->parse(['url' => $url]);
@ -203,7 +204,7 @@ class SanitizeTest extends PHPUnit_Framework_TestCase {
public function testPhotoInContentWithAlt() {
// https://github.com/aaronpk/XRay/issues/52
$url = 'http://sanitize.example/photo-in-content-with-alt';
$response = $this->parse(['url' => $url]);
@ -214,7 +215,7 @@ class SanitizeTest extends PHPUnit_Framework_TestCase {
$this->assertObjectNotHasAttribute('name', $data->data);
$this->assertEquals('http://target.example.com/photo.jpg', $data->data->photo[0]);
$this->assertEquals('This is a photo post with an img tag inside the content.', $data->data->content->text);
$this->assertEquals('This is a photo post with an <code>img</code> tag inside the content.', $data->data->content->html);
$this->assertEquals('This is a photo post with an <code>img</code> tag inside the content.', $data->data->content->html);
}
public function testPhotoInContentWithNoText() {
@ -274,7 +275,7 @@ class SanitizeTest extends PHPUnit_Framework_TestCase {
#print_r($data->data);
$this->assertEquals('🌆 Made it to the first #NPSF #earlygang of the year, did in-betweeners abs, and 6:30 workout with a brutal burnout that was really its own workout. But wow pretty sunrise. Plus 50+ deg F? I’ll take it. #100PDPD#justshowup #darknesstodawn #wakeupthesun #fromwhereirun #NovemberProject #sunrise #latergram #nofilter', $data->data->content->text);
$this->assertEquals('🌆 Made it to the first #NPSF #earlygang of the year, did in-betweeners abs, and 6:30 workout with a brutal burnout that was really its own workout. But wow pretty sunrise. Plus 50+ deg F? I’ll take it. #100PDPD'."\n\n".'#justshowup #darknesstodawn #wakeupthesun #fromwhereirun #NovemberProject #sunrise #latergram #nofilter', $data->data->content->text);
$this->assertObjectNotHasAttribute('name', $data->data);
$this->assertEquals('https://igx.4sqi.net/img/general/original/476_g7yruXflacsGr7PyVmECefyTBMB_R99zmPQxW7pftzA.jpg', $data->data->photo[0]);
$this->assertEquals('https://igx.4sqi.net/img/general/original/476_zM3UgU9JHNhom907Ac_1WCEcUhGOJZaNWGlRmev86YA.jpg', $data->data->photo[1]);
@ -302,4 +303,16 @@ class SanitizeTest extends PHPUnit_Framework_TestCase {
}
*/
public function testWhitespaceWithBreakTags() {
$url = 'http://sanitize.example/entry-with-br-tags';
$response = $this->parse(['url' => $url]);
$body = $response->getContent();
$this->assertEquals(200, $response->getStatusCode());
$data = json_decode($body);
$this->assertEquals('This content has two break tags to indicate a paragraph break.<br /><br />This is how tantek\'s autolinker works.', $data->data->content->html);
$this->assertEquals("This content has two break tags to indicate a paragraph break.\n\nThis is how tantek's autolinker works.", $data->data->content->text);
}
}

+ 16
- 0
tests/data/sanitize.example/entry-with-br-tags View File

@ -0,0 +1,16 @@
HTTP/1.1 200 OK
Server: Apache
Date: Wed, 02 Mar 2018 03:29:14 GMT
Content-Type: text/html; charset=utf-8
Connection: keep-alive
<html>
<head>
<title>Test</title>
</head>
<body class="h-entry">
<div class="e-content">
This content has two break tags to indicate a paragraph break.<br class="auto-break"><br class="auto-break">This is how tantek's autolinker works.
</div>
</body>
</html>

+ 2
- 0
tests/data/sanitize.example/entry-with-valid-tags View File

@ -19,6 +19,8 @@ for code examples and such
<p>Paragraph tags are allowed</p>
<p>We should allow<br>break<br />tags too</p>
<h1>One</h1>
<h2>Two</h2>
<h3>Three</h3>

Loading…
Cancel
Save