From a50cd6284b67560c57cd6f35c40e5d96ea1589e3 Mon Sep 17 00:00:00 2001 From: Aaron Parecki Date: Fri, 2 Mar 2018 18:59:36 -0800 Subject: [PATCH] fix whitespace handling for br tags in html --- lib/XRay/Formats/Format.php | 3 +- tests/SanitizeTest.php | 39 ++++++++++++------- .../data/sanitize.example/entry-with-br-tags | 16 ++++++++ .../sanitize.example/entry-with-valid-tags | 2 + 4 files changed, 46 insertions(+), 14 deletions(-) create mode 100644 tests/data/sanitize.example/entry-with-br-tags diff --git a/lib/XRay/Formats/Format.php b/lib/XRay/Formats/Format.php index 41637ef..eba6d98 100644 --- a/lib/XRay/Formats/Format.php +++ b/lib/XRay/Formats/Format.php @@ -39,6 +39,7 @@ abstract class Format implements iFormat { 'a', 'abbr', 'b', + 'br', 'code', 'del', 'em', @@ -93,7 +94,7 @@ abstract class Format implements iFormat { $sanitized = $purifier->purify($html); $sanitized = str_replace(" ","\r",$sanitized); $sanitized = html_entity_decode($sanitized); - return trim(str_replace('
',"\n", $sanitized)); + return trim(str_replace(['
','
'],"\n", $sanitized)); } diff --git a/tests/SanitizeTest.php b/tests/SanitizeTest.php index 4158c6b..b6e8cdc 100644 --- a/tests/SanitizeTest.php +++ b/tests/SanitizeTest.php @@ -28,15 +28,15 @@ class SanitizeTest extends PHPUnit_Framework_TestCase { $html = $data['data']['content']['html']; $this->assertEquals('entry', $data['data']['type']); - $this->assertContains('This content has only valid tags.', $html); - $this->assertContains('links,', $html, ' missing'); - $this->assertContains('abbreviations,', $html, ' missing'); - $this->assertContains('bold,', $html, ' missing'); - $this->assertContains('inline code,', $html, ' missing'); - $this->assertContains('delete,', $html, ' missing'); - $this->assertContains('emphasis,', $html, ' missing'); - $this->assertContains('italics,', $html, ' missing'); - $this->assertContains('images are allowed', $html, ' missing'); + $this->assertContains('This content has only valid tags.', $html); + $this->assertContains('links,', $html, ' missing'); + $this->assertContains('abbreviations,', $html, ' missing'); + $this->assertContains('bold,', $html, ' missing'); + $this->assertContains('inline code,', $html, ' missing'); + $this->assertContains('delete,', $html, ' missing'); + $this->assertContains('emphasis,', $html, ' missing'); + $this->assertContains('italics,', $html, ' missing'); + $this->assertContains('images are allowed', $html, ' missing'); $this->assertContains('inline quote,', $html, ' missing'); $this->assertContains('strikethrough,', $html, ' missing'); $this->assertContains('strong text,', $html, ' missing'); @@ -53,6 +53,7 @@ class SanitizeTest extends PHPUnit_Framework_TestCase { $this->assertContains('
Six
', $html, '
missing'); $this->assertContains('
    ', $html, '
      missing'); $this->assertContains('
    • One
    • ', $html, '
    • missing'); + $this->assertContains('

      We should allow
      break
      tags too

      ', $html, '
      missing'); } public function testRemovesUnsafeTags() { @@ -187,7 +188,7 @@ class SanitizeTest extends PHPUnit_Framework_TestCase { public function testPhotoInContentEmptyAltAttribute() { // https://github.com/aaronpk/XRay/issues/52 - + $url = 'http://sanitize.example/photo-in-content-empty-alt'; $response = $this->parse(['url' => $url]); @@ -203,7 +204,7 @@ class SanitizeTest extends PHPUnit_Framework_TestCase { public function testPhotoInContentWithAlt() { // https://github.com/aaronpk/XRay/issues/52 - + $url = 'http://sanitize.example/photo-in-content-with-alt'; $response = $this->parse(['url' => $url]); @@ -214,7 +215,7 @@ class SanitizeTest extends PHPUnit_Framework_TestCase { $this->assertObjectNotHasAttribute('name', $data->data); $this->assertEquals('http://target.example.com/photo.jpg', $data->data->photo[0]); $this->assertEquals('This is a photo post with an img tag inside the content.', $data->data->content->text); - $this->assertEquals('This is a photo post with an img tag inside the content.', $data->data->content->html); + $this->assertEquals('This is a photo post with an img tag inside the content.', $data->data->content->html); } public function testPhotoInContentWithNoText() { @@ -274,7 +275,7 @@ class SanitizeTest extends PHPUnit_Framework_TestCase { #print_r($data->data); - $this->assertEquals('🌆 Made it to the first #NPSF #earlygang of the year, did in-betweeners abs, and 6:30 workout with a brutal burnout that was really its own workout. But wow pretty sunrise. Plus 50+ deg F? I’ll take it. #100PDPD#justshowup #darknesstodawn #wakeupthesun #fromwhereirun #NovemberProject #sunrise #latergram #nofilter', $data->data->content->text); + $this->assertEquals('🌆 Made it to the first #NPSF #earlygang of the year, did in-betweeners abs, and 6:30 workout with a brutal burnout that was really its own workout. But wow pretty sunrise. Plus 50+ deg F? I’ll take it. #100PDPD'."\n\n".'#justshowup #darknesstodawn #wakeupthesun #fromwhereirun #NovemberProject #sunrise #latergram #nofilter', $data->data->content->text); $this->assertObjectNotHasAttribute('name', $data->data); $this->assertEquals('https://igx.4sqi.net/img/general/original/476_g7yruXflacsGr7PyVmECefyTBMB_R99zmPQxW7pftzA.jpg', $data->data->photo[0]); $this->assertEquals('https://igx.4sqi.net/img/general/original/476_zM3UgU9JHNhom907Ac_1WCEcUhGOJZaNWGlRmev86YA.jpg', $data->data->photo[1]); @@ -302,4 +303,16 @@ class SanitizeTest extends PHPUnit_Framework_TestCase { } */ + public function testWhitespaceWithBreakTags() { + $url = 'http://sanitize.example/entry-with-br-tags'; + $response = $this->parse(['url' => $url]); + + $body = $response->getContent(); + $this->assertEquals(200, $response->getStatusCode()); + $data = json_decode($body); + + $this->assertEquals('This content has two break tags to indicate a paragraph break.

      This is how tantek\'s autolinker works.', $data->data->content->html); + $this->assertEquals("This content has two break tags to indicate a paragraph break.\n\nThis is how tantek's autolinker works.", $data->data->content->text); + } + } diff --git a/tests/data/sanitize.example/entry-with-br-tags b/tests/data/sanitize.example/entry-with-br-tags new file mode 100644 index 0000000..19cca25 --- /dev/null +++ b/tests/data/sanitize.example/entry-with-br-tags @@ -0,0 +1,16 @@ +HTTP/1.1 200 OK +Server: Apache +Date: Wed, 02 Mar 2018 03:29:14 GMT +Content-Type: text/html; charset=utf-8 +Connection: keep-alive + + + + Test + + +
      + This content has two break tags to indicate a paragraph break.

      This is how tantek's autolinker works. +
      + + diff --git a/tests/data/sanitize.example/entry-with-valid-tags b/tests/data/sanitize.example/entry-with-valid-tags index a14006f..4bd621d 100644 --- a/tests/data/sanitize.example/entry-with-valid-tags +++ b/tests/data/sanitize.example/entry-with-valid-tags @@ -19,6 +19,8 @@ for code examples and such

      Paragraph tags are allowed

      +

      We should allow
      break
      tags too

      +

      One

      Two

      Three