diff --git a/composer.json b/composer.json index 67b3521..3588d01 100644 --- a/composer.json +++ b/composer.json @@ -2,7 +2,8 @@ "require": { "league/plates": "3.*", "league/route": "1.*", - "mf2/mf2": "0.2.*" + "mf2/mf2": "0.2.*", + "ezyang/htmlpurifier": "4.*" }, "autoload": { "files": [ diff --git a/composer.lock b/composer.lock index 0e4a93b..066c143 100644 --- a/composer.lock +++ b/composer.lock @@ -4,9 +4,53 @@ "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#composer-lock-the-lock-file", "This file is @generated automatically" ], - "hash": "a620639fa69cc332e4d4f49436f81ecc", - "content-hash": "b26a0c76296b944624f36bbb163392ae", + "hash": "9ca7e7a96c33dc8c293a42cbcd4c1d2f", + "content-hash": "c1c0c63887a953998208639cd85555a3", "packages": [ + { + "name": "ezyang/htmlpurifier", + "version": "v4.7.0", + "source": { + "type": "git", + "url": "https://github.com/ezyang/htmlpurifier.git", + "reference": "ae1828d955112356f7677c465f94f7deb7d27a40" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/ezyang/htmlpurifier/zipball/ae1828d955112356f7677c465f94f7deb7d27a40", + "reference": "ae1828d955112356f7677c465f94f7deb7d27a40", + "shasum": "" + }, + "require": { + "php": ">=5.2" + }, + "type": "library", + "autoload": { + "psr-0": { + "HTMLPurifier": "library/" + }, + "files": [ + "library/HTMLPurifier.composer.php" + ] + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "LGPL" + ], + "authors": [ + { + "name": "Edward Z. Yang", + "email": "admin@htmlpurifier.org", + "homepage": "http://ezyang.com" + } + ], + "description": "Standards compliant HTML filter written in PHP", + "homepage": "http://htmlpurifier.org/", + "keywords": [ + "html" + ], + "time": "2015-08-05 01:03:42" + }, { "name": "ircmaxell/password-compat", "version": "v1.0.4", diff --git a/lib/Formats/Mf2.php b/lib/Formats/Mf2.php index 845a5cf..a1797ed 100644 --- a/lib/Formats/Mf2.php +++ b/lib/Formats/Mf2.php @@ -1,6 +1,8 @@ set('Cache.DefinitionImpl', null); + $config->set('HTML.AllowedElements', [ + 'a', + 'abbr', + 'b', + 'code', + 'del', + 'em', + 'i', + 'img', + 'q', + 'strike', + 'strong', + 'time', + 'blockquote', + 'pre', + 'h1', + 'h2', + 'h3', + 'h4', + 'h5', + 'h6', + ]); + $def = $config->getHTMLDefinition(true); + $def->addElement( + 'time', + 'Inline', + 'Inline', + 'Common', + [ + 'datetime' => 'Text' + ] + ); + $purifier = new HTMLPurifier($config); + return $purifier->purify($html); + } + private static function responseDisplayText($name, $summary, $content) { // Build a fake h-entry to pass to the comments parser diff --git a/tests/SanitizeTest.php b/tests/SanitizeTest.php new file mode 100644 index 0000000..8db456a --- /dev/null +++ b/tests/SanitizeTest.php @@ -0,0 +1,71 @@ +client = new Parse(); + $this->client->http = new p3k\HTTPTest(dirname(__FILE__).'/data/'); + } + + private function parse($params) { + $request = new Request($params); + $response = new Response(); + return $this->client->parse($request, $response); + } + + public function testAllowsWhitelistedTags() { + $url = 'http://sanitize.example/entry-with-valid-tags'; + $response = $this->parse(['url' => $url]); + + $body = $response->getContent(); + $this->assertEquals(200, $response->getStatusCode()); + $data = json_decode($body, true); + $html = $data['data']['content']['html']; + + $this->assertEquals('entry', $data['data']['type']); + $this->assertContains('This content has only valid tags.', $html); + $this->assertContains('links,', $html, ' missing'); + $this->assertContains('abbreviations,', $html, ' missing'); + $this->assertContains('bold,', $html, ' missing'); + $this->assertContains('inline code,', $html, ' missing'); + $this->assertContains('delete,', $html, ' missing'); + $this->assertContains('emphasis,', $html, ' missing'); + $this->assertContains('italics,', $html, ' missing'); + $this->assertContains('', $html, ' missing'); + $this->assertContains('inline quote,', $html, ' missing'); + $this->assertContains('strikethrough,', $html, ' missing'); + $this->assertContains('strong text,', $html, ' missing'); + $this->assertContains('time elements', $html, ' missing'); + $this->assertContains('Blockquote tags are okay', $html); + $this->assertContains('preformatted text is okay too', $html, ' missing'); + $this->assertContains('for code examples and such', $html, ' missing'); + $this->assertContains('One', $html, ' missing'); + $this->assertContains('Two', $html, ' missing'); + $this->assertContains('Three', $html, ' missing'); + $this->assertContains('Four', $html, ' missing'); + $this->assertContains('Five', $html, ' missing'); + $this->assertContains('Six', $html, ' missing'); + } + + public function testRemovesUnsafeTags() { + $url = 'http://sanitize.example/entry-with-unsafe-tags'; + $response = $this->parse(['url' => $url]); + + $body = $response->getContent(); + $this->assertEquals(200, $response->getStatusCode()); + $data = json_decode($body, true); + $html = $data['data']['content']['html']; + + $this->assertEquals('entry', $data['data']['type']); + $this->assertNotContains('', $html); + $this->assertNotContains(' + + Hello World + +
inline code
missing'); + $this->assertContains('delete,', $html, ' missing'); + $this->assertContains('emphasis,', $html, ' missing'); + $this->assertContains('italics,', $html, ' missing'); + $this->assertContains('', $html, ' missing'); + $this->assertContains('inline quote,', $html, ' missing'); + $this->assertContains('strikethrough,', $html, ' missing'); + $this->assertContains('strong text,', $html, ' missing'); + $this->assertContains('time elements', $html, ' missing'); + $this->assertContains('Blockquote tags are okay', $html); + $this->assertContains('preformatted text is okay too', $html, ' missing'); + $this->assertContains('for code examples and such', $html, ' missing'); + $this->assertContains('One', $html, ' missing'); + $this->assertContains('Two', $html, ' missing'); + $this->assertContains('Three', $html, ' missing'); + $this->assertContains('Four', $html, ' missing'); + $this->assertContains('Five', $html, ' missing'); + $this->assertContains('Six', $html, ' missing'); + } + + public function testRemovesUnsafeTags() { + $url = 'http://sanitize.example/entry-with-unsafe-tags'; + $response = $this->parse(['url' => $url]); + + $body = $response->getContent(); + $this->assertEquals(200, $response->getStatusCode()); + $data = json_decode($body, true); + $html = $data['data']['content']['html']; + + $this->assertEquals('entry', $data['data']['type']); + $this->assertNotContains('', $html); + $this->assertNotContains(' + + Hello World + +
inline quote
missing'); + $this->assertContains('strikethrough,', $html, ' missing'); + $this->assertContains('strong text,', $html, ' missing'); + $this->assertContains('time elements', $html, ' missing'); + $this->assertContains('Blockquote tags are okay', $html); + $this->assertContains('preformatted text is okay too', $html, ' missing'); + $this->assertContains('for code examples and such', $html, ' missing'); + $this->assertContains('One', $html, ' missing'); + $this->assertContains('Two', $html, ' missing'); + $this->assertContains('Three', $html, ' missing'); + $this->assertContains('Four', $html, ' missing'); + $this->assertContains('Five', $html, ' missing'); + $this->assertContains('Six', $html, ' missing'); + } + + public function testRemovesUnsafeTags() { + $url = 'http://sanitize.example/entry-with-unsafe-tags'; + $response = $this->parse(['url' => $url]); + + $body = $response->getContent(); + $this->assertEquals(200, $response->getStatusCode()); + $data = json_decode($body, true); + $html = $data['data']['content']['html']; + + $this->assertEquals('entry', $data['data']['type']); + $this->assertNotContains('', $html); + $this->assertNotContains(' + + Hello World + +
Blockquote tags are okay
preformatted text is okay too', $html, ' missing'); + $this->assertContains('for code examples and such', $html, ' missing'); + $this->assertContains('One', $html, ' missing'); + $this->assertContains('Two', $html, ' missing'); + $this->assertContains('Three', $html, ' missing'); + $this->assertContains('Four', $html, ' missing'); + $this->assertContains('Five', $html, ' missing'); + $this->assertContains('Six', $html, ' missing'); + } + + public function testRemovesUnsafeTags() { + $url = 'http://sanitize.example/entry-with-unsafe-tags'; + $response = $this->parse(['url' => $url]); + + $body = $response->getContent(); + $this->assertEquals(200, $response->getStatusCode()); + $data = json_decode($body, true); + $html = $data['data']['content']['html']; + + $this->assertEquals('entry', $data['data']['type']); + $this->assertNotContains('', $html); + $this->assertNotContains(' + + Hello World + +
missing'); + $this->assertContains('for code examples and such
missing'); + $this->assertContains('One', $html, ' missing'); + $this->assertContains('Two', $html, ' missing'); + $this->assertContains('Three', $html, ' missing'); + $this->assertContains('Four', $html, ' missing'); + $this->assertContains('Five', $html, ' missing'); + $this->assertContains('Six', $html, ' missing'); + } + + public function testRemovesUnsafeTags() { + $url = 'http://sanitize.example/entry-with-unsafe-tags'; + $response = $this->parse(['url' => $url]); + + $body = $response->getContent(); + $this->assertEquals(200, $response->getStatusCode()); + $data = json_decode($body, true); + $html = $data['data']['content']['html']; + + $this->assertEquals('entry', $data['data']['type']); + $this->assertNotContains('', $html); + $this->assertNotContains(' + + Hello World + +
', $html); + $this->assertNotContains(' + +
Hello World