diff --git a/lib/XRay/Formats/Format.php b/lib/XRay/Formats/Format.php index dff38d4..41637ef 100644 --- a/lib/XRay/Formats/Format.php +++ b/lib/XRay/Formats/Format.php @@ -84,8 +84,16 @@ abstract class Format implements iFormat { return trim($sanitized); } + // Return a plaintext version of the input HTML protected static function stripHTML($html) { - return trim(strip_tags($html)); + $config = HTMLPurifier_Config::createDefault(); + $config->set('Cache.DefinitionImpl', null); + $config->set('HTML.AllowedElements', ['br']); + $purifier = new HTMLPurifier($config); + $sanitized = $purifier->purify($html); + $sanitized = str_replace(" ","\r",$sanitized); + $sanitized = html_entity_decode($sanitized); + return trim(str_replace('
',"\n", $sanitized)); } diff --git a/lib/XRay/Formats/Mf2.php b/lib/XRay/Formats/Mf2.php index 675a328..bc25753 100644 --- a/lib/XRay/Formats/Mf2.php +++ b/lib/XRay/Formats/Mf2.php @@ -227,7 +227,8 @@ class Mf2 extends Format { $allowImg = true; $htmlContent = trim(self::sanitizeHTML($content['html'], $allowImg)); - $textContent = trim(str_replace(" ","\r",$content['value'])); + #$textContent = trim(str_replace(" ","\r",$content['value'])); + $textContent = trim(self::stripHTML($htmlContent)); } else { $textContent = trim($content['value']); } @@ -339,10 +340,13 @@ class Mf2 extends Format { $textContent = null; $htmlContent = null; - $content = self::parseHTMLValue('content', $item); - if($content) { + $content = self::getHTMLValue($item, 'content'); + + if(is_string($content)) { + $textContent = $content; + } elseif($content) { $htmlContent = array_key_exists('html', $content) ? $content['html'] : null; - $textContent = array_key_exists('text', $content) ? $content['text'] : null; + $textContent = array_key_exists('value', $content) ? $content['value'] : null; } if($content) { @@ -365,8 +369,9 @@ class Mf2 extends Format { // If there is content, always return the plaintext content, and return HTML content if it's different if($content) { + $content = self::parseHTMLValue('content', $item); $data['content']['text'] = $content['text']; - if(array_key_exists('html', $content)) + if(isset($content['html'])) $data['content']['html'] = $content['html']; } } @@ -762,6 +767,20 @@ class Mf2 extends Format { return $fallback; } + private static function getHTMLValue($mf2, $k, $fallback=null) { + // Return an array with html and value if the value is html, otherwise return a string + if(!empty($mf2['properties'][$k]) and is_array($mf2['properties'][$k])) { + // $mf2['properties'][$v] will always be an array since the input was from the mf2 parser + $value = $mf2['properties'][$k][0]; + if(is_string($value)) { + return $value; + } elseif(isset($value['html'])) { + return $value; + } + } + return $fallback; + } + private static function getPlaintextValues($mf2, $k, $values=[]) { if(!empty($mf2['properties'][$k]) and is_array($mf2['properties'][$k])) { foreach($mf2['properties'][$k] as $value) { diff --git a/tests/SanitizeTest.php b/tests/SanitizeTest.php index a06a84a..56851a7 100644 --- a/tests/SanitizeTest.php +++ b/tests/SanitizeTest.php @@ -151,7 +151,7 @@ class SanitizeTest extends PHPUnit_Framework_TestCase { $this->assertEquals('http://sanitize.example/photo.jpg', $data->data->author->photo); } - public function testPhotoInContent() { + public function testPhotoInContentNoAlt() { // https://github.com/aaronpk/XRay/issues/52 $url = 'http://sanitize.example/photo-in-content'; @@ -161,7 +161,11 @@ class SanitizeTest extends PHPUnit_Framework_TestCase { $this->assertEquals(200, $response->getStatusCode()); $data = json_decode($body); - #print_r($data->data); + $this->assertObjectNotHasAttribute('name', $data->data); + $this->assertEquals('http://target.example.com/photo.jpg', $data->data->photo[0]); + $this->assertEquals('This is a photo post with an img tag inside the content.', $data->data->content->text); + $this->assertEquals('This is a photo post with an img tag inside the content.', $data->data->content->html); + } $this->assertObjectNotHasAttribute('name', $data->data); $this->assertEquals('http://target.example.com/photo.jpg', $data->data->photo[0]);