diff --git a/lib/XRay/Formats/Format.php b/lib/XRay/Formats/Format.php
index dff38d4..41637ef 100644
--- a/lib/XRay/Formats/Format.php
+++ b/lib/XRay/Formats/Format.php
@@ -84,8 +84,16 @@ abstract class Format implements iFormat {
return trim($sanitized);
}
+ // Return a plaintext version of the input HTML
protected static function stripHTML($html) {
- return trim(strip_tags($html));
+ $config = HTMLPurifier_Config::createDefault();
+ $config->set('Cache.DefinitionImpl', null);
+ $config->set('HTML.AllowedElements', ['br']);
+ $purifier = new HTMLPurifier($config);
+ $sanitized = $purifier->purify($html);
+ $sanitized = str_replace("
","\r",$sanitized);
+ $sanitized = html_entity_decode($sanitized);
+ return trim(str_replace('
',"\n", $sanitized));
}
diff --git a/lib/XRay/Formats/Mf2.php b/lib/XRay/Formats/Mf2.php
index 675a328..bc25753 100644
--- a/lib/XRay/Formats/Mf2.php
+++ b/lib/XRay/Formats/Mf2.php
@@ -227,7 +227,8 @@ class Mf2 extends Format {
$allowImg = true;
$htmlContent = trim(self::sanitizeHTML($content['html'], $allowImg));
- $textContent = trim(str_replace("
","\r",$content['value']));
+ #$textContent = trim(str_replace("
","\r",$content['value']));
+ $textContent = trim(self::stripHTML($htmlContent));
} else {
$textContent = trim($content['value']);
}
@@ -339,10 +340,13 @@ class Mf2 extends Format {
$textContent = null;
$htmlContent = null;
- $content = self::parseHTMLValue('content', $item);
- if($content) {
+ $content = self::getHTMLValue($item, 'content');
+
+ if(is_string($content)) {
+ $textContent = $content;
+ } elseif($content) {
$htmlContent = array_key_exists('html', $content) ? $content['html'] : null;
- $textContent = array_key_exists('text', $content) ? $content['text'] : null;
+ $textContent = array_key_exists('value', $content) ? $content['value'] : null;
}
if($content) {
@@ -365,8 +369,9 @@ class Mf2 extends Format {
// If there is content, always return the plaintext content, and return HTML content if it's different
if($content) {
+ $content = self::parseHTMLValue('content', $item);
$data['content']['text'] = $content['text'];
- if(array_key_exists('html', $content))
+ if(isset($content['html']))
$data['content']['html'] = $content['html'];
}
}
@@ -762,6 +767,20 @@ class Mf2 extends Format {
return $fallback;
}
+ private static function getHTMLValue($mf2, $k, $fallback=null) {
+ // Return an array with html and value if the value is html, otherwise return a string
+ if(!empty($mf2['properties'][$k]) and is_array($mf2['properties'][$k])) {
+ // $mf2['properties'][$v] will always be an array since the input was from the mf2 parser
+ $value = $mf2['properties'][$k][0];
+ if(is_string($value)) {
+ return $value;
+ } elseif(isset($value['html'])) {
+ return $value;
+ }
+ }
+ return $fallback;
+ }
+
private static function getPlaintextValues($mf2, $k, $values=[]) {
if(!empty($mf2['properties'][$k]) and is_array($mf2['properties'][$k])) {
foreach($mf2['properties'][$k] as $value) {
diff --git a/tests/SanitizeTest.php b/tests/SanitizeTest.php
index a06a84a..56851a7 100644
--- a/tests/SanitizeTest.php
+++ b/tests/SanitizeTest.php
@@ -151,7 +151,7 @@ class SanitizeTest extends PHPUnit_Framework_TestCase {
$this->assertEquals('http://sanitize.example/photo.jpg', $data->data->author->photo);
}
- public function testPhotoInContent() {
+ public function testPhotoInContentNoAlt() {
// https://github.com/aaronpk/XRay/issues/52
$url = 'http://sanitize.example/photo-in-content';
@@ -161,7 +161,11 @@ class SanitizeTest extends PHPUnit_Framework_TestCase {
$this->assertEquals(200, $response->getStatusCode());
$data = json_decode($body);
- #print_r($data->data);
+ $this->assertObjectNotHasAttribute('name', $data->data);
+ $this->assertEquals('http://target.example.com/photo.jpg', $data->data->photo[0]);
+ $this->assertEquals('This is a photo post with an img tag inside the content.', $data->data->content->text);
+ $this->assertEquals('This is a photo post with an img
tag inside the content.', $data->data->content->html);
+ }
$this->assertObjectNotHasAttribute('name', $data->data);
$this->assertEquals('http://target.example.com/photo.jpg', $data->data->photo[0]);