Browse Source

run name/content dedupe before munging HTML

fix for #53
pull/60/head
Aaron Parecki 7 years ago
parent
commit
66adfbe2f8
No known key found for this signature in database GPG Key ID: 276C2817346D6056
3 changed files with 39 additions and 8 deletions
  1. +9
    -1
      lib/XRay/Formats/Format.php
  2. +24
    -5
      lib/XRay/Formats/Mf2.php
  3. +6
    -2
      tests/SanitizeTest.php

+ 9
- 1
lib/XRay/Formats/Format.php View File

@ -84,8 +84,16 @@ abstract class Format implements iFormat {
return trim($sanitized); return trim($sanitized);
} }
// Return a plaintext version of the input HTML
protected static function stripHTML($html) { protected static function stripHTML($html) {
return trim(strip_tags($html));
$config = HTMLPurifier_Config::createDefault();
$config->set('Cache.DefinitionImpl', null);
$config->set('HTML.AllowedElements', ['br']);
$purifier = new HTMLPurifier($config);
$sanitized = $purifier->purify($html);
$sanitized = str_replace("
","\r",$sanitized);
$sanitized = html_entity_decode($sanitized);
return trim(str_replace('<br>',"\n", $sanitized));
} }

+ 24
- 5
lib/XRay/Formats/Mf2.php View File

@ -227,7 +227,8 @@ class Mf2 extends Format {
$allowImg = true; $allowImg = true;
$htmlContent = trim(self::sanitizeHTML($content['html'], $allowImg)); $htmlContent = trim(self::sanitizeHTML($content['html'], $allowImg));
$textContent = trim(str_replace("&#xD;","\r",$content['value']));
#$textContent = trim(str_replace("&#xD;","\r",$content['value']));
$textContent = trim(self::stripHTML($htmlContent));
} else { } else {
$textContent = trim($content['value']); $textContent = trim($content['value']);
} }
@ -339,10 +340,13 @@ class Mf2 extends Format {
$textContent = null; $textContent = null;
$htmlContent = null; $htmlContent = null;
$content = self::parseHTMLValue('content', $item);
if($content) {
$content = self::getHTMLValue($item, 'content');
if(is_string($content)) {
$textContent = $content;
} elseif($content) {
$htmlContent = array_key_exists('html', $content) ? $content['html'] : null; $htmlContent = array_key_exists('html', $content) ? $content['html'] : null;
$textContent = array_key_exists('text', $content) ? $content['text'] : null;
$textContent = array_key_exists('value', $content) ? $content['value'] : null;
} }
if($content) { if($content) {
@ -365,8 +369,9 @@ class Mf2 extends Format {
// If there is content, always return the plaintext content, and return HTML content if it's different // If there is content, always return the plaintext content, and return HTML content if it's different
if($content) { if($content) {
$content = self::parseHTMLValue('content', $item);
$data['content']['text'] = $content['text']; $data['content']['text'] = $content['text'];
if(array_key_exists('html', $content))
if(isset($content['html']))
$data['content']['html'] = $content['html']; $data['content']['html'] = $content['html'];
} }
} }
@ -762,6 +767,20 @@ class Mf2 extends Format {
return $fallback; return $fallback;
} }
private static function getHTMLValue($mf2, $k, $fallback=null) {
// Return an array with html and value if the value is html, otherwise return a string
if(!empty($mf2['properties'][$k]) and is_array($mf2['properties'][$k])) {
// $mf2['properties'][$v] will always be an array since the input was from the mf2 parser
$value = $mf2['properties'][$k][0];
if(is_string($value)) {
return $value;
} elseif(isset($value['html'])) {
return $value;
}
}
return $fallback;
}
private static function getPlaintextValues($mf2, $k, $values=[]) { private static function getPlaintextValues($mf2, $k, $values=[]) {
if(!empty($mf2['properties'][$k]) and is_array($mf2['properties'][$k])) { if(!empty($mf2['properties'][$k]) and is_array($mf2['properties'][$k])) {
foreach($mf2['properties'][$k] as $value) { foreach($mf2['properties'][$k] as $value) {

+ 6
- 2
tests/SanitizeTest.php View File

@ -151,7 +151,7 @@ class SanitizeTest extends PHPUnit_Framework_TestCase {
$this->assertEquals('http://sanitize.example/photo.jpg', $data->data->author->photo); $this->assertEquals('http://sanitize.example/photo.jpg', $data->data->author->photo);
} }
public function testPhotoInContent() {
public function testPhotoInContentNoAlt() {
// https://github.com/aaronpk/XRay/issues/52 // https://github.com/aaronpk/XRay/issues/52
$url = 'http://sanitize.example/photo-in-content'; $url = 'http://sanitize.example/photo-in-content';
@ -161,7 +161,11 @@ class SanitizeTest extends PHPUnit_Framework_TestCase {
$this->assertEquals(200, $response->getStatusCode()); $this->assertEquals(200, $response->getStatusCode());
$data = json_decode($body); $data = json_decode($body);
#print_r($data->data);
$this->assertObjectNotHasAttribute('name', $data->data);
$this->assertEquals('http://target.example.com/photo.jpg', $data->data->photo[0]);
$this->assertEquals('This is a photo post with an img tag inside the content.', $data->data->content->text);
$this->assertEquals('This is a photo post with an <code>img</code> tag inside the content.', $data->data->content->html);
}
$this->assertObjectNotHasAttribute('name', $data->data); $this->assertObjectNotHasAttribute('name', $data->data);
$this->assertEquals('http://target.example.com/photo.jpg', $data->data->photo[0]); $this->assertEquals('http://target.example.com/photo.jpg', $data->data->photo[0]);

Loading…
Cancel
Save