Browse Source

fix multiline posts with smashed together paragraph tags

closes #117
main v1.13.0
Aaron Parecki 3 months ago
parent
commit
ea1f054bdc
6 changed files with 67 additions and 3 deletions
  1. +9
    -1
      lib/XRay/Formats/Format.php
  2. +1
    -1
      tests/ActivityStreamsTest.php
  3. +12
    -0
      tests/ParseTest.php
  4. +15
    -1
      tests/SanitizeTest.php
  5. +16
    -0
      tests/data/sanitize.example/entry-with-p-tags
  6. +14
    -0
      tests/data/source.example.com/text-content-with-p-tags

+ 9
- 1
lib/XRay/Formats/Format.php View File

@ -141,10 +141,18 @@ abstract class Format implements iFormat {
$config->set('Cache.DefinitionImpl', null);
$config->set('HTML.AllowedElements', ['br']);
$purifier = new HTMLPurifier($config);
// Insert two br tags between smashed together paragraph tags.
// The paragraph tags will be removed by the HTMLPurifier, leaving just the br's, which
// will then be replaced by newlines.
$html = trim(str_replace('</p><p>', "</p><br><br><p>", $html));
$sanitized = $purifier->purify($html);
$sanitized = str_replace("&#xD;","\r",$sanitized);
$sanitized = html_entity_decode($sanitized);
return trim(str_replace(['<br>','<br />'],"\n", $sanitized));
$sanitized = trim(str_replace(['<br>','<br />'],"\n", $sanitized));
return $sanitized;
}
}

+ 1
- 1
tests/ActivityStreamsTest.php View File

@ -236,7 +236,7 @@ class ActivityStreamsTest extends PHPUnit\Framework\TestCase
$this->assertEquals('reply', $data['data']['post-type']);
$this->assertEquals('https://toot.cat/@jamey/100471682482196371', $data['data']['url']);
$this->assertEquals('2018-07-31T22:30:09+00:00', $data['data']['published']);
$this->assertEquals('@darius Huh, I just have never encountered anyone using the phrase generically like that.But you might consider writing IndieWeb.org-style bots (Atom+WebSub, and optionally WebMention if you want them to be interactive), and then using https://fed.brid.gy/ as an alternative to implementing ActivityPub yourself...', $data['data']['content']['text']);
$this->assertEquals("@darius Huh, I just have never encountered anyone using the phrase generically like that.\n\nBut you might consider writing IndieWeb.org-style bots (Atom+WebSub, and optionally WebMention if you want them to be interactive), and then using https://fed.brid.gy/ as an alternative to implementing ActivityPub yourself...", $data['data']['content']['text']);
$this->assertEquals('https://social.tinysubversions.com/users/darius/statuses/100471614681787834', $data['data']['in-reply-to'][0]);
$this->assertEquals('Jamey Sharp', $data['data']['author']['name']);
$this->assertEquals('https://s3-us-west-2.amazonaws.com/tootcatapril2017/accounts/avatars/000/013/259/original/c904452a8411e4f5.jpg', $data['data']['author']['photo']);

+ 12
- 0
tests/ParseTest.php View File

@ -315,6 +315,18 @@ class ParseTest extends PHPUnit\Framework\TestCase
$this->assertEquals('This page has a link to target.example.com and some formatted text but is in a p-content element so is plaintext.', $data->data->content->text);
}
public function testNewlinesInTextContent() {
$url = 'http://source.example.com/text-content-with-p-tags';
$response = $this->parse(['url' => $url]);
$body = $response->getContent();
$this->assertEquals(200, $response->getStatusCode());
$data = json_decode($body);
$this->assertEquals('mf2+html', $data->{'source-format'});
$this->assertObjectNotHasAttribute('name', $data->data);
$this->assertEquals("Hello\nWorld", $data->data->content->text);
}
public function testArticleWithFeaturedImage()
{
$url = 'http://source.example.com/article-with-featured-image';

+ 15
- 1
tests/SanitizeTest.php View File

@ -424,7 +424,7 @@ class SanitizeTest extends PHPUnit\Framework\TestCase
$this->assertEquals('This is a photo post with an <code>img</code> tag inside the content, which does not have a u-photo class so should not be removed. <img src="http://target.example.com/photo.jpg" alt="a photo" />', $data->data->content->html);
}
public function testWhitespaceWithBreakTags()
public function testEntryWithBreakTags()
{
$url = 'http://sanitize.example/entry-with-br-tags';
$response = $this->parse(['url' => $url]);
@ -437,4 +437,18 @@ class SanitizeTest extends PHPUnit\Framework\TestCase
$this->assertEquals("This content has two break tags to indicate a paragraph break.\n\nThis is how tantek's autolinker works.", $data->data->content->text);
}
public function testEntryWithParagraphTags()
{
$url = 'http://sanitize.example/entry-with-p-tags';
$response = $this->parse(['url' => $url]);
$body = $response->getContent();
$this->assertEquals(200, $response->getStatusCode());
$data = json_decode($body);
$this->assertEquals('<p>This is a multiline post separated by paragraph tags with no space between them.</p><p>This is how Mastodon formats HTML.</p>', $data->data->content->html);
$this->assertEquals("This is a multiline post separated by paragraph tags with no space between them.\n\nThis is how Mastodon formats HTML.", $data->data->content->text);
}
}

+ 16
- 0
tests/data/sanitize.example/entry-with-p-tags View File

@ -0,0 +1,16 @@
HTTP/1.1 200 OK
Server: Apache
Date: Wed, 02 Mar 2018 03:29:14 GMT
Content-Type: text/html; charset=utf-8
Connection: keep-alive
<html>
<head>
<title>Test</title>
</head>
<body class="h-entry">
<div class="e-content">
<p>This is a multiline post separated by paragraph tags with no space between them.</p><p>This is how Mastodon formats HTML.</p>
</div>
</body>
</html>

+ 14
- 0
tests/data/source.example.com/text-content-with-p-tags View File

@ -0,0 +1,14 @@
HTTP/1.1 200 OK
Server: Apache
Date: Wed, 09 Dec 2015 03:29:14 GMT
Content-Type: text/html; charset=utf-8
Connection: keep-alive
<html>
<head>
<title>Test</title>
</head>
<body class="h-entry">
<div class="p-content"><p>Hello</p><p>World</p></div>
</body>
</html>

Loading…
Cancel
Save