From ea1f054bdc1e1008fc064d7cb3d44352992eb03b Mon Sep 17 00:00:00 2001
From: Aaron Parecki
', "
", $html));
+
$sanitized = $purifier->purify($html);
$sanitized = str_replace("
","\r",$sanitized);
$sanitized = html_entity_decode($sanitized);
- return trim(str_replace(['
','
'],"\n", $sanitized));
+ $sanitized = trim(str_replace(['
','
'],"\n", $sanitized));
+
+ return $sanitized;
}
}
diff --git a/tests/ActivityStreamsTest.php b/tests/ActivityStreamsTest.php
index 27df6c3..2c4c098 100644
--- a/tests/ActivityStreamsTest.php
+++ b/tests/ActivityStreamsTest.php
@@ -236,7 +236,7 @@ class ActivityStreamsTest extends PHPUnit\Framework\TestCase
$this->assertEquals('reply', $data['data']['post-type']);
$this->assertEquals('https://toot.cat/@jamey/100471682482196371', $data['data']['url']);
$this->assertEquals('2018-07-31T22:30:09+00:00', $data['data']['published']);
- $this->assertEquals('@darius Huh, I just have never encountered anyone using the phrase generically like that.But you might consider writing IndieWeb.org-style bots (Atom+WebSub, and optionally WebMention if you want them to be interactive), and then using https://fed.brid.gy/ as an alternative to implementing ActivityPub yourself...', $data['data']['content']['text']);
+ $this->assertEquals("@darius Huh, I just have never encountered anyone using the phrase generically like that.\n\nBut you might consider writing IndieWeb.org-style bots (Atom+WebSub, and optionally WebMention if you want them to be interactive), and then using https://fed.brid.gy/ as an alternative to implementing ActivityPub yourself...", $data['data']['content']['text']);
$this->assertEquals('https://social.tinysubversions.com/users/darius/statuses/100471614681787834', $data['data']['in-reply-to'][0]);
$this->assertEquals('Jamey Sharp', $data['data']['author']['name']);
$this->assertEquals('https://s3-us-west-2.amazonaws.com/tootcatapril2017/accounts/avatars/000/013/259/original/c904452a8411e4f5.jpg', $data['data']['author']['photo']);
diff --git a/tests/ParseTest.php b/tests/ParseTest.php
index e793555..bcb274e 100644
--- a/tests/ParseTest.php
+++ b/tests/ParseTest.php
@@ -315,6 +315,18 @@ class ParseTest extends PHPUnit\Framework\TestCase
$this->assertEquals('This page has a link to target.example.com and some formatted text but is in a p-content element so is plaintext.', $data->data->content->text);
}
+ public function testNewlinesInTextContent() {
+ $url = 'http://source.example.com/text-content-with-p-tags';
+ $response = $this->parse(['url' => $url]);
+
+ $body = $response->getContent();
+ $this->assertEquals(200, $response->getStatusCode());
+ $data = json_decode($body);
+ $this->assertEquals('mf2+html', $data->{'source-format'});
+ $this->assertObjectNotHasAttribute('name', $data->data);
+ $this->assertEquals("Hello\nWorld", $data->data->content->text);
+ }
+
public function testArticleWithFeaturedImage()
{
$url = 'http://source.example.com/article-with-featured-image';
diff --git a/tests/SanitizeTest.php b/tests/SanitizeTest.php
index 5bf84c1..7ce4e4f 100644
--- a/tests/SanitizeTest.php
+++ b/tests/SanitizeTest.php
@@ -424,7 +424,7 @@ class SanitizeTest extends PHPUnit\Framework\TestCase
$this->assertEquals('This is a photo post with an img
tag inside the content, which does not have a u-photo class so should not be removed. ', $data->data->content->html);
}
- public function testWhitespaceWithBreakTags()
+ public function testEntryWithBreakTags()
{
$url = 'http://sanitize.example/entry-with-br-tags';
$response = $this->parse(['url' => $url]);
@@ -437,4 +437,18 @@ class SanitizeTest extends PHPUnit\Framework\TestCase
$this->assertEquals("This content has two break tags to indicate a paragraph break.\n\nThis is how tantek's autolinker works.", $data->data->content->text);
}
+ public function testEntryWithParagraphTags()
+ {
+ $url = 'http://sanitize.example/entry-with-p-tags';
+ $response = $this->parse(['url' => $url]);
+
+ $body = $response->getContent();
+ $this->assertEquals(200, $response->getStatusCode());
+ $data = json_decode($body);
+
+ $this->assertEquals('
This is a multiline post separated by paragraph tags with no space between them.
This is how Mastodon formats HTML.
', $data->data->content->html); + $this->assertEquals("This is a multiline post separated by paragraph tags with no space between them.\n\nThis is how Mastodon formats HTML.", $data->data->content->text); + } + + } diff --git a/tests/data/sanitize.example/entry-with-p-tags b/tests/data/sanitize.example/entry-with-p-tags new file mode 100644 index 0000000..04ad142 --- /dev/null +++ b/tests/data/sanitize.example/entry-with-p-tags @@ -0,0 +1,16 @@ +HTTP/1.1 200 OK +Server: Apache +Date: Wed, 02 Mar 2018 03:29:14 GMT +Content-Type: text/html; charset=utf-8 +Connection: keep-alive + + + +This is a multiline post separated by paragraph tags with no space between them.
This is how Mastodon formats HTML.
+Hello
World