diff --git a/lib/XRay/Fetcher.php b/lib/XRay/Fetcher.php
index b1c3223..2ff3e4a 100644
--- a/lib/XRay/Fetcher.php
+++ b/lib/XRay/Fetcher.php
@@ -71,7 +71,7 @@ class Fetcher {
$headers = [];
- $headers[] = 'Accept: text/html, application/json, application/xml, text/xml';
+ $headers[] = 'Accept: application/mf2+json, application/activity+json, text/html, application/json, application/xml, text/xml';
if(isset($opts['token']))
$headers[] = 'Authorization: Bearer ' . $opts['token'];
diff --git a/lib/XRay/Formats/ActivityStreams.php b/lib/XRay/Formats/ActivityStreams.php
new file mode 100644
index 0000000..8dd3f52
--- /dev/null
+++ b/lib/XRay/Formats/ActivityStreams.php
@@ -0,0 +1,179 @@
+ [
+ 'type' => 'unknown',
+ ],
+ 'url' => $url,
+ ];
+ return $result;
+ }
+
+ private static function parseAsHEntry($as2, $url, $http, $opts) {
+ $data = [
+ 'type' => 'entry'
+ ];
+ $refs = [];
+
+ if(isset($as2['url']))
+ $data['url'] = $as2['url'];
+ elseif(isset($as2['id']))
+ $data['url'] = $as2['id'];
+
+ if(isset($as2['published'])) {
+ try {
+ $date = new DateTime($as2['published']);
+ $data['published'] = $date->format('c');
+ } catch(\Exception $e){}
+ }
+
+ if(isset($as2['content'])) {
+ $html = trim(self::sanitizeHTML($as2['content']));
+ $text = trim(self::stripHTML($html));
+
+ $data['content'] = [
+ 'text' => $text
+ ];
+
+ if($html && $text && $text != $html) {
+ $data['content']['html'] = $html;
+ }
+ }
+
+ if(isset($as2['tag']) && is_array($as2['tag'])) {
+ $emoji = [];
+ $category = [];
+ foreach($as2['tag'] as $tag) {
+ if(is_array($tag) && isset($tag['name']) && isset($tag['type']) && $tag['type'] == 'Hashtag')
+ $category[] = trim($tag['name'], '#');
+ if(is_array($tag) && isset($tag['type']) && $tag['type'] == 'Emoji' && isset($tag['icon']['url'])) {
+ $emoji[$tag['name']] = $tag['icon']['url'];
+ }
+ }
+
+ if(count($category))
+ $data['category'] = $category;
+
+ if(count($emoji) && isset($data['content']['html'])) {
+ foreach($emoji as $code=>$img) {
+ $data['content']['html'] = str_replace($code, '', $data['content']['html']);
+ }
+ }
+ }
+
+ if(isset($as2['inReplyTo'])) {
+ $data['in-reply-to'] = [$as2['inReplyTo']];
+ }
+
+ // Photos and Videos
+ if(isset($as2['attachment'])) {
+ $photos = [];
+ $videos = [];
+ foreach($as2['attachment'] as $attachment) {
+ if(strpos($attachment['mediaType'], 'image/') !== false) {
+ $photos[] = $attachment['url'];
+ }
+ if(strpos($attachment['mediaType'], 'video/') !== false) {
+ $videos[] = $attachment['url'];
+ }
+ }
+ if(count($photos))
+ $data['photo'] = $photos;
+ if(count($videos))
+ $data['video'] = $videos;
+ }
+
+ // Fetch the author info, which requires an HTTP request
+ if(isset($as2['attributedTo']) && is_string($as2['attributedTo'])) {
+ $authorResponse = $http->get($as2['attributedTo'], ['Accept: application/activity+json,application/json']);
+ if($authorResponse && !empty($authorResponse['body'])) {
+ $authorProfile = json_decode($authorResponse['body'], true);
+ $author = self::parseAsHCard($authorProfile, $as2['attributedTo'], $http, $opts);
+ if($author && !empty($author['data']))
+ $data['author'] = $author['data'];
+ }
+ }
+
+ $data['post-type'] = PostType::discover($data);
+
+ $response = [
+ 'data' => $data,
+ ];
+
+ if(count($refs)) {
+ $response['data']['refs'] = $refs;
+ }
+
+ return $response;
+ }
+
+ private static function parseAsHCard($as2, $url, $http, $opts) {
+ $data = [
+ 'type' => 'card',
+ 'name' => null,
+ 'url' => null,
+ 'photo' => null
+ ];
+
+ if(!empty($as2['name']))
+ $data['name'] = $as2['name'];
+ elseif(isset($as2['preferredUsername']))
+ $data['name'] = $as2['preferredUsername'];
+
+ if(isset($as2['preferredUsername']))
+ $data['nickname'] = $as2['preferredUsername'];
+
+ if(isset($as2['url']))
+ $data['url'] = $as2['url'];
+
+ if(isset($as2['icon']) && isset($as2['icon']['url']))
+ $data['photo'] = $as2['icon']['url'];
+
+ // TODO: featured image for h-cards?
+ // if(isset($as2['image']) && isset($as2['image']['url']))
+ // $data['featured'] = $as2['image']['url'];
+
+ $response = [
+ 'data' => $data
+ ];
+
+ return $response;
+ }
+
+}
diff --git a/lib/XRay/Formats/Format.php b/lib/XRay/Formats/Format.php
index eba6d98..2b9bcf8 100644
--- a/lib/XRay/Formats/Format.php
+++ b/lib/XRay/Formats/Format.php
@@ -59,7 +59,8 @@ abstract class Format implements iFormat {
'h6',
'ul',
'li',
- 'ol'
+ 'ol',
+ 'span',
];
if($allowImg)
$allowed[] = 'img';
diff --git a/lib/XRay/Formats/HTML.php b/lib/XRay/Formats/HTML.php
index d9b561b..37c2b06 100644
--- a/lib/XRay/Formats/HTML.php
+++ b/lib/XRay/Formats/HTML.php
@@ -95,26 +95,63 @@ class HTML extends Format {
// Check for a rel=alternate link to a Microformats JSON representation, and use that instead
if(isset($mf2['rel-urls'])) {
+ $alternates = [
+ 'mf2' => [],
+ 'as2' => [],
+ ];
+
foreach($mf2['rel-urls'] as $relurl => $reltype) {
+
if(in_array('alternate', $reltype['rels']) && $reltype['type'] == 'application/mf2+json') {
- // Fetch and parse the MF2 JSON link instead
- $jsonpage = $http->get($relurl, [
- 'Accept' => 'application/mf2+json,application/json'
- ]);
- // Skip and fall back to parsing the HTML if anything about this request fails
- if(!$jsonpage['error'] && $jsonpage['body']) {
- $jsondata = json_decode($jsonpage['body'],true);
- if($jsondata) {
- $data = Formats\Mf2::parse($jsondata, $url, $http, $opts);
- if($data && is_array($data) && isset($data['data']['type'])) {
- $data['url'] = $relurl;
- $data['source-format'] = 'mf2+json';
- return $data;
- }
+ $alternates['mf2'][] = $relurl;
+ }
+
+ if(in_array('alternate', $reltype['rels']) && $reltype['type'] == 'application/activity+json') {
+ $alternates['as2'][] = $relurl;
+ }
+
+ }
+
+ if(count($alternates['mf2'])) {
+ // Fetch and parse the MF2 JSON link
+ $relurl = $alternates['mf2'][0];
+ $jsonpage = $http->get($relurl, [
+ 'Accept' => 'application/mf2+json,application/json'
+ ]);
+ // Skip and fall back to parsing the HTML if anything about this request fails
+ if(!$jsonpage['error'] && $jsonpage['body']) {
+ $jsondata = json_decode($jsonpage['body'],true);
+ if($jsondata) {
+ $data = Formats\Mf2::parse($jsondata, $url, $http, $opts);
+ if($data && is_array($data) && isset($data['data']['type'])) {
+ $data['url'] = $relurl;
+ $data['source-format'] = 'mf2+json';
+ return $data;
}
}
}
}
+
+ if(count($alternates['as2'])) {
+ $relurl = $alternates['as2'][0];
+ // Fetch and parse the ActivityStreams JSON link
+ $jsonpage = $http->get($relurl, [
+ 'Accept' => 'application/activity+json,application/json'
+ ]);
+ // Skip and fall back to parsing the HTML if anything about this request fails
+ if(!$jsonpage['error'] && $jsonpage['body']) {
+ $jsondata = json_decode($jsonpage['body'],true);
+ if($jsondata) {
+ $data = Formats\ActivityStreams::parse($jsondata, $url, $http, $opts);
+ if($data && is_array($data) && isset($data['data']['type'])) {
+ $data['url'] = $relurl;
+ $data['source-format'] = 'activity+json';
+ return $data;
+ }
+ }
+ }
+ }
+
}
// Now start pulling in the data from the page. Start by looking for microformats2
diff --git a/lib/XRay/Formats/Mf2.php b/lib/XRay/Formats/Mf2.php
index a2d331f..47d5903 100644
--- a/lib/XRay/Formats/Mf2.php
+++ b/lib/XRay/Formats/Mf2.php
@@ -1,8 +1,6 @@
http, $opts);
+ $data['source-format'] = 'activity+json';
+ return $data;
+ }
+
if(substr($body, 0, 5) == 'http, $opts);
$data['source-format'] = 'mf2+json';
return $data;
+ } elseif($parsed && Formats\ActivityStreams::is_as2_json($parsed)) {
+ // Check if an ActivityStreams JSON string was passed in
+ $data = Formats\ActivityStreams::parse($parsed, $url, $this->http, $opts);
+ $data['source-format'] = 'activity+json';
+ return $data;
}
}
// No special parsers matched, parse for Microformats now
$data = Formats\HTML::parse($this->http, $body, $url, $opts);
- if(!isset($data['source-format']))
+ if(!isset($data['source-format']) && isset($data['type']) && $data['type'] != 'unknown')
$data['source-format'] = 'mf2+html';
return $data;
}
diff --git a/tests/ActivityStreamsTest.php b/tests/ActivityStreamsTest.php
new file mode 100644
index 0000000..31342c0
--- /dev/null
+++ b/tests/ActivityStreamsTest.php
@@ -0,0 +1,142 @@
+client = new Parse();
+ $this->client->http = new p3k\HTTP\Test(dirname(__FILE__).'/data/');
+ $this->client->mc = null;
+ }
+
+ private function parse($params) {
+ $request = new Request($params);
+ $response = new Response();
+ return $this->client->parse($request, $response);
+ }
+
+ public function testAuthorProfile() {
+ $url = 'http://activitystreams.example/aaronpk';
+ $response = $this->parse(['url' => $url]);
+
+ $body = $response->getContent();
+ $this->assertEquals(200, $response->getStatusCode());
+ $data = json_decode($body, true);
+
+ $this->assertEquals('activity+json', $data['source-format']);
+ $this->assertEquals('card', $data['data']['type']);
+ $this->assertEquals('aaronpk', $data['data']['name']);
+ $this->assertEquals('https://aaronparecki.com/images/profile.jpg', $data['data']['photo']);
+ $this->assertEquals('https://aaronparecki.com/', $data['data']['url']);
+ }
+
+ public function testNoteWithTags() {
+ $url = 'http://activitystreams.example/note.json';
+ $response = $this->parse(['url' => $url]);
+
+ $body = $response->getContent();
+ $this->assertEquals(200, $response->getStatusCode());
+ $data = json_decode($body, true);
+
+ $this->assertEquals('activity+json', $data['source-format']);
+ $this->assertEquals('note', $data['data']['post-type']);
+ $this->assertEquals($url, $data['data']['url']);
+ $this->assertEquals('2018-07-12T13:02:04-07:00', $data['data']['published']);
+ $this->assertEquals('This is the text content of an ActivityStreams note', $data['data']['content']['text']);
+ $this->assertArrayNotHasKey('html', $data['data']['content']);
+ $this->assertSame(['activitystreams'], $data['data']['category']);
+ $this->assertEquals('aaronpk', $data['data']['author']['name']);
+ $this->assertEquals('https://aaronparecki.com/images/profile.jpg', $data['data']['author']['photo']);
+ $this->assertEquals('https://aaronparecki.com/', $data['data']['author']['url']);
+ }
+
+ public function testPhoto() {
+ $url = 'http://activitystreams.example/photo.json';
+ $response = $this->parse(['url' => $url]);
+
+ $body = $response->getContent();
+ $this->assertEquals(200, $response->getStatusCode());
+ $data = json_decode($body, true);
+
+ $this->assertEquals('activity+json', $data['source-format']);
+ $this->assertEquals($url, $data['data']['url']);
+ $this->assertEquals('photo', $data['data']['post-type']);
+ $this->assertEquals('2018-07-12T13:02:04-07:00', $data['data']['published']);
+ $this->assertEquals('This is the text content of an ActivityStreams photo', $data['data']['content']['text']);
+ $this->assertArrayNotHasKey('html', $data['data']['content']);
+ $this->assertSame(['activitystreams'], $data['data']['category']);
+ $this->assertSame(['https://aaronparecki.com/2018/06/28/26/photo.jpg'], $data['data']['photo']);
+ }
+
+ public function testVideo() {
+ $url = 'http://activitystreams.example/video.json';
+ $response = $this->parse(['url' => $url]);
+
+ $body = $response->getContent();
+ $this->assertEquals(200, $response->getStatusCode());
+ $data = json_decode($body, true);
+
+ $this->assertEquals('activity+json', $data['source-format']);
+ $this->assertEquals('video', $data['data']['post-type']);
+ $this->assertEquals('2018-07-12T13:02:04-07:00', $data['data']['published']);
+ $this->assertSame(['https://aaronparecki.com/2018/07/21/19/video.mp4'], $data['data']['video']);
+ }
+
+ public function testReply() {
+ $url = 'http://activitystreams.example/reply.json';
+ $response = $this->parse(['url' => $url]);
+
+ $body = $response->getContent();
+ $this->assertEquals(200, $response->getStatusCode());
+ $data = json_decode($body, true);
+
+ $this->assertEquals('activity+json', $data['source-format']);
+ $this->assertEquals('reply', $data['data']['post-type']);
+ $this->assertEquals('2018-07-12T13:02:04-07:00', $data['data']['published']);
+ $this->assertArrayNotHasKey('category', $data['data']); // should not include the person-tag
+ // For now, don't fetch the reply context
+ $this->assertEquals(['http://activitystreams.example/note.json'], $data['data']['in-reply-to']);
+ }
+
+ public function testCustomEmoji() {
+ $url = 'http://activitystreams.example/custom-emoji.json';
+ $response = $this->parse(['url' => $url]);
+
+ $body = $response->getContent();
+ $this->assertEquals(200, $response->getStatusCode());
+ $data = json_decode($body, true);
+
+ $this->assertEquals('activity+json', $data['source-format']);
+ $this->assertEquals('note', $data['data']['post-type']);
+ $this->assertEquals("https://mastodon.social/@Gargron/100465999501820229", $data['data']['url']);
+ $this->assertEquals('2018-07-30T22:24:54+00:00', $data['data']['published']);
+ $this->assertEquals(':yikes:', $data['data']['content']['text']);
+ $this->assertEquals('
This should not be the content from XRay
+This should not be the content from XRay
+