From 6de9be2567e92e2dbe05998f4be8aa640ef47d83 Mon Sep 17 00:00:00 2001 From: Aaron Parecki Date: Sat, 5 Mar 2016 09:57:19 -0800 Subject: [PATCH] parse h-event closes #9 --- controllers/Parse.php | 1 + lib/Formats/Mf2.php | 120 ++++++++++++++++-- tests/ParseTest.php | 53 ++++++++ tests/data/source.example.com/h-event | 24 ++++ .../h-event-text-description | 24 ++++ .../h-event-with-h-card-location | 20 +++ 6 files changed, 230 insertions(+), 12 deletions(-) create mode 100644 tests/data/source.example.com/h-event create mode 100644 tests/data/source.example.com/h-event-text-description create mode 100644 tests/data/source.example.com/h-event-with-h-card-location diff --git a/controllers/Parse.php b/controllers/Parse.php index 8bbe3fa..3701005 100644 --- a/controllers/Parse.php +++ b/controllers/Parse.php @@ -69,6 +69,7 @@ class Parse { if($html) { // If HTML is provided in the request, parse that, and use the URL provided as the base URL for mf2 resolving $result['body'] = $html; + $result['url'] = $url; } else { // Attempt some basic URL validation $scheme = parse_url($url, PHP_URL_SCHEME); diff --git a/lib/Formats/Mf2.php b/lib/Formats/Mf2.php index 0da9307..15cde47 100644 --- a/lib/Formats/Mf2.php +++ b/lib/Formats/Mf2.php @@ -13,9 +13,13 @@ class Mf2 { if(count($mf2['items']) == 1) { $item = $mf2['items'][0]; if(in_array('h-entry', $item['type']) || in_array('h-cite', $item['type'])) { - Parse::debug("mf2.0: Recognized $url as an h-entry it is the only item on the page"); + Parse::debug("mf2:0: Recognized $url as an h-entry it is the only item on the page"); return self::parseAsHEntry($mf2, $item, $http, $url); } + if(in_array('h-event', $item['type'])) { + Parse::debug("mf2:0: Recognized $url as an h-event it is the only item on the page"); + return self::parseAsHEvent($mf2, $item, $http, $url); + } } // Check if the list of items is a bunch of h-entrys and return as a feed @@ -28,7 +32,7 @@ class Mf2 { $urls = $item['properties']['url']; $urls = array_map('\normalize_url', $urls); if(in_array($url, $urls)) { - Parse::debug("mf2.1: Recognized $url as an h-entry because an h-entry on the page matched the URL of the request"); + Parse::debug("mf2:1: Recognized $url as an h-entry because an h-entry on the page matched the URL of the request"); return self::parseAsHEntry($mf2, $item, $http, $url); } $lastSeenEntry = $item; @@ -39,20 +43,20 @@ class Mf2 { // If there was more than one h-entry on the page, treat the whole page as a feed if($hentrys > 1) { - Parse::debug("mf2.2: Recognized $url as an h-feed because there are more than one h-entry on the page"); + Parse::debug("mf2:2: Recognized $url as an h-feed because there are more than one h-entry on the page"); return self::parseAsHFeed($mf2, $http); } // If the first item is an h-feed, parse as a feed $first = $mf2['items'][0]; if(in_array('h-feed', $first['type'])) { - Parse::debug("mf2.3: Recognized $url as an h-feed because the first item is an h-feed"); + Parse::debug("mf2:3: Recognized $url as an h-feed because the first item is an h-feed"); return self::parseAsHFeed($mf2, $http); } - // Check each top-level h-card, and if there is one that matches this URL, the page is an h-card + // Check each top-level h-card and h-event, and if there is one that matches this URL, the page is an h-card foreach($mf2['items'] as $item) { - if(in_array('h-card', $item['type']) + if((in_array('h-card', $item['type']) or in_array('h-event', $item['type'])) and array_key_exists('url', $item['properties']) ) { $urls = $item['properties']['url']; @@ -60,8 +64,13 @@ class Mf2 { if(in_array($url, $urls)) { // TODO: check for children h-entrys (like tantek.com), or sibling h-entries (like aaronparecki.com) // and return the result as a feed instead - Parse::debug("mf2.4: Recognized $url as an h-card because an h-card on the page matched the URL of the request"); - return self::parseAsHCard($item, $http, $url); + if(in_array('h-card', $item['type'])) { + Parse::debug("mf2:4: Recognized $url as an h-card because an h-card on the page matched the URL of the request"); + return self::parseAsHCard($item, $http, $url); + } else { + Parse::debug("mf2:4: Recognized $url as an h-event because an h-event on the page matched the URL of the request"); + return self::parseAsHEvent($item, $http, $url); + } } } } @@ -72,7 +81,7 @@ class Mf2 { $urls = $lastSeenEntry['properties']['url']; $urls = array_map('\normalize_url', $urls); if(count($urls) && !in_array($url, $urls)) { - Parse::debug("mf2.5: Recognized $url as an h-feed no h-entrys on the page matched the URL of the request"); + Parse::debug("mf2:5: Recognized $url as an h-feed no h-entrys on the page matched the URL of the request"); return self::parseAsHFeed($mf2, $http); } } @@ -82,12 +91,12 @@ class Mf2 { foreach($mf2['items'] as $item) { // Otherwise check for an h-entry if(in_array('h-entry', $item['type']) || in_array('h-cite', $item['type'])) { - Parse::debug("mf2.6: $url is falling back to the first h-entry on the page"); + Parse::debug("mf2:6: $url is falling back to the first h-entry on the page"); return self::parseAsHEntry($mf2, $item, $http); } } - Parse::debug("mf2.E: No object at $url was recognized"); + Parse::debug("mf2:E: No object at $url was recognized"); return false; } @@ -199,6 +208,91 @@ class Mf2 { return $response; } + private static function parseAsHEvent($mf2, $item, $http) { + $data = [ + 'type' => 'event' + ]; + $refs = []; + + // Single plaintext values + $properties = ['name','summary','url','published','start','end','duration']; + foreach($properties as $p) { + if($v = self::getPlaintext($item, $p)) + $data[$p] = $v; + } + + // Always arrays + $properties = ['photo','video','syndication']; + foreach($properties as $p) { + if(array_key_exists($p, $item['properties'])) { + $data[$p] = []; + foreach($item['properties'][$p] as $v) { + if(is_string($v)) + $data[$p][] = $v; + elseif(is_array($v) and array_key_exists('value', $v)) + $data[$p][] = $v['value']; + } + } + } + + // Always returned as arrays, and may also create external references + $properties = ['category','location','attendee']; + foreach($properties as $p) { + if(array_key_exists($p, $item['properties'])) { + $data[$p] = []; + foreach($item['properties'][$p] as $v) { + if(is_string($v)) + $data[$p][] = $v; + elseif(self::isMicroformat($v) && ($u=self::getPlaintext($v, 'url'))) { + $data[$p][] = $u; + // parse the object and put the result in the "refs" object + $ref = self::parse(['items'=>[$v]], $u, $http); + if($ref) { + $refs[$u] = $ref['data']; + } + } + } + } + } + + // If there is a description, always return the plaintext description, and return HTML description if it's different + $textDescription = null; + $htmlDescription = null; + if(array_key_exists('description', $item['properties'])) { + $description = $item['properties']['description'][0]; + if(is_string($description)) { + $textDescription = $description; + } elseif(!is_string($description) && is_array($description) && array_key_exists('value', $description)) { + if(array_key_exists('html', $description)) { + $htmlDescription = trim(self::sanitizeHTML($description['html'])); + $textDescription = trim(str_replace(" ","\r",strip_tags($htmlDescription))); + $textDescription = trim(str_replace(" ","\r",$description['value'])); + } else { + $textDescription = trim($description['value']); + } + } + } + + if($textDescription) { + $data['description'] = [ + 'text' => $textDescription + ]; + if($htmlDescription && $textDescription != $htmlDescription) { + $data['description']['html'] = $htmlDescription; + } + } + + $response = [ + 'data' => $data + ]; + + if(count($refs)) { + $response['refs'] = $refs; + } + + return $response; + } + private static function parseAsHFeed($mf2, $http) { $data = [ 'type' => 'feed', @@ -389,7 +483,9 @@ class Mf2 { // Override the allowed classes to only support Microformats2 classes $def->manager->attrTypes->set('Class', new \HTMLPurifier_AttrDef_HTML_Microformats2()); $purifier = new HTMLPurifier($config); - return $purifier->purify($html); + $sanitized = $purifier->purify($html); + $sanitized = str_replace(" ","\r",$sanitized); + return $sanitized; } private static function responseDisplayText($name, $summary, $content) { diff --git a/tests/ParseTest.php b/tests/ParseTest.php index 4e628ca..2725d2c 100644 --- a/tests/ParseTest.php +++ b/tests/ParseTest.php @@ -256,4 +256,57 @@ class ParseTest extends PHPUnit_Framework_TestCase { $this->assertEquals('entry', $data['data']['type']); } + public function testEventWithHTMLDescription() { + $url = 'http://source.example.com/h-event'; + $response = $this->parse(['url' => $url]); + + $body = $response->getContent(); + $this->assertEquals(200, $response->getStatusCode()); + $data = json_decode($body, true); + $this->assertEquals('event', $data['data']['type']); + $this->assertEquals('Homebrew Website Club', $data['data']['name']); + $this->assertEquals($url, $data['data']['url']); + $this->assertEquals('2016-03-09T18:30', $data['data']['start']); + $this->assertEquals('2016-03-09T19:30', $data['data']['end']); + $this->assertStringStartsWith("Are you building your own website? Indie reader? Personal publishing web app? Or some other digital magic-cloud proxy? If so, come on by and join a gathering of people with likeminded interests. Bring your friends that want to start a personal web site. Exchange information, swap ideas, talk shop, help work on a project...", $data['data']['description']['text']); + $this->assertStringEndsWith("See the Homebrew Website Club Newsletter Volume 1 Issue 1 for a description of the first meeting.", $data['data']['description']['text']); + $this->assertStringStartsWith("

Are you building your own website? Indie reader? Personal publishing web app? Or some other digital magic-cloud proxy? If so, come on by and join a gathering of people with likeminded interests. Bring your friends that want to start a personal web site. Exchange information, swap ideas, talk shop, help work on a project...

", $data['data']['description']['html']); + $this->assertStringEndsWith('

See the Homebrew Website Club Newsletter Volume 1 Issue 1 for a description of the first meeting.

', $data['data']['description']['html']); + } + + public function testEventWithTextDescription() { + $url = 'http://source.example.com/h-event-text-description'; + $response = $this->parse(['url' => $url]); + + $body = $response->getContent(); + $this->assertEquals(200, $response->getStatusCode()); + $data = json_decode($body, true); + $this->assertEquals('event', $data['data']['type']); + $this->assertEquals('Homebrew Website Club', $data['data']['name']); + $this->assertEquals($url, $data['data']['url']); + $this->assertEquals('2016-03-09T18:30', $data['data']['start']); + $this->assertEquals('2016-03-09T19:30', $data['data']['end']); + $this->assertStringStartsWith("Are you building your own website? Indie reader? Personal publishing web app? Or some other digital magic-cloud proxy? If so, come on by and join a gathering of people with likeminded interests. Bring your friends that want to start a personal web site. Exchange information, swap ideas, talk shop, help work on a project...", $data['data']['description']['text']); + $this->assertStringEndsWith("See the Homebrew Website Club Newsletter Volume 1 Issue 1 for a description of the first meeting.", $data['data']['description']['text']); + $this->assertArrayNotHasKey('html', $data['data']['description']); + } + + public function testEventWithHCardLocation() { + $url = 'http://source.example.com/h-event-with-h-card-location'; + $response = $this->parse(['url' => $url]); + + $body = $response->getContent(); + $this->assertEquals(200, $response->getStatusCode()); + $data = json_decode($body, true); + $this->assertEquals('event', $data['data']['type']); + $this->assertEquals('Homebrew Website Club', $data['data']['name']); + $this->assertEquals($url, $data['data']['url']); + $this->assertEquals('2016-02-09T18:30', $data['data']['start']); + $this->assertEquals('2016-02-09T19:30', $data['data']['end']); + $this->assertArrayHasKey('http://source.example.com/venue', $data['refs']); + $this->assertEquals('card', $data['refs']['http://source.example.com/venue']['type']); + $this->assertEquals('http://source.example.com/venue', $data['refs']['http://source.example.com/venue']['url']); + $this->assertEquals('Venue', $data['refs']['http://source.example.com/venue']['name']); + } + } diff --git a/tests/data/source.example.com/h-event b/tests/data/source.example.com/h-event new file mode 100644 index 0000000..600eead --- /dev/null +++ b/tests/data/source.example.com/h-event @@ -0,0 +1,24 @@ +HTTP/1.1 200 OK +Server: Apache +Date: Wed, 09 Dec 2015 03:29:14 GMT +Content-Type: text/html; charset=utf-8 +Connection: keep-alive + + + + Homebrew Website Club + + +

Homebrew Website Club

+ + permalink + + - + +
+

Are you building your own website? Indie reader? Personal publishing web app? Or some other digital magic-cloud proxy? If so, come on by and join a gathering of people with likeminded interests. Bring your friends that want to start a personal web site. Exchange information, swap ideas, talk shop, help work on a project...

+

See the Homebrew Website Club Newsletter Volume 1 Issue 1 for a description of the first meeting.

+
+ + + diff --git a/tests/data/source.example.com/h-event-text-description b/tests/data/source.example.com/h-event-text-description new file mode 100644 index 0000000..c271505 --- /dev/null +++ b/tests/data/source.example.com/h-event-text-description @@ -0,0 +1,24 @@ +HTTP/1.1 200 OK +Server: Apache +Date: Wed, 09 Dec 2015 03:29:14 GMT +Content-Type: text/html; charset=utf-8 +Connection: keep-alive + + + + Homebrew Website Club + + +

Homebrew Website Club

+ + permalink + + - + +
+

Are you building your own website? Indie reader? Personal publishing web app? Or some other digital magic-cloud proxy? If so, come on by and join a gathering of people with likeminded interests. Bring your friends that want to start a personal web site. Exchange information, swap ideas, talk shop, help work on a project...

+

See the Homebrew Website Club Newsletter Volume 1 Issue 1 for a description of the first meeting.

+
+ + + diff --git a/tests/data/source.example.com/h-event-with-h-card-location b/tests/data/source.example.com/h-event-with-h-card-location new file mode 100644 index 0000000..d1ef5df --- /dev/null +++ b/tests/data/source.example.com/h-event-with-h-card-location @@ -0,0 +1,20 @@ +HTTP/1.1 200 OK +Server: Apache +Date: Wed, 09 Dec 2015 02:29:12 GMT +Content-Type: text/html; charset=utf-8 +Connection: keep-alive + + + + Homebrew Website Club + + +

Homebrew Website Club

+ + permalink + + - + + Venue + + \ No newline at end of file