Browse Source

parse h-event

closes #9
pull/39/head
Aaron Parecki 8 years ago
parent
commit
6de9be2567
6 changed files with 230 additions and 12 deletions
  1. +1
    -0
      controllers/Parse.php
  2. +108
    -12
      lib/Formats/Mf2.php
  3. +53
    -0
      tests/ParseTest.php
  4. +24
    -0
      tests/data/source.example.com/h-event
  5. +24
    -0
      tests/data/source.example.com/h-event-text-description
  6. +20
    -0
      tests/data/source.example.com/h-event-with-h-card-location

+ 1
- 0
controllers/Parse.php View File

@ -69,6 +69,7 @@ class Parse {
if($html) { if($html) {
// If HTML is provided in the request, parse that, and use the URL provided as the base URL for mf2 resolving // If HTML is provided in the request, parse that, and use the URL provided as the base URL for mf2 resolving
$result['body'] = $html; $result['body'] = $html;
$result['url'] = $url;
} else { } else {
// Attempt some basic URL validation // Attempt some basic URL validation
$scheme = parse_url($url, PHP_URL_SCHEME); $scheme = parse_url($url, PHP_URL_SCHEME);

+ 108
- 12
lib/Formats/Mf2.php View File

@ -13,9 +13,13 @@ class Mf2 {
if(count($mf2['items']) == 1) { if(count($mf2['items']) == 1) {
$item = $mf2['items'][0]; $item = $mf2['items'][0];
if(in_array('h-entry', $item['type']) || in_array('h-cite', $item['type'])) { if(in_array('h-entry', $item['type']) || in_array('h-cite', $item['type'])) {
Parse::debug("mf2.0: Recognized $url as an h-entry it is the only item on the page");
Parse::debug("mf2:0: Recognized $url as an h-entry it is the only item on the page");
return self::parseAsHEntry($mf2, $item, $http, $url); return self::parseAsHEntry($mf2, $item, $http, $url);
} }
if(in_array('h-event', $item['type'])) {
Parse::debug("mf2:0: Recognized $url as an h-event it is the only item on the page");
return self::parseAsHEvent($mf2, $item, $http, $url);
}
} }
// Check if the list of items is a bunch of h-entrys and return as a feed // Check if the list of items is a bunch of h-entrys and return as a feed
@ -28,7 +32,7 @@ class Mf2 {
$urls = $item['properties']['url']; $urls = $item['properties']['url'];
$urls = array_map('\normalize_url', $urls); $urls = array_map('\normalize_url', $urls);
if(in_array($url, $urls)) { if(in_array($url, $urls)) {
Parse::debug("mf2.1: Recognized $url as an h-entry because an h-entry on the page matched the URL of the request");
Parse::debug("mf2:1: Recognized $url as an h-entry because an h-entry on the page matched the URL of the request");
return self::parseAsHEntry($mf2, $item, $http, $url); return self::parseAsHEntry($mf2, $item, $http, $url);
} }
$lastSeenEntry = $item; $lastSeenEntry = $item;
@ -39,20 +43,20 @@ class Mf2 {
// If there was more than one h-entry on the page, treat the whole page as a feed // If there was more than one h-entry on the page, treat the whole page as a feed
if($hentrys > 1) { if($hentrys > 1) {
Parse::debug("mf2.2: Recognized $url as an h-feed because there are more than one h-entry on the page");
Parse::debug("mf2:2: Recognized $url as an h-feed because there are more than one h-entry on the page");
return self::parseAsHFeed($mf2, $http); return self::parseAsHFeed($mf2, $http);
} }
// If the first item is an h-feed, parse as a feed // If the first item is an h-feed, parse as a feed
$first = $mf2['items'][0]; $first = $mf2['items'][0];
if(in_array('h-feed', $first['type'])) { if(in_array('h-feed', $first['type'])) {
Parse::debug("mf2.3: Recognized $url as an h-feed because the first item is an h-feed");
Parse::debug("mf2:3: Recognized $url as an h-feed because the first item is an h-feed");
return self::parseAsHFeed($mf2, $http); return self::parseAsHFeed($mf2, $http);
} }
// Check each top-level h-card, and if there is one that matches this URL, the page is an h-card
// Check each top-level h-card and h-event, and if there is one that matches this URL, the page is an h-card
foreach($mf2['items'] as $item) { foreach($mf2['items'] as $item) {
if(in_array('h-card', $item['type'])
if((in_array('h-card', $item['type']) or in_array('h-event', $item['type']))
and array_key_exists('url', $item['properties']) and array_key_exists('url', $item['properties'])
) { ) {
$urls = $item['properties']['url']; $urls = $item['properties']['url'];
@ -60,8 +64,13 @@ class Mf2 {
if(in_array($url, $urls)) { if(in_array($url, $urls)) {
// TODO: check for children h-entrys (like tantek.com), or sibling h-entries (like aaronparecki.com) // TODO: check for children h-entrys (like tantek.com), or sibling h-entries (like aaronparecki.com)
// and return the result as a feed instead // and return the result as a feed instead
Parse::debug("mf2.4: Recognized $url as an h-card because an h-card on the page matched the URL of the request");
return self::parseAsHCard($item, $http, $url);
if(in_array('h-card', $item['type'])) {
Parse::debug("mf2:4: Recognized $url as an h-card because an h-card on the page matched the URL of the request");
return self::parseAsHCard($item, $http, $url);
} else {
Parse::debug("mf2:4: Recognized $url as an h-event because an h-event on the page matched the URL of the request");
return self::parseAsHEvent($item, $http, $url);
}
} }
} }
} }
@ -72,7 +81,7 @@ class Mf2 {
$urls = $lastSeenEntry['properties']['url']; $urls = $lastSeenEntry['properties']['url'];
$urls = array_map('\normalize_url', $urls); $urls = array_map('\normalize_url', $urls);
if(count($urls) && !in_array($url, $urls)) { if(count($urls) && !in_array($url, $urls)) {
Parse::debug("mf2.5: Recognized $url as an h-feed no h-entrys on the page matched the URL of the request");
Parse::debug("mf2:5: Recognized $url as an h-feed no h-entrys on the page matched the URL of the request");
return self::parseAsHFeed($mf2, $http); return self::parseAsHFeed($mf2, $http);
} }
} }
@ -82,12 +91,12 @@ class Mf2 {
foreach($mf2['items'] as $item) { foreach($mf2['items'] as $item) {
// Otherwise check for an h-entry // Otherwise check for an h-entry
if(in_array('h-entry', $item['type']) || in_array('h-cite', $item['type'])) { if(in_array('h-entry', $item['type']) || in_array('h-cite', $item['type'])) {
Parse::debug("mf2.6: $url is falling back to the first h-entry on the page");
Parse::debug("mf2:6: $url is falling back to the first h-entry on the page");
return self::parseAsHEntry($mf2, $item, $http); return self::parseAsHEntry($mf2, $item, $http);
} }
} }
Parse::debug("mf2.E: No object at $url was recognized");
Parse::debug("mf2:E: No object at $url was recognized");
return false; return false;
} }
@ -199,6 +208,91 @@ class Mf2 {
return $response; return $response;
} }
private static function parseAsHEvent($mf2, $item, $http) {
$data = [
'type' => 'event'
];
$refs = [];
// Single plaintext values
$properties = ['name','summary','url','published','start','end','duration'];
foreach($properties as $p) {
if($v = self::getPlaintext($item, $p))
$data[$p] = $v;
}
// Always arrays
$properties = ['photo','video','syndication'];
foreach($properties as $p) {
if(array_key_exists($p, $item['properties'])) {
$data[$p] = [];
foreach($item['properties'][$p] as $v) {
if(is_string($v))
$data[$p][] = $v;
elseif(is_array($v) and array_key_exists('value', $v))
$data[$p][] = $v['value'];
}
}
}
// Always returned as arrays, and may also create external references
$properties = ['category','location','attendee'];
foreach($properties as $p) {
if(array_key_exists($p, $item['properties'])) {
$data[$p] = [];
foreach($item['properties'][$p] as $v) {
if(is_string($v))
$data[$p][] = $v;
elseif(self::isMicroformat($v) && ($u=self::getPlaintext($v, 'url'))) {
$data[$p][] = $u;
// parse the object and put the result in the "refs" object
$ref = self::parse(['items'=>[$v]], $u, $http);
if($ref) {
$refs[$u] = $ref['data'];
}
}
}
}
}
// If there is a description, always return the plaintext description, and return HTML description if it's different
$textDescription = null;
$htmlDescription = null;
if(array_key_exists('description', $item['properties'])) {
$description = $item['properties']['description'][0];
if(is_string($description)) {
$textDescription = $description;
} elseif(!is_string($description) && is_array($description) && array_key_exists('value', $description)) {
if(array_key_exists('html', $description)) {
$htmlDescription = trim(self::sanitizeHTML($description['html']));
$textDescription = trim(str_replace("
","\r",strip_tags($htmlDescription)));
$textDescription = trim(str_replace("
","\r",$description['value']));
} else {
$textDescription = trim($description['value']);
}
}
}
if($textDescription) {
$data['description'] = [
'text' => $textDescription
];
if($htmlDescription && $textDescription != $htmlDescription) {
$data['description']['html'] = $htmlDescription;
}
}
$response = [
'data' => $data
];
if(count($refs)) {
$response['refs'] = $refs;
}
return $response;
}
private static function parseAsHFeed($mf2, $http) { private static function parseAsHFeed($mf2, $http) {
$data = [ $data = [
'type' => 'feed', 'type' => 'feed',
@ -389,7 +483,9 @@ class Mf2 {
// Override the allowed classes to only support Microformats2 classes // Override the allowed classes to only support Microformats2 classes
$def->manager->attrTypes->set('Class', new \HTMLPurifier_AttrDef_HTML_Microformats2()); $def->manager->attrTypes->set('Class', new \HTMLPurifier_AttrDef_HTML_Microformats2());
$purifier = new HTMLPurifier($config); $purifier = new HTMLPurifier($config);
return $purifier->purify($html);
$sanitized = $purifier->purify($html);
$sanitized = str_replace("
","\r",$sanitized);
return $sanitized;
} }
private static function responseDisplayText($name, $summary, $content) { private static function responseDisplayText($name, $summary, $content) {

+ 53
- 0
tests/ParseTest.php View File

@ -256,4 +256,57 @@ class ParseTest extends PHPUnit_Framework_TestCase {
$this->assertEquals('entry', $data['data']['type']); $this->assertEquals('entry', $data['data']['type']);
} }
public function testEventWithHTMLDescription() {
$url = 'http://source.example.com/h-event';
$response = $this->parse(['url' => $url]);
$body = $response->getContent();
$this->assertEquals(200, $response->getStatusCode());
$data = json_decode($body, true);
$this->assertEquals('event', $data['data']['type']);
$this->assertEquals('Homebrew Website Club', $data['data']['name']);
$this->assertEquals($url, $data['data']['url']);
$this->assertEquals('2016-03-09T18:30', $data['data']['start']);
$this->assertEquals('2016-03-09T19:30', $data['data']['end']);
$this->assertStringStartsWith("Are you building your own website? Indie reader? Personal publishing web app? Or some other digital magic-cloud proxy? If so, come on by and join a gathering of people with likeminded interests. Bring your friends that want to start a personal web site. Exchange information, swap ideas, talk shop, help work on a project...", $data['data']['description']['text']);
$this->assertStringEndsWith("See the Homebrew Website Club Newsletter Volume 1 Issue 1 for a description of the first meeting.", $data['data']['description']['text']);
$this->assertStringStartsWith("<p>Are you building your own website? Indie reader? Personal publishing web app? Or some other digital magic-cloud proxy? If so, come on by and join a gathering of people with likeminded interests. Bring your friends that want to start a personal web site. Exchange information, swap ideas, talk shop, help work on a project...</p>", $data['data']['description']['html']);
$this->assertStringEndsWith('<p>See the <a href="http://tantek.com/2013/332/b1/homebrew-website-club-newsletter">Homebrew Website Club Newsletter Volume 1 Issue 1</a> for a description of the first meeting.</p>', $data['data']['description']['html']);
}
public function testEventWithTextDescription() {
$url = 'http://source.example.com/h-event-text-description';
$response = $this->parse(['url' => $url]);
$body = $response->getContent();
$this->assertEquals(200, $response->getStatusCode());
$data = json_decode($body, true);
$this->assertEquals('event', $data['data']['type']);
$this->assertEquals('Homebrew Website Club', $data['data']['name']);
$this->assertEquals($url, $data['data']['url']);
$this->assertEquals('2016-03-09T18:30', $data['data']['start']);
$this->assertEquals('2016-03-09T19:30', $data['data']['end']);
$this->assertStringStartsWith("Are you building your own website? Indie reader? Personal publishing web app? Or some other digital magic-cloud proxy? If so, come on by and join a gathering of people with likeminded interests. Bring your friends that want to start a personal web site. Exchange information, swap ideas, talk shop, help work on a project...", $data['data']['description']['text']);
$this->assertStringEndsWith("See the Homebrew Website Club Newsletter Volume 1 Issue 1 for a description of the first meeting.", $data['data']['description']['text']);
$this->assertArrayNotHasKey('html', $data['data']['description']);
}
public function testEventWithHCardLocation() {
$url = 'http://source.example.com/h-event-with-h-card-location';
$response = $this->parse(['url' => $url]);
$body = $response->getContent();
$this->assertEquals(200, $response->getStatusCode());
$data = json_decode($body, true);
$this->assertEquals('event', $data['data']['type']);
$this->assertEquals('Homebrew Website Club', $data['data']['name']);
$this->assertEquals($url, $data['data']['url']);
$this->assertEquals('2016-02-09T18:30', $data['data']['start']);
$this->assertEquals('2016-02-09T19:30', $data['data']['end']);
$this->assertArrayHasKey('http://source.example.com/venue', $data['refs']);
$this->assertEquals('card', $data['refs']['http://source.example.com/venue']['type']);
$this->assertEquals('http://source.example.com/venue', $data['refs']['http://source.example.com/venue']['url']);
$this->assertEquals('Venue', $data['refs']['http://source.example.com/venue']['name']);
}
} }

+ 24
- 0
tests/data/source.example.com/h-event View File

@ -0,0 +1,24 @@
HTTP/1.1 200 OK
Server: Apache
Date: Wed, 09 Dec 2015 03:29:14 GMT
Content-Type: text/html; charset=utf-8
Connection: keep-alive
<html>
<head>
<title>Homebrew Website Club</title>
</head>
<body class="h-event">
<h2 class="p-name">Homebrew Website Club</h2>
<a href="/h-event" class="u-url">permalink</a>
<time class="dt-start" datetime="2016-03-09T18:30">March 9, 6:30pm</time> - <time class="dt-end" datetime="2016-03-09T19:30">7:30pm</time>
<div class="e-description">
<p>Are you building your own website? Indie reader? Personal publishing web app? Or some other digital magic-cloud proxy? If so, come on by and join a gathering of people with likeminded interests. Bring your friends that want to start a personal web site. Exchange information, swap ideas, talk shop, help work on a project...</p>
<p>See the <a href="http://tantek.com/2013/332/b1/homebrew-website-club-newsletter">Homebrew Website Club Newsletter Volume 1 Issue 1</a> for a description of the first meeting.</p>
</div>
</body>
</html>

+ 24
- 0
tests/data/source.example.com/h-event-text-description View File

@ -0,0 +1,24 @@
HTTP/1.1 200 OK
Server: Apache
Date: Wed, 09 Dec 2015 03:29:14 GMT
Content-Type: text/html; charset=utf-8
Connection: keep-alive
<html>
<head>
<title>Homebrew Website Club</title>
</head>
<body class="h-event">
<h2 class="p-name">Homebrew Website Club</h2>
<a href="/h-event-text-description" class="u-url">permalink</a>
<time class="dt-start" datetime="2016-03-09T18:30">March 9, 6:30pm</time> - <time class="dt-end" datetime="2016-03-09T19:30">7:30pm</time>
<div class="p-description">
<p>Are you building your own website? Indie reader? Personal publishing web app? Or some other digital magic-cloud proxy? If so, come on by and join a gathering of people with likeminded interests. Bring your friends that want to start a personal web site. Exchange information, swap ideas, talk shop, help work on a project...</p>
<p>See the <a href="http://tantek.com/2013/332/b1/homebrew-website-club-newsletter">Homebrew Website Club Newsletter Volume 1 Issue 1</a> for a description of the first meeting.</p>
</div>
</body>
</html>

+ 20
- 0
tests/data/source.example.com/h-event-with-h-card-location View File

@ -0,0 +1,20 @@
HTTP/1.1 200 OK
Server: Apache
Date: Wed, 09 Dec 2015 02:29:12 GMT
Content-Type: text/html; charset=utf-8
Connection: keep-alive
<html>
<head>
<title>Homebrew Website Club</title>
</head>
<body class="h-event">
<h2 class="p-name">Homebrew Website Club</h2>
<a href="/h-event-with-h-card-location" class="u-url">permalink</a>
<time class="dt-start" datetime="2016-02-09T18:30">Feb 9, 6:30pm</time> - <time class="dt-end" datetime="2016-02-09T19:30">7:30pm</time>
<a href="/venue" class="p-location h-card">Venue</a>
</body>
</html>

Loading…
Cancel
Save