Browse Source

adds support for parsing checkins

checkin data is returned embedded like author data rather than in the `refs` object

closes #35
pull/49/head
Aaron Parecki 4 years ago
parent
commit
d50231142a
No known key found for this signature in database GPG Key ID: 276C2817346D6056
4 changed files with 120 additions and 4 deletions
  1. +32
    -4
      lib/XRay/Formats/Mf2.php
  2. +36
    -0
      tests/ParseTest.php
  3. +28
    -0
      tests/data/source.example.com/checkin
  4. +24
    -0
      tests/data/source.example.com/checkin-url

+ 32
- 4
lib/XRay/Formats/Mf2.php View File

@ -235,6 +235,31 @@ class Mf2 extends Format {
}
}
private static function parseEmbeddedHCard($property, $item, &$http) {
if(array_key_exists($property, $item['properties'])) {
$mf2 = $item['properties'][$property][0];
if(is_string($mf2) && self::isURL($mf2)) {
$hcard = [
'type' => 'card',
'url' => $mf2
];
return $hcard;
} if(self::isMicroformat($mf2) && in_array('h-card', $mf2['type'])) {
$hcard = [
'type' => 'card',
];
$properties = ['name','latitude','longitude','locality','region','country','url'];
foreach($properties as $p) {
if($v=self::getPlaintext($mf2, $p)) {
$hcard[$p] = $v;
}
}
return $hcard;
}
}
return false;
}
private static function collectArrayURLValues($properties, $item, &$data, &$refs, &$http) {
foreach($properties as $p) {
if(array_key_exists($p, $item['properties'])) {
@ -303,7 +328,7 @@ class Mf2 extends Format {
$refs = [];
// Single plaintext and URL values
self::collectSingleValues(['published','summary','rsvp','swarm-coins'], ['url'], $item, $data);
self::collectSingleValues(['published','summary','rsvp','swarm-coins'], ['url'], $item, $data, $http);
// These properties are always returned as arrays and may contain plaintext content
// First strip leading hashtags from category values if present
@ -324,6 +349,9 @@ class Mf2 extends Format {
if($author = self::findAuthor($mf2, $item, $http))
$data['author'] = $author;
if($checkin = self::parseEmbeddedHCard('checkin', $item, $http))
$data['checkin'] = $checkin;
$response = [
'data' => $data
];
@ -341,7 +369,7 @@ class Mf2 extends Format {
];
$refs = [];
self::collectSingleValues(['summary','published','rating','best','worst'], ['url'], $item, $data);
self::collectSingleValues(['summary','published','rating','best','worst'], ['url'], $item, $data, $http);
// Fallback for Mf1 "description" as content. The PHP parser does not properly map this to "content"
$description = self::parseHTMLValue('description', $item);
@ -405,7 +433,7 @@ class Mf2 extends Format {
'type' => 'product'
];
self::collectSingleValues(['name','identifier','price'], ['url'], $item, $data);
self::collectSingleValues(['name','identifier','price'], ['url'], $item, $data, $http);
$description = self::parseHTMLValue('description', $item);
if($description) {
@ -454,7 +482,7 @@ class Mf2 extends Format {
$refs = [];
// Single plaintext and URL values
self::collectSingleValues(['name','summary','published','start','end','duration'], ['url'], $item, $data);
self::collectSingleValues(['name','summary','published','start','end','duration'], ['url'], $item, $data, $http);
// These properties are always returned as arrays and may contain plaintext content
self::collectArrayValues(['category','location','attendee'], $item, $data, $refs, $http);

+ 36
- 0
tests/ParseTest.php View File

@ -499,6 +499,42 @@ class ParseTest extends PHPUnit_Framework_TestCase {
$this->assertFalse($data['info']['found_fragment']);
}
public function testCheckin() {
$url = 'http://source.example.com/checkin';
$response = $this->parse(['url' => $url]);
$body = $response->getContent();
$this->assertEquals(200, $response->getStatusCode());
$data = json_decode($body, true);
$this->assertEquals('entry', $data['data']['type']);
$venue = $data['data']['checkin'];
$this->assertEquals('https://foursquare.com/v/57104d2e498ece022e169dca', $venue['url']);
$this->assertEquals('DreamHost', $venue['name']);
$this->assertEquals('45.518716', $venue['latitude']);
$this->assertEquals('Homebrew Website Club!', $data['data']['content']['text']);
$this->assertEquals('https://aaronparecki.com/2017/06/07/12/photo.jpg', $data['data']['photo'][0]);
$this->assertEquals('2017-06-07T17:14:40-07:00', $data['data']['published']);
$this->assertArrayNotHasKey('name', $data['data']);
}
public function testCheckinURLOnly() {
$url = 'http://source.example.com/checkin-url';
$response = $this->parse(['url' => $url]);
$body = $response->getContent();
$this->assertEquals(200, $response->getStatusCode());
$data = json_decode($body, true);
$this->assertEquals('entry', $data['data']['type']);
$venue = $data['data']['checkin'];
$this->assertEquals('https://foursquare.com/v/57104d2e498ece022e169dca', $venue['url']);
$this->assertEquals('Homebrew Website Club!', $data['data']['content']['text']);
$this->assertEquals('https://aaronparecki.com/2017/06/07/12/photo.jpg', $data['data']['photo'][0]);
$this->assertEquals('2017-06-07T17:14:40-07:00', $data['data']['published']);
$this->assertArrayNotHasKey('name', $data['data']);
}
public function testXKCD() {
$url = 'http://xkcd.com/1810/';
$response = $this->parse(['url' => $url]);

+ 28
- 0
tests/data/source.example.com/checkin View File

@ -0,0 +1,28 @@
HTTP/1.1 200 OK
Server: Apache
Date: Wed, 09 Dec 2015 03:29:14 GMT
Content-Type: text/html; charset=utf-8
Connection: keep-alive
<html>
<head>
<title>Test</title>
</head>
<body class="h-entry">
<div class="u-checkin h-card">
at <a href="https://foursquare.com/v/57104d2e498ece022e169dca" class="u-url p-name">DreamHost</a>
<div style="display:none;">
<span class="p-latitude">45.518716</span>
<span class="p-longitude">-122.679614</span>
</div>
</div>
<p class="e-content p-name">Homebrew Website Club!</p>
<img src="https://aaronparecki.com/2017/06/07/12/photo.jpg" class="u-photo">
<a href="http://source.example.com/checkin" class="u-url">
<time class="dt-published" datetime="2017-06-07T17:14:40-07:00">
Wed, Jun 7, 2017 5:14pm -07:00
</time>
</a>
</body>
</html>

+ 24
- 0
tests/data/source.example.com/checkin-url View File

@ -0,0 +1,24 @@
HTTP/1.1 200 OK
Server: Apache
Date: Wed, 09 Dec 2015 03:29:14 GMT
Content-Type: text/html; charset=utf-8
Connection: keep-alive
<html>
<head>
<title>Test</title>
</head>
<body class="h-entry">
<a class="u-checkin" href="https://foursquare.com/v/57104d2e498ece022e169dca">
at DreamHost
</a>
<p class="e-content p-name">Homebrew Website Club!</p>
<img src="https://aaronparecki.com/2017/06/07/12/photo.jpg" class="u-photo">
<a href="http://source.example.com/checkin" class="u-url">
<time class="dt-published" datetime="2017-06-07T17:14:40-07:00">
Wed, Jun 7, 2017 5:14pm -07:00
</time>
</a>
</body>
</html>

Loading…
Cancel
Save