Browse Source

add h-recipe

closes #24
pull/39/head
Aaron Parecki 4 years ago
parent
commit
5e60e13b5a
No known key found for this signature in database GPG Key ID: 276C2817346D6056
3 changed files with 155 additions and 23 deletions
  1. +97
    -22
      lib/Formats/Mf2.php
  2. +18
    -1
      tests/ParseTest.php
  3. +40
    -0
      tests/data/source.example.com/h-recipe

+ 97
- 22
lib/Formats/Mf2.php View File

@ -25,6 +25,10 @@ class Mf2 {
Parse::debug("mf2:0: Recognized $url as an h-review it is the only item on the page");
return self::parseAsHReview($mf2, $item, $http);
}
if(in_array('h-recipe', $item['type'])) {
Parse::debug("mf2:0: Recognized $url as an h-recipe it is the only item on the page");
return self::parseAsHRecipe($mf2, $item, $http);
}
if(in_array('h-product', $item['type'])) {
Parse::debug("mf2:0: Recognized $url as an h-product it is the only item on the page");
return self::parseAsHProduct($mf2, $item, $http);
@ -51,6 +55,8 @@ class Mf2 {
return self::parseAsHEvent($mf2, $item, $http);
} elseif(in_array('h-review', $item['type'])) {
return self::parseAsHReview($mf2, $item, $http);
} elseif(in_array('h-recipe', $item['type'])) {
return self::parseAsHRecipe($mf2, $item, $http);
} elseif(in_array('h-product', $item['type'])) {
return self::parseAsHProduct($mf2, $item, $http);
} else {
@ -79,6 +85,8 @@ class Mf2 {
return self::parseAsHEvent($mf2, $item, $http);
} elseif(in_array('h-review', $item['type'])) {
return self::parseAsHReview($mf2, $item, $http);
} elseif(in_array('h-recipe', $item['type'])) {
return self::parseAsHRecipe($mf2, $item, $http);
} elseif(in_array('h-product', $item['type'])) {
return self::parseAsHProduct($mf2, $item, $http);
}
@ -118,6 +126,9 @@ class Mf2 {
} elseif(in_array('h-review', $item['type'])) {
Parse::debug("mf2:6: $url is falling back to the first h-review on the page");
return self::parseAsHReview($mf2, $item, $http);
} elseif(in_array('h-recipe', $item['type'])) {
Parse::debug("mf2:6: $url is falling back to the first h-recipe on the page");
return self::parseAsHReview($mf2, $item, $http);
} elseif(in_array('h-product', $item['type'])) {
Parse::debug("mf2:6: $url is falling back to the first h-product on the page");
return self::parseAsHProduct($mf2, $item, $http);
@ -143,6 +154,35 @@ class Mf2 {
}
}
private static function parseHTMLValue($property, $item) {
if(!array_key_exists($property, $item['properties']))
return null;
$textContent = false;
$htmlContent = false;
$content = $item['properties'][$property][0];
if(is_string($content)) {
$textContent = $content;
} elseif(!is_string($content) && is_array($content) && array_key_exists('value', $content)) {
if(array_key_exists('html', $content)) {
$htmlContent = trim(self::sanitizeHTML($content['html']));
#$textContent = trim(str_replace("
","\r",strip_tags($htmlContent)));
$textContent = trim(str_replace("
","\r",$content['value']));
} else {
$textContent = trim($content['value']);
}
}
$data = [
'text' => $textContent
];
if($htmlContent && $textContent != $htmlContent) {
$data['html'] = $htmlContent;
}
return $data;
}
// Always return arrays, and may contain plaintext content
// Nested objects are added to refs and the URL is used as the value if present
private static function collectArrayValues($properties, $item, &$data, &$refs, &$http) {
@ -195,23 +235,17 @@ class Mf2 {
private static function determineNameAndContent($item, &$data) {
// Determine if the name is distinct from the content
$name = self::getPlaintext($item, 'name');
$content = null;
$textContent = null;
$htmlContent = null;
if(array_key_exists('content', $item['properties'])) {
$content = $item['properties']['content'][0];
if(is_string($content)) {
$textContent = $content;
} elseif(!is_string($content) && is_array($content) && array_key_exists('value', $content)) {
if(array_key_exists('html', $content)) {
$htmlContent = trim(self::sanitizeHTML($content['html']));
$textContent = trim(str_replace("
","\r",strip_tags($htmlContent)));
$textContent = trim(str_replace("
","\r",$content['value']));
} else {
$textContent = trim($content['value']);
}
}
$content = self::parseHTMLValue('content', $item);
if($content) {
$htmlContent = array_key_exists('html', $content) ? $content['html'] : null;
$textContent = array_key_exists('text', $content) ? $content['text'] : null;
}
if($content) {
// Trim ellipses from the name
$name = preg_replace('/ ?(\.\.\.|…)$/', '', $name);
@ -231,13 +265,9 @@ class Mf2 {
// If there is content, always return the plaintext content, and return HTML content if it's different
if($content) {
$data['content'] = [
'text' => $textContent
];
if($htmlContent && $textContent != $htmlContent) {
$data['content']['html'] = $htmlContent;
}
// TODO: If no HTML content was included in the post, create HTML by autolinking?
$data['content']['text'] = $content['text'];
if(array_key_exists('html', $content))
$data['content']['html'] = $content['html'];
}
}
@ -279,9 +309,14 @@ class Mf2 {
];
$refs = [];
// TODO: add description as an HTML value
self::collectSingleValues(['summary','published','rating','best','worst'], ['url'], $item, $data);
// Fallback for Mf1 "description" as content. The PHP parser does not properly map this to "content"
$description = self::parseHTMLValue('description', $item);
if($description) {
$data['content'] = $description;
}
self::collectArrayValues(['category'], $item, $data, $refs, $http);
self::collectArrayURLValues(['item'], $item, $data, $refs, $http);
@ -302,6 +337,37 @@ class Mf2 {
return $response;
}
private static function parseAsHRecipe($mf2, $item, $http) {
$data = [
'type' => 'recipe'
];
$refs = [];
self::collectSingleValues(['name','summary','published','duration','yield','nutrition'], ['url'], $item, $data);
$instructions = self::parseHTMLValue('instructions', $item);
if($instructions) {
$data['instructions'] = $instructions;
}
self::collectArrayValues(['category','ingredient'], $item, $data, $refs, $http);
self::collectArrayURLValues(['photo'], $item, $data, $refs, $http);
if($author = self::findAuthor($mf2, $item, $http))
$data['author'] = $author;
$response = [
'data' => $data
];
if(count($refs)) {
$response['refs'] = $refs;
}
return $response;
}
private static function parseAsHProduct($mf2, $item, $http) {
$data = [
'type' => 'product'
@ -309,6 +375,11 @@ class Mf2 {
self::collectSingleValues(['name','identifier','price'], ['url'], $item, $data);
$description = self::parseHTMLValue('description', $item);
if($description) {
$data['description'] = $description;
}
self::collectArrayValues(['category','brand'], $item, $data, $refs, $http);
self::collectArrayURLValues(['photo','video','audio'], $item, $data, $refs, $http);
@ -317,6 +388,10 @@ class Mf2 {
'data' => $data
];
if(count($refs)) {
$response['refs'] = $refs;
}
return $response;
}

+ 18
- 1
tests/ParseTest.php View File

@ -414,7 +414,7 @@ class ParseTest extends PHPUnit_Framework_TestCase {
#$this->assertEquals('Not great', $data['data']['summary']);
$this->assertEquals('3', $data['data']['rating']);
$this->assertEquals('5', $data['data']['best']);
#$this->assertEquals('This is the full text of the review', $data['data']['content']['text']);
$this->assertEquals('This is the full text of the review', $data['data']['content']['text']);
// $this->assertContains('http://product.example.com/', $data['data']['item']);
// $this->assertArrayHasKey('http://product.example.com/', $data['refs']);
// $this->assertEquals('product', $data['refs']['http://product.example.com/']['type']);
@ -423,6 +423,23 @@ class ParseTest extends PHPUnit_Framework_TestCase {
}
public function testMf2Recipe() {
$url = 'http://source.example.com/h-recipe';
$response = $this->parse(['url' => $url]);
$body = $response->getContent();
$this->assertEquals(200, $response->getStatusCode());
$data = json_decode($body, true);
$this->assertEquals('recipe', $data['data']['type']);
$this->assertEquals('Cookie Recipe', $data['data']['name']);
$this->assertEquals('12 Cookies', $data['data']['yield']);
$this->assertEquals('PT30M', $data['data']['duration']);
$this->assertEquals('The best chocolate chip cookie recipe', $data['data']['summary']);
$this->assertContains('3 cups flour', $data['data']['ingredient']);
$this->assertContains('chocolate chips', $data['data']['ingredient']);
}
public function testEntryIsAnInvitee() {
$url = 'http://source.example.com/bridgy-invitee';
$response = $this->parse(['url' => $url]);

+ 40
- 0
tests/data/source.example.com/h-recipe View File

@ -0,0 +1,40 @@
HTTP/1.1 200 OK
Server: Apache
Date: Wed, 09 Dec 2015 03:29:14 GMT
Content-Type: text/html; charset=utf-8
Connection: keep-alive
<html>
<head>
<title>Cookies</title>
</head>
<body class="h-recipe">
<h2 class="p-name">Cookie Recipe</h2>
<a href="/h-recipe" class="u-url">permalink</a>
<div class="p-yield">12 Cookies</div>
<time class="dt-duration" datetime="PT30M">30 Minutes</time>
<div class="p-summary">The best chocolate chip cookie recipe</div>
<h3>Ingredients</h3>
<ul>
<li class="p-ingredient">3 cups flour</li>
<li class="p-ingredient">1/2 cup sugar</li>
<li class="p-ingredient">1 cup butter</li>
<li class="p-ingredient">chocolate chips</li>
</ul>
<h3>Instructions</h3>
<div class="e-instructions">
<ul>
<li>Mix the ingredients</li>
<li>Spoon onto a cookie sheet</li>
<li>Bake until they're done</li>
</ul>
</div>
</body>
</html>

Loading…
Cancel
Save