From 5e60e13b5af610c34e8d0aeacf3fabdd781e6720 Mon Sep 17 00:00:00 2001 From: Aaron Parecki Date: Thu, 26 Jan 2017 11:04:51 -0800 Subject: [PATCH] add h-recipe closes #24 --- lib/Formats/Mf2.php | 119 ++++++++++++++++++++----- tests/ParseTest.php | 19 +++- tests/data/source.example.com/h-recipe | 40 +++++++++ 3 files changed, 155 insertions(+), 23 deletions(-) create mode 100644 tests/data/source.example.com/h-recipe diff --git a/lib/Formats/Mf2.php b/lib/Formats/Mf2.php index 4d74881..cee28e3 100644 --- a/lib/Formats/Mf2.php +++ b/lib/Formats/Mf2.php @@ -25,6 +25,10 @@ class Mf2 { Parse::debug("mf2:0: Recognized $url as an h-review it is the only item on the page"); return self::parseAsHReview($mf2, $item, $http); } + if(in_array('h-recipe', $item['type'])) { + Parse::debug("mf2:0: Recognized $url as an h-recipe it is the only item on the page"); + return self::parseAsHRecipe($mf2, $item, $http); + } if(in_array('h-product', $item['type'])) { Parse::debug("mf2:0: Recognized $url as an h-product it is the only item on the page"); return self::parseAsHProduct($mf2, $item, $http); @@ -51,6 +55,8 @@ class Mf2 { return self::parseAsHEvent($mf2, $item, $http); } elseif(in_array('h-review', $item['type'])) { return self::parseAsHReview($mf2, $item, $http); + } elseif(in_array('h-recipe', $item['type'])) { + return self::parseAsHRecipe($mf2, $item, $http); } elseif(in_array('h-product', $item['type'])) { return self::parseAsHProduct($mf2, $item, $http); } else { @@ -79,6 +85,8 @@ class Mf2 { return self::parseAsHEvent($mf2, $item, $http); } elseif(in_array('h-review', $item['type'])) { return self::parseAsHReview($mf2, $item, $http); + } elseif(in_array('h-recipe', $item['type'])) { + return self::parseAsHRecipe($mf2, $item, $http); } elseif(in_array('h-product', $item['type'])) { return self::parseAsHProduct($mf2, $item, $http); } @@ -118,6 +126,9 @@ class Mf2 { } elseif(in_array('h-review', $item['type'])) { Parse::debug("mf2:6: $url is falling back to the first h-review on the page"); return self::parseAsHReview($mf2, $item, $http); + } elseif(in_array('h-recipe', $item['type'])) { + Parse::debug("mf2:6: $url is falling back to the first h-recipe on the page"); + return self::parseAsHReview($mf2, $item, $http); } elseif(in_array('h-product', $item['type'])) { Parse::debug("mf2:6: $url is falling back to the first h-product on the page"); return self::parseAsHProduct($mf2, $item, $http); @@ -143,6 +154,35 @@ class Mf2 { } } + private static function parseHTMLValue($property, $item) { + if(!array_key_exists($property, $item['properties'])) + return null; + + $textContent = false; + $htmlContent = false; + + $content = $item['properties'][$property][0]; + if(is_string($content)) { + $textContent = $content; + } elseif(!is_string($content) && is_array($content) && array_key_exists('value', $content)) { + if(array_key_exists('html', $content)) { + $htmlContent = trim(self::sanitizeHTML($content['html'])); + #$textContent = trim(str_replace(" ","\r",strip_tags($htmlContent))); + $textContent = trim(str_replace(" ","\r",$content['value'])); + } else { + $textContent = trim($content['value']); + } + } + + $data = [ + 'text' => $textContent + ]; + if($htmlContent && $textContent != $htmlContent) { + $data['html'] = $htmlContent; + } + return $data; + } + // Always return arrays, and may contain plaintext content // Nested objects are added to refs and the URL is used as the value if present private static function collectArrayValues($properties, $item, &$data, &$refs, &$http) { @@ -195,23 +235,17 @@ class Mf2 { private static function determineNameAndContent($item, &$data) { // Determine if the name is distinct from the content $name = self::getPlaintext($item, 'name'); - $content = null; + $textContent = null; $htmlContent = null; - if(array_key_exists('content', $item['properties'])) { - $content = $item['properties']['content'][0]; - if(is_string($content)) { - $textContent = $content; - } elseif(!is_string($content) && is_array($content) && array_key_exists('value', $content)) { - if(array_key_exists('html', $content)) { - $htmlContent = trim(self::sanitizeHTML($content['html'])); - $textContent = trim(str_replace(" ","\r",strip_tags($htmlContent))); - $textContent = trim(str_replace(" ","\r",$content['value'])); - } else { - $textContent = trim($content['value']); - } - } + $content = self::parseHTMLValue('content', $item); + if($content) { + $htmlContent = array_key_exists('html', $content) ? $content['html'] : null; + $textContent = array_key_exists('text', $content) ? $content['text'] : null; + } + + if($content) { // Trim ellipses from the name $name = preg_replace('/ ?(\.\.\.|…)$/', '', $name); @@ -231,13 +265,9 @@ class Mf2 { // If there is content, always return the plaintext content, and return HTML content if it's different if($content) { - $data['content'] = [ - 'text' => $textContent - ]; - if($htmlContent && $textContent != $htmlContent) { - $data['content']['html'] = $htmlContent; - } - // TODO: If no HTML content was included in the post, create HTML by autolinking? + $data['content']['text'] = $content['text']; + if(array_key_exists('html', $content)) + $data['content']['html'] = $content['html']; } } @@ -279,9 +309,14 @@ class Mf2 { ]; $refs = []; - // TODO: add description as an HTML value self::collectSingleValues(['summary','published','rating','best','worst'], ['url'], $item, $data); + // Fallback for Mf1 "description" as content. The PHP parser does not properly map this to "content" + $description = self::parseHTMLValue('description', $item); + if($description) { + $data['content'] = $description; + } + self::collectArrayValues(['category'], $item, $data, $refs, $http); self::collectArrayURLValues(['item'], $item, $data, $refs, $http); @@ -302,6 +337,37 @@ class Mf2 { return $response; } + private static function parseAsHRecipe($mf2, $item, $http) { + $data = [ + 'type' => 'recipe' + ]; + $refs = []; + + self::collectSingleValues(['name','summary','published','duration','yield','nutrition'], ['url'], $item, $data); + + $instructions = self::parseHTMLValue('instructions', $item); + if($instructions) { + $data['instructions'] = $instructions; + } + + self::collectArrayValues(['category','ingredient'], $item, $data, $refs, $http); + + self::collectArrayURLValues(['photo'], $item, $data, $refs, $http); + + if($author = self::findAuthor($mf2, $item, $http)) + $data['author'] = $author; + + $response = [ + 'data' => $data + ]; + + if(count($refs)) { + $response['refs'] = $refs; + } + + return $response; + } + private static function parseAsHProduct($mf2, $item, $http) { $data = [ 'type' => 'product' @@ -309,6 +375,11 @@ class Mf2 { self::collectSingleValues(['name','identifier','price'], ['url'], $item, $data); + $description = self::parseHTMLValue('description', $item); + if($description) { + $data['description'] = $description; + } + self::collectArrayValues(['category','brand'], $item, $data, $refs, $http); self::collectArrayURLValues(['photo','video','audio'], $item, $data, $refs, $http); @@ -317,6 +388,10 @@ class Mf2 { 'data' => $data ]; + if(count($refs)) { + $response['refs'] = $refs; + } + return $response; } diff --git a/tests/ParseTest.php b/tests/ParseTest.php index 22acbd3..5c328ea 100644 --- a/tests/ParseTest.php +++ b/tests/ParseTest.php @@ -414,7 +414,7 @@ class ParseTest extends PHPUnit_Framework_TestCase { #$this->assertEquals('Not great', $data['data']['summary']); $this->assertEquals('3', $data['data']['rating']); $this->assertEquals('5', $data['data']['best']); - #$this->assertEquals('This is the full text of the review', $data['data']['content']['text']); + $this->assertEquals('This is the full text of the review', $data['data']['content']['text']); // $this->assertContains('http://product.example.com/', $data['data']['item']); // $this->assertArrayHasKey('http://product.example.com/', $data['refs']); // $this->assertEquals('product', $data['refs']['http://product.example.com/']['type']); @@ -423,6 +423,23 @@ class ParseTest extends PHPUnit_Framework_TestCase { } + public function testMf2Recipe() { + $url = 'http://source.example.com/h-recipe'; + $response = $this->parse(['url' => $url]); + + $body = $response->getContent(); + $this->assertEquals(200, $response->getStatusCode()); + $data = json_decode($body, true); + + $this->assertEquals('recipe', $data['data']['type']); + $this->assertEquals('Cookie Recipe', $data['data']['name']); + $this->assertEquals('12 Cookies', $data['data']['yield']); + $this->assertEquals('PT30M', $data['data']['duration']); + $this->assertEquals('The best chocolate chip cookie recipe', $data['data']['summary']); + $this->assertContains('3 cups flour', $data['data']['ingredient']); + $this->assertContains('chocolate chips', $data['data']['ingredient']); + } + public function testEntryIsAnInvitee() { $url = 'http://source.example.com/bridgy-invitee'; $response = $this->parse(['url' => $url]); diff --git a/tests/data/source.example.com/h-recipe b/tests/data/source.example.com/h-recipe new file mode 100644 index 0000000..b957848 --- /dev/null +++ b/tests/data/source.example.com/h-recipe @@ -0,0 +1,40 @@ +HTTP/1.1 200 OK +Server: Apache +Date: Wed, 09 Dec 2015 03:29:14 GMT +Content-Type: text/html; charset=utf-8 +Connection: keep-alive + + + + Cookies + + +

Cookie Recipe

+ + permalink + +
12 Cookies
+ + + +
The best chocolate chip cookie recipe
+ +

Ingredients

+ + +

Instructions

+
+ +
+ + +