You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

725 lines
24 KiB

8 years ago
8 years ago
  1. <?php
  2. namespace p3k\XRay\Formats;
  3. use HTMLPurifier, HTMLPurifier_Config;
  4. class Mf2 {
  5. public static function parse($mf2, $url, $http) {
  6. if(count($mf2['items']) == 0)
  7. return false;
  8. // If there is only one item on the page, just use that
  9. if(count($mf2['items']) == 1) {
  10. $item = $mf2['items'][0];
  11. if(in_array('h-entry', $item['type']) || in_array('h-cite', $item['type'])) {
  12. #Parse::debug("mf2:0: Recognized $url as an h-entry it is the only item on the page");
  13. return self::parseAsHEntry($mf2, $item, $http);
  14. }
  15. if(in_array('h-event', $item['type'])) {
  16. #Parse::debug("mf2:0: Recognized $url as an h-event it is the only item on the page");
  17. return self::parseAsHEvent($mf2, $item, $http);
  18. }
  19. if(in_array('h-review', $item['type'])) {
  20. #Parse::debug("mf2:0: Recognized $url as an h-review it is the only item on the page");
  21. return self::parseAsHReview($mf2, $item, $http);
  22. }
  23. if(in_array('h-recipe', $item['type'])) {
  24. #Parse::debug("mf2:0: Recognized $url as an h-recipe it is the only item on the page");
  25. return self::parseAsHRecipe($mf2, $item, $http);
  26. }
  27. if(in_array('h-product', $item['type'])) {
  28. #Parse::debug("mf2:0: Recognized $url as an h-product it is the only item on the page");
  29. return self::parseAsHProduct($mf2, $item, $http);
  30. }
  31. if(in_array('h-feed', $item['type'])) {
  32. #Parse::debug("mf2:0: Recognized $url as an h-feed because it is the only item on the page");
  33. return self::parseAsHFeed($mf2, $http);
  34. }
  35. if(in_array('h-card', $item['type'])) {
  36. #Parse::debug("mf2:0: Recognized $url as an h-card it is the only item on the page");
  37. return self::parseAsHCard($item, $http, $url);
  38. }
  39. }
  40. // Check the list of items on the page to see if one matches the URL of the page,
  41. // and treat as a permalink for that object if so. Otherwise, parse as a feed.
  42. foreach($mf2['items'] as $item) {
  43. if(array_key_exists('url', $item['properties'])) {
  44. $urls = $item['properties']['url'];
  45. $urls = array_map('\p3k\XRay\normalize_url', $urls);
  46. if(in_array($url, $urls)) {
  47. #Parse::debug("mf2:1: Recognized $url as a permalink because an object on the page matched the URL of the request");
  48. if(in_array('h-card', $item['type'])) {
  49. return self::parseAsHCard($item, $http, $url);
  50. } elseif(in_array('h-entry', $item['type']) || in_array('h-cite', $item['type'])) {
  51. return self::parseAsHEntry($mf2, $item, $http);
  52. } elseif(in_array('h-event', $item['type'])) {
  53. return self::parseAsHEvent($mf2, $item, $http);
  54. } elseif(in_array('h-review', $item['type'])) {
  55. return self::parseAsHReview($mf2, $item, $http);
  56. } elseif(in_array('h-recipe', $item['type'])) {
  57. return self::parseAsHRecipe($mf2, $item, $http);
  58. } elseif(in_array('h-product', $item['type'])) {
  59. return self::parseAsHProduct($mf2, $item, $http);
  60. } else {
  61. #Parse::debug('This object was not a recognized type.');
  62. return false;
  63. }
  64. }
  65. }
  66. }
  67. // Check for an h-card matching rel=author or the author URL of any h-* on the page,
  68. // and return the h-* object if so
  69. if(isset($mf2['rels']['author'])) {
  70. foreach($mf2['items'] as $card) {
  71. if(in_array('h-card', $card['type']) && array_key_exists('url', $card['properties'])) {
  72. $urls = $card['properties']['url'];
  73. $urls = array_map('\p3k\XRay\normalize_url', $urls);
  74. if(count(array_intersect($urls, $mf2['rels']['author'])) > 0) {
  75. // There is an author h-card on this page
  76. // Now look for the first h-* object other than an h-card and use that as the object
  77. foreach($mf2['items'] as $item) {
  78. if(!in_array('h-card', $item['type'])) {
  79. if(in_array('h-entry', $item['type']) || in_array('h-cite', $item['type'])) {
  80. return self::parseAsHEntry($mf2, $item, $http);
  81. } elseif(in_array('h-event', $item['type'])) {
  82. return self::parseAsHEvent($mf2, $item, $http);
  83. } elseif(in_array('h-review', $item['type'])) {
  84. return self::parseAsHReview($mf2, $item, $http);
  85. } elseif(in_array('h-recipe', $item['type'])) {
  86. return self::parseAsHRecipe($mf2, $item, $http);
  87. } elseif(in_array('h-product', $item['type'])) {
  88. return self::parseAsHProduct($mf2, $item, $http);
  89. }
  90. }
  91. }
  92. }
  93. }
  94. }
  95. }
  96. // If there was more than one h-entry on the page, treat the whole page as a feed
  97. if(count($mf2['items']) > 1) {
  98. if(count(array_filter($mf2['items'], function($item){
  99. return in_array('h-entry', $item['type']);
  100. })) > 1) {
  101. #Parse::debug("mf2:2: Recognized $url as an h-feed because there are more than one object on the page");
  102. return self::parseAsHFeed($mf2, $http);
  103. }
  104. }
  105. // If the first item is an h-feed, parse as a feed
  106. $first = $mf2['items'][0];
  107. if(in_array('h-feed', $first['type'])) {
  108. #Parse::debug("mf2:3: Recognized $url as an h-feed because the first item is an h-feed");
  109. return self::parseAsHFeed($mf2, $http);
  110. }
  111. // Fallback case, but hopefully we have found something before this point
  112. foreach($mf2['items'] as $item) {
  113. // Otherwise check for a recognized h-entr* object
  114. if(in_array('h-entry', $item['type']) || in_array('h-cite', $item['type'])) {
  115. #Parse::debug("mf2:6: $url is falling back to the first h-entry on the page");
  116. return self::parseAsHEntry($mf2, $item, $http);
  117. } elseif(in_array('h-event', $item['type'])) {
  118. #Parse::debug("mf2:6: $url is falling back to the first h-event on the page");
  119. return self::parseAsHEvent($mf2, $item, $http);
  120. } elseif(in_array('h-review', $item['type'])) {
  121. #Parse::debug("mf2:6: $url is falling back to the first h-review on the page");
  122. return self::parseAsHReview($mf2, $item, $http);
  123. } elseif(in_array('h-recipe', $item['type'])) {
  124. #Parse::debug("mf2:6: $url is falling back to the first h-recipe on the page");
  125. return self::parseAsHReview($mf2, $item, $http);
  126. } elseif(in_array('h-product', $item['type'])) {
  127. #Parse::debug("mf2:6: $url is falling back to the first h-product on the page");
  128. return self::parseAsHProduct($mf2, $item, $http);
  129. }
  130. }
  131. #Parse::debug("mf2:E: No object at $url was recognized");
  132. return false;
  133. }
  134. private static function collectSingleValues($properties, $urlProperties, $item, &$data) {
  135. foreach($properties as $p) {
  136. if(($v = self::getPlaintext($item, $p)) !== null) {
  137. $data[$p] = $v;
  138. }
  139. }
  140. foreach($urlProperties as $p) {
  141. if(($v = self::getPlaintext($item, $p)) !== null) {
  142. if(self::isURL($v))
  143. $data[$p] = $v;
  144. }
  145. }
  146. }
  147. private static function parseHTMLValue($property, $item) {
  148. if(!array_key_exists($property, $item['properties']))
  149. return null;
  150. $textContent = false;
  151. $htmlContent = false;
  152. $content = $item['properties'][$property][0];
  153. if(is_string($content)) {
  154. $textContent = $content;
  155. } elseif(!is_string($content) && is_array($content) && array_key_exists('value', $content)) {
  156. if(array_key_exists('html', $content)) {
  157. $htmlContent = trim(self::sanitizeHTML($content['html']));
  158. #$textContent = trim(str_replace("&#xD;","\r",strip_tags($htmlContent)));
  159. $textContent = trim(str_replace("&#xD;","\r",$content['value']));
  160. } else {
  161. $textContent = trim($content['value']);
  162. }
  163. }
  164. $data = [
  165. 'text' => $textContent
  166. ];
  167. if($htmlContent && $textContent != $htmlContent) {
  168. $data['html'] = $htmlContent;
  169. }
  170. return $data;
  171. }
  172. // Always return arrays, and may contain plaintext content
  173. // Nested objects are added to refs and the URL is used as the value if present
  174. private static function collectArrayValues($properties, $item, &$data, &$refs, &$http) {
  175. foreach($properties as $p) {
  176. if(array_key_exists($p, $item['properties'])) {
  177. foreach($item['properties'][$p] as $v) {
  178. if(is_string($v)) {
  179. if(!array_key_exists($p, $data)) $data[$p] = [];
  180. if(!in_array($v, $data[$p]))
  181. $data[$p][] = $v;
  182. } elseif(self::isMicroformat($v)) {
  183. if(($u=self::getPlaintext($v, 'url')) && self::isURL($u)) {
  184. if(!array_key_exists($p, $data)) $data[$p] = [];
  185. if(!in_array($u, $data[$p]))
  186. $data[$p][] = $u;
  187. $ref = self::parse(['items'=>[$v]], $u, $http);
  188. if($ref) {
  189. $refs[$u] = $ref['data'];
  190. }
  191. } else {
  192. if(!array_key_exists($p, $data)) $data[$p] = [];
  193. if(!in_array($v['value'], $data[$p]))
  194. $data[$p][] = $v['value'];
  195. }
  196. }
  197. }
  198. }
  199. }
  200. }
  201. private static function collectArrayURLValues($properties, $item, &$data, &$refs, &$http) {
  202. foreach($properties as $p) {
  203. if(array_key_exists($p, $item['properties'])) {
  204. foreach($item['properties'][$p] as $v) {
  205. if(is_string($v) && self::isURL($v)) {
  206. if(!array_key_exists($p, $data)) $data[$p] = [];
  207. $data[$p][] = $v;
  208. }
  209. elseif(self::isMicroformat($v) && ($u=self::getPlaintext($v, 'url')) && self::isURL($u)) {
  210. if(!array_key_exists($p, $data)) $data[$p] = [];
  211. $data[$p][] = $u;
  212. // parse the object and put the result in the "refs" object
  213. $ref = self::parse(['items'=>[$v]], $u, $http);
  214. if($ref) {
  215. $refs[$u] = $ref['data'];
  216. }
  217. }
  218. }
  219. }
  220. }
  221. }
  222. private static function determineNameAndContent($item, &$data) {
  223. // Determine if the name is distinct from the content
  224. $name = self::getPlaintext($item, 'name');
  225. $textContent = null;
  226. $htmlContent = null;
  227. $content = self::parseHTMLValue('content', $item);
  228. if($content) {
  229. $htmlContent = array_key_exists('html', $content) ? $content['html'] : null;
  230. $textContent = array_key_exists('text', $content) ? $content['text'] : null;
  231. }
  232. if($content) {
  233. // Trim ellipses from the name
  234. $name = preg_replace('/ ?(\.\.\.|…)$/', '', $name);
  235. // Remove all whitespace when checking equality
  236. $nameCompare = preg_replace('/\s/','',trim($name));
  237. $contentCompare = preg_replace('/\s/','',trim($textContent));
  238. // Check if the name is a prefix of the content
  239. if($contentCompare && $nameCompare && strpos($contentCompare, $nameCompare) === 0) {
  240. $name = null;
  241. }
  242. }
  243. if($name) {
  244. $data['name'] = $name;
  245. }
  246. // If there is content, always return the plaintext content, and return HTML content if it's different
  247. if($content) {
  248. $data['content']['text'] = $content['text'];
  249. if(array_key_exists('html', $content))
  250. $data['content']['html'] = $content['html'];
  251. }
  252. }
  253. private static function parseAsHEntry($mf2, $item, $http) {
  254. $data = [
  255. 'type' => 'entry'
  256. ];
  257. $refs = [];
  258. // Single plaintext and URL values
  259. self::collectSingleValues(['published','summary','rsvp','swarm-coins'], ['url'], $item, $data);
  260. // These properties are always returned as arrays and may contain plaintext content
  261. // First strip leading hashtags from category values if present
  262. if(array_key_exists('category', $item['properties'])) {
  263. foreach($item['properties']['category'] as $i=>$c) {
  264. if(is_string($c))
  265. $item['properties']['category'][$i] = ltrim($c, '#');
  266. }
  267. }
  268. self::collectArrayValues(['category','invitee'], $item, $data, $refs, $http);
  269. // These properties are always returned as arrays and always URLs
  270. // If the value is an h-* object with a URL, the URL is used and a "ref" is added as well
  271. self::collectArrayURLValues(['photo','video','audio','syndication','in-reply-to','like-of','repost-of','bookmark-of'], $item, $data, $refs, $http);
  272. self::determineNameAndContent($item, $data);
  273. if($author = self::findAuthor($mf2, $item, $http))
  274. $data['author'] = $author;
  275. $response = [
  276. 'data' => $data
  277. ];
  278. if(count($refs)) {
  279. $response['data']['refs'] = $refs;
  280. }
  281. return $response;
  282. }
  283. private static function parseAsHReview($mf2, $item, $http) {
  284. $data = [
  285. 'type' => 'review'
  286. ];
  287. $refs = [];
  288. self::collectSingleValues(['summary','published','rating','best','worst'], ['url'], $item, $data);
  289. // Fallback for Mf1 "description" as content. The PHP parser does not properly map this to "content"
  290. $description = self::parseHTMLValue('description', $item);
  291. if($description) {
  292. $data['content'] = $description;
  293. }
  294. self::collectArrayValues(['category'], $item, $data, $refs, $http);
  295. self::collectArrayURLValues(['item'], $item, $data, $refs, $http);
  296. self::determineNameAndContent($item, $data);
  297. if($author = self::findAuthor($mf2, $item, $http))
  298. $data['author'] = $author;
  299. $response = [
  300. 'data' => $data
  301. ];
  302. if(count($refs)) {
  303. $response['data']['refs'] = $refs;
  304. }
  305. return $response;
  306. }
  307. private static function parseAsHRecipe($mf2, $item, $http) {
  308. $data = [
  309. 'type' => 'recipe'
  310. ];
  311. $refs = [];
  312. self::collectSingleValues(['name','summary','published','duration','yield','nutrition'], ['url'], $item, $data);
  313. $instructions = self::parseHTMLValue('instructions', $item);
  314. if($instructions) {
  315. $data['instructions'] = $instructions;
  316. }
  317. self::collectArrayValues(['category','ingredient'], $item, $data, $refs, $http);
  318. self::collectArrayURLValues(['photo'], $item, $data, $refs, $http);
  319. if($author = self::findAuthor($mf2, $item, $http))
  320. $data['author'] = $author;
  321. $response = [
  322. 'data' => $data
  323. ];
  324. if(count($refs)) {
  325. $response['data']['refs'] = $refs;
  326. }
  327. return $response;
  328. }
  329. private static function parseAsHProduct($mf2, $item, $http) {
  330. $data = [
  331. 'type' => 'product'
  332. ];
  333. self::collectSingleValues(['name','identifier','price'], ['url'], $item, $data);
  334. $description = self::parseHTMLValue('description', $item);
  335. if($description) {
  336. $data['description'] = $description;
  337. }
  338. self::collectArrayValues(['category','brand'], $item, $data, $refs, $http);
  339. self::collectArrayURLValues(['photo','video','audio'], $item, $data, $refs, $http);
  340. $response = [
  341. 'data' => $data
  342. ];
  343. if(count($refs)) {
  344. $response['data']['refs'] = $refs;
  345. }
  346. return $response;
  347. }
  348. private static function parseAsHEvent($mf2, $item, $http) {
  349. $data = [
  350. 'type' => 'event'
  351. ];
  352. $refs = [];
  353. // Single plaintext and URL values
  354. self::collectSingleValues(['name','summary','published','start','end','duration'], ['url'], $item, $data);
  355. // These properties are always returned as arrays and may contain plaintext content
  356. self::collectArrayValues(['category','location','attendee'], $item, $data, $refs, $http);
  357. // These properties are always returned as arrays and always URLs
  358. // If the value is an h-* object with a URL, the URL is used and a "ref" is added as well
  359. self::collectArrayURLValues(['photo','video','audio','syndication'], $item, $data, $refs, $http);
  360. // If there is a description, always return the plaintext description, and return HTML description if it's different
  361. $textDescription = null;
  362. $htmlDescription = null;
  363. if(array_key_exists('description', $item['properties'])) {
  364. $description = $item['properties']['description'][0];
  365. if(is_string($description)) {
  366. $textDescription = $description;
  367. } elseif(!is_string($description) && is_array($description) && array_key_exists('value', $description)) {
  368. if(array_key_exists('html', $description)) {
  369. $htmlDescription = trim(self::sanitizeHTML($description['html']));
  370. $textDescription = trim(str_replace("&#xD;","\r",strip_tags($htmlDescription)));
  371. $textDescription = trim(str_replace("&#xD;","\r",$description['value']));
  372. } else {
  373. $textDescription = trim($description['value']);
  374. }
  375. }
  376. }
  377. if($textDescription) {
  378. $data['description'] = [
  379. 'text' => $textDescription
  380. ];
  381. if($htmlDescription && $textDescription != $htmlDescription) {
  382. $data['description']['html'] = $htmlDescription;
  383. }
  384. }
  385. $response = [
  386. 'data' => $data
  387. ];
  388. if(count($refs)) {
  389. $response['data']['refs'] = $refs;
  390. }
  391. return $response;
  392. }
  393. private static function parseAsHFeed($mf2, $http) {
  394. $data = [
  395. 'type' => 'feed',
  396. 'author' => [
  397. 'type' => 'card',
  398. 'name' => null,
  399. 'url' => null,
  400. 'photo' => null
  401. ],
  402. 'todo' => 'Not yet implemented. Please see https://github.com/aaronpk/XRay/issues/1'
  403. ];
  404. return [
  405. 'data' => $data,
  406. 'entries' => []
  407. ];
  408. }
  409. private static function parseAsHCard($item, $http, $authorURL=false) {
  410. $data = [
  411. 'type' => 'card',
  412. 'name' => null,
  413. 'url' => null,
  414. 'photo' => null
  415. ];
  416. $properties = ['url','name','photo'];
  417. foreach($properties as $p) {
  418. if($p == 'url' && $authorURL) {
  419. // If there is a matching author URL, use that one
  420. $found = false;
  421. foreach($item['properties']['url'] as $url) {
  422. if(self::isURL($url)) {
  423. $url = \p3k\XRay\normalize_url($url);
  424. if($url == $authorURL) {
  425. $data['url'] = $url;
  426. $found = true;
  427. }
  428. }
  429. }
  430. if(!$found && self::isURL($item['properties']['url'][0])) {
  431. $data['url'] = $item['properties']['url'][0];
  432. }
  433. } else if(($v = self::getPlaintext($item, $p)) !== null) {
  434. // Make sure the URL property is actually a URL
  435. if($p == 'url' || $p == 'photo') {
  436. if(self::isURL($v))
  437. $data[$p] = $v;
  438. } else {
  439. $data[$p] = $v;
  440. }
  441. }
  442. }
  443. // If no URL property was found, use the $authorURL provided
  444. if(!$data['url'])
  445. $data['url'] = $authorURL;
  446. $response = [
  447. 'data' => $data
  448. ];
  449. return $response;
  450. }
  451. private static function findAuthor($mf2, $item, $http) {
  452. $author = [
  453. 'type' => 'card',
  454. 'name' => null,
  455. 'url' => null,
  456. 'photo' => null
  457. ];
  458. // Author Discovery
  459. // http://indiewebcamp.com/authorship
  460. $authorPage = false;
  461. if(array_key_exists('author', $item['properties'])) {
  462. // Check if any of the values of the author property are an h-card
  463. foreach($item['properties']['author'] as $a) {
  464. if(self::isHCard($a)) {
  465. // 5.1 "if it has an h-card, use it, exit."
  466. return self::parseAsHCard($a, $http)['data'];
  467. } elseif(is_string($a)) {
  468. if(self::isURL($a)) {
  469. // 5.2 "otherwise if author property is an http(s) URL, let the author-page have that URL"
  470. $authorPage = $a;
  471. } else {
  472. // 5.3 "otherwise use the author property as the author name, exit"
  473. // We can only set the name, no h-card or URL was found
  474. $author['name'] = self::getPlaintext($item, 'author');
  475. return $author;
  476. }
  477. } else {
  478. // This case is only hit when the author property is an mf2 object that is not an h-card
  479. $author['name'] = self::getPlaintext($item, 'author');
  480. return $author;
  481. }
  482. }
  483. }
  484. // 6. "if no author page was found" ... check for rel-author link
  485. if(!$authorPage) {
  486. if(isset($mf2['rels']) && isset($mf2['rels']['author']))
  487. $authorPage = $mf2['rels']['author'][0];
  488. }
  489. // 7. "if there is an author-page URL" ...
  490. if($authorPage) {
  491. // 7.1 "get the author-page from that URL and parse it for microformats2"
  492. $authorPageContents = self::getURL($authorPage, $http);
  493. if($authorPageContents) {
  494. foreach($authorPageContents['items'] as $i) {
  495. if(self::isHCard($i)) {
  496. // 7.2 "if author-page has 1+ h-card with url == uid == author-page's URL, then use first such h-card, exit."
  497. if(array_key_exists('url', $i['properties'])
  498. and in_array($authorPage, $i['properties']['url'])
  499. and array_key_exists('uid', $i['properties'])
  500. and in_array($authorPage, $i['properties']['uid'])
  501. ) {
  502. return self::parseAsHCard($i, $http, $authorPage)['data'];
  503. }
  504. // 7.3 "else if author-page has 1+ h-card with url property which matches the href of a rel-me link on the author-page"
  505. $relMeLinks = (isset($authorPageContents['rels']) && isset($authorPageContents['rels']['me'])) ? $authorPageContents['rels']['me'] : [];
  506. if(count($relMeLinks) > 0
  507. and array_key_exists('url', $i['properties'])
  508. and count(array_intersect($i['properties']['url'], $relMeLinks)) > 0
  509. ) {
  510. return self::parseAsHCard($i, $http, $authorPage)['data'];
  511. }
  512. }
  513. }
  514. }
  515. // 7.4 "if the h-entry's page has 1+ h-card with url == author-page URL, use first such h-card, exit."
  516. foreach($mf2['items'] as $i) {
  517. if(self::isHCard($i)) {
  518. if(array_key_exists('url', $i['properties'])
  519. and in_array($authorPage, $i['properties']['url'])
  520. ) {
  521. return self::parseAsHCard($i, $http)['data'];
  522. }
  523. }
  524. }
  525. }
  526. if(!$author['name'] && !$author['photo'] && !$author['url'])
  527. return null;
  528. return $author;
  529. }
  530. private static function sanitizeHTML($html) {
  531. $config = HTMLPurifier_Config::createDefault();
  532. $config->set('Cache.DefinitionImpl', null);
  533. $config->set('HTML.AllowedElements', [
  534. 'a',
  535. 'abbr',
  536. 'b',
  537. 'code',
  538. 'del',
  539. 'em',
  540. 'i',
  541. 'img',
  542. 'q',
  543. 'strike',
  544. 'strong',
  545. 'time',
  546. 'blockquote',
  547. 'pre',
  548. 'p',
  549. 'h1',
  550. 'h2',
  551. 'h3',
  552. 'h4',
  553. 'h5',
  554. 'h6',
  555. 'ul',
  556. 'li',
  557. 'ol'
  558. ]);
  559. $def = $config->getHTMLDefinition(true);
  560. $def->addElement(
  561. 'time',
  562. 'Inline',
  563. 'Inline',
  564. 'Common',
  565. [
  566. 'datetime' => 'Text'
  567. ]
  568. );
  569. // Override the allowed classes to only support Microformats2 classes
  570. $def->manager->attrTypes->set('Class', new HTMLPurifier_AttrDef_HTML_Microformats2());
  571. $purifier = new HTMLPurifier($config);
  572. $sanitized = $purifier->purify($html);
  573. $sanitized = str_replace("&#xD;","\r",$sanitized);
  574. return $sanitized;
  575. }
  576. private static function hasNumericKeys(array $arr) {
  577. foreach($arr as $key=>$val)
  578. if (is_numeric($key))
  579. return true;
  580. return false;
  581. }
  582. private static function isMicroformat($mf) {
  583. return is_array($mf)
  584. and !self::hasNumericKeys($mf)
  585. and !empty($mf['type'])
  586. and isset($mf['properties']);
  587. }
  588. private static function isHCard($mf) {
  589. return is_array($mf)
  590. and !empty($mf['type'])
  591. and is_array($mf['type'])
  592. and in_array('h-card', $mf['type']);
  593. }
  594. private static function isURL($string) {
  595. return preg_match('/^https?:\/\/.+\..+$/', $string);
  596. }
  597. // Given an array of microformats properties and a key name, return the plaintext value
  598. // at that property
  599. // e.g.
  600. // {"properties":{"published":["foo"]}} results in "foo"
  601. private static function getPlaintext($mf2, $k, $fallback=null) {
  602. if(!empty($mf2['properties'][$k]) and is_array($mf2['properties'][$k])) {
  603. // $mf2['properties'][$v] will always be an array since the input was from the mf2 parser
  604. $value = $mf2['properties'][$k][0];
  605. if(is_string($value)) {
  606. return $value;
  607. } elseif(self::isMicroformat($value) && array_key_exists('value', $value)) {
  608. return $value['value'];
  609. }
  610. }
  611. return $fallback;
  612. }
  613. private static function getURL($url, $http) {
  614. if(!$url) return null;
  615. // TODO: consider adding caching here
  616. $result = $http->get($url);
  617. if($result['error'] || !$result['body']) {
  618. return null;
  619. }
  620. return \mf2\Parse($result['body'], $url);
  621. }
  622. }