You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

747 lines
25 KiB

8 years ago
8 years ago
8 years ago
  1. <?php
  2. namespace XRay\Formats;
  3. use HTMLPurifier, HTMLPurifier_Config;
  4. use Parse;
  5. class Mf2 {
  6. public static function parse($mf2, $url, $http) {
  7. if(count($mf2['items']) == 0)
  8. return false;
  9. // If there is only one item on the page, just use that
  10. if(count($mf2['items']) == 1) {
  11. $item = $mf2['items'][0];
  12. if(in_array('h-entry', $item['type']) || in_array('h-cite', $item['type'])) {
  13. Parse::debug("mf2:0: Recognized $url as an h-entry it is the only item on the page");
  14. return self::parseAsHEntry($mf2, $item, $http);
  15. }
  16. if(in_array('h-event', $item['type'])) {
  17. Parse::debug("mf2:0: Recognized $url as an h-event it is the only item on the page");
  18. return self::parseAsHEvent($mf2, $item, $http);
  19. }
  20. if(in_array('h-review', $item['type'])) {
  21. Parse::debug("mf2:0: Recognized $url as an h-review it is the only item on the page");
  22. return self::parseAsHReview($mf2, $item, $http);
  23. }
  24. if(in_array('h-recipe', $item['type'])) {
  25. Parse::debug("mf2:0: Recognized $url as an h-recipe it is the only item on the page");
  26. return self::parseAsHRecipe($mf2, $item, $http);
  27. }
  28. if(in_array('h-product', $item['type'])) {
  29. Parse::debug("mf2:0: Recognized $url as an h-product it is the only item on the page");
  30. return self::parseAsHProduct($mf2, $item, $http);
  31. }
  32. if(in_array('h-feed', $item['type'])) {
  33. Parse::debug("mf2:0: Recognized $url as an h-feed because it is the only item on the page");
  34. return self::parseAsHFeed($mf2, $http);
  35. }
  36. if(in_array('h-card', $item['type'])) {
  37. Parse::debug("mf2:0: Recognized $url as an h-card it is the only item on the page");
  38. return self::parseAsHCard($item, $http, $url);
  39. }
  40. }
  41. // Check the list of items on the page to see if one matches the URL of the page,
  42. // and treat as a permalink for that object if so. Otherwise, parse as a feed.
  43. foreach($mf2['items'] as $item) {
  44. if(array_key_exists('url', $item['properties'])) {
  45. $urls = $item['properties']['url'];
  46. $urls = array_map('self::normalize_url', $urls);
  47. if(in_array($url, $urls)) {
  48. Parse::debug("mf2:1: Recognized $url as a permalink because an object on the page matched the URL of the request");
  49. if(in_array('h-card', $item['type'])) {
  50. return self::parseAsHCard($item, $http, $url);
  51. } elseif(in_array('h-entry', $item['type']) || in_array('h-cite', $item['type'])) {
  52. return self::parseAsHEntry($mf2, $item, $http);
  53. } elseif(in_array('h-event', $item['type'])) {
  54. return self::parseAsHEvent($mf2, $item, $http);
  55. } elseif(in_array('h-review', $item['type'])) {
  56. return self::parseAsHReview($mf2, $item, $http);
  57. } elseif(in_array('h-recipe', $item['type'])) {
  58. return self::parseAsHRecipe($mf2, $item, $http);
  59. } elseif(in_array('h-product', $item['type'])) {
  60. return self::parseAsHProduct($mf2, $item, $http);
  61. } else {
  62. Parse::debug('This object was not a recognized type.');
  63. return false;
  64. }
  65. }
  66. }
  67. }
  68. // Check for an h-card matching rel=author or the author URL of any h-* on the page,
  69. // and return the h-* object if so
  70. if(isset($mf2['rels']['author'])) {
  71. foreach($mf2['items'] as $card) {
  72. if(in_array('h-card', $card['type']) && array_key_exists('url', $card['properties'])) {
  73. $urls = $card['properties']['url'];
  74. $urls = array_map('self::normalize_url', $urls);
  75. if(count(array_intersect($urls, $mf2['rels']['author'])) > 0) {
  76. // There is an author h-card on this page
  77. // Now look for the first h-* object other than an h-card and use that as the object
  78. foreach($mf2['items'] as $item) {
  79. if(!in_array('h-card', $item['type'])) {
  80. if(in_array('h-entry', $item['type']) || in_array('h-cite', $item['type'])) {
  81. return self::parseAsHEntry($mf2, $item, $http);
  82. } elseif(in_array('h-event', $item['type'])) {
  83. return self::parseAsHEvent($mf2, $item, $http);
  84. } elseif(in_array('h-review', $item['type'])) {
  85. return self::parseAsHReview($mf2, $item, $http);
  86. } elseif(in_array('h-recipe', $item['type'])) {
  87. return self::parseAsHRecipe($mf2, $item, $http);
  88. } elseif(in_array('h-product', $item['type'])) {
  89. return self::parseAsHProduct($mf2, $item, $http);
  90. }
  91. }
  92. }
  93. }
  94. }
  95. }
  96. }
  97. // If there was more than one h-entry on the page, treat the whole page as a feed
  98. if(count($mf2['items']) > 1) {
  99. if(count(array_filter($mf2['items'], function($item){
  100. return in_array('h-entry', $item['type']);
  101. })) > 1) {
  102. Parse::debug("mf2:2: Recognized $url as an h-feed because there are more than one object on the page");
  103. return self::parseAsHFeed($mf2, $http);
  104. }
  105. }
  106. // If the first item is an h-feed, parse as a feed
  107. $first = $mf2['items'][0];
  108. if(in_array('h-feed', $first['type'])) {
  109. Parse::debug("mf2:3: Recognized $url as an h-feed because the first item is an h-feed");
  110. return self::parseAsHFeed($mf2, $http);
  111. }
  112. // Fallback case, but hopefully we have found something before this point
  113. foreach($mf2['items'] as $item) {
  114. // Otherwise check for a recognized h-entr* object
  115. if(in_array('h-entry', $item['type']) || in_array('h-cite', $item['type'])) {
  116. Parse::debug("mf2:6: $url is falling back to the first h-entry on the page");
  117. return self::parseAsHEntry($mf2, $item, $http);
  118. } elseif(in_array('h-event', $item['type'])) {
  119. Parse::debug("mf2:6: $url is falling back to the first h-event on the page");
  120. return self::parseAsHEvent($mf2, $item, $http);
  121. } elseif(in_array('h-review', $item['type'])) {
  122. Parse::debug("mf2:6: $url is falling back to the first h-review on the page");
  123. return self::parseAsHReview($mf2, $item, $http);
  124. } elseif(in_array('h-recipe', $item['type'])) {
  125. Parse::debug("mf2:6: $url is falling back to the first h-recipe on the page");
  126. return self::parseAsHReview($mf2, $item, $http);
  127. } elseif(in_array('h-product', $item['type'])) {
  128. Parse::debug("mf2:6: $url is falling back to the first h-product on the page");
  129. return self::parseAsHProduct($mf2, $item, $http);
  130. }
  131. }
  132. Parse::debug("mf2:E: No object at $url was recognized");
  133. return false;
  134. }
  135. private static function collectSingleValues($properties, $urlProperties, $item, &$data) {
  136. foreach($properties as $p) {
  137. if(($v = self::getPlaintext($item, $p)) !== null) {
  138. $data[$p] = $v;
  139. }
  140. }
  141. foreach($urlProperties as $p) {
  142. if(($v = self::getPlaintext($item, $p)) !== null) {
  143. if(self::isURL($v))
  144. $data[$p] = $v;
  145. }
  146. }
  147. }
  148. private static function parseHTMLValue($property, $item) {
  149. if(!array_key_exists($property, $item['properties']))
  150. return null;
  151. $textContent = false;
  152. $htmlContent = false;
  153. $content = $item['properties'][$property][0];
  154. if(is_string($content)) {
  155. $textContent = $content;
  156. } elseif(!is_string($content) && is_array($content) && array_key_exists('value', $content)) {
  157. if(array_key_exists('html', $content)) {
  158. $htmlContent = trim(self::sanitizeHTML($content['html']));
  159. #$textContent = trim(str_replace("&#xD;","\r",strip_tags($htmlContent)));
  160. $textContent = trim(str_replace("&#xD;","\r",$content['value']));
  161. } else {
  162. $textContent = trim($content['value']);
  163. }
  164. }
  165. $data = [
  166. 'text' => $textContent
  167. ];
  168. if($htmlContent && $textContent != $htmlContent) {
  169. $data['html'] = $htmlContent;
  170. }
  171. return $data;
  172. }
  173. // Always return arrays, and may contain plaintext content
  174. // Nested objects are added to refs and the URL is used as the value if present
  175. private static function collectArrayValues($properties, $item, &$data, &$refs, &$http) {
  176. foreach($properties as $p) {
  177. if(array_key_exists($p, $item['properties'])) {
  178. foreach($item['properties'][$p] as $v) {
  179. if(is_string($v)) {
  180. if(!array_key_exists($p, $data)) $data[$p] = [];
  181. if(!in_array($v, $data[$p]))
  182. $data[$p][] = $v;
  183. } elseif(self::isMicroformat($v)) {
  184. if(($u=self::getPlaintext($v, 'url')) && self::isURL($u)) {
  185. if(!array_key_exists($p, $data)) $data[$p] = [];
  186. if(!in_array($u, $data[$p]))
  187. $data[$p][] = $u;
  188. $ref = self::parse(['items'=>[$v]], $u, $http);
  189. if($ref) {
  190. $refs[$u] = $ref['data'];
  191. }
  192. } else {
  193. if(!array_key_exists($p, $data)) $data[$p] = [];
  194. if(!in_array($v['value'], $data[$p]))
  195. $data[$p][] = $v['value'];
  196. }
  197. }
  198. }
  199. }
  200. }
  201. }
  202. private static function collectArrayURLValues($properties, $item, &$data, &$refs, &$http) {
  203. foreach($properties as $p) {
  204. if(array_key_exists($p, $item['properties'])) {
  205. foreach($item['properties'][$p] as $v) {
  206. if(is_string($v) && self::isURL($v)) {
  207. if(!array_key_exists($p, $data)) $data[$p] = [];
  208. $data[$p][] = $v;
  209. }
  210. elseif(self::isMicroformat($v) && ($u=self::getPlaintext($v, 'url')) && self::isURL($u)) {
  211. if(!array_key_exists($p, $data)) $data[$p] = [];
  212. $data[$p][] = $u;
  213. // parse the object and put the result in the "refs" object
  214. $ref = self::parse(['items'=>[$v]], $u, $http);
  215. if($ref) {
  216. $refs[$u] = $ref['data'];
  217. }
  218. }
  219. }
  220. }
  221. }
  222. }
  223. private static function determineNameAndContent($item, &$data) {
  224. // Determine if the name is distinct from the content
  225. $name = self::getPlaintext($item, 'name');
  226. $textContent = null;
  227. $htmlContent = null;
  228. $content = self::parseHTMLValue('content', $item);
  229. if($content) {
  230. $htmlContent = array_key_exists('html', $content) ? $content['html'] : null;
  231. $textContent = array_key_exists('text', $content) ? $content['text'] : null;
  232. }
  233. if($content) {
  234. // Trim ellipses from the name
  235. $name = preg_replace('/ ?(\.\.\.|…)$/', '', $name);
  236. // Remove all whitespace when checking equality
  237. $nameCompare = preg_replace('/\s/','',trim($name));
  238. $contentCompare = preg_replace('/\s/','',trim($textContent));
  239. // Check if the name is a prefix of the content
  240. if($contentCompare && $nameCompare && strpos($contentCompare, $nameCompare) === 0) {
  241. $name = null;
  242. }
  243. }
  244. if($name) {
  245. $data['name'] = $name;
  246. }
  247. // If there is content, always return the plaintext content, and return HTML content if it's different
  248. if($content) {
  249. $data['content']['text'] = $content['text'];
  250. if(array_key_exists('html', $content))
  251. $data['content']['html'] = $content['html'];
  252. }
  253. }
  254. private static function parseAsHEntry($mf2, $item, $http) {
  255. $data = [
  256. 'type' => 'entry'
  257. ];
  258. $refs = [];
  259. // Single plaintext and URL values
  260. self::collectSingleValues(['published','summary','rsvp','swarm-coins'], ['url'], $item, $data);
  261. // These properties are always returned as arrays and may contain plaintext content
  262. // First strip leading hashtags from category values if present
  263. if(array_key_exists('category', $item['properties'])) {
  264. foreach($item['properties']['category'] as $i=>$c) {
  265. if(is_string($c))
  266. $item['properties']['category'][$i] = ltrim($c, '#');
  267. }
  268. }
  269. self::collectArrayValues(['category','invitee'], $item, $data, $refs, $http);
  270. // These properties are always returned as arrays and always URLs
  271. // If the value is an h-* object with a URL, the URL is used and a "ref" is added as well
  272. self::collectArrayURLValues(['photo','video','audio','syndication','in-reply-to','like-of','repost-of','bookmark-of'], $item, $data, $refs, $http);
  273. self::determineNameAndContent($item, $data);
  274. if($author = self::findAuthor($mf2, $item, $http))
  275. $data['author'] = $author;
  276. $response = [
  277. 'data' => $data
  278. ];
  279. if(count($refs)) {
  280. $response['refs'] = $refs;
  281. }
  282. return $response;
  283. }
  284. private static function parseAsHReview($mf2, $item, $http) {
  285. $data = [
  286. 'type' => 'review'
  287. ];
  288. $refs = [];
  289. self::collectSingleValues(['summary','published','rating','best','worst'], ['url'], $item, $data);
  290. // Fallback for Mf1 "description" as content. The PHP parser does not properly map this to "content"
  291. $description = self::parseHTMLValue('description', $item);
  292. if($description) {
  293. $data['content'] = $description;
  294. }
  295. self::collectArrayValues(['category'], $item, $data, $refs, $http);
  296. self::collectArrayURLValues(['item'], $item, $data, $refs, $http);
  297. self::determineNameAndContent($item, $data);
  298. if($author = self::findAuthor($mf2, $item, $http))
  299. $data['author'] = $author;
  300. $response = [
  301. 'data' => $data
  302. ];
  303. if(count($refs)) {
  304. $response['refs'] = $refs;
  305. }
  306. return $response;
  307. }
  308. private static function parseAsHRecipe($mf2, $item, $http) {
  309. $data = [
  310. 'type' => 'recipe'
  311. ];
  312. $refs = [];
  313. self::collectSingleValues(['name','summary','published','duration','yield','nutrition'], ['url'], $item, $data);
  314. $instructions = self::parseHTMLValue('instructions', $item);
  315. if($instructions) {
  316. $data['instructions'] = $instructions;
  317. }
  318. self::collectArrayValues(['category','ingredient'], $item, $data, $refs, $http);
  319. self::collectArrayURLValues(['photo'], $item, $data, $refs, $http);
  320. if($author = self::findAuthor($mf2, $item, $http))
  321. $data['author'] = $author;
  322. $response = [
  323. 'data' => $data
  324. ];
  325. if(count($refs)) {
  326. $response['refs'] = $refs;
  327. }
  328. return $response;
  329. }
  330. private static function parseAsHProduct($mf2, $item, $http) {
  331. $data = [
  332. 'type' => 'product'
  333. ];
  334. self::collectSingleValues(['name','identifier','price'], ['url'], $item, $data);
  335. $description = self::parseHTMLValue('description', $item);
  336. if($description) {
  337. $data['description'] = $description;
  338. }
  339. self::collectArrayValues(['category','brand'], $item, $data, $refs, $http);
  340. self::collectArrayURLValues(['photo','video','audio'], $item, $data, $refs, $http);
  341. $response = [
  342. 'data' => $data
  343. ];
  344. if(count($refs)) {
  345. $response['refs'] = $refs;
  346. }
  347. return $response;
  348. }
  349. private static function parseAsHEvent($mf2, $item, $http) {
  350. $data = [
  351. 'type' => 'event'
  352. ];
  353. $refs = [];
  354. // Single plaintext and URL values
  355. self::collectSingleValues(['name','summary','published','start','end','duration'], ['url'], $item, $data);
  356. // These properties are always returned as arrays and may contain plaintext content
  357. self::collectArrayValues(['category','location','attendee'], $item, $data, $refs, $http);
  358. // These properties are always returned as arrays and always URLs
  359. // If the value is an h-* object with a URL, the URL is used and a "ref" is added as well
  360. self::collectArrayURLValues(['photo','video','audio','syndication'], $item, $data, $refs, $http);
  361. // If there is a description, always return the plaintext description, and return HTML description if it's different
  362. $textDescription = null;
  363. $htmlDescription = null;
  364. if(array_key_exists('description', $item['properties'])) {
  365. $description = $item['properties']['description'][0];
  366. if(is_string($description)) {
  367. $textDescription = $description;
  368. } elseif(!is_string($description) && is_array($description) && array_key_exists('value', $description)) {
  369. if(array_key_exists('html', $description)) {
  370. $htmlDescription = trim(self::sanitizeHTML($description['html']));
  371. $textDescription = trim(str_replace("&#xD;","\r",strip_tags($htmlDescription)));
  372. $textDescription = trim(str_replace("&#xD;","\r",$description['value']));
  373. } else {
  374. $textDescription = trim($description['value']);
  375. }
  376. }
  377. }
  378. if($textDescription) {
  379. $data['description'] = [
  380. 'text' => $textDescription
  381. ];
  382. if($htmlDescription && $textDescription != $htmlDescription) {
  383. $data['description']['html'] = $htmlDescription;
  384. }
  385. }
  386. $response = [
  387. 'data' => $data
  388. ];
  389. if(count($refs)) {
  390. $response['refs'] = $refs;
  391. }
  392. return $response;
  393. }
  394. private static function parseAsHFeed($mf2, $http) {
  395. $data = [
  396. 'type' => 'feed',
  397. 'author' => [
  398. 'type' => 'card',
  399. 'name' => null,
  400. 'url' => null,
  401. 'photo' => null
  402. ],
  403. 'todo' => 'Not yet implemented. Please see https://github.com/aaronpk/XRay/issues/1'
  404. ];
  405. return [
  406. 'data' => $data,
  407. 'entries' => []
  408. ];
  409. }
  410. private static function parseAsHCard($item, $http, $authorURL=false) {
  411. $data = [
  412. 'type' => 'card',
  413. 'name' => null,
  414. 'url' => null,
  415. 'photo' => null
  416. ];
  417. $properties = ['url','name','photo'];
  418. foreach($properties as $p) {
  419. if($p == 'url' && $authorURL) {
  420. // If there is a matching author URL, use that one
  421. $found = false;
  422. foreach($item['properties']['url'] as $url) {
  423. if(self::isURL($url)) {
  424. $url = self::normalize_url($url);
  425. if($url == $authorURL) {
  426. $data['url'] = $url;
  427. $found = true;
  428. }
  429. }
  430. }
  431. if(!$found && self::isURL($item['properties']['url'][0])) {
  432. $data['url'] = $item['properties']['url'][0];
  433. }
  434. } else if(($v = self::getPlaintext($item, $p)) !== null) {
  435. // Make sure the URL property is actually a URL
  436. if($p == 'url' || $p == 'photo') {
  437. if(self::isURL($v))
  438. $data[$p] = $v;
  439. } else {
  440. $data[$p] = $v;
  441. }
  442. }
  443. }
  444. // If no URL property was found, use the $authorURL provided
  445. if(!$data['url'])
  446. $data['url'] = $authorURL;
  447. $response = [
  448. 'data' => $data
  449. ];
  450. return $response;
  451. }
  452. private static function findAuthor($mf2, $item, $http) {
  453. $author = [
  454. 'type' => 'card',
  455. 'name' => null,
  456. 'url' => null,
  457. 'photo' => null
  458. ];
  459. // Author Discovery
  460. // http://indiewebcamp.com/authorship
  461. $authorPage = false;
  462. if(array_key_exists('author', $item['properties'])) {
  463. // Check if any of the values of the author property are an h-card
  464. foreach($item['properties']['author'] as $a) {
  465. if(self::isHCard($a)) {
  466. // 5.1 "if it has an h-card, use it, exit."
  467. return self::parseAsHCard($a, $http)['data'];
  468. } elseif(is_string($a)) {
  469. if(self::isURL($a)) {
  470. // 5.2 "otherwise if author property is an http(s) URL, let the author-page have that URL"
  471. $authorPage = $a;
  472. } else {
  473. // 5.3 "otherwise use the author property as the author name, exit"
  474. // We can only set the name, no h-card or URL was found
  475. $author['name'] = self::getPlaintext($item, 'author');
  476. return $author;
  477. }
  478. } else {
  479. // This case is only hit when the author property is an mf2 object that is not an h-card
  480. $author['name'] = self::getPlaintext($item, 'author');
  481. return $author;
  482. }
  483. }
  484. }
  485. // 6. "if no author page was found" ... check for rel-author link
  486. if(!$authorPage) {
  487. if(isset($mf2['rels']) && isset($mf2['rels']['author']))
  488. $authorPage = $mf2['rels']['author'][0];
  489. }
  490. // 7. "if there is an author-page URL" ...
  491. if($authorPage) {
  492. // 7.1 "get the author-page from that URL and parse it for microformats2"
  493. $authorPageContents = self::getURL($authorPage, $http);
  494. if($authorPageContents) {
  495. foreach($authorPageContents['items'] as $i) {
  496. if(self::isHCard($i)) {
  497. // 7.2 "if author-page has 1+ h-card with url == uid == author-page's URL, then use first such h-card, exit."
  498. if(array_key_exists('url', $i['properties'])
  499. and in_array($authorPage, $i['properties']['url'])
  500. and array_key_exists('uid', $i['properties'])
  501. and in_array($authorPage, $i['properties']['uid'])
  502. ) {
  503. return self::parseAsHCard($i, $http, $authorPage)['data'];
  504. }
  505. // 7.3 "else if author-page has 1+ h-card with url property which matches the href of a rel-me link on the author-page"
  506. $relMeLinks = (isset($authorPageContents['rels']) && isset($authorPageContents['rels']['me'])) ? $authorPageContents['rels']['me'] : [];
  507. if(count($relMeLinks) > 0
  508. and array_key_exists('url', $i['properties'])
  509. and count(array_intersect($i['properties']['url'], $relMeLinks)) > 0
  510. ) {
  511. return self::parseAsHCard($i, $http, $authorPage)['data'];
  512. }
  513. }
  514. }
  515. }
  516. // 7.4 "if the h-entry's page has 1+ h-card with url == author-page URL, use first such h-card, exit."
  517. foreach($mf2['items'] as $i) {
  518. if(self::isHCard($i)) {
  519. if(array_key_exists('url', $i['properties'])
  520. and in_array($authorPage, $i['properties']['url'])
  521. ) {
  522. return self::parseAsHCard($i, $http)['data'];
  523. }
  524. }
  525. }
  526. }
  527. if(!$author['name'] && !$author['photo'] && !$author['url'])
  528. return null;
  529. return $author;
  530. }
  531. private static function sanitizeHTML($html) {
  532. $config = HTMLPurifier_Config::createDefault();
  533. $config->set('Cache.DefinitionImpl', null);
  534. $config->set('HTML.AllowedElements', [
  535. 'a',
  536. 'abbr',
  537. 'b',
  538. 'code',
  539. 'del',
  540. 'em',
  541. 'i',
  542. 'img',
  543. 'q',
  544. 'strike',
  545. 'strong',
  546. 'time',
  547. 'blockquote',
  548. 'pre',
  549. 'p',
  550. 'h1',
  551. 'h2',
  552. 'h3',
  553. 'h4',
  554. 'h5',
  555. 'h6',
  556. 'ul',
  557. 'li',
  558. 'ol'
  559. ]);
  560. $def = $config->getHTMLDefinition(true);
  561. $def->addElement(
  562. 'time',
  563. 'Inline',
  564. 'Inline',
  565. 'Common',
  566. [
  567. 'datetime' => 'Text'
  568. ]
  569. );
  570. // Override the allowed classes to only support Microformats2 classes
  571. $def->manager->attrTypes->set('Class', new HTMLPurifier_AttrDef_HTML_Microformats2());
  572. $purifier = new HTMLPurifier($config);
  573. $sanitized = $purifier->purify($html);
  574. $sanitized = str_replace("&#xD;","\r",$sanitized);
  575. return $sanitized;
  576. }
  577. private static function hasNumericKeys(array $arr) {
  578. foreach($arr as $key=>$val)
  579. if (is_numeric($key))
  580. return true;
  581. return false;
  582. }
  583. private static function isMicroformat($mf) {
  584. return is_array($mf)
  585. and !self::hasNumericKeys($mf)
  586. and !empty($mf['type'])
  587. and isset($mf['properties']);
  588. }
  589. private static function isHCard($mf) {
  590. return is_array($mf)
  591. and !empty($mf['type'])
  592. and is_array($mf['type'])
  593. and in_array('h-card', $mf['type']);
  594. }
  595. private static function isURL($string) {
  596. return preg_match('/^https?:\/\/.+\..+$/', $string);
  597. }
  598. // Given an array of microformats properties and a key name, return the plaintext value
  599. // at that property
  600. // e.g.
  601. // {"properties":{"published":["foo"]}} results in "foo"
  602. private static function getPlaintext($mf2, $k, $fallback=null) {
  603. if(!empty($mf2['properties'][$k]) and is_array($mf2['properties'][$k])) {
  604. // $mf2['properties'][$v] will always be an array since the input was from the mf2 parser
  605. $value = $mf2['properties'][$k][0];
  606. if(is_string($value)) {
  607. return $value;
  608. } elseif(self::isMicroformat($value) && array_key_exists('value', $value)) {
  609. return $value['value'];
  610. }
  611. }
  612. return $fallback;
  613. }
  614. private static function getURL($url, $http) {
  615. if(!$url) return null;
  616. // TODO: consider adding caching here
  617. $result = $http->get($url);
  618. if($result['error'] || !$result['body']) {
  619. return null;
  620. }
  621. return \mf2\Parse($result['body'], $url);
  622. }
  623. private static function normalize_url($url) {
  624. $parts = parse_url($url);
  625. if(empty($parts['path']))
  626. $parts['path'] = '/';
  627. $parts['host'] = strtolower($parts['host']);
  628. return self::build_url($parts);
  629. }
  630. private static function build_url($parsed_url) {
  631. $scheme = isset($parsed_url['scheme']) ? $parsed_url['scheme'] . '://' : '';
  632. $host = isset($parsed_url['host']) ? $parsed_url['host'] : '';
  633. $port = isset($parsed_url['port']) ? ':' . $parsed_url['port'] : '';
  634. $user = isset($parsed_url['user']) ? $parsed_url['user'] : '';
  635. $pass = isset($parsed_url['pass']) ? ':' . $parsed_url['pass'] : '';
  636. $pass = ($user || $pass) ? "$pass@" : '';
  637. $path = isset($parsed_url['path']) ? $parsed_url['path'] : '';
  638. $query = isset($parsed_url['query']) ? '?' . $parsed_url['query'] : '';
  639. $fragment = isset($parsed_url['fragment']) ? '#' . $parsed_url['fragment'] : '';
  640. return "$scheme$user$pass$host$port$path$query$fragment";
  641. }
  642. }