You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

747 lines
25 KiB

10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
10 years ago
  1. <?php
  2. namespace XRay\Formats;
  3. use HTMLPurifier, HTMLPurifier_Config;
  4. use Parse;
  5. class Mf2 {
  6. public static function parse($mf2, $url, $http) {
  7. if(count($mf2['items']) == 0)
  8. return false;
  9. // If there is only one item on the page, just use that
  10. if(count($mf2['items']) == 1) {
  11. $item = $mf2['items'][0];
  12. if(in_array('h-entry', $item['type']) || in_array('h-cite', $item['type'])) {
  13. Parse::debug("mf2:0: Recognized $url as an h-entry it is the only item on the page");
  14. return self::parseAsHEntry($mf2, $item, $http);
  15. }
  16. if(in_array('h-event', $item['type'])) {
  17. Parse::debug("mf2:0: Recognized $url as an h-event it is the only item on the page");
  18. return self::parseAsHEvent($mf2, $item, $http);
  19. }
  20. if(in_array('h-review', $item['type'])) {
  21. Parse::debug("mf2:0: Recognized $url as an h-review it is the only item on the page");
  22. return self::parseAsHReview($mf2, $item, $http);
  23. }
  24. if(in_array('h-recipe', $item['type'])) {
  25. Parse::debug("mf2:0: Recognized $url as an h-recipe it is the only item on the page");
  26. return self::parseAsHRecipe($mf2, $item, $http);
  27. }
  28. if(in_array('h-product', $item['type'])) {
  29. Parse::debug("mf2:0: Recognized $url as an h-product it is the only item on the page");
  30. return self::parseAsHProduct($mf2, $item, $http);
  31. }
  32. if(in_array('h-feed', $item['type'])) {
  33. Parse::debug("mf2:0: Recognized $url as an h-feed because it is the only item on the page");
  34. return self::parseAsHFeed($mf2, $http);
  35. }
  36. if(in_array('h-card', $item['type'])) {
  37. Parse::debug("mf2:0: Recognized $url as an h-card it is the only item on the page");
  38. return self::parseAsHCard($item, $http, $url);
  39. }
  40. }
  41. // Check the list of items on the page to see if one matches the URL of the page,
  42. // and treat as a permalink for that object if so. Otherwise, parse as a feed.
  43. foreach($mf2['items'] as $item) {
  44. if(array_key_exists('url', $item['properties'])) {
  45. $urls = $item['properties']['url'];
  46. $urls = array_map('self::normalize_url', $urls);
  47. if(in_array($url, $urls)) {
  48. Parse::debug("mf2:1: Recognized $url as a permalink because an object on the page matched the URL of the request");
  49. if(in_array('h-card', $item['type'])) {
  50. return self::parseAsHCard($item, $http, $url);
  51. } elseif(in_array('h-entry', $item['type']) || in_array('h-cite', $item['type'])) {
  52. return self::parseAsHEntry($mf2, $item, $http);
  53. } elseif(in_array('h-event', $item['type'])) {
  54. return self::parseAsHEvent($mf2, $item, $http);
  55. } elseif(in_array('h-review', $item['type'])) {
  56. return self::parseAsHReview($mf2, $item, $http);
  57. } elseif(in_array('h-recipe', $item['type'])) {
  58. return self::parseAsHRecipe($mf2, $item, $http);
  59. } elseif(in_array('h-product', $item['type'])) {
  60. return self::parseAsHProduct($mf2, $item, $http);
  61. } else {
  62. Parse::debug('This object was not a recognized type.');
  63. return false;
  64. }
  65. }
  66. }
  67. }
  68. // Check for an h-card matching rel=author or the author URL of any h-* on the page,
  69. // and return the h-* object if so
  70. if(isset($mf2['rels']['author'])) {
  71. foreach($mf2['items'] as $card) {
  72. if(in_array('h-card', $card['type']) && array_key_exists('url', $card['properties'])) {
  73. $urls = $card['properties']['url'];
  74. $urls = array_map('self::normalize_url', $urls);
  75. if(count(array_intersect($urls, $mf2['rels']['author'])) > 0) {
  76. // There is an author h-card on this page
  77. // Now look for the first h-* object other than an h-card and use that as the object
  78. foreach($mf2['items'] as $item) {
  79. if(!in_array('h-card', $item['type'])) {
  80. if(in_array('h-entry', $item['type']) || in_array('h-cite', $item['type'])) {
  81. return self::parseAsHEntry($mf2, $item, $http);
  82. } elseif(in_array('h-event', $item['type'])) {
  83. return self::parseAsHEvent($mf2, $item, $http);
  84. } elseif(in_array('h-review', $item['type'])) {
  85. return self::parseAsHReview($mf2, $item, $http);
  86. } elseif(in_array('h-recipe', $item['type'])) {
  87. return self::parseAsHRecipe($mf2, $item, $http);
  88. } elseif(in_array('h-product', $item['type'])) {
  89. return self::parseAsHProduct($mf2, $item, $http);
  90. }
  91. }
  92. }
  93. }
  94. }
  95. }
  96. }
  97. // If there was more than one h-entry on the page, treat the whole page as a feed
  98. if(count($mf2['items']) > 1) {
  99. if(count(array_filter($mf2['items'], function($item){
  100. return in_array('h-entry', $item['type']);
  101. })) > 1) {
  102. Parse::debug("mf2:2: Recognized $url as an h-feed because there are more than one object on the page");
  103. return self::parseAsHFeed($mf2, $http);
  104. }
  105. }
  106. // If the first item is an h-feed, parse as a feed
  107. $first = $mf2['items'][0];
  108. if(in_array('h-feed', $first['type'])) {
  109. Parse::debug("mf2:3: Recognized $url as an h-feed because the first item is an h-feed");
  110. return self::parseAsHFeed($mf2, $http);
  111. }
  112. // Fallback case, but hopefully we have found something before this point
  113. foreach($mf2['items'] as $item) {
  114. // Otherwise check for a recognized h-entr* object
  115. if(in_array('h-entry', $item['type']) || in_array('h-cite', $item['type'])) {
  116. Parse::debug("mf2:6: $url is falling back to the first h-entry on the page");
  117. return self::parseAsHEntry($mf2, $item, $http);
  118. } elseif(in_array('h-event', $item['type'])) {
  119. Parse::debug("mf2:6: $url is falling back to the first h-event on the page");
  120. return self::parseAsHEvent($mf2, $item, $http);
  121. } elseif(in_array('h-review', $item['type'])) {
  122. Parse::debug("mf2:6: $url is falling back to the first h-review on the page");
  123. return self::parseAsHReview($mf2, $item, $http);
  124. } elseif(in_array('h-recipe', $item['type'])) {
  125. Parse::debug("mf2:6: $url is falling back to the first h-recipe on the page");
  126. return self::parseAsHReview($mf2, $item, $http);
  127. } elseif(in_array('h-product', $item['type'])) {
  128. Parse::debug("mf2:6: $url is falling back to the first h-product on the page");
  129. return self::parseAsHProduct($mf2, $item, $http);
  130. }
  131. }
  132. Parse::debug("mf2:E: No object at $url was recognized");
  133. return false;
  134. }
  135. private static function collectSingleValues($properties, $urlProperties, $item, &$data) {
  136. foreach($properties as $p) {
  137. if(($v = self::getPlaintext($item, $p)) !== null) {
  138. $data[$p] = $v;
  139. }
  140. }
  141. foreach($urlProperties as $p) {
  142. if(($v = self::getPlaintext($item, $p)) !== null) {
  143. if(self::isURL($v))
  144. $data[$p] = $v;
  145. }
  146. }
  147. }
  148. private static function parseHTMLValue($property, $item) {
  149. if(!array_key_exists($property, $item['properties']))
  150. return null;
  151. $textContent = false;
  152. $htmlContent = false;
  153. $content = $item['properties'][$property][0];
  154. if(is_string($content)) {
  155. $textContent = $content;
  156. } elseif(!is_string($content) && is_array($content) && array_key_exists('value', $content)) {
  157. if(array_key_exists('html', $content)) {
  158. $htmlContent = trim(self::sanitizeHTML($content['html']));
  159. #$textContent = trim(str_replace("&#xD;","\r",strip_tags($htmlContent)));
  160. $textContent = trim(str_replace("&#xD;","\r",$content['value']));
  161. } else {
  162. $textContent = trim($content['value']);
  163. }
  164. }
  165. $data = [
  166. 'text' => $textContent
  167. ];
  168. if($htmlContent && $textContent != $htmlContent) {
  169. $data['html'] = $htmlContent;
  170. }
  171. return $data;
  172. }
  173. // Always return arrays, and may contain plaintext content
  174. // Nested objects are added to refs and the URL is used as the value if present
  175. private static function collectArrayValues($properties, $item, &$data, &$refs, &$http) {
  176. foreach($properties as $p) {
  177. if(array_key_exists($p, $item['properties'])) {
  178. foreach($item['properties'][$p] as $v) {
  179. if(is_string($v)) {
  180. if(!array_key_exists($p, $data)) $data[$p] = [];
  181. if(!in_array($v, $data[$p]))
  182. $data[$p][] = $v;
  183. } elseif(self::isMicroformat($v)) {
  184. if(($u=self::getPlaintext($v, 'url')) && self::isURL($u)) {
  185. if(!array_key_exists($p, $data)) $data[$p] = [];
  186. if(!in_array($u, $data[$p]))
  187. $data[$p][] = $u;
  188. $ref = self::parse(['items'=>[$v]], $u, $http);
  189. if($ref) {
  190. $refs[$u] = $ref['data'];
  191. }
  192. } else {
  193. if(!array_key_exists($p, $data)) $data[$p] = [];
  194. if(!in_array($v['value'], $data[$p]))
  195. $data[$p][] = $v['value'];
  196. }
  197. }
  198. }
  199. }
  200. }
  201. }
  202. private static function collectArrayURLValues($properties, $item, &$data, &$refs, &$http) {
  203. foreach($properties as $p) {
  204. if(array_key_exists($p, $item['properties'])) {
  205. foreach($item['properties'][$p] as $v) {
  206. if(is_string($v) && self::isURL($v)) {
  207. if(!array_key_exists($p, $data)) $data[$p] = [];
  208. $data[$p][] = $v;
  209. }
  210. elseif(self::isMicroformat($v) && ($u=self::getPlaintext($v, 'url')) && self::isURL($u)) {
  211. if(!array_key_exists($p, $data)) $data[$p] = [];
  212. $data[$p][] = $u;
  213. // parse the object and put the result in the "refs" object
  214. $ref = self::parse(['items'=>[$v]], $u, $http);
  215. if($ref) {
  216. $refs[$u] = $ref['data'];
  217. }
  218. }
  219. }
  220. }
  221. }
  222. }
  223. private static function determineNameAndContent($item, &$data) {
  224. // Determine if the name is distinct from the content
  225. $name = self::getPlaintext($item, 'name');
  226. $textContent = null;
  227. $htmlContent = null;
  228. $content = self::parseHTMLValue('content', $item);
  229. if($content) {
  230. $htmlContent = array_key_exists('html', $content) ? $content['html'] : null;
  231. $textContent = array_key_exists('text', $content) ? $content['text'] : null;
  232. }
  233. if($content) {
  234. // Trim ellipses from the name
  235. $name = preg_replace('/ ?(\.\.\.|…)$/', '', $name);
  236. // Remove all whitespace when checking equality
  237. $nameCompare = preg_replace('/\s/','',trim($name));
  238. $contentCompare = preg_replace('/\s/','',trim($textContent));
  239. // Check if the name is a prefix of the content
  240. if($contentCompare && $nameCompare && strpos($contentCompare, $nameCompare) === 0) {
  241. $name = null;
  242. }
  243. }
  244. if($name) {
  245. $data['name'] = $name;
  246. }
  247. // If there is content, always return the plaintext content, and return HTML content if it's different
  248. if($content) {
  249. $data['content']['text'] = $content['text'];
  250. if(array_key_exists('html', $content))
  251. $data['content']['html'] = $content['html'];
  252. }
  253. }
  254. private static function parseAsHEntry($mf2, $item, $http) {
  255. $data = [
  256. 'type' => 'entry'
  257. ];
  258. $refs = [];
  259. // Single plaintext and URL values
  260. self::collectSingleValues(['published','summary','rsvp','swarm-coins'], ['url'], $item, $data);
  261. // These properties are always returned as arrays and may contain plaintext content
  262. // First strip leading hashtags from category values if present
  263. if(array_key_exists('category', $item['properties'])) {
  264. foreach($item['properties']['category'] as $i=>$c) {
  265. if(is_string($c))
  266. $item['properties']['category'][$i] = ltrim($c, '#');
  267. }
  268. }
  269. self::collectArrayValues(['category','invitee'], $item, $data, $refs, $http);
  270. // These properties are always returned as arrays and always URLs
  271. // If the value is an h-* object with a URL, the URL is used and a "ref" is added as well
  272. self::collectArrayURLValues(['photo','video','audio','syndication','in-reply-to','like-of','repost-of','bookmark-of'], $item, $data, $refs, $http);
  273. self::determineNameAndContent($item, $data);
  274. if($author = self::findAuthor($mf2, $item, $http))
  275. $data['author'] = $author;
  276. $response = [
  277. 'data' => $data
  278. ];
  279. if(count($refs)) {
  280. $response['refs'] = $refs;
  281. }
  282. return $response;
  283. }
  284. private static function parseAsHReview($mf2, $item, $http) {
  285. $data = [
  286. 'type' => 'review'
  287. ];
  288. $refs = [];
  289. self::collectSingleValues(['summary','published','rating','best','worst'], ['url'], $item, $data);
  290. // Fallback for Mf1 "description" as content. The PHP parser does not properly map this to "content"
  291. $description = self::parseHTMLValue('description', $item);
  292. if($description) {
  293. $data['content'] = $description;
  294. }
  295. self::collectArrayValues(['category'], $item, $data, $refs, $http);
  296. self::collectArrayURLValues(['item'], $item, $data, $refs, $http);
  297. self::determineNameAndContent($item, $data);
  298. if($author = self::findAuthor($mf2, $item, $http))
  299. $data['author'] = $author;
  300. $response = [
  301. 'data' => $data
  302. ];
  303. if(count($refs)) {
  304. $response['refs'] = $refs;
  305. }
  306. return $response;
  307. }
  308. private static function parseAsHRecipe($mf2, $item, $http) {
  309. $data = [
  310. 'type' => 'recipe'
  311. ];
  312. $refs = [];
  313. self::collectSingleValues(['name','summary','published','duration','yield','nutrition'], ['url'], $item, $data);
  314. $instructions = self::parseHTMLValue('instructions', $item);
  315. if($instructions) {
  316. $data['instructions'] = $instructions;
  317. }
  318. self::collectArrayValues(['category','ingredient'], $item, $data, $refs, $http);
  319. self::collectArrayURLValues(['photo'], $item, $data, $refs, $http);
  320. if($author = self::findAuthor($mf2, $item, $http))
  321. $data['author'] = $author;
  322. $response = [
  323. 'data' => $data
  324. ];
  325. if(count($refs)) {
  326. $response['refs'] = $refs;
  327. }
  328. return $response;
  329. }
  330. private static function parseAsHProduct($mf2, $item, $http) {
  331. $data = [
  332. 'type' => 'product'
  333. ];
  334. self::collectSingleValues(['name','identifier','price'], ['url'], $item, $data);
  335. $description = self::parseHTMLValue('description', $item);
  336. if($description) {
  337. $data['description'] = $description;
  338. }
  339. self::collectArrayValues(['category','brand'], $item, $data, $refs, $http);
  340. self::collectArrayURLValues(['photo','video','audio'], $item, $data, $refs, $http);
  341. $response = [
  342. 'data' => $data
  343. ];
  344. if(count($refs)) {
  345. $response['refs'] = $refs;
  346. }
  347. return $response;
  348. }
  349. private static function parseAsHEvent($mf2, $item, $http) {
  350. $data = [
  351. 'type' => 'event'
  352. ];
  353. $refs = [];
  354. // Single plaintext and URL values
  355. self::collectSingleValues(['name','summary','published','start','end','duration'], ['url'], $item, $data);
  356. // These properties are always returned as arrays and may contain plaintext content
  357. self::collectArrayValues(['category','location','attendee'], $item, $data, $refs, $http);
  358. // These properties are always returned as arrays and always URLs
  359. // If the value is an h-* object with a URL, the URL is used and a "ref" is added as well
  360. self::collectArrayURLValues(['photo','video','audio','syndication'], $item, $data, $refs, $http);
  361. // If there is a description, always return the plaintext description, and return HTML description if it's different
  362. $textDescription = null;
  363. $htmlDescription = null;
  364. if(array_key_exists('description', $item['properties'])) {
  365. $description = $item['properties']['description'][0];
  366. if(is_string($description)) {
  367. $textDescription = $description;
  368. } elseif(!is_string($description) && is_array($description) && array_key_exists('value', $description)) {
  369. if(array_key_exists('html', $description)) {
  370. $htmlDescription = trim(self::sanitizeHTML($description['html']));
  371. $textDescription = trim(str_replace("&#xD;","\r",strip_tags($htmlDescription)));
  372. $textDescription = trim(str_replace("&#xD;","\r",$description['value']));
  373. } else {
  374. $textDescription = trim($description['value']);
  375. }
  376. }
  377. }
  378. if($textDescription) {
  379. $data['description'] = [
  380. 'text' => $textDescription
  381. ];
  382. if($htmlDescription && $textDescription != $htmlDescription) {
  383. $data['description']['html'] = $htmlDescription;
  384. }
  385. }
  386. $response = [
  387. 'data' => $data
  388. ];
  389. if(count($refs)) {
  390. $response['refs'] = $refs;
  391. }
  392. return $response;
  393. }
  394. private static function parseAsHFeed($mf2, $http) {
  395. $data = [
  396. 'type' => 'feed',
  397. 'author' => [
  398. 'type' => 'card',
  399. 'name' => null,
  400. 'url' => null,
  401. 'photo' => null
  402. ],
  403. 'todo' => 'Not yet implemented. Please see https://github.com/aaronpk/XRay/issues/1'
  404. ];
  405. return [
  406. 'data' => $data,
  407. 'entries' => []
  408. ];
  409. }
  410. private static function parseAsHCard($item, $http, $authorURL=false) {
  411. $data = [
  412. 'type' => 'card',
  413. 'name' => null,
  414. 'url' => null,
  415. 'photo' => null
  416. ];
  417. $properties = ['url','name','photo'];
  418. foreach($properties as $p) {
  419. if($p == 'url' && $authorURL) {
  420. // If there is a matching author URL, use that one
  421. $found = false;
  422. foreach($item['properties']['url'] as $url) {
  423. if(self::isURL($url)) {
  424. $url = self::normalize_url($url);
  425. if($url == $authorURL) {
  426. $data['url'] = $url;
  427. $found = true;
  428. }
  429. }
  430. }
  431. if(!$found && self::isURL($item['properties']['url'][0])) {
  432. $data['url'] = $item['properties']['url'][0];
  433. }
  434. } else if(($v = self::getPlaintext($item, $p)) !== null) {
  435. // Make sure the URL property is actually a URL
  436. if($p == 'url' || $p == 'photo') {
  437. if(self::isURL($v))
  438. $data[$p] = $v;
  439. } else {
  440. $data[$p] = $v;
  441. }
  442. }
  443. }
  444. // If no URL property was found, use the $authorURL provided
  445. if(!$data['url'])
  446. $data['url'] = $authorURL;
  447. $response = [
  448. 'data' => $data
  449. ];
  450. return $response;
  451. }
  452. private static function findAuthor($mf2, $item, $http) {
  453. $author = [
  454. 'type' => 'card',
  455. 'name' => null,
  456. 'url' => null,
  457. 'photo' => null
  458. ];
  459. // Author Discovery
  460. // http://indiewebcamp.com/authorship
  461. $authorPage = false;
  462. if(array_key_exists('author', $item['properties'])) {
  463. // Check if any of the values of the author property are an h-card
  464. foreach($item['properties']['author'] as $a) {
  465. if(self::isHCard($a)) {
  466. // 5.1 "if it has an h-card, use it, exit."
  467. return self::parseAsHCard($a, $http)['data'];
  468. } elseif(is_string($a)) {
  469. if(self::isURL($a)) {
  470. // 5.2 "otherwise if author property is an http(s) URL, let the author-page have that URL"
  471. $authorPage = $a;
  472. } else {
  473. // 5.3 "otherwise use the author property as the author name, exit"
  474. // We can only set the name, no h-card or URL was found
  475. $author['name'] = self::getPlaintext($item, 'author');
  476. return $author;
  477. }
  478. } else {
  479. // This case is only hit when the author property is an mf2 object that is not an h-card
  480. $author['name'] = self::getPlaintext($item, 'author');
  481. return $author;
  482. }
  483. }
  484. }
  485. // 6. "if no author page was found" ... check for rel-author link
  486. if(!$authorPage) {
  487. if(isset($mf2['rels']) && isset($mf2['rels']['author']))
  488. $authorPage = $mf2['rels']['author'][0];
  489. }
  490. // 7. "if there is an author-page URL" ...
  491. if($authorPage) {
  492. // 7.1 "get the author-page from that URL and parse it for microformats2"
  493. $authorPageContents = self::getURL($authorPage, $http);
  494. if($authorPageContents) {
  495. foreach($authorPageContents['items'] as $i) {
  496. if(self::isHCard($i)) {
  497. // 7.2 "if author-page has 1+ h-card with url == uid == author-page's URL, then use first such h-card, exit."
  498. if(array_key_exists('url', $i['properties'])
  499. and in_array($authorPage, $i['properties']['url'])
  500. and array_key_exists('uid', $i['properties'])
  501. and in_array($authorPage, $i['properties']['uid'])
  502. ) {
  503. return self::parseAsHCard($i, $http, $authorPage)['data'];
  504. }
  505. // 7.3 "else if author-page has 1+ h-card with url property which matches the href of a rel-me link on the author-page"
  506. $relMeLinks = (isset($authorPageContents['rels']) && isset($authorPageContents['rels']['me'])) ? $authorPageContents['rels']['me'] : [];
  507. if(count($relMeLinks) > 0
  508. and array_key_exists('url', $i['properties'])
  509. and count(array_intersect($i['properties']['url'], $relMeLinks)) > 0
  510. ) {
  511. return self::parseAsHCard($i, $http, $authorPage)['data'];
  512. }
  513. }
  514. }
  515. }
  516. // 7.4 "if the h-entry's page has 1+ h-card with url == author-page URL, use first such h-card, exit."
  517. foreach($mf2['items'] as $i) {
  518. if(self::isHCard($i)) {
  519. if(array_key_exists('url', $i['properties'])
  520. and in_array($authorPage, $i['properties']['url'])
  521. ) {
  522. return self::parseAsHCard($i, $http)['data'];
  523. }
  524. }
  525. }
  526. }
  527. if(!$author['name'] && !$author['photo'] && !$author['url'])
  528. return null;
  529. return $author;
  530. }
  531. private static function sanitizeHTML($html) {
  532. $config = HTMLPurifier_Config::createDefault();
  533. $config->set('Cache.DefinitionImpl', null);
  534. $config->set('HTML.AllowedElements', [
  535. 'a',
  536. 'abbr',
  537. 'b',
  538. 'code',
  539. 'del',
  540. 'em',
  541. 'i',
  542. 'img',
  543. 'q',
  544. 'strike',
  545. 'strong',
  546. 'time',
  547. 'blockquote',
  548. 'pre',
  549. 'p',
  550. 'h1',
  551. 'h2',
  552. 'h3',
  553. 'h4',
  554. 'h5',
  555. 'h6',
  556. 'ul',
  557. 'li',
  558. 'ol'
  559. ]);
  560. $def = $config->getHTMLDefinition(true);
  561. $def->addElement(
  562. 'time',
  563. 'Inline',
  564. 'Inline',
  565. 'Common',
  566. [
  567. 'datetime' => 'Text'
  568. ]
  569. );
  570. // Override the allowed classes to only support Microformats2 classes
  571. $def->manager->attrTypes->set('Class', new HTMLPurifier_AttrDef_HTML_Microformats2());
  572. $purifier = new HTMLPurifier($config);
  573. $sanitized = $purifier->purify($html);
  574. $sanitized = str_replace("&#xD;","\r",$sanitized);
  575. return $sanitized;
  576. }
  577. private static function hasNumericKeys(array $arr) {
  578. foreach($arr as $key=>$val)
  579. if (is_numeric($key))
  580. return true;
  581. return false;
  582. }
  583. private static function isMicroformat($mf) {
  584. return is_array($mf)
  585. and !self::hasNumericKeys($mf)
  586. and !empty($mf['type'])
  587. and isset($mf['properties']);
  588. }
  589. private static function isHCard($mf) {
  590. return is_array($mf)
  591. and !empty($mf['type'])
  592. and is_array($mf['type'])
  593. and in_array('h-card', $mf['type']);
  594. }
  595. private static function isURL($string) {
  596. return preg_match('/^https?:\/\/.+\..+$/', $string);
  597. }
  598. // Given an array of microformats properties and a key name, return the plaintext value
  599. // at that property
  600. // e.g.
  601. // {"properties":{"published":["foo"]}} results in "foo"
  602. private static function getPlaintext($mf2, $k, $fallback=null) {
  603. if(!empty($mf2['properties'][$k]) and is_array($mf2['properties'][$k])) {
  604. // $mf2['properties'][$v] will always be an array since the input was from the mf2 parser
  605. $value = $mf2['properties'][$k][0];
  606. if(is_string($value)) {
  607. return $value;
  608. } elseif(self::isMicroformat($value) && array_key_exists('value', $value)) {
  609. return $value['value'];
  610. }
  611. }
  612. return $fallback;
  613. }
  614. private static function getURL($url, $http) {
  615. if(!$url) return null;
  616. // TODO: consider adding caching here
  617. $result = $http->get($url);
  618. if($result['error'] || !$result['body']) {
  619. return null;
  620. }
  621. return \mf2\Parse($result['body'], $url);
  622. }
  623. private static function normalize_url($url) {
  624. $parts = parse_url($url);
  625. if(empty($parts['path']))
  626. $parts['path'] = '/';
  627. $parts['host'] = strtolower($parts['host']);
  628. return self::build_url($parts);
  629. }
  630. private static function build_url($parsed_url) {
  631. $scheme = isset($parsed_url['scheme']) ? $parsed_url['scheme'] . '://' : '';
  632. $host = isset($parsed_url['host']) ? $parsed_url['host'] : '';
  633. $port = isset($parsed_url['port']) ? ':' . $parsed_url['port'] : '';
  634. $user = isset($parsed_url['user']) ? $parsed_url['user'] : '';
  635. $pass = isset($parsed_url['pass']) ? ':' . $parsed_url['pass'] : '';
  636. $pass = ($user || $pass) ? "$pass@" : '';
  637. $path = isset($parsed_url['path']) ? $parsed_url['path'] : '';
  638. $query = isset($parsed_url['query']) ? '?' . $parsed_url['query'] : '';
  639. $fragment = isset($parsed_url['fragment']) ? '#' . $parsed_url['fragment'] : '';
  640. return "$scheme$user$pass$host$port$path$query$fragment";
  641. }
  642. }