You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

729 lines
24 KiB

8 years ago
8 years ago
8 years ago
  1. <?php
  2. namespace XRay\Formats;
  3. use HTMLPurifier, HTMLPurifier_Config;
  4. use Parse;
  5. class Mf2 {
  6. public static function parse($mf2, $url, $http) {
  7. if(count($mf2['items']) == 0)
  8. return false;
  9. // If there is only one item on the page, just use that
  10. if(count($mf2['items']) == 1) {
  11. $item = $mf2['items'][0];
  12. if(in_array('h-entry', $item['type']) || in_array('h-cite', $item['type'])) {
  13. Parse::debug("mf2:0: Recognized $url as an h-entry it is the only item on the page");
  14. return self::parseAsHEntry($mf2, $item, $http);
  15. }
  16. if(in_array('h-event', $item['type'])) {
  17. Parse::debug("mf2:0: Recognized $url as an h-event it is the only item on the page");
  18. return self::parseAsHEvent($mf2, $item, $http);
  19. }
  20. if(in_array('h-review', $item['type'])) {
  21. Parse::debug("mf2:0: Recognized $url as an h-review it is the only item on the page");
  22. return self::parseAsHReview($mf2, $item, $http);
  23. }
  24. if(in_array('h-recipe', $item['type'])) {
  25. Parse::debug("mf2:0: Recognized $url as an h-recipe it is the only item on the page");
  26. return self::parseAsHRecipe($mf2, $item, $http);
  27. }
  28. if(in_array('h-product', $item['type'])) {
  29. Parse::debug("mf2:0: Recognized $url as an h-product it is the only item on the page");
  30. return self::parseAsHProduct($mf2, $item, $http);
  31. }
  32. if(in_array('h-feed', $item['type'])) {
  33. Parse::debug("mf2:0: Recognized $url as an h-feed because it is the only item on the page");
  34. return self::parseAsHFeed($mf2, $http);
  35. }
  36. }
  37. // Check the list of items on the page to see if one matches the URL of the page,
  38. // and treat as a permalink for that object if so. Otherwise, parse as a feed.
  39. foreach($mf2['items'] as $item) {
  40. if(array_key_exists('url', $item['properties'])) {
  41. $urls = $item['properties']['url'];
  42. $urls = array_map('self::normalize_url', $urls);
  43. if(in_array($url, $urls)) {
  44. Parse::debug("mf2:1: Recognized $url as a permalink because an object on the page matched the URL of the request");
  45. if(in_array('h-card', $item['type'])) {
  46. return self::parseAsHCard($item, $http, $url);
  47. } elseif(in_array('h-entry', $item['type']) || in_array('h-cite', $item['type'])) {
  48. return self::parseAsHEntry($mf2, $item, $http);
  49. } elseif(in_array('h-event', $item['type'])) {
  50. return self::parseAsHEvent($mf2, $item, $http);
  51. } elseif(in_array('h-review', $item['type'])) {
  52. return self::parseAsHReview($mf2, $item, $http);
  53. } elseif(in_array('h-recipe', $item['type'])) {
  54. return self::parseAsHRecipe($mf2, $item, $http);
  55. } elseif(in_array('h-product', $item['type'])) {
  56. return self::parseAsHProduct($mf2, $item, $http);
  57. } else {
  58. Parse::debug('This object was not a recognized type.');
  59. return false;
  60. }
  61. }
  62. }
  63. }
  64. // Check for an h-card matching rel=author or the author URL of any h-* on the page,
  65. // and return the h-* object if so
  66. if(isset($mf2['rels']['author'])) {
  67. foreach($mf2['items'] as $card) {
  68. if(in_array('h-card', $card['type']) && array_key_exists('url', $card['properties'])) {
  69. $urls = $card['properties']['url'];
  70. $urls = array_map('self::normalize_url', $urls);
  71. if(count(array_intersect($urls, $mf2['rels']['author'])) > 0) {
  72. // There is an author h-card on this page
  73. // Now look for the first h-* object other than an h-card and use that as the object
  74. foreach($mf2['items'] as $item) {
  75. if(!in_array('h-card', $item['type'])) {
  76. if(in_array('h-entry', $item['type']) || in_array('h-cite', $item['type'])) {
  77. return self::parseAsHEntry($mf2, $item, $http);
  78. } elseif(in_array('h-event', $item['type'])) {
  79. return self::parseAsHEvent($mf2, $item, $http);
  80. } elseif(in_array('h-review', $item['type'])) {
  81. return self::parseAsHReview($mf2, $item, $http);
  82. } elseif(in_array('h-recipe', $item['type'])) {
  83. return self::parseAsHRecipe($mf2, $item, $http);
  84. } elseif(in_array('h-product', $item['type'])) {
  85. return self::parseAsHProduct($mf2, $item, $http);
  86. }
  87. }
  88. }
  89. }
  90. }
  91. }
  92. }
  93. // If there was more than one h-entry on the page, treat the whole page as a feed
  94. if(count($mf2['items']) > 1) {
  95. if(count(array_filter($mf2['items'], function($item){
  96. return in_array('h-entry', $item['type']);
  97. })) > 1) {
  98. Parse::debug("mf2:2: Recognized $url as an h-feed because there are more than one object on the page");
  99. return self::parseAsHFeed($mf2, $http);
  100. }
  101. }
  102. // If the first item is an h-feed, parse as a feed
  103. $first = $mf2['items'][0];
  104. if(in_array('h-feed', $first['type'])) {
  105. Parse::debug("mf2:3: Recognized $url as an h-feed because the first item is an h-feed");
  106. return self::parseAsHFeed($mf2, $http);
  107. }
  108. // Fallback case, but hopefully we have found something before this point
  109. foreach($mf2['items'] as $item) {
  110. // Otherwise check for a recognized h-entr* object
  111. if(in_array('h-entry', $item['type']) || in_array('h-cite', $item['type'])) {
  112. Parse::debug("mf2:6: $url is falling back to the first h-entry on the page");
  113. return self::parseAsHEntry($mf2, $item, $http);
  114. } elseif(in_array('h-event', $item['type'])) {
  115. Parse::debug("mf2:6: $url is falling back to the first h-event on the page");
  116. return self::parseAsHEvent($mf2, $item, $http);
  117. } elseif(in_array('h-review', $item['type'])) {
  118. Parse::debug("mf2:6: $url is falling back to the first h-review on the page");
  119. return self::parseAsHReview($mf2, $item, $http);
  120. } elseif(in_array('h-recipe', $item['type'])) {
  121. Parse::debug("mf2:6: $url is falling back to the first h-recipe on the page");
  122. return self::parseAsHReview($mf2, $item, $http);
  123. } elseif(in_array('h-product', $item['type'])) {
  124. Parse::debug("mf2:6: $url is falling back to the first h-product on the page");
  125. return self::parseAsHProduct($mf2, $item, $http);
  126. }
  127. }
  128. Parse::debug("mf2:E: No object at $url was recognized");
  129. return false;
  130. }
  131. private static function collectSingleValues($properties, $urlProperties, $item, &$data) {
  132. foreach($properties as $p) {
  133. if(($v = self::getPlaintext($item, $p)) !== null) {
  134. $data[$p] = $v;
  135. }
  136. }
  137. foreach($urlProperties as $p) {
  138. if(($v = self::getPlaintext($item, $p)) !== null) {
  139. if(self::isURL($v))
  140. $data[$p] = $v;
  141. }
  142. }
  143. }
  144. private static function parseHTMLValue($property, $item) {
  145. if(!array_key_exists($property, $item['properties']))
  146. return null;
  147. $textContent = false;
  148. $htmlContent = false;
  149. $content = $item['properties'][$property][0];
  150. if(is_string($content)) {
  151. $textContent = $content;
  152. } elseif(!is_string($content) && is_array($content) && array_key_exists('value', $content)) {
  153. if(array_key_exists('html', $content)) {
  154. $htmlContent = trim(self::sanitizeHTML($content['html']));
  155. #$textContent = trim(str_replace("&#xD;","\r",strip_tags($htmlContent)));
  156. $textContent = trim(str_replace("&#xD;","\r",$content['value']));
  157. } else {
  158. $textContent = trim($content['value']);
  159. }
  160. }
  161. $data = [
  162. 'text' => $textContent
  163. ];
  164. if($htmlContent && $textContent != $htmlContent) {
  165. $data['html'] = $htmlContent;
  166. }
  167. return $data;
  168. }
  169. // Always return arrays, and may contain plaintext content
  170. // Nested objects are added to refs and the URL is used as the value if present
  171. private static function collectArrayValues($properties, $item, &$data, &$refs, &$http) {
  172. foreach($properties as $p) {
  173. if(array_key_exists($p, $item['properties'])) {
  174. foreach($item['properties'][$p] as $v) {
  175. if(is_string($v)) {
  176. if(!array_key_exists($p, $data)) $data[$p] = [];
  177. $data[$p][] = $v;
  178. } elseif(self::isMicroformat($v)) {
  179. if(($u=self::getPlaintext($v, 'url')) && self::isURL($u)) {
  180. if(!array_key_exists($p, $data)) $data[$p] = [];
  181. $data[$p][] = $u;
  182. $ref = self::parse(['items'=>[$v]], $u, $http);
  183. if($ref) {
  184. $refs[$u] = $ref['data'];
  185. }
  186. } else {
  187. if(!array_key_exists($p, $data)) $data[$p] = [];
  188. $data[$p][] = $v['value'];
  189. }
  190. }
  191. }
  192. }
  193. }
  194. }
  195. private static function collectArrayURLValues($properties, $item, &$data, &$refs, &$http) {
  196. foreach($properties as $p) {
  197. if(array_key_exists($p, $item['properties'])) {
  198. foreach($item['properties'][$p] as $v) {
  199. if(is_string($v) && self::isURL($v)) {
  200. if(!array_key_exists($p, $data)) $data[$p] = [];
  201. $data[$p][] = $v;
  202. }
  203. elseif(self::isMicroformat($v) && ($u=self::getPlaintext($v, 'url')) && self::isURL($u)) {
  204. if(!array_key_exists($p, $data)) $data[$p] = [];
  205. $data[$p][] = $u;
  206. // parse the object and put the result in the "refs" object
  207. $ref = self::parse(['items'=>[$v]], $u, $http);
  208. if($ref) {
  209. $refs[$u] = $ref['data'];
  210. }
  211. }
  212. }
  213. }
  214. }
  215. }
  216. private static function determineNameAndContent($item, &$data) {
  217. // Determine if the name is distinct from the content
  218. $name = self::getPlaintext($item, 'name');
  219. $textContent = null;
  220. $htmlContent = null;
  221. $content = self::parseHTMLValue('content', $item);
  222. if($content) {
  223. $htmlContent = array_key_exists('html', $content) ? $content['html'] : null;
  224. $textContent = array_key_exists('text', $content) ? $content['text'] : null;
  225. }
  226. if($content) {
  227. // Trim ellipses from the name
  228. $name = preg_replace('/ ?(\.\.\.|…)$/', '', $name);
  229. // Remove all whitespace when checking equality
  230. $nameCompare = preg_replace('/\s/','',trim($name));
  231. $contentCompare = preg_replace('/\s/','',trim($textContent));
  232. // Check if the name is a prefix of the content
  233. if($contentCompare && $nameCompare && strpos($contentCompare, $nameCompare) === 0) {
  234. $name = null;
  235. }
  236. }
  237. if($name) {
  238. $data['name'] = $name;
  239. }
  240. // If there is content, always return the plaintext content, and return HTML content if it's different
  241. if($content) {
  242. $data['content']['text'] = $content['text'];
  243. if(array_key_exists('html', $content))
  244. $data['content']['html'] = $content['html'];
  245. }
  246. }
  247. private static function parseAsHEntry($mf2, $item, $http) {
  248. $data = [
  249. 'type' => 'entry'
  250. ];
  251. $refs = [];
  252. // Single plaintext and URL values
  253. self::collectSingleValues(['published','summary','rsvp'], ['url'], $item, $data);
  254. // These properties are always returned as arrays and may contain plaintext content
  255. self::collectArrayValues(['category','invitee'], $item, $data, $refs, $http);
  256. // These properties are always returned as arrays and always URLs
  257. // If the value is an h-* object with a URL, the URL is used and a "ref" is added as well
  258. self::collectArrayURLValues(['photo','video','audio','syndication','in-reply-to','like-of','repost-of','bookmark-of'], $item, $data, $refs, $http);
  259. self::determineNameAndContent($item, $data);
  260. if($author = self::findAuthor($mf2, $item, $http))
  261. $data['author'] = $author;
  262. $response = [
  263. 'data' => $data
  264. ];
  265. if(count($refs)) {
  266. $response['refs'] = $refs;
  267. }
  268. return $response;
  269. }
  270. private static function parseAsHReview($mf2, $item, $http) {
  271. $data = [
  272. 'type' => 'review'
  273. ];
  274. $refs = [];
  275. self::collectSingleValues(['summary','published','rating','best','worst'], ['url'], $item, $data);
  276. // Fallback for Mf1 "description" as content. The PHP parser does not properly map this to "content"
  277. $description = self::parseHTMLValue('description', $item);
  278. if($description) {
  279. $data['content'] = $description;
  280. }
  281. self::collectArrayValues(['category'], $item, $data, $refs, $http);
  282. self::collectArrayURLValues(['item'], $item, $data, $refs, $http);
  283. self::determineNameAndContent($item, $data);
  284. if($author = self::findAuthor($mf2, $item, $http))
  285. $data['author'] = $author;
  286. $response = [
  287. 'data' => $data
  288. ];
  289. if(count($refs)) {
  290. $response['refs'] = $refs;
  291. }
  292. return $response;
  293. }
  294. private static function parseAsHRecipe($mf2, $item, $http) {
  295. $data = [
  296. 'type' => 'recipe'
  297. ];
  298. $refs = [];
  299. self::collectSingleValues(['name','summary','published','duration','yield','nutrition'], ['url'], $item, $data);
  300. $instructions = self::parseHTMLValue('instructions', $item);
  301. if($instructions) {
  302. $data['instructions'] = $instructions;
  303. }
  304. self::collectArrayValues(['category','ingredient'], $item, $data, $refs, $http);
  305. self::collectArrayURLValues(['photo'], $item, $data, $refs, $http);
  306. if($author = self::findAuthor($mf2, $item, $http))
  307. $data['author'] = $author;
  308. $response = [
  309. 'data' => $data
  310. ];
  311. if(count($refs)) {
  312. $response['refs'] = $refs;
  313. }
  314. return $response;
  315. }
  316. private static function parseAsHProduct($mf2, $item, $http) {
  317. $data = [
  318. 'type' => 'product'
  319. ];
  320. self::collectSingleValues(['name','identifier','price'], ['url'], $item, $data);
  321. $description = self::parseHTMLValue('description', $item);
  322. if($description) {
  323. $data['description'] = $description;
  324. }
  325. self::collectArrayValues(['category','brand'], $item, $data, $refs, $http);
  326. self::collectArrayURLValues(['photo','video','audio'], $item, $data, $refs, $http);
  327. $response = [
  328. 'data' => $data
  329. ];
  330. if(count($refs)) {
  331. $response['refs'] = $refs;
  332. }
  333. return $response;
  334. }
  335. private static function parseAsHEvent($mf2, $item, $http) {
  336. $data = [
  337. 'type' => 'event'
  338. ];
  339. $refs = [];
  340. // Single plaintext and URL values
  341. self::collectSingleValues(['name','summary','published','start','end','duration'], ['url'], $item, $data);
  342. // These properties are always returned as arrays and may contain plaintext content
  343. self::collectArrayValues(['category','location','attendee'], $item, $data, $refs, $http);
  344. // These properties are always returned as arrays and always URLs
  345. // If the value is an h-* object with a URL, the URL is used and a "ref" is added as well
  346. self::collectArrayURLValues(['photo','video','audio','syndication'], $item, $data, $refs, $http);
  347. // If there is a description, always return the plaintext description, and return HTML description if it's different
  348. $textDescription = null;
  349. $htmlDescription = null;
  350. if(array_key_exists('description', $item['properties'])) {
  351. $description = $item['properties']['description'][0];
  352. if(is_string($description)) {
  353. $textDescription = $description;
  354. } elseif(!is_string($description) && is_array($description) && array_key_exists('value', $description)) {
  355. if(array_key_exists('html', $description)) {
  356. $htmlDescription = trim(self::sanitizeHTML($description['html']));
  357. $textDescription = trim(str_replace("&#xD;","\r",strip_tags($htmlDescription)));
  358. $textDescription = trim(str_replace("&#xD;","\r",$description['value']));
  359. } else {
  360. $textDescription = trim($description['value']);
  361. }
  362. }
  363. }
  364. if($textDescription) {
  365. $data['description'] = [
  366. 'text' => $textDescription
  367. ];
  368. if($htmlDescription && $textDescription != $htmlDescription) {
  369. $data['description']['html'] = $htmlDescription;
  370. }
  371. }
  372. $response = [
  373. 'data' => $data
  374. ];
  375. if(count($refs)) {
  376. $response['refs'] = $refs;
  377. }
  378. return $response;
  379. }
  380. private static function parseAsHFeed($mf2, $http) {
  381. $data = [
  382. 'type' => 'feed',
  383. 'author' => [
  384. 'type' => 'card',
  385. 'name' => null,
  386. 'url' => null,
  387. 'photo' => null
  388. ],
  389. 'todo' => 'Not yet implemented. Please see https://github.com/aaronpk/XRay/issues/1'
  390. ];
  391. return [
  392. 'data' => $data,
  393. 'entries' => []
  394. ];
  395. }
  396. private static function parseAsHCard($item, $http, $authorURL=false) {
  397. $data = [
  398. 'type' => 'card',
  399. 'name' => null,
  400. 'url' => null,
  401. 'photo' => null
  402. ];
  403. $properties = ['url','name','photo'];
  404. foreach($properties as $p) {
  405. if($p == 'url' && $authorURL) {
  406. // If there is a matching author URL, use that one
  407. $found = false;
  408. foreach($item['properties']['url'] as $url) {
  409. if(self::isURL($url)) {
  410. $url = self::normalize_url($url);
  411. if($url == $authorURL) {
  412. $data['url'] = $url;
  413. $found = true;
  414. }
  415. }
  416. }
  417. if(!$found && self::isURL($item['properties']['url'][0])) {
  418. $data['url'] = $item['properties']['url'][0];
  419. }
  420. } else if(($v = self::getPlaintext($item, $p)) !== null) {
  421. // Make sure the URL property is actually a URL
  422. if($p == 'url' || $p == 'photo') {
  423. if(self::isURL($v))
  424. $data[$p] = $v;
  425. } else {
  426. $data[$p] = $v;
  427. }
  428. }
  429. }
  430. $response = [
  431. 'data' => $data
  432. ];
  433. return $response;
  434. }
  435. private static function findAuthor($mf2, $item, $http) {
  436. $author = [
  437. 'type' => 'card',
  438. 'name' => null,
  439. 'url' => null,
  440. 'photo' => null
  441. ];
  442. // Author Discovery
  443. // http://indiewebcamp.com/authorship
  444. $authorPage = false;
  445. if(array_key_exists('author', $item['properties'])) {
  446. // Check if any of the values of the author property are an h-card
  447. foreach($item['properties']['author'] as $a) {
  448. if(self::isHCard($a)) {
  449. // 5.1 "if it has an h-card, use it, exit."
  450. return self::parseAsHCard($a, $http)['data'];
  451. } elseif(is_string($a)) {
  452. if(self::isURL($a)) {
  453. // 5.2 "otherwise if author property is an http(s) URL, let the author-page have that URL"
  454. $authorPage = $a;
  455. } else {
  456. // 5.3 "otherwise use the author property as the author name, exit"
  457. // We can only set the name, no h-card or URL was found
  458. $author['name'] = self::getPlaintext($item, 'author');
  459. return $author;
  460. }
  461. } else {
  462. // This case is only hit when the author property is an mf2 object that is not an h-card
  463. $author['name'] = self::getPlaintext($item, 'author');
  464. return $author;
  465. }
  466. }
  467. }
  468. // 6. "if no author page was found" ... check for rel-author link
  469. if(!$authorPage) {
  470. if(isset($mf2['rels']) && isset($mf2['rels']['author']))
  471. $authorPage = $mf2['rels']['author'][0];
  472. }
  473. // 7. "if there is an author-page URL" ...
  474. if($authorPage) {
  475. // 7.1 "get the author-page from that URL and parse it for microformats2"
  476. $authorPageContents = self::getURL($authorPage, $http);
  477. if($authorPageContents) {
  478. foreach($authorPageContents['items'] as $i) {
  479. if(self::isHCard($i)) {
  480. // 7.2 "if author-page has 1+ h-card with url == uid == author-page's URL, then use first such h-card, exit."
  481. if(array_key_exists('url', $i['properties'])
  482. and in_array($authorPage, $i['properties']['url'])
  483. and array_key_exists('uid', $i['properties'])
  484. and in_array($authorPage, $i['properties']['uid'])
  485. ) {
  486. return self::parseAsHCard($i, $http, $authorPage)['data'];
  487. }
  488. // 7.3 "else if author-page has 1+ h-card with url property which matches the href of a rel-me link on the author-page"
  489. $relMeLinks = (isset($authorPageContents['rels']) && isset($authorPageContents['rels']['me'])) ? $authorPageContents['rels']['me'] : [];
  490. if(count($relMeLinks) > 0
  491. and array_key_exists('url', $i['properties'])
  492. and count(array_intersect($i['properties']['url'], $relMeLinks)) > 0
  493. ) {
  494. return self::parseAsHCard($i, $http, $authorPage)['data'];
  495. }
  496. }
  497. }
  498. }
  499. // 7.4 "if the h-entry's page has 1+ h-card with url == author-page URL, use first such h-card, exit."
  500. foreach($mf2['items'] as $i) {
  501. if(self::isHCard($i)) {
  502. if(array_key_exists('url', $i['properties'])
  503. and in_array($authorPage, $i['properties']['url'])
  504. ) {
  505. return self::parseAsHCard($i, $http)['data'];
  506. }
  507. }
  508. }
  509. }
  510. if(!$author['name'] && !$author['photo'] && !$author['url'])
  511. return null;
  512. return $author;
  513. }
  514. private static function sanitizeHTML($html) {
  515. $config = HTMLPurifier_Config::createDefault();
  516. $config->set('Cache.DefinitionImpl', null);
  517. $config->set('HTML.AllowedElements', [
  518. 'a',
  519. 'abbr',
  520. 'b',
  521. 'code',
  522. 'del',
  523. 'em',
  524. 'i',
  525. 'img',
  526. 'q',
  527. 'strike',
  528. 'strong',
  529. 'time',
  530. 'blockquote',
  531. 'pre',
  532. 'p',
  533. 'h1',
  534. 'h2',
  535. 'h3',
  536. 'h4',
  537. 'h5',
  538. 'h6',
  539. 'ul',
  540. 'li',
  541. 'ol'
  542. ]);
  543. $def = $config->getHTMLDefinition(true);
  544. $def->addElement(
  545. 'time',
  546. 'Inline',
  547. 'Inline',
  548. 'Common',
  549. [
  550. 'datetime' => 'Text'
  551. ]
  552. );
  553. // Override the allowed classes to only support Microformats2 classes
  554. $def->manager->attrTypes->set('Class', new HTMLPurifier_AttrDef_HTML_Microformats2());
  555. $purifier = new HTMLPurifier($config);
  556. $sanitized = $purifier->purify($html);
  557. $sanitized = str_replace("&#xD;","\r",$sanitized);
  558. return $sanitized;
  559. }
  560. private static function hasNumericKeys(array $arr) {
  561. foreach($arr as $key=>$val)
  562. if (is_numeric($key))
  563. return true;
  564. return false;
  565. }
  566. private static function isMicroformat($mf) {
  567. return is_array($mf)
  568. and !self::hasNumericKeys($mf)
  569. and !empty($mf['type'])
  570. and isset($mf['properties']);
  571. }
  572. private static function isHCard($mf) {
  573. return is_array($mf)
  574. and !empty($mf['type'])
  575. and is_array($mf['type'])
  576. and in_array('h-card', $mf['type']);
  577. }
  578. private static function isURL($string) {
  579. return preg_match('/^https?:\/\/.+\..+$/', $string);
  580. }
  581. // Given an array of microformats properties and a key name, return the plaintext value
  582. // at that property
  583. // e.g.
  584. // {"properties":{"published":["foo"]}} results in "foo"
  585. private static function getPlaintext($mf2, $k, $fallback=null) {
  586. if(!empty($mf2['properties'][$k]) and is_array($mf2['properties'][$k])) {
  587. // $mf2['properties'][$v] will always be an array since the input was from the mf2 parser
  588. $value = $mf2['properties'][$k][0];
  589. if(is_string($value)) {
  590. return $value;
  591. } elseif(self::isMicroformat($value) && array_key_exists('value', $value)) {
  592. return $value['value'];
  593. }
  594. }
  595. return $fallback;
  596. }
  597. private static function getURL($url, $http) {
  598. if(!$url) return null;
  599. // TODO: consider adding caching here
  600. $result = $http->get($url);
  601. if($result['error'] || !$result['body']) {
  602. return null;
  603. }
  604. return \mf2\Parse($result['body'], $url);
  605. }
  606. private static function normalize_url($url) {
  607. $parts = parse_url($url);
  608. if(empty($parts['path']))
  609. $parts['path'] = '/';
  610. $parts['host'] = strtolower($parts['host']);
  611. return self::build_url($parts);
  612. }
  613. private static function build_url($parsed_url) {
  614. $scheme = isset($parsed_url['scheme']) ? $parsed_url['scheme'] . '://' : '';
  615. $host = isset($parsed_url['host']) ? $parsed_url['host'] : '';
  616. $port = isset($parsed_url['port']) ? ':' . $parsed_url['port'] : '';
  617. $user = isset($parsed_url['user']) ? $parsed_url['user'] : '';
  618. $pass = isset($parsed_url['pass']) ? ':' . $parsed_url['pass'] : '';
  619. $pass = ($user || $pass) ? "$pass@" : '';
  620. $path = isset($parsed_url['path']) ? $parsed_url['path'] : '';
  621. $query = isset($parsed_url['query']) ? '?' . $parsed_url['query'] : '';
  622. $fragment = isset($parsed_url['fragment']) ? '#' . $parsed_url['fragment'] : '';
  623. return "$scheme$user$pass$host$port$path$query$fragment";
  624. }
  625. }