You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

737 lines
25 KiB

8 years ago
8 years ago
8 years ago
  1. <?php
  2. namespace XRay\Formats;
  3. use HTMLPurifier, HTMLPurifier_Config;
  4. use Parse;
  5. class Mf2 {
  6. public static function parse($mf2, $url, $http) {
  7. if(count($mf2['items']) == 0)
  8. return false;
  9. // If there is only one item on the page, just use that
  10. if(count($mf2['items']) == 1) {
  11. $item = $mf2['items'][0];
  12. if(in_array('h-entry', $item['type']) || in_array('h-cite', $item['type'])) {
  13. Parse::debug("mf2:0: Recognized $url as an h-entry it is the only item on the page");
  14. return self::parseAsHEntry($mf2, $item, $http);
  15. }
  16. if(in_array('h-event', $item['type'])) {
  17. Parse::debug("mf2:0: Recognized $url as an h-event it is the only item on the page");
  18. return self::parseAsHEvent($mf2, $item, $http);
  19. }
  20. if(in_array('h-review', $item['type'])) {
  21. Parse::debug("mf2:0: Recognized $url as an h-review it is the only item on the page");
  22. return self::parseAsHReview($mf2, $item, $http);
  23. }
  24. if(in_array('h-recipe', $item['type'])) {
  25. Parse::debug("mf2:0: Recognized $url as an h-recipe it is the only item on the page");
  26. return self::parseAsHRecipe($mf2, $item, $http);
  27. }
  28. if(in_array('h-product', $item['type'])) {
  29. Parse::debug("mf2:0: Recognized $url as an h-product it is the only item on the page");
  30. return self::parseAsHProduct($mf2, $item, $http);
  31. }
  32. if(in_array('h-feed', $item['type'])) {
  33. Parse::debug("mf2:0: Recognized $url as an h-feed because it is the only item on the page");
  34. return self::parseAsHFeed($mf2, $http);
  35. }
  36. if(in_array('h-card', $item['type'])) {
  37. Parse::debug("mf2:0: Recognized $url as an h-card it is the only item on the page");
  38. return self::parseAsHCard($item, $http, $url);
  39. }
  40. }
  41. // Check the list of items on the page to see if one matches the URL of the page,
  42. // and treat as a permalink for that object if so. Otherwise, parse as a feed.
  43. foreach($mf2['items'] as $item) {
  44. if(array_key_exists('url', $item['properties'])) {
  45. $urls = $item['properties']['url'];
  46. $urls = array_map('self::normalize_url', $urls);
  47. if(in_array($url, $urls)) {
  48. Parse::debug("mf2:1: Recognized $url as a permalink because an object on the page matched the URL of the request");
  49. if(in_array('h-card', $item['type'])) {
  50. return self::parseAsHCard($item, $http, $url);
  51. } elseif(in_array('h-entry', $item['type']) || in_array('h-cite', $item['type'])) {
  52. return self::parseAsHEntry($mf2, $item, $http);
  53. } elseif(in_array('h-event', $item['type'])) {
  54. return self::parseAsHEvent($mf2, $item, $http);
  55. } elseif(in_array('h-review', $item['type'])) {
  56. return self::parseAsHReview($mf2, $item, $http);
  57. } elseif(in_array('h-recipe', $item['type'])) {
  58. return self::parseAsHRecipe($mf2, $item, $http);
  59. } elseif(in_array('h-product', $item['type'])) {
  60. return self::parseAsHProduct($mf2, $item, $http);
  61. } else {
  62. Parse::debug('This object was not a recognized type.');
  63. return false;
  64. }
  65. }
  66. }
  67. }
  68. // Check for an h-card matching rel=author or the author URL of any h-* on the page,
  69. // and return the h-* object if so
  70. if(isset($mf2['rels']['author'])) {
  71. foreach($mf2['items'] as $card) {
  72. if(in_array('h-card', $card['type']) && array_key_exists('url', $card['properties'])) {
  73. $urls = $card['properties']['url'];
  74. $urls = array_map('self::normalize_url', $urls);
  75. if(count(array_intersect($urls, $mf2['rels']['author'])) > 0) {
  76. // There is an author h-card on this page
  77. // Now look for the first h-* object other than an h-card and use that as the object
  78. foreach($mf2['items'] as $item) {
  79. if(!in_array('h-card', $item['type'])) {
  80. if(in_array('h-entry', $item['type']) || in_array('h-cite', $item['type'])) {
  81. return self::parseAsHEntry($mf2, $item, $http);
  82. } elseif(in_array('h-event', $item['type'])) {
  83. return self::parseAsHEvent($mf2, $item, $http);
  84. } elseif(in_array('h-review', $item['type'])) {
  85. return self::parseAsHReview($mf2, $item, $http);
  86. } elseif(in_array('h-recipe', $item['type'])) {
  87. return self::parseAsHRecipe($mf2, $item, $http);
  88. } elseif(in_array('h-product', $item['type'])) {
  89. return self::parseAsHProduct($mf2, $item, $http);
  90. }
  91. }
  92. }
  93. }
  94. }
  95. }
  96. }
  97. // If there was more than one h-entry on the page, treat the whole page as a feed
  98. if(count($mf2['items']) > 1) {
  99. if(count(array_filter($mf2['items'], function($item){
  100. return in_array('h-entry', $item['type']);
  101. })) > 1) {
  102. Parse::debug("mf2:2: Recognized $url as an h-feed because there are more than one object on the page");
  103. return self::parseAsHFeed($mf2, $http);
  104. }
  105. }
  106. // If the first item is an h-feed, parse as a feed
  107. $first = $mf2['items'][0];
  108. if(in_array('h-feed', $first['type'])) {
  109. Parse::debug("mf2:3: Recognized $url as an h-feed because the first item is an h-feed");
  110. return self::parseAsHFeed($mf2, $http);
  111. }
  112. // Fallback case, but hopefully we have found something before this point
  113. foreach($mf2['items'] as $item) {
  114. // Otherwise check for a recognized h-entr* object
  115. if(in_array('h-entry', $item['type']) || in_array('h-cite', $item['type'])) {
  116. Parse::debug("mf2:6: $url is falling back to the first h-entry on the page");
  117. return self::parseAsHEntry($mf2, $item, $http);
  118. } elseif(in_array('h-event', $item['type'])) {
  119. Parse::debug("mf2:6: $url is falling back to the first h-event on the page");
  120. return self::parseAsHEvent($mf2, $item, $http);
  121. } elseif(in_array('h-review', $item['type'])) {
  122. Parse::debug("mf2:6: $url is falling back to the first h-review on the page");
  123. return self::parseAsHReview($mf2, $item, $http);
  124. } elseif(in_array('h-recipe', $item['type'])) {
  125. Parse::debug("mf2:6: $url is falling back to the first h-recipe on the page");
  126. return self::parseAsHReview($mf2, $item, $http);
  127. } elseif(in_array('h-product', $item['type'])) {
  128. Parse::debug("mf2:6: $url is falling back to the first h-product on the page");
  129. return self::parseAsHProduct($mf2, $item, $http);
  130. }
  131. }
  132. Parse::debug("mf2:E: No object at $url was recognized");
  133. return false;
  134. }
  135. private static function collectSingleValues($properties, $urlProperties, $item, &$data) {
  136. foreach($properties as $p) {
  137. if(($v = self::getPlaintext($item, $p)) !== null) {
  138. $data[$p] = $v;
  139. }
  140. }
  141. foreach($urlProperties as $p) {
  142. if(($v = self::getPlaintext($item, $p)) !== null) {
  143. if(self::isURL($v))
  144. $data[$p] = $v;
  145. }
  146. }
  147. }
  148. private static function parseHTMLValue($property, $item) {
  149. if(!array_key_exists($property, $item['properties']))
  150. return null;
  151. $textContent = false;
  152. $htmlContent = false;
  153. $content = $item['properties'][$property][0];
  154. if(is_string($content)) {
  155. $textContent = $content;
  156. } elseif(!is_string($content) && is_array($content) && array_key_exists('value', $content)) {
  157. if(array_key_exists('html', $content)) {
  158. $htmlContent = trim(self::sanitizeHTML($content['html']));
  159. #$textContent = trim(str_replace("&#xD;","\r",strip_tags($htmlContent)));
  160. $textContent = trim(str_replace("&#xD;","\r",$content['value']));
  161. } else {
  162. $textContent = trim($content['value']);
  163. }
  164. }
  165. $data = [
  166. 'text' => $textContent
  167. ];
  168. if($htmlContent && $textContent != $htmlContent) {
  169. $data['html'] = $htmlContent;
  170. }
  171. return $data;
  172. }
  173. // Always return arrays, and may contain plaintext content
  174. // Nested objects are added to refs and the URL is used as the value if present
  175. private static function collectArrayValues($properties, $item, &$data, &$refs, &$http) {
  176. foreach($properties as $p) {
  177. if(array_key_exists($p, $item['properties'])) {
  178. foreach($item['properties'][$p] as $v) {
  179. if(is_string($v)) {
  180. if(!array_key_exists($p, $data)) $data[$p] = [];
  181. $data[$p][] = $v;
  182. } elseif(self::isMicroformat($v)) {
  183. if(($u=self::getPlaintext($v, 'url')) && self::isURL($u)) {
  184. if(!array_key_exists($p, $data)) $data[$p] = [];
  185. $data[$p][] = $u;
  186. $ref = self::parse(['items'=>[$v]], $u, $http);
  187. if($ref) {
  188. $refs[$u] = $ref['data'];
  189. }
  190. } else {
  191. if(!array_key_exists($p, $data)) $data[$p] = [];
  192. $data[$p][] = $v['value'];
  193. }
  194. }
  195. }
  196. }
  197. }
  198. }
  199. private static function collectArrayURLValues($properties, $item, &$data, &$refs, &$http) {
  200. foreach($properties as $p) {
  201. if(array_key_exists($p, $item['properties'])) {
  202. foreach($item['properties'][$p] as $v) {
  203. if(is_string($v) && self::isURL($v)) {
  204. if(!array_key_exists($p, $data)) $data[$p] = [];
  205. $data[$p][] = $v;
  206. }
  207. elseif(self::isMicroformat($v) && ($u=self::getPlaintext($v, 'url')) && self::isURL($u)) {
  208. if(!array_key_exists($p, $data)) $data[$p] = [];
  209. $data[$p][] = $u;
  210. // parse the object and put the result in the "refs" object
  211. $ref = self::parse(['items'=>[$v]], $u, $http);
  212. if($ref) {
  213. $refs[$u] = $ref['data'];
  214. }
  215. }
  216. }
  217. }
  218. }
  219. }
  220. private static function determineNameAndContent($item, &$data) {
  221. // Determine if the name is distinct from the content
  222. $name = self::getPlaintext($item, 'name');
  223. $textContent = null;
  224. $htmlContent = null;
  225. $content = self::parseHTMLValue('content', $item);
  226. if($content) {
  227. $htmlContent = array_key_exists('html', $content) ? $content['html'] : null;
  228. $textContent = array_key_exists('text', $content) ? $content['text'] : null;
  229. }
  230. if($content) {
  231. // Trim ellipses from the name
  232. $name = preg_replace('/ ?(\.\.\.|…)$/', '', $name);
  233. // Remove all whitespace when checking equality
  234. $nameCompare = preg_replace('/\s/','',trim($name));
  235. $contentCompare = preg_replace('/\s/','',trim($textContent));
  236. // Check if the name is a prefix of the content
  237. if($contentCompare && $nameCompare && strpos($contentCompare, $nameCompare) === 0) {
  238. $name = null;
  239. }
  240. }
  241. if($name) {
  242. $data['name'] = $name;
  243. }
  244. // If there is content, always return the plaintext content, and return HTML content if it's different
  245. if($content) {
  246. $data['content']['text'] = $content['text'];
  247. if(array_key_exists('html', $content))
  248. $data['content']['html'] = $content['html'];
  249. }
  250. }
  251. private static function parseAsHEntry($mf2, $item, $http) {
  252. $data = [
  253. 'type' => 'entry'
  254. ];
  255. $refs = [];
  256. // Single plaintext and URL values
  257. self::collectSingleValues(['published','summary','rsvp','swarm-coins'], ['url'], $item, $data);
  258. // These properties are always returned as arrays and may contain plaintext content
  259. self::collectArrayValues(['category','invitee'], $item, $data, $refs, $http);
  260. // These properties are always returned as arrays and always URLs
  261. // If the value is an h-* object with a URL, the URL is used and a "ref" is added as well
  262. self::collectArrayURLValues(['photo','video','audio','syndication','in-reply-to','like-of','repost-of','bookmark-of'], $item, $data, $refs, $http);
  263. self::determineNameAndContent($item, $data);
  264. if($author = self::findAuthor($mf2, $item, $http))
  265. $data['author'] = $author;
  266. $response = [
  267. 'data' => $data
  268. ];
  269. if(count($refs)) {
  270. $response['refs'] = $refs;
  271. }
  272. return $response;
  273. }
  274. private static function parseAsHReview($mf2, $item, $http) {
  275. $data = [
  276. 'type' => 'review'
  277. ];
  278. $refs = [];
  279. self::collectSingleValues(['summary','published','rating','best','worst'], ['url'], $item, $data);
  280. // Fallback for Mf1 "description" as content. The PHP parser does not properly map this to "content"
  281. $description = self::parseHTMLValue('description', $item);
  282. if($description) {
  283. $data['content'] = $description;
  284. }
  285. self::collectArrayValues(['category'], $item, $data, $refs, $http);
  286. self::collectArrayURLValues(['item'], $item, $data, $refs, $http);
  287. self::determineNameAndContent($item, $data);
  288. if($author = self::findAuthor($mf2, $item, $http))
  289. $data['author'] = $author;
  290. $response = [
  291. 'data' => $data
  292. ];
  293. if(count($refs)) {
  294. $response['refs'] = $refs;
  295. }
  296. return $response;
  297. }
  298. private static function parseAsHRecipe($mf2, $item, $http) {
  299. $data = [
  300. 'type' => 'recipe'
  301. ];
  302. $refs = [];
  303. self::collectSingleValues(['name','summary','published','duration','yield','nutrition'], ['url'], $item, $data);
  304. $instructions = self::parseHTMLValue('instructions', $item);
  305. if($instructions) {
  306. $data['instructions'] = $instructions;
  307. }
  308. self::collectArrayValues(['category','ingredient'], $item, $data, $refs, $http);
  309. self::collectArrayURLValues(['photo'], $item, $data, $refs, $http);
  310. if($author = self::findAuthor($mf2, $item, $http))
  311. $data['author'] = $author;
  312. $response = [
  313. 'data' => $data
  314. ];
  315. if(count($refs)) {
  316. $response['refs'] = $refs;
  317. }
  318. return $response;
  319. }
  320. private static function parseAsHProduct($mf2, $item, $http) {
  321. $data = [
  322. 'type' => 'product'
  323. ];
  324. self::collectSingleValues(['name','identifier','price'], ['url'], $item, $data);
  325. $description = self::parseHTMLValue('description', $item);
  326. if($description) {
  327. $data['description'] = $description;
  328. }
  329. self::collectArrayValues(['category','brand'], $item, $data, $refs, $http);
  330. self::collectArrayURLValues(['photo','video','audio'], $item, $data, $refs, $http);
  331. $response = [
  332. 'data' => $data
  333. ];
  334. if(count($refs)) {
  335. $response['refs'] = $refs;
  336. }
  337. return $response;
  338. }
  339. private static function parseAsHEvent($mf2, $item, $http) {
  340. $data = [
  341. 'type' => 'event'
  342. ];
  343. $refs = [];
  344. // Single plaintext and URL values
  345. self::collectSingleValues(['name','summary','published','start','end','duration'], ['url'], $item, $data);
  346. // These properties are always returned as arrays and may contain plaintext content
  347. self::collectArrayValues(['category','location','attendee'], $item, $data, $refs, $http);
  348. // These properties are always returned as arrays and always URLs
  349. // If the value is an h-* object with a URL, the URL is used and a "ref" is added as well
  350. self::collectArrayURLValues(['photo','video','audio','syndication'], $item, $data, $refs, $http);
  351. // If there is a description, always return the plaintext description, and return HTML description if it's different
  352. $textDescription = null;
  353. $htmlDescription = null;
  354. if(array_key_exists('description', $item['properties'])) {
  355. $description = $item['properties']['description'][0];
  356. if(is_string($description)) {
  357. $textDescription = $description;
  358. } elseif(!is_string($description) && is_array($description) && array_key_exists('value', $description)) {
  359. if(array_key_exists('html', $description)) {
  360. $htmlDescription = trim(self::sanitizeHTML($description['html']));
  361. $textDescription = trim(str_replace("&#xD;","\r",strip_tags($htmlDescription)));
  362. $textDescription = trim(str_replace("&#xD;","\r",$description['value']));
  363. } else {
  364. $textDescription = trim($description['value']);
  365. }
  366. }
  367. }
  368. if($textDescription) {
  369. $data['description'] = [
  370. 'text' => $textDescription
  371. ];
  372. if($htmlDescription && $textDescription != $htmlDescription) {
  373. $data['description']['html'] = $htmlDescription;
  374. }
  375. }
  376. $response = [
  377. 'data' => $data
  378. ];
  379. if(count($refs)) {
  380. $response['refs'] = $refs;
  381. }
  382. return $response;
  383. }
  384. private static function parseAsHFeed($mf2, $http) {
  385. $data = [
  386. 'type' => 'feed',
  387. 'author' => [
  388. 'type' => 'card',
  389. 'name' => null,
  390. 'url' => null,
  391. 'photo' => null
  392. ],
  393. 'todo' => 'Not yet implemented. Please see https://github.com/aaronpk/XRay/issues/1'
  394. ];
  395. return [
  396. 'data' => $data,
  397. 'entries' => []
  398. ];
  399. }
  400. private static function parseAsHCard($item, $http, $authorURL=false) {
  401. $data = [
  402. 'type' => 'card',
  403. 'name' => null,
  404. 'url' => null,
  405. 'photo' => null
  406. ];
  407. $properties = ['url','name','photo'];
  408. foreach($properties as $p) {
  409. if($p == 'url' && $authorURL) {
  410. // If there is a matching author URL, use that one
  411. $found = false;
  412. foreach($item['properties']['url'] as $url) {
  413. if(self::isURL($url)) {
  414. $url = self::normalize_url($url);
  415. if($url == $authorURL) {
  416. $data['url'] = $url;
  417. $found = true;
  418. }
  419. }
  420. }
  421. if(!$found && self::isURL($item['properties']['url'][0])) {
  422. $data['url'] = $item['properties']['url'][0];
  423. }
  424. } else if(($v = self::getPlaintext($item, $p)) !== null) {
  425. // Make sure the URL property is actually a URL
  426. if($p == 'url' || $p == 'photo') {
  427. if(self::isURL($v))
  428. $data[$p] = $v;
  429. } else {
  430. $data[$p] = $v;
  431. }
  432. }
  433. }
  434. // If no URL property was found, use the $authorURL provided
  435. if(!$data['url'])
  436. $data['url'] = $authorURL;
  437. $response = [
  438. 'data' => $data
  439. ];
  440. return $response;
  441. }
  442. private static function findAuthor($mf2, $item, $http) {
  443. $author = [
  444. 'type' => 'card',
  445. 'name' => null,
  446. 'url' => null,
  447. 'photo' => null
  448. ];
  449. // Author Discovery
  450. // http://indiewebcamp.com/authorship
  451. $authorPage = false;
  452. if(array_key_exists('author', $item['properties'])) {
  453. // Check if any of the values of the author property are an h-card
  454. foreach($item['properties']['author'] as $a) {
  455. if(self::isHCard($a)) {
  456. // 5.1 "if it has an h-card, use it, exit."
  457. return self::parseAsHCard($a, $http)['data'];
  458. } elseif(is_string($a)) {
  459. if(self::isURL($a)) {
  460. // 5.2 "otherwise if author property is an http(s) URL, let the author-page have that URL"
  461. $authorPage = $a;
  462. } else {
  463. // 5.3 "otherwise use the author property as the author name, exit"
  464. // We can only set the name, no h-card or URL was found
  465. $author['name'] = self::getPlaintext($item, 'author');
  466. return $author;
  467. }
  468. } else {
  469. // This case is only hit when the author property is an mf2 object that is not an h-card
  470. $author['name'] = self::getPlaintext($item, 'author');
  471. return $author;
  472. }
  473. }
  474. }
  475. // 6. "if no author page was found" ... check for rel-author link
  476. if(!$authorPage) {
  477. if(isset($mf2['rels']) && isset($mf2['rels']['author']))
  478. $authorPage = $mf2['rels']['author'][0];
  479. }
  480. // 7. "if there is an author-page URL" ...
  481. if($authorPage) {
  482. // 7.1 "get the author-page from that URL and parse it for microformats2"
  483. $authorPageContents = self::getURL($authorPage, $http);
  484. if($authorPageContents) {
  485. foreach($authorPageContents['items'] as $i) {
  486. if(self::isHCard($i)) {
  487. // 7.2 "if author-page has 1+ h-card with url == uid == author-page's URL, then use first such h-card, exit."
  488. if(array_key_exists('url', $i['properties'])
  489. and in_array($authorPage, $i['properties']['url'])
  490. and array_key_exists('uid', $i['properties'])
  491. and in_array($authorPage, $i['properties']['uid'])
  492. ) {
  493. return self::parseAsHCard($i, $http, $authorPage)['data'];
  494. }
  495. // 7.3 "else if author-page has 1+ h-card with url property which matches the href of a rel-me link on the author-page"
  496. $relMeLinks = (isset($authorPageContents['rels']) && isset($authorPageContents['rels']['me'])) ? $authorPageContents['rels']['me'] : [];
  497. if(count($relMeLinks) > 0
  498. and array_key_exists('url', $i['properties'])
  499. and count(array_intersect($i['properties']['url'], $relMeLinks)) > 0
  500. ) {
  501. return self::parseAsHCard($i, $http, $authorPage)['data'];
  502. }
  503. }
  504. }
  505. }
  506. // 7.4 "if the h-entry's page has 1+ h-card with url == author-page URL, use first such h-card, exit."
  507. foreach($mf2['items'] as $i) {
  508. if(self::isHCard($i)) {
  509. if(array_key_exists('url', $i['properties'])
  510. and in_array($authorPage, $i['properties']['url'])
  511. ) {
  512. return self::parseAsHCard($i, $http)['data'];
  513. }
  514. }
  515. }
  516. }
  517. if(!$author['name'] && !$author['photo'] && !$author['url'])
  518. return null;
  519. return $author;
  520. }
  521. private static function sanitizeHTML($html) {
  522. $config = HTMLPurifier_Config::createDefault();
  523. $config->set('Cache.DefinitionImpl', null);
  524. $config->set('HTML.AllowedElements', [
  525. 'a',
  526. 'abbr',
  527. 'b',
  528. 'code',
  529. 'del',
  530. 'em',
  531. 'i',
  532. 'img',
  533. 'q',
  534. 'strike',
  535. 'strong',
  536. 'time',
  537. 'blockquote',
  538. 'pre',
  539. 'p',
  540. 'h1',
  541. 'h2',
  542. 'h3',
  543. 'h4',
  544. 'h5',
  545. 'h6',
  546. 'ul',
  547. 'li',
  548. 'ol'
  549. ]);
  550. $def = $config->getHTMLDefinition(true);
  551. $def->addElement(
  552. 'time',
  553. 'Inline',
  554. 'Inline',
  555. 'Common',
  556. [
  557. 'datetime' => 'Text'
  558. ]
  559. );
  560. // Override the allowed classes to only support Microformats2 classes
  561. $def->manager->attrTypes->set('Class', new HTMLPurifier_AttrDef_HTML_Microformats2());
  562. $purifier = new HTMLPurifier($config);
  563. $sanitized = $purifier->purify($html);
  564. $sanitized = str_replace("&#xD;","\r",$sanitized);
  565. return $sanitized;
  566. }
  567. private static function hasNumericKeys(array $arr) {
  568. foreach($arr as $key=>$val)
  569. if (is_numeric($key))
  570. return true;
  571. return false;
  572. }
  573. private static function isMicroformat($mf) {
  574. return is_array($mf)
  575. and !self::hasNumericKeys($mf)
  576. and !empty($mf['type'])
  577. and isset($mf['properties']);
  578. }
  579. private static function isHCard($mf) {
  580. return is_array($mf)
  581. and !empty($mf['type'])
  582. and is_array($mf['type'])
  583. and in_array('h-card', $mf['type']);
  584. }
  585. private static function isURL($string) {
  586. return preg_match('/^https?:\/\/.+\..+$/', $string);
  587. }
  588. // Given an array of microformats properties and a key name, return the plaintext value
  589. // at that property
  590. // e.g.
  591. // {"properties":{"published":["foo"]}} results in "foo"
  592. private static function getPlaintext($mf2, $k, $fallback=null) {
  593. if(!empty($mf2['properties'][$k]) and is_array($mf2['properties'][$k])) {
  594. // $mf2['properties'][$v] will always be an array since the input was from the mf2 parser
  595. $value = $mf2['properties'][$k][0];
  596. if(is_string($value)) {
  597. return $value;
  598. } elseif(self::isMicroformat($value) && array_key_exists('value', $value)) {
  599. return $value['value'];
  600. }
  601. }
  602. return $fallback;
  603. }
  604. private static function getURL($url, $http) {
  605. if(!$url) return null;
  606. // TODO: consider adding caching here
  607. $result = $http->get($url);
  608. if($result['error'] || !$result['body']) {
  609. return null;
  610. }
  611. return \mf2\Parse($result['body'], $url);
  612. }
  613. private static function normalize_url($url) {
  614. $parts = parse_url($url);
  615. if(empty($parts['path']))
  616. $parts['path'] = '/';
  617. $parts['host'] = strtolower($parts['host']);
  618. return self::build_url($parts);
  619. }
  620. private static function build_url($parsed_url) {
  621. $scheme = isset($parsed_url['scheme']) ? $parsed_url['scheme'] . '://' : '';
  622. $host = isset($parsed_url['host']) ? $parsed_url['host'] : '';
  623. $port = isset($parsed_url['port']) ? ':' . $parsed_url['port'] : '';
  624. $user = isset($parsed_url['user']) ? $parsed_url['user'] : '';
  625. $pass = isset($parsed_url['pass']) ? ':' . $parsed_url['pass'] : '';
  626. $pass = ($user || $pass) ? "$pass@" : '';
  627. $path = isset($parsed_url['path']) ? $parsed_url['path'] : '';
  628. $query = isset($parsed_url['query']) ? '?' . $parsed_url['query'] : '';
  629. $fragment = isset($parsed_url['fragment']) ? '#' . $parsed_url['fragment'] : '';
  630. return "$scheme$user$pass$host$port$path$query$fragment";
  631. }
  632. }