You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

122 lines
3.1 KiB

  1. <?php
  2. namespace p3k\XRay\Formats;
  3. use HTMLPurifier, HTMLPurifier_Config;
  4. use DOMDocument, DOMXPath;
  5. use p3k\XRay\Formats;
  6. use PicoFeed\Reader\Reader;
  7. use PicoFeed\PicoFeedException;
  8. class XML extends Format {
  9. public static function matches_host($url) { return true; }
  10. public static function matches($url) { return true; }
  11. public static function parse($http_response) {
  12. $xml = $http_response['body'];
  13. $url = $http_response['url'];
  14. $result = [
  15. 'data' => [
  16. 'type' => 'unknown',
  17. ],
  18. 'url' => $url,
  19. 'source-format' => 'xml',
  20. 'code' => $http_response['code'],
  21. ];
  22. try {
  23. $reader = new Reader();
  24. $parser = $reader->getParser($url, $xml, '');
  25. $feed = $parser->execute();
  26. $result['data']['type'] = 'feed';
  27. $result['data']['items'] = [];
  28. foreach($feed->getItems() as $item) {
  29. $result['data']['items'][] = self::_hEntryFromFeedItem($item, $feed);
  30. }
  31. } catch(PicoFeedException $e) {
  32. }
  33. return $result;
  34. }
  35. private static function _hEntryFromFeedItem($item, $feed) {
  36. $entry = [
  37. 'type' => 'entry',
  38. 'author' => [
  39. 'name' => null,
  40. 'url' => null,
  41. 'photo' => null
  42. ]
  43. ];
  44. if(is_array($guid=$item->getTag('guid')) && count($guid))
  45. $entry['uid'] = $guid[0];
  46. elseif(is_array($guid=$item->getTag('id')) && count($guid))
  47. $entry['uid'] = $guid[0];
  48. if($item->getUrl())
  49. $entry['url'] = $item->getUrl();
  50. if($item->getPublishedDate())
  51. $entry['published'] = $item->getPublishedDate()->format('c');
  52. if($item->getContent())
  53. $entry['content'] = [
  54. 'html' => self::sanitizeHTML($item->getContent()),
  55. 'text' => self::stripHTML($item->getContent())
  56. ];
  57. if($item->getTitle() && $item->getTitle() != $item->getUrl()) {
  58. $title = $item->getTitle();
  59. $entry['name'] = $title;
  60. // Check if the title is a prefix of the content and drop if so
  61. if(isset($entry['content'])) {
  62. if(substr($title, -3) == '...' || substr($title, -1) == '…') {
  63. if(substr($title, -3) == '...') {
  64. $trimmedTitle = substr($title, 0, -3);
  65. } else {
  66. $trimmedTitle = substr($title, 0, -1);
  67. }
  68. if(substr($entry['content']['text'], 0, strlen($trimmedTitle)) == $trimmedTitle) {
  69. unset($entry['name']);
  70. }
  71. }
  72. }
  73. }
  74. if($item->getAuthor()) {
  75. $entry['author']['name'] = $item->getAuthor();
  76. }
  77. if($item->getAuthorUrl()) {
  78. $entry['author']['url'] = $item->getAuthorUrl();
  79. } else if($feed->siteUrl) {
  80. $entry['author']['url'] = $feed->siteUrl;
  81. }
  82. if($item->getEnclosureType()) {
  83. $prop = false;
  84. switch($item->getEnclosureType()) {
  85. case 'audio/mpeg':
  86. $prop = 'audio'; break;
  87. case 'image/jpeg':
  88. case 'image/png':
  89. case 'image/gif':
  90. $prop = 'photo'; break;
  91. }
  92. if($prop)
  93. $entry[$prop] = [$item->getEnclosureUrl()];
  94. }
  95. $entry['post-type'] = \p3k\XRay\PostType::discover($entry);
  96. return $entry;
  97. }
  98. }