You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

150 lines
4.8 KiB

  1. <?php
  2. namespace p3k\XRay;
  3. use p3k\XRay\Formats;
  4. class Feeds {
  5. private $http;
  6. public function __construct($http) {
  7. $this->http = $http;
  8. }
  9. public function find($url, $opts=[]) {
  10. if(isset($opts['timeout']))
  11. $this->http->set_timeout($opts['timeout']);
  12. if(isset($opts['max_redirects']))
  13. $this->http->set_max_redirects($opts['max_redirects']);
  14. $scheme = parse_url($url, PHP_URL_SCHEME);
  15. if(!in_array($scheme, ['http','https'])) {
  16. return [
  17. 'error' => 'invalid_url',
  18. 'error_description' => 'Only http and https URLs are supported'
  19. ];
  20. }
  21. $host = parse_url($url, PHP_URL_HOST);
  22. if(!$host) {
  23. return [
  24. 'error' => 'invalid_url',
  25. 'error_description' => 'The URL provided was not valid'
  26. ];
  27. }
  28. $url = normalize_url($url);
  29. $result = $this->http->get($url);
  30. if(isset($result['error']) && $result['error']) {
  31. return [
  32. 'error' => $result['error'],
  33. 'error_description' => $result['error_description']
  34. ];
  35. }
  36. $body = $result['body'];
  37. $feeds = [];
  38. // First check the content type of the response
  39. $contentType = isset($result['headers']['Content-Type']) ? $result['headers']['Content-Type'] : '';
  40. if(is_array($contentType))
  41. $contentType = $contentType[count($contentType)-1];
  42. if(strpos($contentType, 'application/atom+xml') !== false || strpos(substr($body, 0, 50), '<feed ') !== false) {
  43. $feeds[] = [
  44. 'url' => $result['url'],
  45. 'type' => 'atom'
  46. ];
  47. } elseif(strpos($contentType, 'application/rss+xml') !== false || strpos($contentType, 'text/xml') !== false
  48. || strpos($contentType, 'application/xml') !== false || strpos(substr($body, 0, 50), '<rss ') !== false) {
  49. $feeds[] = [
  50. 'url' => $result['url'],
  51. 'type' => 'rss'
  52. ];
  53. } elseif(strpos($contentType, 'application/json') !== false && substr($body, 0, 1) == '{') {
  54. $feeddata = json_decode($body, true);
  55. if($feeddata && isset($feeddata['version']) && $feeddata['version'] == 'https://jsonfeed.org/version/1') {
  56. $feeds[] = [
  57. 'url' => $result['url'],
  58. 'type' => 'jsonfeed'
  59. ];
  60. }
  61. } elseif((strpos($contentType, 'application/mf2+json') !== false || strpos($contentType, 'application/microformats2+json') !== false ) && substr($body, 0, 1) == '{') {
  62. $feeddata = json_decode($body, true);
  63. if($feeddata && isset($feeddata['items']) && !empty($feeddata['items'])) {
  64. // assume that the first element in the array is the feed object
  65. $item0 = $feeddata['items'][0];
  66. if (isset($item0['type']) && $item0['type'][0] == 'h-feed') {
  67. $feeds[] = [
  68. 'url' => $result['url'],
  69. 'type' => 'microformats'
  70. ];
  71. }
  72. }
  73. } else {
  74. // Some other document was returned, parse the HTML and look for rel alternates and Microformats
  75. $mf2 = \mf2\Parse($result['body'], $result['url']);
  76. if(isset($mf2['rel-urls'])) {
  77. foreach($mf2['rel-urls'] as $rel=>$info) {
  78. if(isset($info['rels']) && in_array('alternate', $info['rels'])) {
  79. if(isset($info['type'])) {
  80. if(strpos($info['type'], 'application/json') !== false) {
  81. $feeds[] = [
  82. 'url' => $rel,
  83. 'type' => 'jsonfeed'
  84. ];
  85. }
  86. if(strpos($info['type'], 'application/atom+xml') !== false) {
  87. $feeds[] = [
  88. 'url' => $rel,
  89. 'type' => 'atom'
  90. ];
  91. }
  92. if(strpos($info['type'], 'application/rss+xml') !== false) {
  93. $feeds[] = [
  94. 'url' => $rel,
  95. 'type' => 'rss'
  96. ];
  97. }
  98. }
  99. }
  100. }
  101. }
  102. // Check if the feed URL was a temporary redirect
  103. if($url != $result['url']) {
  104. // p3k\http doesn't return the intermediate HTTP codes, so we have to fetch the input URL again without following redirects
  105. $this->http->set_max_redirects(0);
  106. $check = $this->http->get($url);
  107. if($check['code'] == 302)
  108. $result['url'] = $url;
  109. }
  110. $parsed = Formats\HTML::parse($this->http, $result, array_merge($opts, ['expect'=>'feed']));
  111. if($parsed && isset($parsed['data']['type']) && $parsed['data']['type'] == 'feed') {
  112. $feeds[] = [
  113. 'url' => $result['url'],
  114. 'type' => 'microformats'
  115. ];
  116. }
  117. }
  118. // Sort feeds by priority
  119. $rank = ['microformats'=>0,'jsonfeed'=>1,'atom'=>2,'rss'=>3];
  120. usort($feeds, function($a, $b) use($rank) {
  121. return $rank[$a['type']] > $rank[$b['type']];
  122. });
  123. return [
  124. 'url' => $result['url'],
  125. 'code' => $result['code'],
  126. 'feeds' => $feeds,
  127. ];
  128. }
  129. }