You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

113 lines
3.1 KiB

  1. <?php
  2. namespace p3k\XRay;
  3. use p3k\XRay\Formats;
  4. class Feeds {
  5. private $http;
  6. public function __construct($http) {
  7. $this->http = $http;
  8. }
  9. public function find($url, $opts=[]) {
  10. if(isset($opts['timeout']))
  11. $this->http->set_timeout($opts['timeout']);
  12. if(isset($opts['max_redirects']))
  13. $this->http->set_max_redirects($opts['max_redirects']);
  14. $scheme = parse_url($url, PHP_URL_SCHEME);
  15. if(!in_array($scheme, ['http','https'])) {
  16. return [
  17. 'error' => 'invalid_url',
  18. 'error_description' => 'Only http and https URLs are supported'
  19. ];
  20. }
  21. $host = parse_url($url, PHP_URL_HOST);
  22. if(!$host) {
  23. return [
  24. 'error' => 'invalid_url',
  25. 'error_description' => 'The URL provided was not valid'
  26. ];
  27. }
  28. $url = normalize_url($url);
  29. $result = $this->http->get($url);
  30. $body = $result['body'];
  31. $feeds = [];
  32. // First check the content type of the response
  33. $contentType = isset($result['headers']['Content-Type']) ? $result['headers']['Content-Type'] : '';
  34. if(is_array($contentType))
  35. $contentType = $contentType[count($contentType)-1];
  36. if(strpos($contentType, 'application/atom+xml') !== false) {
  37. $feeds[] = [
  38. 'url' => $result['url'],
  39. 'type' => 'atom'
  40. ];
  41. } elseif(strpos($contentType, 'application/rss+xml') !== false) {
  42. $feeds[] = [
  43. 'url' => $result['url'],
  44. 'type' => 'rss'
  45. ];
  46. } elseif(strpos($contentType, 'application/json') !== false
  47. && substr($body, 0, 1) == '{' && strpos(substr($body, 0, 100), 'https://jsonfeed.org/version/1')) {
  48. $feeds[] = [
  49. 'url' => $result['url'],
  50. 'type' => 'jsonfeed'
  51. ];
  52. } else {
  53. // Some other document was returned, parse the HTML and look for rel alternates and Microformats
  54. $mf2 = \mf2\Parse($body, $result['url']);
  55. if(isset($mf2['alternates'])) {
  56. foreach($mf2['alternates'] as $alt) {
  57. if(strpos($alt['type'], 'application/json') !== false) {
  58. $feeds[] = [
  59. 'url' => $alt['url'],
  60. 'type' => 'jsonfeed'
  61. ];
  62. }
  63. if(strpos($alt['type'], 'application/atom+xml') !== false) {
  64. $feeds[] = [
  65. 'url' => $alt['url'],
  66. 'type' => 'atom'
  67. ];
  68. }
  69. if(strpos($alt['type'], 'application/rss+xml') !== false) {
  70. $feeds[] = [
  71. 'url' => $alt['url'],
  72. 'type' => 'rss'
  73. ];
  74. }
  75. }
  76. }
  77. $parsed = Formats\HTML::parse($this->http, $body, $result['url'], array_merge($opts, ['expect'=>'feed']));
  78. if($parsed && isset($parsed['data']['type']) && $parsed['data']['type'] == 'feed') {
  79. $feeds[] = [
  80. 'url' => $result['url'],
  81. 'type' => 'microformats'
  82. ];
  83. }
  84. }
  85. // Sort feeds by priority
  86. $rank = ['microformats'=>0,'jsonfeed'=>1,'atom'=>2,'rss'=>3];
  87. usort($feeds, function($a, $b) use($rank) {
  88. return $rank[$a['type']] > $rank[$b['type']];
  89. });
  90. return [
  91. 'url' => $result['url'],
  92. 'code' => $result['code'],
  93. 'feeds' => $feeds
  94. ];
  95. }
  96. }