You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

161 lines
4.2 KiB

  1. <?php
  2. namespace p3k\XRay;
  3. class Fetcher {
  4. private $http;
  5. public function __construct($http) {
  6. $this->http = $http;
  7. }
  8. public function fetch($url, $opts=[]) {
  9. if(isset($opts['timeout']))
  10. $this->http->set_timeout($opts['timeout']);
  11. if(isset($opts['max_redirects']))
  12. $this->http->set_max_redirects($opts['max_redirects']);
  13. // Attempt some basic URL validation
  14. $scheme = parse_url($url, PHP_URL_SCHEME);
  15. if(!in_array($scheme, ['http','https'])) {
  16. return [
  17. 'error_code' => 400,
  18. 'error' => 'invalid_url',
  19. 'error_description' => 'Only http and https URLs are supported'
  20. ];
  21. }
  22. $host = parse_url($url, PHP_URL_HOST);
  23. if(!$host) {
  24. return [
  25. 'error_code' => 400,
  26. 'error' => 'invalid_url',
  27. 'error_description' => 'The URL provided was not valid'
  28. ];
  29. }
  30. $url = normalize_url($url);
  31. $host = parse_url($url, PHP_URL_HOST);
  32. // Check if this is a Twitter URL and use the API
  33. if(Formats\Twitter::matches_host($url)) {
  34. return $this->_fetch_tweet($url, $opts);
  35. }
  36. // Transform the HTML GitHub URL into an GitHub API request and fetch the API response
  37. if(Formats\GitHub::matches_host($url)) {
  38. return $this->_fetch_github($url, $opts);
  39. }
  40. // All other URLs are fetched normally
  41. // Special-case appspot.com URLs to not follow redirects.
  42. // https://cloud.google.com/appengine/docs/php/urlfetch/
  43. if(!should_follow_redirects($url)) {
  44. $this->http->set_max_redirects(0);
  45. $this->http->set_transport(new \p3k\HTTP\Stream());
  46. } else {
  47. $this->http->set_transport(new \p3k\HTTP\Curl());
  48. }
  49. $headers = [];
  50. if(isset($opts['token']))
  51. $headers[] = 'Authorization: Bearer ' . $opts['token'];
  52. $result = $this->http->get($url, $headers);
  53. if($result['error']) {
  54. return [
  55. 'error' => $result['error'],
  56. 'error_description' => $result['error_description'],
  57. 'url' => $result['url'],
  58. 'code' => $result['code'],
  59. ];
  60. }
  61. if(trim($result['body']) == '') {
  62. if($result['code'] == 410) {
  63. // 410 Gone responses are valid and should not return an error
  64. return $this->respond($response, 200, [
  65. 'TODO' => [
  66. ],
  67. 'url' => $result['url'],
  68. 'code' => $result['code']
  69. ]);
  70. }
  71. return [
  72. 'error' => 'no_content',
  73. 'error_description' => 'We did not get a response body when fetching the URL',
  74. 'url' => $result['url'],
  75. 'code' => $result['code']
  76. ];
  77. }
  78. // Check for HTTP 401/403
  79. if($result['code'] == 401) {
  80. return [
  81. 'error' => 'unauthorized',
  82. 'error_description' => 'The URL returned "HTTP 401 Unauthorized"',
  83. 'url' => $result['url'],
  84. 'code' => $result['code']
  85. ];
  86. }
  87. if($result['code'] == 403) {
  88. return [
  89. 'error' => 'forbidden',
  90. 'error_description' => 'The URL returned "HTTP 403 Forbidden"',
  91. 'url' => $result['url'],
  92. 'code' => $result['code']
  93. ];
  94. }
  95. return [
  96. 'url' => $result['url'],
  97. 'body' => $result['body'],
  98. 'code' => $result['code'],
  99. ];
  100. }
  101. private function _fetch_tweet($url, $opts) {
  102. $fields = ['twitter_api_key','twitter_api_secret','twitter_access_token','twitter_access_token_secret'];
  103. $creds = [];
  104. foreach($fields as $f) {
  105. if(isset($opts[$f]))
  106. $creds[$f] = $opts[$f];
  107. }
  108. if(count($creds) < 4) {
  109. return [
  110. 'error_code' => 400,
  111. 'error' => 'missing_parameters',
  112. 'error_description' => 'All 4 Twitter credentials must be included in the request'
  113. ];
  114. }
  115. $tweet = Formats\Twitter::fetch($url, $creds);
  116. if(!$tweet) {
  117. return [
  118. 'error' => 'twitter_error',
  119. 'error_description' => $e->getMessage()
  120. ];
  121. }
  122. return [
  123. 'url' => $url,
  124. 'body' => $tweet,
  125. 'code' => 200,
  126. ];
  127. }
  128. private function _fetch_github($url, $opts) {
  129. $fields = ['github_access_token'];
  130. $creds = [];
  131. foreach($fields as $f) {
  132. if(isset($opts[$f]))
  133. $creds[$f] = $opts[$f];
  134. }
  135. return Formats\GitHub::fetch($this->http, $url, $creds);
  136. }
  137. }