You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

163 lines
4.2 KiB

  1. <?php
  2. namespace p3k\XRay;
  3. class Fetcher {
  4. private $http;
  5. public function __construct($http) {
  6. $this->http = $http;
  7. }
  8. public function fetch($url, $opts=[]) {
  9. if($opts == false) $opts = [];
  10. if(isset($opts['timeout']))
  11. $this->http->set_timeout($opts['timeout']);
  12. if(isset($opts['max_redirects']))
  13. $this->http->set_max_redirects($opts['max_redirects']);
  14. // Attempt some basic URL validation
  15. $scheme = parse_url($url, PHP_URL_SCHEME);
  16. if(!in_array($scheme, ['http','https'])) {
  17. return [
  18. 'error_code' => 400,
  19. 'error' => 'invalid_url',
  20. 'error_description' => 'Only http and https URLs are supported'
  21. ];
  22. }
  23. $host = parse_url($url, PHP_URL_HOST);
  24. if(!$host) {
  25. return [
  26. 'error_code' => 400,
  27. 'error' => 'invalid_url',
  28. 'error_description' => 'The URL provided was not valid'
  29. ];
  30. }
  31. $url = normalize_url($url);
  32. $host = parse_url($url, PHP_URL_HOST);
  33. // Check if this is a Twitter URL and use the API
  34. if(Formats\Twitter::matches_host($url)) {
  35. return $this->_fetch_tweet($url, $opts);
  36. }
  37. // Transform the HTML GitHub URL into an GitHub API request and fetch the API response
  38. if(Formats\GitHub::matches_host($url)) {
  39. return $this->_fetch_github($url, $opts);
  40. }
  41. // All other URLs are fetched normally
  42. // Special-case appspot.com URLs to not follow redirects.
  43. // https://cloud.google.com/appengine/docs/php/urlfetch/
  44. if(!should_follow_redirects($url)) {
  45. $this->http->set_max_redirects(0);
  46. $this->http->set_transport(new \p3k\HTTP\Stream());
  47. } else {
  48. $this->http->set_transport(new \p3k\HTTP\Curl());
  49. }
  50. $headers = [];
  51. if(isset($opts['token']))
  52. $headers[] = 'Authorization: Bearer ' . $opts['token'];
  53. $result = $this->http->get($url, $headers);
  54. if($result['error']) {
  55. return [
  56. 'error' => $result['error'],
  57. 'error_description' => $result['error_description'],
  58. 'url' => $result['url'],
  59. 'code' => $result['code'],
  60. ];
  61. }
  62. if(trim($result['body']) == '') {
  63. if($result['code'] == 410) {
  64. // 410 Gone responses are valid and should not return an error
  65. return $this->respond($response, 200, [
  66. 'TODO' => [
  67. ],
  68. 'url' => $result['url'],
  69. 'code' => $result['code']
  70. ]);
  71. }
  72. return [
  73. 'error' => 'no_content',
  74. 'error_description' => 'We did not get a response body when fetching the URL',
  75. 'url' => $result['url'],
  76. 'code' => $result['code']
  77. ];
  78. }
  79. // Check for HTTP 401/403
  80. if($result['code'] == 401) {
  81. return [
  82. 'error' => 'unauthorized',
  83. 'error_description' => 'The URL returned "HTTP 401 Unauthorized"',
  84. 'url' => $result['url'],
  85. 'code' => $result['code']
  86. ];
  87. }
  88. if($result['code'] == 403) {
  89. return [
  90. 'error' => 'forbidden',
  91. 'error_description' => 'The URL returned "HTTP 403 Forbidden"',
  92. 'url' => $result['url'],
  93. 'code' => $result['code']
  94. ];
  95. }
  96. return [
  97. 'url' => $result['url'],
  98. 'body' => $result['body'],
  99. 'code' => $result['code'],
  100. ];
  101. }
  102. private function _fetch_tweet($url, $opts) {
  103. $fields = ['twitter_api_key','twitter_api_secret','twitter_access_token','twitter_access_token_secret'];
  104. $creds = [];
  105. foreach($fields as $f) {
  106. if(isset($opts[$f]))
  107. $creds[$f] = $opts[$f];
  108. }
  109. if(count($creds) < 4) {
  110. return [
  111. 'error_code' => 400,
  112. 'error' => 'missing_parameters',
  113. 'error_description' => 'All 4 Twitter credentials must be included in the request'
  114. ];
  115. }
  116. $tweet = Formats\Twitter::fetch($url, $creds);
  117. if(!$tweet) {
  118. return [
  119. 'error' => 'twitter_error',
  120. 'error_description' => $e->getMessage()
  121. ];
  122. }
  123. return [
  124. 'url' => $url,
  125. 'body' => $tweet,
  126. 'code' => 200,
  127. ];
  128. }
  129. private function _fetch_github($url, $opts) {
  130. $fields = ['github_access_token'];
  131. $creds = [];
  132. foreach($fields as $f) {
  133. if(isset($opts[$f]))
  134. $creds[$f] = $opts[$f];
  135. }
  136. return Formats\GitHub::fetch($this->http, $url, $creds);
  137. }
  138. }