You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

279 lines
8.4 KiB

  1. <?php
  2. namespace p3k\XRay\Formats;
  3. use DateTime, DateTimeZone;
  4. class Twitter extends Format {
  5. public static function matches_host($url) {
  6. $host = parse_url($url, PHP_URL_HOST);
  7. return in_array($host, ['mobile.twitter.com','twitter.com','www.twitter.com','twtr.io']);
  8. }
  9. public static function matches($url) {
  10. if(preg_match('/https?:\/\/(?:mobile\.twitter\.com|twitter\.com|twtr\.io)\/(?:[a-z0-9_\/!#]+statuse?s?\/([0-9]+)|([a-zA-Z0-9_]+))/i', $url, $match))
  11. return $match;
  12. else
  13. return false;
  14. }
  15. public static function fetch($url, $creds) {
  16. if(!($match = self::matches($url))) {
  17. return false;
  18. }
  19. $tweet_id = $match[1];
  20. $host = parse_url($url, PHP_URL_HOST);
  21. if($host == 'twtr.io') {
  22. $tweet_id = self::b60to10($tweet_id);
  23. }
  24. $twitter = new \Twitter($creds['twitter_api_key'], $creds['twitter_api_secret'], $creds['twitter_access_token'], $creds['twitter_access_token_secret']);
  25. try {
  26. $tweet = $twitter->request('statuses/show/'.$tweet_id, 'GET', ['tweet_mode'=>'extended']);
  27. } catch(\TwitterException $e) {
  28. return false;
  29. }
  30. return $tweet;
  31. }
  32. public static function parse($url, $tweet_id, $creds, $json=null) {
  33. $host = parse_url($url, PHP_URL_HOST);
  34. if($host == 'twtr.io') {
  35. $tweet_id = self::b60to10($tweet_id);
  36. }
  37. if($json) {
  38. if(is_string($json))
  39. $tweet = json_decode($json);
  40. else
  41. $tweet = $json;
  42. } else {
  43. $twitter = new \Twitter($creds['twitter_api_key'], $creds['twitter_api_secret'], $creds['twitter_access_token'], $creds['twitter_access_token_secret']);
  44. try {
  45. $tweet = $twitter->request('statuses/show/'.$tweet_id, 'GET', ['tweet_mode'=>'extended']);
  46. } catch(\TwitterException $e) {
  47. return [false, false];
  48. }
  49. }
  50. if(!$tweet)
  51. return [false, false];
  52. $entry = array(
  53. 'type' => 'entry',
  54. 'url' => $url,
  55. 'author' => [
  56. 'type' => 'card',
  57. 'name' => null,
  58. 'nickname' => null,
  59. 'photo' => null,
  60. 'url' => null
  61. ]
  62. );
  63. $refs = [];
  64. // Only use the "display" segment of the text
  65. $text = mb_substr($tweet->full_text,
  66. $tweet->display_text_range[0],
  67. $tweet->display_text_range[1]-$tweet->display_text_range[0],
  68. 'UTF-8');
  69. if(property_exists($tweet, 'retweeted_status')) {
  70. // No content for retweets
  71. $reposted = $tweet->retweeted_status;
  72. $repostOf = 'https://twitter.com/' . $reposted->user->screen_name . '/status/' . $reposted->id_str;
  73. $entry['repost-of'] = $repostOf;
  74. list($repostedEntry) = self::parse($repostOf, $reposted->id_str, null, $reposted);
  75. if(isset($repostedEntry['refs'])) {
  76. foreach($repostedEntry['refs'] as $k=>$v) {
  77. $refs[$k] = $v;
  78. }
  79. }
  80. $refs[$repostOf] = $repostedEntry['data'];
  81. } else {
  82. // Twitter escapes & as &amp; in the text
  83. $text = html_entity_decode($text);
  84. $text = self::expandTweetURLs($text, $tweet);
  85. $entry['content'] = ['text' => $text];
  86. }
  87. // Published date
  88. $published = new DateTime($tweet->created_at);
  89. if(property_exists($tweet->user, 'utc_offset')) {
  90. $tz = new DateTimeZone(sprintf('%+d', $tweet->user->utc_offset / 3600));
  91. $published->setTimeZone($tz);
  92. }
  93. $entry['published'] = $published->format('c');
  94. // Hashtags
  95. if(property_exists($tweet, 'entities') && property_exists($tweet->entities, 'hashtags')) {
  96. if(count($tweet->entities->hashtags)) {
  97. $entry['category'] = [];
  98. foreach($tweet->entities->hashtags as $hashtag) {
  99. $entry['category'][] = $hashtag->text;
  100. }
  101. }
  102. }
  103. // Don't include the RT'd photo or video in the main object.
  104. // They get included in the reposted object instead.
  105. if(!property_exists($tweet, 'retweeted_status')) {
  106. // Photos and Videos
  107. if(property_exists($tweet, 'extended_entities') && property_exists($tweet->extended_entities, 'media')) {
  108. foreach($tweet->extended_entities->media as $media) {
  109. if($media->type == 'photo') {
  110. if(!array_key_exists('photo', $entry))
  111. $entry['photo'] = [];
  112. $entry['photo'][] = $media->media_url_https;
  113. } elseif($media->type == 'video') {
  114. if(!array_key_exists('video', $entry))
  115. $entry['video'] = [];
  116. // Find the highest bitrate video that is mp4
  117. $videos = $media->video_info->variants;
  118. $videos = array_filter($videos, function($v) {
  119. return property_exists($v, 'bitrate') && $v->content_type == 'video/mp4';
  120. });
  121. if(count($videos)) {
  122. usort($videos, function($a,$b) {
  123. return $a->bitrate < $b->bitrate;
  124. });
  125. $entry['video'][] = $videos[0]->url;
  126. }
  127. }
  128. }
  129. }
  130. // Place
  131. if(property_exists($tweet, 'place') && $tweet->place) {
  132. $place = $tweet->place;
  133. if($place->place_type == 'city') {
  134. $entry['location'] = $place->url;
  135. $refs[$place->url] = [
  136. 'type' => 'adr',
  137. 'name' => $place->full_name,
  138. 'locality' => $place->name,
  139. 'country-name' => $place->country,
  140. ];
  141. }
  142. }
  143. }
  144. // Quoted Status
  145. if(property_exists($tweet, 'quoted_status')) {
  146. $quoteOf = 'https://twitter.com/' . $tweet->quoted_status->user->screen_name . '/status/' . $tweet->quoted_status_id_str;
  147. list($quoted) = self::parse($quoteOf, $tweet->quoted_status_id_str, null, $tweet->quoted_status);
  148. if(isset($quoted['refs'])) {
  149. foreach($quoted['refs'] as $k=>$v) {
  150. $refs[$k] = $v;
  151. }
  152. }
  153. $refs[$quoteOf] = $quoted['data'];
  154. }
  155. if($author = self::_buildHCardFromTwitterProfile($tweet->user)) {
  156. $entry['author'] = $author;
  157. }
  158. $response = [
  159. 'data' => $entry
  160. ];
  161. if(count($refs)) {
  162. $response['refs'] = $refs;
  163. }
  164. return [$response, $tweet];
  165. }
  166. private static function _buildHCardFromTwitterProfile($profile) {
  167. if(!$profile) return false;
  168. $author = [
  169. 'type' => 'card'
  170. ];
  171. $author['nickname'] = $profile->screen_name;
  172. $author['location'] = $profile->location;
  173. $author['bio'] = self::expandTwitterObjectURLs($profile->description, $profile, 'description');
  174. if($profile->name)
  175. $author['name'] = $profile->name;
  176. else
  177. $author['name'] = $profile->screen_name;
  178. if($profile->url) {
  179. if($profile->entities->url->urls[0]->expanded_url)
  180. $author['url'] = $profile->entities->url->urls[0]->expanded_url;
  181. else
  182. $author['url'] = $profile->entities->url->urls[0]->url;
  183. }
  184. else {
  185. $author['url'] = 'https://twitter.com/' . $profile->screen_name;
  186. }
  187. $author['photo'] = $profile->profile_image_url_https;
  188. return $author;
  189. }
  190. private static function expandTweetURLs($text, $object) {
  191. if(property_exists($object, 'entities') && property_exists($object->entities, 'urls')) {
  192. foreach($object->entities->urls as $url) {
  193. $text = str_replace($url->url, $url->expanded_url, $text);
  194. }
  195. }
  196. return $text;
  197. }
  198. private static function expandTwitterObjectURLs($text, $object, $key) {
  199. if(property_exists($object, 'entities')
  200. && property_exists($object->entities, $key)
  201. && property_exists($object->entities->{$key}, 'urls')) {
  202. foreach($object->entities->{$key}->urls as $url) {
  203. $text = str_replace($url->url, $url->expanded_url, $text);
  204. }
  205. }
  206. return $text;
  207. }
  208. /**
  209. * Converts base 60 to base 10, with error checking
  210. * http://tantek.pbworks.com/NewBase60
  211. * @param string $s
  212. * @return int
  213. */
  214. function b60to10($s)
  215. {
  216. $n = 0;
  217. for($i = 0; $i < strlen($s); $i++) // iterate from first to last char of $s
  218. {
  219. $c = ord($s[$i]); // put current ASCII of char into $c
  220. if ($c>=48 && $c<=57) { $c=bcsub($c,48); }
  221. else if ($c>=65 && $c<=72) { $c=bcsub($c,55); }
  222. else if ($c==73 || $c==108) { $c=1; } // typo capital I, lowercase l to 1
  223. else if ($c>=74 && $c<=78) { $c=bcsub($c,56); }
  224. else if ($c==79) { $c=0; } // error correct typo capital O to 0
  225. else if ($c>=80 && $c<=90) { $c=bcsub($c,57); }
  226. else if ($c==95) { $c=34; } // underscore
  227. else if ($c>=97 && $c<=107) { $c=bcsub($c,62); }
  228. else if ($c>=109 && $c<=122) { $c=bcsub($c,63); }
  229. else { $c = 0; } // treat all other noise as 0
  230. $n = bcadd(bcmul(60, $n), $c);
  231. }
  232. return $n;
  233. }
  234. }