You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

265 lines
7.9 KiB

  1. <?php
  2. namespace p3k\XRay\Formats;
  3. use DateTime, DateTimeZone;
  4. class Twitter extends Format {
  5. public static function matches_host($url) {
  6. $host = parse_url($url, PHP_URL_HOST);
  7. return in_array($host, ['mobile.twitter.com','twitter.com','www.twitter.com','twtr.io']);
  8. }
  9. public static function matches($url) {
  10. if(preg_match('/https?:\/\/(?:mobile\.twitter\.com|twitter\.com|twtr\.io)\/(?:[a-z0-9_\/!#]+statuse?s?\/([0-9]+)|([a-zA-Z0-9_]+))/i', $url, $match))
  11. return $match;
  12. else
  13. return false;
  14. }
  15. public static function fetch($url, $creds) {
  16. if(!($match = self::matches($url))) {
  17. return false;
  18. }
  19. $tweet_id = $match[1];
  20. $host = parse_url($url, PHP_URL_HOST);
  21. if($host == 'twtr.io') {
  22. $tweet_id = self::b60to10($tweet_id);
  23. }
  24. $twitter = new \Twitter($creds['twitter_api_key'], $creds['twitter_api_secret'], $creds['twitter_access_token'], $creds['twitter_access_token_secret']);
  25. try {
  26. $tweet = $twitter->request('statuses/show/'.$tweet_id, 'GET', ['tweet_mode'=>'extended']);
  27. } catch(\TwitterException $e) {
  28. return false;
  29. }
  30. return $tweet;
  31. }
  32. public static function parse($json, $url) {
  33. if(is_string($json))
  34. $tweet = json_decode($json);
  35. else
  36. $tweet = $json;
  37. if(!$tweet) {
  38. return self::_unknown();
  39. }
  40. $entry = array(
  41. 'type' => 'entry',
  42. 'url' => $url,
  43. 'author' => [
  44. 'type' => 'card',
  45. 'name' => null,
  46. 'nickname' => null,
  47. 'photo' => null,
  48. 'url' => null
  49. ]
  50. );
  51. $refs = [];
  52. // Only use the "display" segment of the text
  53. $text = mb_substr($tweet->full_text,
  54. $tweet->display_text_range[0],
  55. $tweet->display_text_range[1]-$tweet->display_text_range[0],
  56. 'UTF-8');
  57. if(property_exists($tweet, 'retweeted_status')) {
  58. // No content for retweets
  59. $reposted = $tweet->retweeted_status;
  60. $repostOf = 'https://twitter.com/' . $reposted->user->screen_name . '/status/' . $reposted->id_str;
  61. $entry['repost-of'] = $repostOf;
  62. $repostedEntry = self::parse($reposted, $repostOf);
  63. if(isset($repostedEntry['data']['refs'])) {
  64. foreach($repostedEntry['data']['refs'] as $k=>$v) {
  65. $refs[$k] = $v;
  66. }
  67. }
  68. $refs[$repostOf] = $repostedEntry['data'];
  69. } else {
  70. // Twitter escapes & as &amp; in the text
  71. $text = html_entity_decode($text);
  72. $text = self::expandTweetURLs($text, $tweet);
  73. $entry['content'] = ['text' => $text];
  74. }
  75. // Published date
  76. $published = new DateTime($tweet->created_at);
  77. if(property_exists($tweet->user, 'utc_offset')) {
  78. $tz = new DateTimeZone(sprintf('%+d', $tweet->user->utc_offset / 3600));
  79. $published->setTimeZone($tz);
  80. }
  81. $entry['published'] = $published->format('c');
  82. // Hashtags
  83. if(property_exists($tweet, 'entities') && property_exists($tweet->entities, 'hashtags')) {
  84. if(count($tweet->entities->hashtags)) {
  85. $entry['category'] = [];
  86. foreach($tweet->entities->hashtags as $hashtag) {
  87. $entry['category'][] = $hashtag->text;
  88. }
  89. }
  90. }
  91. // Don't include the RT'd photo or video in the main object.
  92. // They get included in the reposted object instead.
  93. if(!property_exists($tweet, 'retweeted_status')) {
  94. // Photos and Videos
  95. if(property_exists($tweet, 'extended_entities') && property_exists($tweet->extended_entities, 'media')) {
  96. foreach($tweet->extended_entities->media as $media) {
  97. if($media->type == 'photo') {
  98. if(!array_key_exists('photo', $entry))
  99. $entry['photo'] = [];
  100. $entry['photo'][] = $media->media_url_https;
  101. } elseif($media->type == 'video') {
  102. if(!array_key_exists('video', $entry))
  103. $entry['video'] = [];
  104. // Find the highest bitrate video that is mp4
  105. $videos = $media->video_info->variants;
  106. $videos = array_filter($videos, function($v) {
  107. return property_exists($v, 'bitrate') && $v->content_type == 'video/mp4';
  108. });
  109. if(count($videos)) {
  110. usort($videos, function($a,$b) {
  111. return $a->bitrate < $b->bitrate;
  112. });
  113. $entry['video'][] = $videos[0]->url;
  114. }
  115. }
  116. }
  117. }
  118. // Place
  119. if(property_exists($tweet, 'place') && $tweet->place) {
  120. $place = $tweet->place;
  121. if($place->place_type == 'city') {
  122. $entry['location'] = $place->url;
  123. $refs[$place->url] = [
  124. 'type' => 'adr',
  125. 'name' => $place->full_name,
  126. 'locality' => $place->name,
  127. 'country-name' => $place->country,
  128. ];
  129. }
  130. }
  131. }
  132. // Quoted Status
  133. if(property_exists($tweet, 'quoted_status')) {
  134. $quoteOf = 'https://twitter.com/' . $tweet->quoted_status->user->screen_name . '/status/' . $tweet->quoted_status_id_str;
  135. $quotedEntry = self::parse($tweet->quoted_status, $quoteOf);
  136. if(isset($quotedEntry['data']['refs'])) {
  137. foreach($quotedEntry['data']['refs'] as $k=>$v) {
  138. $refs[$k] = $v;
  139. }
  140. }
  141. $refs[$quoteOf] = $quotedEntry['data'];
  142. }
  143. if($author = self::_buildHCardFromTwitterProfile($tweet->user)) {
  144. $entry['author'] = $author;
  145. }
  146. if(count($refs)) {
  147. $entry['refs'] = $refs;
  148. }
  149. return [
  150. 'data' => $entry,
  151. 'original' => $tweet,
  152. ];
  153. }
  154. private static function _buildHCardFromTwitterProfile($profile) {
  155. if(!$profile) return false;
  156. $author = [
  157. 'type' => 'card'
  158. ];
  159. $author['nickname'] = $profile->screen_name;
  160. $author['location'] = $profile->location;
  161. $author['bio'] = self::expandTwitterObjectURLs($profile->description, $profile, 'description');
  162. if($profile->name)
  163. $author['name'] = $profile->name;
  164. else
  165. $author['name'] = $profile->screen_name;
  166. if($profile->url) {
  167. if($profile->entities->url->urls[0]->expanded_url)
  168. $author['url'] = $profile->entities->url->urls[0]->expanded_url;
  169. else
  170. $author['url'] = $profile->entities->url->urls[0]->url;
  171. }
  172. else {
  173. $author['url'] = 'https://twitter.com/' . $profile->screen_name;
  174. }
  175. $author['photo'] = $profile->profile_image_url_https;
  176. return $author;
  177. }
  178. private static function expandTweetURLs($text, $object) {
  179. if(property_exists($object, 'entities') && property_exists($object->entities, 'urls')) {
  180. foreach($object->entities->urls as $url) {
  181. $text = str_replace($url->url, $url->expanded_url, $text);
  182. }
  183. }
  184. return $text;
  185. }
  186. private static function expandTwitterObjectURLs($text, $object, $key) {
  187. if(property_exists($object, 'entities')
  188. && property_exists($object->entities, $key)
  189. && property_exists($object->entities->{$key}, 'urls')) {
  190. foreach($object->entities->{$key}->urls as $url) {
  191. $text = str_replace($url->url, $url->expanded_url, $text);
  192. }
  193. }
  194. return $text;
  195. }
  196. /**
  197. * Converts base 60 to base 10, with error checking
  198. * http://tantek.pbworks.com/NewBase60
  199. * @param string $s
  200. * @return int
  201. */
  202. function b60to10($s)
  203. {
  204. $n = 0;
  205. for($i = 0; $i < strlen($s); $i++) // iterate from first to last char of $s
  206. {
  207. $c = ord($s[$i]); // put current ASCII of char into $c
  208. if ($c>=48 && $c<=57) { $c=bcsub($c,48); }
  209. else if ($c>=65 && $c<=72) { $c=bcsub($c,55); }
  210. else if ($c==73 || $c==108) { $c=1; } // typo capital I, lowercase l to 1
  211. else if ($c>=74 && $c<=78) { $c=bcsub($c,56); }
  212. else if ($c==79) { $c=0; } // error correct typo capital O to 0
  213. else if ($c>=80 && $c<=90) { $c=bcsub($c,57); }
  214. else if ($c==95) { $c=34; } // underscore
  215. else if ($c>=97 && $c<=107) { $c=bcsub($c,62); }
  216. else if ($c>=109 && $c<=122) { $c=bcsub($c,63); }
  217. else { $c = 0; } // treat all other noise as 0
  218. $n = bcadd(bcmul(60, $n), $c);
  219. }
  220. return $n;
  221. }
  222. }