You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

353 lines
10 KiB

6 years ago
6 years ago
6 years ago
6 years ago
  1. <?php
  2. namespace p3k\XRay\Formats;
  3. use DateTime, DateTimeZone;
  4. class Twitter extends Format {
  5. public static function matches_host($url) {
  6. $host = parse_url($url, PHP_URL_HOST);
  7. return in_array($host, ['mobile.twitter.com','twitter.com','www.twitter.com','twtr.io']);
  8. }
  9. public static function matches($url) {
  10. if(preg_match('/https?:\/\/(?:mobile\.twitter\.com|twitter\.com|twtr\.io)\/(?:[a-z0-9_\/!#]+statuse?s?\/([0-9]+)|([a-zA-Z0-9_]+))/i', $url, $match))
  11. return $match;
  12. else
  13. return false;
  14. }
  15. public static function fetch($url, $creds) {
  16. if(!($match = self::matches($url))) {
  17. return false;
  18. }
  19. $tweet_id = $match[1];
  20. $host = parse_url($url, PHP_URL_HOST);
  21. if($host == 'twtr.io') {
  22. $tweet_id = self::b60to10($tweet_id);
  23. }
  24. $twitter = new \Twitter($creds['twitter_api_key'], $creds['twitter_api_secret'], $creds['twitter_access_token'], $creds['twitter_access_token_secret']);
  25. try {
  26. $tweet = $twitter->request('statuses/show/'.$tweet_id, 'GET', ['tweet_mode'=>'extended']);
  27. } catch(\TwitterException $e) {
  28. return [
  29. 'error' => 'twitter_error',
  30. 'error_description' => $e->getMessage()
  31. ];
  32. }
  33. return [
  34. 'url' => $url,
  35. 'body' => $tweet,
  36. 'code' => 200,
  37. ];
  38. }
  39. public static function parse($json, $url) {
  40. if(is_string($json))
  41. $tweet = json_decode($json);
  42. else
  43. $tweet = $json;
  44. if(!$tweet) {
  45. return self::_unknown();
  46. }
  47. $entry = array(
  48. 'type' => 'entry',
  49. 'url' => $url,
  50. 'author' => [
  51. 'type' => 'card',
  52. 'name' => null,
  53. 'nickname' => null,
  54. 'photo' => null,
  55. 'url' => null
  56. ]
  57. );
  58. $refs = [];
  59. if(property_exists($tweet, 'retweeted_status')) {
  60. // No content for retweets
  61. $reposted = $tweet->retweeted_status;
  62. $repostOf = 'https://twitter.com/' . $reposted->user->screen_name . '/status/' . $reposted->id_str;
  63. $entry['repost-of'] = $repostOf;
  64. $repostedEntry = self::parse($reposted, $repostOf);
  65. if(isset($repostedEntry['data']['refs'])) {
  66. foreach($repostedEntry['data']['refs'] as $k=>$v) {
  67. $refs[$k] = $v;
  68. }
  69. }
  70. $refs[$repostOf] = $repostedEntry['data'];
  71. } else {
  72. $entry['content'] = self::expandTweetContent($tweet);
  73. }
  74. // Published date
  75. $published = new DateTime($tweet->created_at);
  76. if(property_exists($tweet->user, 'utc_offset')) {
  77. $tz = new DateTimeZone(sprintf('%+d', $tweet->user->utc_offset / 3600));
  78. $published->setTimeZone($tz);
  79. }
  80. $entry['published'] = $published->format('c');
  81. // Hashtags
  82. if(property_exists($tweet, 'entities') && property_exists($tweet->entities, 'hashtags')) {
  83. if(count($tweet->entities->hashtags)) {
  84. $entry['category'] = [];
  85. foreach($tweet->entities->hashtags as $hashtag) {
  86. $entry['category'][] = $hashtag->text;
  87. }
  88. }
  89. }
  90. // In-Reply-To
  91. if(property_exists($tweet, 'in_reply_to_status_id_str') && $tweet->in_reply_to_status_id_str) {
  92. $entry['in-reply-to'] = [
  93. 'https://twitter.com/'.$tweet->in_reply_to_screen_name.'/status/'.$tweet->in_reply_to_status_id_str
  94. ];
  95. }
  96. // Don't include the RT'd photo or video in the main object.
  97. // They get included in the reposted object instead.
  98. if(!property_exists($tweet, 'retweeted_status')) {
  99. // Photos and Videos
  100. if(property_exists($tweet, 'extended_entities') && property_exists($tweet->extended_entities, 'media')) {
  101. foreach($tweet->extended_entities->media as $media) {
  102. self::extractMedia($media, $entry);
  103. }
  104. }
  105. // Photos from Streaming API Tweets
  106. if(property_exists($tweet, 'extended_tweet')) {
  107. if(property_exists($tweet->extended_tweet, 'entities') && property_exists($tweet->extended_tweet->entities, 'media')) {
  108. foreach($tweet->extended_tweet->entities->media as $media) {
  109. self::extractMedia($media, $entry);
  110. }
  111. }
  112. }
  113. // Place
  114. if(property_exists($tweet, 'place') && $tweet->place) {
  115. $place = $tweet->place;
  116. if($place->place_type == 'city') {
  117. $entry['location'] = $place->url;
  118. $refs[$place->url] = [
  119. 'type' => 'adr',
  120. 'name' => $place->full_name,
  121. 'locality' => $place->name,
  122. 'country-name' => $place->country,
  123. ];
  124. }
  125. }
  126. }
  127. // Quoted Status
  128. if(property_exists($tweet, 'quoted_status')) {
  129. $quoteOf = 'https://twitter.com/' . $tweet->quoted_status->user->screen_name . '/status/' . $tweet->quoted_status_id_str;
  130. $quotedEntry = self::parse($tweet->quoted_status, $quoteOf);
  131. if(isset($quotedEntry['data']['refs'])) {
  132. foreach($quotedEntry['data']['refs'] as $k=>$v) {
  133. $refs[$k] = $v;
  134. }
  135. }
  136. $refs[$quoteOf] = $quotedEntry['data'];
  137. $entry['quotation-of'] = $quoteOf;
  138. }
  139. if($author = self::_buildHCardFromTwitterProfile($tweet->user)) {
  140. $entry['author'] = $author;
  141. }
  142. if(count($refs)) {
  143. $entry['refs'] = $refs;
  144. }
  145. $entry['post-type'] = \p3k\XRay\PostType::discover($entry);
  146. return [
  147. 'data' => $entry,
  148. 'original' => $tweet,
  149. 'source-format' => 'twitter',
  150. ];
  151. }
  152. private static function extractMedia($media, &$entry) {
  153. if($media->type == 'photo') {
  154. if(!array_key_exists('photo', $entry))
  155. $entry['photo'] = [];
  156. $entry['photo'][] = $media->media_url_https;
  157. } elseif($media->type == 'video' || $media->type == 'animated_gif') {
  158. if(!array_key_exists('photo', $entry))
  159. $entry['photo'] = [];
  160. if(!array_key_exists('video', $entry))
  161. $entry['video'] = [];
  162. // Include the thumbnail
  163. $entry['photo'][] = $media->media_url_https;
  164. // Find the highest bitrate video that is mp4
  165. $videos = $media->video_info->variants;
  166. $videos = array_filter($videos, function($v) {
  167. return property_exists($v, 'bitrate') && $v->content_type == 'video/mp4';
  168. });
  169. if(count($videos)) {
  170. usort($videos, function($a,$b) {
  171. return $a->bitrate < $b->bitrate;
  172. });
  173. $entry['video'][] = $videos[0]->url;
  174. }
  175. }
  176. }
  177. private static function _buildHCardFromTwitterProfile($profile) {
  178. if(!$profile) return false;
  179. $author = [
  180. 'type' => 'card'
  181. ];
  182. $author['nickname'] = $profile->screen_name;
  183. $author['location'] = $profile->location;
  184. $author['bio'] = self::expandTwitterObjectURLs($profile->description, $profile, 'description');
  185. if($profile->name)
  186. $author['name'] = $profile->name;
  187. else
  188. $author['name'] = $profile->screen_name;
  189. if($profile->url) {
  190. if(property_exists($profile, 'entities')) {
  191. if($profile->entities->url->urls[0]->expanded_url)
  192. $author['url'] = $profile->entities->url->urls[0]->expanded_url;
  193. else
  194. $author['url'] = $profile->entities->url->urls[0]->url;
  195. } else {
  196. $author['url'] = $profile->url;
  197. }
  198. }
  199. else {
  200. $author['url'] = 'https://twitter.com/' . $profile->screen_name;
  201. }
  202. $author['photo'] = $profile->profile_image_url_https;
  203. return $author;
  204. }
  205. private static function expandTweetContent($tweet) {
  206. $entities = new \StdClass;
  207. if(property_exists($tweet, 'truncated') && $tweet->truncated) {
  208. if(property_exists($tweet, 'extended_tweet')) {
  209. $text = $tweet->extended_tweet->full_text;
  210. $text = mb_substr($text,
  211. $tweet->extended_tweet->display_text_range[0],
  212. $tweet->extended_tweet->display_text_range[1]-$tweet->extended_tweet->display_text_range[0],
  213. 'UTF-8');
  214. if(property_exists($tweet->extended_tweet, 'entities')) {
  215. $entities = $tweet->extended_tweet->entities;
  216. }
  217. } else {
  218. $text = $tweet->text;
  219. if(property_exists($tweet, 'entities')) {
  220. $entities = $tweet->entities;
  221. }
  222. }
  223. } else {
  224. // Only use the "display" segment of the text
  225. if(property_exists($tweet, 'full_text')) {
  226. // Only use the "display" segment of the text
  227. $text = mb_substr($tweet->full_text,
  228. $tweet->display_text_range[0],
  229. $tweet->display_text_range[1]-$tweet->display_text_range[0],
  230. 'UTF-8');
  231. } else {
  232. $text = $tweet->text;
  233. }
  234. if(property_exists($tweet, 'entities')) {
  235. $entities = $tweet->entities;
  236. }
  237. }
  238. // Twitter escapes & as &amp; in the text
  239. $text = html_entity_decode($text);
  240. $html = str_replace("\n", "<br>\n", $text);
  241. if(property_exists($entities, 'user_mentions')) {
  242. foreach($entities->user_mentions as $user) {
  243. $html = str_replace('@'.$user->screen_name, '<a href="https://twitter.com/'.$user->screen_name.'">@'.$user->screen_name.'</a>', $html);
  244. }
  245. }
  246. if(property_exists($entities, 'urls')) {
  247. foreach($entities->urls as $url) {
  248. $text = str_replace($url->url, $url->expanded_url, $text);
  249. $html = str_replace($url->url, '<a href="'.$url->expanded_url.'">'.$url->expanded_url.'</a>', $html);
  250. }
  251. }
  252. $content = [
  253. 'text' => $text,
  254. ];
  255. if($html != $text)
  256. $content['html'] = $html;
  257. return $content;
  258. }
  259. private static function expandTwitterObjectURLs($text, $object, $key) {
  260. if(property_exists($object, 'entities')
  261. && property_exists($object->entities, $key)
  262. && property_exists($object->entities->{$key}, 'urls')) {
  263. foreach($object->entities->{$key}->urls as $url) {
  264. $text = str_replace($url->url, $url->expanded_url, $text);
  265. }
  266. }
  267. return $text;
  268. }
  269. /**
  270. * Converts base 60 to base 10, with error checking
  271. * http://tantek.pbworks.com/NewBase60
  272. * @param string $s
  273. * @return int
  274. */
  275. function b60to10($s)
  276. {
  277. $n = 0;
  278. for($i = 0; $i < strlen($s); $i++) // iterate from first to last char of $s
  279. {
  280. $c = ord($s[$i]); // put current ASCII of char into $c
  281. if ($c>=48 && $c<=57) { $c=bcsub($c,48); }
  282. else if ($c>=65 && $c<=72) { $c=bcsub($c,55); }
  283. else if ($c==73 || $c==108) { $c=1; } // typo capital I, lowercase l to 1
  284. else if ($c>=74 && $c<=78) { $c=bcsub($c,56); }
  285. else if ($c==79) { $c=0; } // error correct typo capital O to 0
  286. else if ($c>=80 && $c<=90) { $c=bcsub($c,57); }
  287. else if ($c==95) { $c=34; } // underscore
  288. else if ($c>=97 && $c<=107) { $c=bcsub($c,62); }
  289. else if ($c>=109 && $c<=122) { $c=bcsub($c,63); }
  290. else { $c = 0; } // treat all other noise as 0
  291. $n = bcadd(bcmul(60, $n), $c);
  292. }
  293. return $n;
  294. }
  295. }