You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

232 lines
6.1 KiB

5 years ago
  1. <?php
  2. namespace XRay\Formats;
  3. use DOMDocument, DOMXPath;
  4. use DateTime, DateTimeZone;
  5. use Parse;
  6. class Instagram {
  7. public static function parse($html, $url, $http) {
  8. $photoData = self::_extractPhotoDataFromPhotoPage($html);
  9. if(!$photoData)
  10. return false;
  11. // Start building the h-entry
  12. $entry = array(
  13. 'type' => 'entry',
  14. 'url' => $url,
  15. 'author' => [
  16. 'type' => 'card',
  17. 'name' => null,
  18. 'photo' => null,
  19. 'url' => null
  20. ]
  21. );
  22. $profiles = [];
  23. // Fetch profile info for this user
  24. $username = $photoData['owner']['username'];
  25. $profile = self::_getInstagramProfile($username, $http);
  26. if($profile) {
  27. $entry['author'] = self::_buildHCardFromInstagramProfile($profile);
  28. $profiles[] = $profile;
  29. }
  30. // Content and hashtags
  31. if(isset($photoData['caption'])) {
  32. if(preg_match_all('/#([a-z0-9_-]+)/i', $photoData['caption'], $matches)) {
  33. $entry['category'] = [];
  34. foreach($matches[1] as $match) {
  35. $entry['category'][] = $match;
  36. }
  37. }
  38. $entry['content'] = [
  39. 'text' => $photoData['caption']
  40. ];
  41. }
  42. // Include the photo/video media URLs
  43. // (Always return arrays)
  44. $entry['photo'] = [$photoData['display_src']];
  45. if(array_key_exists('is_video', $photoData) && $photoData['is_video']) {
  46. $entry['video'] = [$photoData['video_url']];
  47. }
  48. $refs = [];
  49. // Find person tags and fetch user profiles
  50. if(array_key_exists('usertags', $photoData) && $photoData['usertags']['nodes']) {
  51. if(!isset($entry['category'])) $entry['category'] = [];
  52. foreach($photoData['usertags']['nodes'] as $tag) {
  53. $profile = self::_getInstagramProfile($tag['user']['username'], $http);
  54. if($profile) {
  55. $card = self::_buildHCardFromInstagramProfile($profile);
  56. $entry['category'][] = $card['url'];
  57. $refs[$card['url']] = $card;
  58. $profiles[] = $profile;
  59. }
  60. }
  61. }
  62. // Published date
  63. $published = DateTime::createFromFormat('U', $photoData['date']);
  64. // Include venue data
  65. $locations = [];
  66. if($photoData['location']) {
  67. $location = self::_getInstagramLocation($photoData['location']['id'], $http);
  68. if($location) {
  69. $entry['location'] = [$location['url']];
  70. $refs[$location['url']] = $location;
  71. $locations[] = $location;
  72. // Look up timezone
  73. if($location['latitude']) {
  74. $tz = \p3k\Timezone::timezone_for_location($location['latitude'], $location['longitude']);
  75. if($tz) {
  76. $published->setTimeZone(new DateTimeZone($tz));
  77. }
  78. }
  79. }
  80. }
  81. $entry['published'] = $published->format('c');
  82. $response = [
  83. 'data' => $entry
  84. ];
  85. if(count($refs)) {
  86. $response['refs'] = $refs;
  87. }
  88. return [$response, [
  89. 'photo' => $photoData,
  90. 'profiles' => $profiles,
  91. 'locations' => $locations
  92. ]];
  93. }
  94. private static function _buildHCardFromInstagramProfile($profile) {
  95. if(!$profile) return false;
  96. $author = [
  97. 'type' => 'card'
  98. ];
  99. if($profile['full_name'])
  100. $author['name'] = $profile['full_name'];
  101. else
  102. $author['name'] = $profile['username'];
  103. if(isset($profile['external_url']) && $profile['external_url'])
  104. $author['url'] = $profile['external_url'];
  105. else
  106. $author['url'] = 'https://www.instagram.com/' . $profile['username'];
  107. if(isset($profile['profile_pic_url_hd']))
  108. $author['photo'] = $profile['profile_pic_url_hd'];
  109. else
  110. $author['photo'] = $profile['profile_pic_url'];
  111. return $author;
  112. }
  113. private static function _getInstagramProfile($username, $http) {
  114. $response = $http->get('https://www.instagram.com/'.$username.'/?__a=1');
  115. if(!$response['error']) {
  116. $profile = @json_decode($response['body'], true);
  117. if($profile && array_key_exists('user', $profile)) {
  118. $user = $profile['user'];
  119. return $user;
  120. }
  121. }
  122. return null;
  123. }
  124. private static function _getInstagramLocation($id, $http) {
  125. $igURL = 'https://www.instagram.com/explore/locations/'.$id.'/';
  126. $response = $http->get($igURL);
  127. if($response['body']) {
  128. $data = self::_extractVenueDataFromVenuePage($response['body']);
  129. if($data) {
  130. return [
  131. 'type' => 'card',
  132. 'name' => $data['name'],
  133. 'url' => $igURL,
  134. 'latitude' => $data['lat'],
  135. 'longitude' => $data['lng'],
  136. ];
  137. }
  138. }
  139. return null;
  140. }
  141. private static function _extractPhotoDataFromPhotoPage($html) {
  142. $data = self::_extractIGData($html);
  143. if($data && is_array($data) && array_key_exists('entry_data', $data)) {
  144. if(is_array($data['entry_data']) && array_key_exists('PostPage', $data['entry_data'])) {
  145. $post = $data['entry_data']['PostPage'];
  146. if(is_array($post) && array_key_exists(0, $post) && array_key_exists('media', $post[0])) {
  147. $media = $post[0]['media'];
  148. return $media;
  149. }
  150. }
  151. }
  152. return null;
  153. }
  154. private static function _extractVenueDataFromVenuePage($html) {
  155. $data = self::_extractIGData($html);
  156. if($data && is_array($data) && array_key_exists('entry_data', $data)) {
  157. if(is_array($data['entry_data']) && array_key_exists('LocationsPage', $data['entry_data'])) {
  158. $data = $data['entry_data']['LocationsPage'];
  159. if(is_array($data) && array_key_exists(0, $data) && array_key_exists('location', $data[0])) {
  160. $location = $data[0]['location'];
  161. # we don't need these and they're huge, so drop them now
  162. unset($location['media']);
  163. unset($location['top_posts']);
  164. return $location;
  165. }
  166. }
  167. }
  168. return null;
  169. }
  170. private static function _extractIGData($html) {
  171. $doc = new DOMDocument();
  172. @$doc->loadHTML($html);
  173. if(!$doc) {
  174. return null;
  175. }
  176. $xpath = new DOMXPath($doc);
  177. $data = null;
  178. foreach($xpath->query('//script') as $script) {
  179. if(preg_match('/window\._sharedData = ({.+});/', $script->textContent, $match)) {
  180. $data = json_decode($match[1], true);
  181. }
  182. }
  183. return $data;
  184. }
  185. }