|
|
@ -1,11 +1,19 @@ |
|
|
|
<?php |
|
|
|
namespace p3k\XRay\Formats; |
|
|
|
|
|
|
|
const BASE_URL = 'https://www.instagram.com/'; |
|
|
|
const QUERY_MEDIA = BASE_URL.'graphql/query/?query_hash=42323d64886122307be10013ad2dcc44&variables=%s'; |
|
|
|
const QUERY_MEDIA_VARS = '{"id":"%s","first":%d,"after":"%s"}'; |
|
|
|
|
|
|
|
use DOMDocument, DOMXPath; |
|
|
|
use DateTime, DateTimeZone; |
|
|
|
|
|
|
|
class Instagram extends Format { |
|
|
|
|
|
|
|
private static $gis; |
|
|
|
|
|
|
|
private static $extra_photos = 20; |
|
|
|
|
|
|
|
public static function matches_host($url) { |
|
|
|
$host = parse_url($url, PHP_URL_HOST); |
|
|
|
return in_array($host, ['www.instagram.com','instagram.com']); |
|
|
@ -16,6 +24,9 @@ class Instagram extends Format { |
|
|
|
} |
|
|
|
|
|
|
|
public static function parse($http, $html, $url, $opts=[]) { |
|
|
|
if(isset($opts['length'])) { |
|
|
|
self::$extra_photos = intval($opts['length'])-12; |
|
|
|
} |
|
|
|
if(preg_match('#instagram.com/([^/]+)/$#', $url)) { |
|
|
|
if(isset($opts['expect']) && $opts['expect'] == 'feed') |
|
|
|
return self::parseFeed($http, $html, $url); |
|
|
@ -39,6 +50,34 @@ class Instagram extends Format { |
|
|
|
]; |
|
|
|
} |
|
|
|
|
|
|
|
private static function _getIntstagramGIS($params) { |
|
|
|
$data = self::$gis.":".$params; |
|
|
|
return md5($data); |
|
|
|
} |
|
|
|
|
|
|
|
private static function _getMorePhotos($http,$html,$url,$profileData) { |
|
|
|
$params = sprintf( |
|
|
|
QUERY_MEDIA_VARS, |
|
|
|
$profileData['id'], |
|
|
|
self::$extra_photos, |
|
|
|
$profileData['edge_owner_to_timeline_media']['page_info']['end_cursor'] |
|
|
|
); |
|
|
|
|
|
|
|
$url = sprintf(QUERY_MEDIA,$params); |
|
|
|
$headers = []; |
|
|
|
$headers[] = 'x-instagram-gis: ' . self::_getIntstagramGIS($params); |
|
|
|
$headers[] = 'x-requested-with: XMLHttpRequest'; |
|
|
|
|
|
|
|
$resp = $http->get($url,$headers); |
|
|
|
|
|
|
|
if(!$resp['error']) { |
|
|
|
$data = json_decode($resp['body'],true); |
|
|
|
$photos = $data['data']['user']['edge_owner_to_timeline_media']['edges']; |
|
|
|
return $photos; |
|
|
|
} |
|
|
|
return null; |
|
|
|
} |
|
|
|
|
|
|
|
private static function parseFeed($http, $html, $url) { |
|
|
|
$profileData = self::_parseProfileFromHTML($html); |
|
|
|
if(!$profileData) |
|
|
@ -47,9 +86,13 @@ class Instagram extends Format { |
|
|
|
$photos = $profileData['edge_owner_to_timeline_media']['edges']; |
|
|
|
$items = []; |
|
|
|
|
|
|
|
$morePhotos = self::_getMorePhotos($http,$html,$url,$profileData); |
|
|
|
|
|
|
|
$photos = array_merge($photos,$morePhotos); |
|
|
|
|
|
|
|
foreach($photos as $photoData) { |
|
|
|
$item = self::parsePhotoFromData($http, $photoData['node'], |
|
|
|
'https://www.instagram.com/p/'.$photoData['node']['shortcode'].'/', $profileData); |
|
|
|
BASE_URL.'p/'.$photoData['node']['shortcode'].'/', $profileData); |
|
|
|
// Note: Not all the photo info is available in the initial JSON.
|
|
|
|
// Things like video mp4 URLs and person tags and locations are missing.
|
|
|
|
// Consumers of the feed will need to fetch the photo permalink in order to get all missing information.
|
|
|
@ -249,7 +292,7 @@ class Instagram extends Format { |
|
|
|
if(isset($profile['external_url']) && $profile['external_url']) |
|
|
|
$author['url'] = $profile['external_url']; |
|
|
|
else |
|
|
|
$author['url'] = 'https://www.instagram.com/' . $profile['username']; |
|
|
|
$author['url'] = BASE_URL . $profile['username']; |
|
|
|
|
|
|
|
if(isset($profile['profile_pic_url_hd'])) |
|
|
|
$author['photo'] = $profile['profile_pic_url_hd']; |
|
|
@ -263,7 +306,7 @@ class Instagram extends Format { |
|
|
|
} |
|
|
|
|
|
|
|
private static function _getInstagramProfile($username, $http) { |
|
|
|
$response = $http->get('https://www.instagram.com/'.$username.'/'); |
|
|
|
$response = $http->get(BASE_URL.$username.'/'); |
|
|
|
|
|
|
|
if(!$response['error']) |
|
|
|
return self::_parseProfileFromHTML($response['body']); |
|
|
@ -273,6 +316,9 @@ class Instagram extends Format { |
|
|
|
|
|
|
|
private static function _parseProfileFromHTML($html) { |
|
|
|
$data = self::_extractIGData($html); |
|
|
|
if(isset($data['rhx_gis'])) { |
|
|
|
self::$gis = $data['rhx_gis']; |
|
|
|
} |
|
|
|
if(isset($data['entry_data']['ProfilePage'][0])) { |
|
|
|
$profile = $data['entry_data']['ProfilePage'][0]; |
|
|
|
if($profile && isset($profile['graphql']['user'])) { |
|
|
@ -284,7 +330,7 @@ class Instagram extends Format { |
|
|
|
} |
|
|
|
|
|
|
|
private static function _getInstagramLocation($id, $http) { |
|
|
|
$igURL = 'https://www.instagram.com/explore/locations/'.$id.'/'; |
|
|
|
$igURL = BASE_URL.'explore/locations/'.$id.'/'; |
|
|
|
$response = $http->get($igURL); |
|
|
|
if($response['body']) { |
|
|
|
$data = self::_extractVenueDataFromVenuePage($response['body']); |
|
|
|