Browse Source

Fetch more than just the front 12 off instagram

pull/72/head
Ruxton 6 years ago
parent
commit
98b039e2dd
1 changed files with 39 additions and 4 deletions
  1. +39
    -4
      lib/XRay/Formats/Instagram.php

+ 39
- 4
lib/XRay/Formats/Instagram.php View File

@ -1,11 +1,17 @@
<?php <?php
namespace p3k\XRay\Formats; namespace p3k\XRay\Formats;
const BASE_URL = 'https://www.instagram.com/';
const QUERY_MEDIA = BASE_URL.'graphql/query/?query_hash=42323d64886122307be10013ad2dcc44&variables=%s';
const QUERY_MEDIA_VARS = '{"id":"%s","first":20,"after":"%s"}';
use DOMDocument, DOMXPath; use DOMDocument, DOMXPath;
use DateTime, DateTimeZone; use DateTime, DateTimeZone;
class Instagram extends Format { class Instagram extends Format {
private static $gis;
public static function matches_host($url) { public static function matches_host($url) {
$host = parse_url($url, PHP_URL_HOST); $host = parse_url($url, PHP_URL_HOST);
return in_array($host, ['www.instagram.com','instagram.com']); return in_array($host, ['www.instagram.com','instagram.com']);
@ -38,6 +44,28 @@ class Instagram extends Format {
]; ];
} }
private static function _getIntstagramGIS($params) {
$data = self::$gis.":".$params;
return md5($data);
}
private static function _getMorePhotos($http,$html,$url,$profileData) {
$params = sprintf(QUERY_MEDIA_VARS, $profileData['id'], $profileData['edge_owner_to_timeline_media']['page_info']['end_cursor']);
$url = sprintf(QUERY_MEDIA,$params);
$headers = [];
$headers[] = 'x-instagram-gis: ' . self::_getIntstagramGIS($params);
$headers[] = 'x-requested-with: XMLHttpRequest';
$resp = $http->get($url,$headers);
if(!$resp['error'])
$data = json_decode($resp['body'],true);
$photos = $data['data']['user']['edge_owner_to_timeline_media']['edges'];
return $photos;
return null;
}
private static function parseFeed($http, $html, $url) { private static function parseFeed($http, $html, $url) {
$profileData = self::_parseProfileFromHTML($html); $profileData = self::_parseProfileFromHTML($html);
if(!$profileData) if(!$profileData)
@ -46,9 +74,13 @@ class Instagram extends Format {
$photos = $profileData['edge_owner_to_timeline_media']['edges']; $photos = $profileData['edge_owner_to_timeline_media']['edges'];
$items = []; $items = [];
$morePhotos = self::_getMorePhotos($http,$html,$url,$profileData);
$photos = array_merge($photos,$morePhotos);
foreach($photos as $photoData) { foreach($photos as $photoData) {
$item = self::parsePhotoFromData($http, $photoData['node'], $item = self::parsePhotoFromData($http, $photoData['node'],
'https://www.instagram.com/p/'.$photoData['node']['shortcode'].'/', $profileData);
BASE_URL.'p/'.$photoData['node']['shortcode'].'/', $profileData);
// Note: Not all the photo info is available in the initial JSON. // Note: Not all the photo info is available in the initial JSON.
// Things like video mp4 URLs and person tags and locations are missing. // Things like video mp4 URLs and person tags and locations are missing.
// Consumers of the feed will need to fetch the photo permalink in order to get all missing information. // Consumers of the feed will need to fetch the photo permalink in order to get all missing information.
@ -223,7 +255,7 @@ class Instagram extends Format {
if(isset($profile['external_url']) && $profile['external_url']) if(isset($profile['external_url']) && $profile['external_url'])
$author['url'] = $profile['external_url']; $author['url'] = $profile['external_url'];
else else
$author['url'] = 'https://www.instagram.com/' . $profile['username'];
$author['url'] = BASE_URL . $profile['username'];
if(isset($profile['profile_pic_url_hd'])) if(isset($profile['profile_pic_url_hd']))
$author['photo'] = $profile['profile_pic_url_hd']; $author['photo'] = $profile['profile_pic_url_hd'];
@ -237,7 +269,7 @@ class Instagram extends Format {
} }
private static function _getInstagramProfile($username, $http) { private static function _getInstagramProfile($username, $http) {
$response = $http->get('https://www.instagram.com/'.$username.'/');
$response = $http->get(BASE_URL.$username.'/');
if(!$response['error']) if(!$response['error'])
return self::_parseProfileFromHTML($response['body']); return self::_parseProfileFromHTML($response['body']);
@ -247,6 +279,9 @@ class Instagram extends Format {
private static function _parseProfileFromHTML($html) { private static function _parseProfileFromHTML($html) {
$data = self::_extractIGData($html); $data = self::_extractIGData($html);
if(isset($data['rhx_gis'])) {
self::$gis = $data['rhx_gis'];
}
if(isset($data['entry_data']['ProfilePage'][0])) { if(isset($data['entry_data']['ProfilePage'][0])) {
$profile = $data['entry_data']['ProfilePage'][0]; $profile = $data['entry_data']['ProfilePage'][0];
if($profile && isset($profile['graphql']['user'])) { if($profile && isset($profile['graphql']['user'])) {
@ -258,7 +293,7 @@ class Instagram extends Format {
} }
private static function _getInstagramLocation($id, $http) { private static function _getInstagramLocation($id, $http) {
$igURL = 'https://www.instagram.com/explore/locations/'.$id.'/';
$igURL = BASE_URL.'explore/locations/'.$id.'/';
$response = $http->get($igURL); $response = $http->get($igURL);
if($response['body']) { if($response['body']) {
$data = self::_extractVenueDataFromVenuePage($response['body']); $data = self::_extractVenueDataFromVenuePage($response['body']);

Loading…
Cancel
Save