Browse Source

parse instagram user info from HTML instead of secret JSON API

adds script to refresh the downloaded instagram data for the tests as well
pull/72/head
Aaron Parecki 6 years ago
parent
commit
6f39655c8a
No known key found for this signature in database GPG Key ID: 276C2817346D6056
23 changed files with 2952 additions and 1873 deletions
  1. +8
    -5
      lib/XRay/Formats/Instagram.php
  2. +18
    -18
      tests/InstagramTest.php
  3. +194
    -199
      tests/data/www.instagram.com/BGDpqNoiMJ0
  4. +197
    -0
      tests/data/www.instagram.com/aaronpk_
  5. +0
    -18
      tests/data/www.instagram.com/aaronpk_?__a=1
  6. +182
    -187
      tests/data/www.instagram.com/explore_locations_109284789535230_
  7. +182
    -187
      tests/data/www.instagram.com/explore_locations_359000003_
  8. +197
    -0
      tests/data/www.instagram.com/indiewebcat_
  9. +0
    -18
      tests/data/www.instagram.com/indiewebcat_?__a=1
  10. +197
    -0
      tests/data/www.instagram.com/kmikeym_
  11. +0
    -18
      tests/data/www.instagram.com/kmikeym_?__a=1
  12. +197
    -0
      tests/data/www.instagram.com/microformats_
  13. +0
    -18
      tests/data/www.instagram.com/microformats_?__a=1
  14. +204
    -0
      tests/data/www.instagram.com/p_BGDpqNoiMJ0_
  15. +192
    -197
      tests/data/www.instagram.com/p_BN3Z5salSys_
  16. +192
    -197
      tests/data/www.instagram.com/p_BNfqVfVlmkj_
  17. +192
    -197
      tests/data/www.instagram.com/p_BO5rYVElvJq_
  18. +197
    -202
      tests/data/www.instagram.com/p_BO_RN8AFZSx_
  19. +192
    -197
      tests/data/www.instagram.com/p_BZWmUB_DVtp_
  20. +192
    -197
      tests/data/www.instagram.com/p_BZWmpecjBwN_
  21. +197
    -0
      tests/data/www.instagram.com/pk_spam_
  22. +0
    -18
      tests/data/www.instagram.com/pk_spam_?__a=1
  23. +22
    -0
      tests/download-instagram-data.sh

+ 8
- 5
lib/XRay/Formats/Instagram.php View File

@ -193,13 +193,16 @@ class Instagram extends Format {
}
private static function _getInstagramProfile($username, $http) {
$response = $http->get('https://www.instagram.com/'.$username.'/?__a=1');
$response = $http->get('https://www.instagram.com/'.$username.'/');
if(!$response['error']) {
$profile = @json_decode($response['body'], true);
if($profile && isset($profile['graphql']['user'])) {
$user = $profile['graphql']['user'];
return $user;
$data = self::_extractIGData($response['body']);
if(isset($data['entry_data']['ProfilePage'][0])) {
$profile = $data['entry_data']['ProfilePage'][0];
if($profile && isset($profile['graphql']['user'])) {
$user = $profile['graphql']['user'];
return $user;
}
}
}
return null;

+ 18
- 18
tests/InstagramTest.php View File

@ -20,7 +20,7 @@ class InstagramTest extends PHPUnit_Framework_TestCase {
public function testInstagramPhoto() {
// Original URL: https://www.instagram.com/p/BO5rYVElvJq/
$url = 'http://www.instagram.com/photo.html';
$url = 'https://www.instagram.com/p/BO5rYVElvJq/';
$response = $this->parse(['url' => $url]);
$body = $response->getContent();
@ -36,7 +36,7 @@ class InstagramTest extends PHPUnit_Framework_TestCase {
$this->assertEquals(['https://instagram.fsea1-1.fna.fbcdn.net/vp/214e719b6026ef54e0545f2ed70d4c83/5B56795F/t51.2885-15/e35/15803256_1832278043695907_4846092951052353536_n.jpg'], $data['data']['photo']);
$this->assertEquals('https://aaronparecki.com/', $data['data']['author']['url']);
$this->assertEquals('Aaron Parecki', $data['data']['author']['name']);
$this->assertEquals('https://instagram.fsea1-1.fna.fbcdn.net/vp/661acbd22070fd8aa9863b2a6cec9a0c/5B3FC98E/t51.2885-19/s320x320/14240576_268350536897085_1129715662_a.jpg', $data['data']['author']['photo']);
$this->assertEquals('https://instagram.fsea1-1.fna.fbcdn.net/vp/0dc6166cbd4ec6782453d36cd07fec06/5B67568E/t51.2885-19/s320x320/14240576_268350536897085_1129715662_a.jpg', $data['data']['author']['photo']);
}
public function testBGDpqNoiMJ0() {
@ -53,7 +53,7 @@ class InstagramTest extends PHPUnit_Framework_TestCase {
'type' => 'card',
'name' => 'pk_spam',
'url' => 'https://aaronparecki.com/',
'photo' => 'https://scontent-sof1-1.cdninstagram.com/vp/f17e1275a70fc32e93cbf434ddc32bcd/5B6CCC7A/t51.2885-19/11906329_960233084022564_1448528159_a.jpg'
'photo' => 'https://instagram.fhel2-1.fna.fbcdn.net/vp/f17e1275a70fc32e93cbf434ddc32bcd/5B6CCC7A/t51.2885-19/11906329_960233084022564_1448528159_a.jpg'
], $data['data']['author']);
$this->assertSame([
@ -71,14 +71,14 @@ class InstagramTest extends PHPUnit_Framework_TestCase {
'type' => 'card',
'name' => 'Burnside 26',
'url' => 'https://www.instagram.com/explore/locations/359000003/',
'latitude' => 45.52322,
'longitude' => -122.63885
'latitude' => 45.5228640678,
'longitude' => -122.6389405085
], $data['data']['refs']['https://www.instagram.com/explore/locations/359000003/']);
}
public function testInstagramVideo() {
// Original URL: https://www.instagram.com/p/BO_RN8AFZSx/
$url = 'http://www.instagram.com/video.html';
$url = 'https://www.instagram.com/p/BO_RN8AFZSx/';
$response = $this->parse(['url' => $url]);
$body = $response->getContent();
@ -89,17 +89,17 @@ class InstagramTest extends PHPUnit_Framework_TestCase {
$this->assertContains('100daysofmusic', $data['data']['category']);
$this->assertEquals('Day 18. Maple and Spruce #100daysofmusic #100daysproject #the100dayproject https://aaronparecki.com/2017/01/07/14/day18', $data['data']['content']['text']);
$this->assertEquals(1, count($data['data']['photo']));
$this->assertEquals(['https://instagram.fsea1-1.fna.fbcdn.net/vp/4a53ff9447418192586344a101de1c37/5ABFEC5F/t51.2885-15/e15/15624670_548881701986735_8264383763249627136_n.jpg'], $data['data']['photo']);
$this->assertEquals(['https://instagram.fsea1-1.fna.fbcdn.net/vp/32890db04701c4ab4fa7da05a6e9de93/5ADB9BDF/t51.2885-15/e15/15624670_548881701986735_8264383763249627136_n.jpg'], $data['data']['photo']);
$this->assertEquals(1, count($data['data']['video']));
$this->assertEquals(['https://instagram.fsea1-1.fna.fbcdn.net/vp/c6647a32e561438cc42a8ffabc5e308c/5ABFB41E/t50.2886-16/15921147_1074837002642259_2269307616507199488_n.mp4'], $data['data']['video']);
$this->assertEquals(['https://instagram.fsea1-1.fna.fbcdn.net/vp/46c7118509146b978fb7bfc497eeb16f/5ADB639E/t50.2886-16/15921147_1074837002642259_2269307616507199488_n.mp4'], $data['data']['video']);
$this->assertEquals('https://aaronparecki.com/', $data['data']['author']['url']);
$this->assertEquals('Aaron Parecki', $data['data']['author']['name']);
$this->assertEquals('https://instagram.fsea1-1.fna.fbcdn.net/vp/661acbd22070fd8aa9863b2a6cec9a0c/5B3FC98E/t51.2885-19/s320x320/14240576_268350536897085_1129715662_a.jpg', $data['data']['author']['photo']);
$this->assertEquals('https://instagram.fsea1-1.fna.fbcdn.net/vp/0dc6166cbd4ec6782453d36cd07fec06/5B67568E/t51.2885-19/s320x320/14240576_268350536897085_1129715662_a.jpg', $data['data']['author']['photo']);
}
public function testInstagramPhotoWithPersonTag() {
// Original URL: https://www.instagram.com/p/BNfqVfVlmkj/
$url = 'http://www.instagram.com/photo_with_person_tag.html';
$url = 'https://www.instagram.com/p/BNfqVfVlmkj/';
$response = $this->parse(['url' => $url]);
$body = $response->getContent();
@ -109,12 +109,12 @@ class InstagramTest extends PHPUnit_Framework_TestCase {
$this->assertEquals(2, count($data['data']['category']));
$this->assertContains('http://www.kmikeym.com/', $data['data']['category']);
$this->assertArrayHasKey('http://www.kmikeym.com/', $data['data']['refs']);
$this->assertEquals(['type'=>'card','name'=>'Mike Merrill','url'=>'http://www.kmikeym.com/','photo'=>'https://instagram.fsea1-1.fna.fbcdn.net/vp/b9d8b25c8e7003f05212c1425fc6c422/5B3F45FC/t51.2885-19/s320x320/20634957_814691788710973_2275383796935163904_a.jpg'], $data['data']['refs']['http://www.kmikeym.com/']);
$this->assertEquals(['type'=>'card','name'=>'Mike Merrill','url'=>'http://www.kmikeym.com/','photo'=>'https://instagram.fsea1-1.fna.fbcdn.net/vp/dea521b3000a53d2d9a6845f5b066256/5B66D2FC/t51.2885-19/s320x320/20634957_814691788710973_2275383796935163904_a.jpg'], $data['data']['refs']['http://www.kmikeym.com/']);
}
public function testInstagramPhotoWithVenue() {
// Original URL: https://www.instagram.com/p/BN3Z5salSys/
$url = 'http://www.instagram.com/photo_with_venue.html';
$url = 'https://www.instagram.com/p/BN3Z5salSys/';
$response = $this->parse(['url' => $url]);
$body = $response->getContent();
@ -134,7 +134,7 @@ class InstagramTest extends PHPUnit_Framework_TestCase {
public function testTwoPhotos() {
// Original URL: https://www.instagram.com/p/BZWmUB_DVtp/
$url = 'http://www.instagram.com/two_photos.html';
$url = 'https://www.instagram.com/p/BZWmUB_DVtp/';
$response = $this->parse(['url' => $url]);
$body = $response->getContent();
@ -142,15 +142,15 @@ class InstagramTest extends PHPUnit_Framework_TestCase {
$data = json_decode($body, true);
$this->assertEquals(2, count($data['data']['photo']));
$this->assertEquals('https://instagram.fsea1-1.fna.fbcdn.net/vp/01add5f543d1206a940ce388d82b345d/5B343A38/t51.2885-15/e35/21827424_134752690591737_8093088291252862976_n.jpg', $data['data']['photo'][0]);
$this->assertEquals('https://instagram.fsea1-1.fna.fbcdn.net/vp/80750e21e987cbde01a4aa0b73ee2654/5B37514D/t51.2885-15/e35/21909774_347707439021016_5237540582556958720_n.jpg', $data['data']['photo'][1]);
$this->assertEquals('https://instagram.fsea1-1.fna.fbcdn.net/vp/406101ff9601ab78147e121b65ce3eea/5B5BC738/t51.2885-15/e35/21827424_134752690591737_8093088291252862976_n.jpg', $data['data']['photo'][0]);
$this->assertEquals('https://instagram.fsea1-1.fna.fbcdn.net/vp/03ddc8c03c8708439dae29663b8c2305/5B5EDE4D/t51.2885-15/e35/21909774_347707439021016_5237540582556958720_n.jpg', $data['data']['photo'][1]);
$this->assertArrayNotHasKey('video', $data['data']);
$this->assertEquals(2, count($data['data']['category']));
}
public function testMixPhotosAndVideos() {
// Original URL: https://www.instagram.com/p/BZWmpecjBwN/
$url = 'http://www.instagram.com/photos_and_video.html';
$url = 'https://www.instagram.com/p/BZWmpecjBwN/';
$response = $this->parse(['url' => $url]);
$body = $response->getContent();
@ -159,8 +159,8 @@ class InstagramTest extends PHPUnit_Framework_TestCase {
$this->assertEquals(3, count($data['data']['photo']));
$this->assertEquals('https://instagram.fsea1-1.fna.fbcdn.net/vp/b0f6cd9dc4d5c3371efe9f412a0d7f0b/5B6BC5B8/t51.2885-15/e35/21878922_686481254874005_8468823712617988096_n.jpg', $data['data']['photo'][0]);
$this->assertEquals('https://instagram.fsea1-1.fna.fbcdn.net/vp/872cf78dbd1e717e2867bd9cbb4a8a87/5ABF8D49/t51.2885-15/e15/21910026_1507234999368159_6974261907783942144_n.jpg', $data['data']['photo'][1]);
$this->assertEquals('https://instagram.fsea1-1.fna.fbcdn.net/vp/db0ed2c09dcd3a5c46279deaaca30cbf/5B410A88/t51.2885-15/e35/21878800_273567963151023_7672178549897297920_n.jpg', $data['data']['photo'][2]);
$this->assertEquals('https://instagram.fsea1-1.fna.fbcdn.net/vp/f8939cca504f97931fd4768b77d2c152/5ADB3CC9/t51.2885-15/e15/21910026_1507234999368159_6974261907783942144_n.jpg', $data['data']['photo'][1]);
$this->assertEquals('https://instagram.fsea1-1.fna.fbcdn.net/vp/254c313bdcac37c19da5e10be8222a88/5B689788/t51.2885-15/e35/21878800_273567963151023_7672178549897297920_n.jpg', $data['data']['photo'][2]);
$this->assertArrayNotHasKey('video', $data['data']);
$this->assertEquals(2, count($data['data']['category']));
}

+ 194
- 199
tests/data/www.instagram.com/BGDpqNoiMJ0
File diff suppressed because it is too large
View File


+ 197
- 0
tests/data/www.instagram.com/aaronpk_
File diff suppressed because it is too large
View File


+ 0
- 18
tests/data/www.instagram.com/aaronpk_?__a=1
File diff suppressed because it is too large
View File


+ 182
- 187
tests/data/www.instagram.com/explore_locations_109284789535230_
File diff suppressed because it is too large
View File


+ 182
- 187
tests/data/www.instagram.com/explore_locations_359000003_
File diff suppressed because it is too large
View File


+ 197
- 0
tests/data/www.instagram.com/indiewebcat_
File diff suppressed because it is too large
View File


+ 0
- 18
tests/data/www.instagram.com/indiewebcat_?__a=1
File diff suppressed because it is too large
View File


+ 197
- 0
tests/data/www.instagram.com/kmikeym_
File diff suppressed because it is too large
View File


+ 0
- 18
tests/data/www.instagram.com/kmikeym_?__a=1
File diff suppressed because it is too large
View File


+ 197
- 0
tests/data/www.instagram.com/microformats_
File diff suppressed because it is too large
View File


+ 0
- 18
tests/data/www.instagram.com/microformats_?__a=1 View File

@ -1,18 +0,0 @@
HTTP/1.1 200 OK
Content-Type: application/json
X-Frame-Options: SAMEORIGIN
Cache-Control: private, no-cache, no-store, must-revalidate
Pragma: no-cache
Expires: Sat, 01 Jan 2000 00:00:00 GMT
Vary: Cookie, Accept-Language, Accept-Encoding
Content-Language: en
Date: Thu, 29 Mar 2018 21:23:25 GMT
Strict-Transport-Security: max-age=86400
Set-Cookie: rur=FTW; Path=/
Set-Cookie: csrftoken=RMAMTeM95T4fCvhBf6rk7ElOC45vQsSH; expires=Thu, 28-Mar-2019 21:23:25 GMT; Max-Age=31449600; Path=/; Secure
Set-Cookie: mid=Wr1ZTQAEAAGt2gR6vff8Xy2AnQXh; expires=Wed, 24-Mar-2038 21:23:25 GMT; Max-Age=630720000; Path=/
Set-Cookie: urlgen="{\"time\": 1522358605}:1f1f13:VfiT5TvOwO3eYdChPIYDY5cMurw"; Path=/
Connection: keep-alive
Content-Length: 3198
{"logging_page_id":"profilePage_8911340","show_suggested_profiles":false,"graphql":{"user":{"biography":"http://twitter.com/microformats","blocked_by_viewer":false,"country_block":false,"external_url":"http://microformats.org/","external_url_linkshimmed":"https://l.instagram.com/?u=http%3A%2F%2Fmicroformats.org%2F&e=ATNSyuVyoSGmqcX8L2UPscB9dEpLiFpjfl0qZmhTqKbEoQ51yRITZcdDLQYgc3ociCmGUESp","edge_followed_by":{"count":296},"followed_by_viewer":false,"edge_follow":{"count":66},"follows_viewer":false,"full_name":"","has_blocked_viewer":false,"has_requested_viewer":false,"id":"8911340","is_private":false,"is_verified":false,"mutual_followers":null,"profile_pic_url":"https://instagram.fsea1-1.fna.fbcdn.net/vp/7d923fc0609bc0450e52edb206073897/5B390889/t51.2885-19/11849954_706937769411730_315174835_a.jpg","profile_pic_url_hd":"https://instagram.fsea1-1.fna.fbcdn.net/vp/7d923fc0609bc0450e52edb206073897/5B390889/t51.2885-19/11849954_706937769411730_315174835_a.jpg","requested_by_viewer":false,"username":"microformats","connected_fb_page":null,"edge_owner_to_timeline_media":{"count":1,"page_info":{"has_next_page":false,"end_cursor":"AQDctKBXRxTRKQRh4h3nM83DDZ9RCy6P-LuQ5FgmTThleahLdyny1etpi1OfDngf6oU"},"edges":[{"node":{"__typename":"GraphImage","id":"207279526","edge_media_to_caption":{"edges":[{"node":{"text":"Brighton #microformats meetup"}}]},"shortcode":"MWtWm","edge_media_to_comment":{"count":4},"comments_disabled":false,"taken_at_timestamp":1315334603,"dimensions":{"height":612,"width":612},"display_url":"https://instagram.fsea1-1.fna.fbcdn.net/vp/8fbe5c38cbd0ad700eb0dfcf8ae0725a/5B707BCE/t51.2885-15/e15/11184669_651043525029552_626633635_n.jpg","edge_liked_by":{"count":17},"edge_media_preview_like":{"count":17},"gating_info":null,"media_preview":null,"owner":{"id":"8911340"},"thumbnail_src":"https://instagram.fsea1-1.fna.fbcdn.net/vp/8fbe5c38cbd0ad700eb0dfcf8ae0725a/5B707BCE/t51.2885-15/e15/11184669_651043525029552_626633635_n.jpg","thumbnail_resources":[{"src":"https://instagram.fsea1-1.fna.fbcdn.net/vp/fce8304485a6b51ca035adc7ed34c3c9/5B401152/t51.2885-15/s150x150/e15/11184669_651043525029552_626633635_n.jpg","config_width":150,"config_height":150},{"src":"https://instagram.fsea1-1.fna.fbcdn.net/vp/111e119854c22b0946e9f6f8a8d37186/5B512ED7/t51.2885-15/s240x240/e15/11184669_651043525029552_626633635_n.jpg","config_width":240,"config_height":240},{"src":"https://instagram.fsea1-1.fna.fbcdn.net/vp/28277cac208fc522a2c452777f9f6280/5B3504B0/t51.2885-15/s320x320/e15/11184669_651043525029552_626633635_n.jpg","config_width":320,"config_height":320},{"src":"https://instagram.fsea1-1.fna.fbcdn.net/vp/9a3081cfc5b7fe28890774b7fff31ab8/5B52071C/t51.2885-15/s480x480/e15/11184669_651043525029552_626633635_n.jpg","config_width":480,"config_height":480},{"src":"https://instagram.fsea1-1.fna.fbcdn.net/vp/8fbe5c38cbd0ad700eb0dfcf8ae0725a/5B707BCE/t51.2885-15/e15/11184669_651043525029552_626633635_n.jpg","config_width":640,"config_height":640}],"is_video":false}}]},"edge_saved_media":{"count":0,"page_info":{"has_next_page":false,"end_cursor":null},"edges":[]},"edge_media_collections":{"count":0,"page_info":{"has_next_page":false,"end_cursor":null},"edges":[]}}}}

+ 204
- 0
tests/data/www.instagram.com/p_BGDpqNoiMJ0_
File diff suppressed because it is too large
View File


tests/data/www.instagram.com/p_BN3Z5salSys_
File diff suppressed because it is too large
View File


tests/data/www.instagram.com/p_BNfqVfVlmkj_
File diff suppressed because it is too large
View File


tests/data/www.instagram.com/p_BO5rYVElvJq_
File diff suppressed because it is too large
View File


tests/data/www.instagram.com/p_BO_RN8AFZSx_
File diff suppressed because it is too large
View File


tests/data/www.instagram.com/p_BZWmUB_DVtp_
File diff suppressed because it is too large
View File


tests/data/www.instagram.com/p_BZWmpecjBwN_
File diff suppressed because it is too large
View File


+ 197
- 0
tests/data/www.instagram.com/pk_spam_
File diff suppressed because it is too large
View File


+ 0
- 18
tests/data/www.instagram.com/pk_spam_?__a=1
File diff suppressed because it is too large
View File


+ 22
- 0
tests/download-instagram-data.sh View File

@ -0,0 +1,22 @@
#!/bin/bash
urls=(
'https://www.instagram.com/aaronpk/'
'https://www.instagram.com/p/BO5rYVElvJq/'
'https://www.instagram.com/p/BGDpqNoiMJ0/'
'https://www.instagram.com/p/BO_RN8AFZSx/'
'https://www.instagram.com/p/BNfqVfVlmkj/'
'https://www.instagram.com/p/BN3Z5salSys/'
'https://www.instagram.com/p/BZWmUB_DVtp/'
'https://www.instagram.com/p/BZWmpecjBwN/'
'https://www.instagram.com/explore/locations/109284789535230/'
'https://www.instagram.com/explore/locations/359000003/'
)
for url in ${urls[@]}; do
fn=$(echo $url | sed 's#https://www.instagram.com/##' | sed 's#/#_#g')
echo "$url > $fn"
curl -i -s $url > data/www.instagram.com/$fn
unix2dos data/www.instagram.com/$fn
done

Loading…
Cancel
Save