Browse Source

updates for instagram parsing

instagram seems to have rolled out the `graphql` key everywhere now
pull/72/head
Aaron Parecki 6 years ago
parent
commit
c70b29479a
No known key found for this signature in database GPG Key ID: 276C2817346D6056
16 changed files with 1978 additions and 1256 deletions
  1. +13
    -15
      lib/XRay/Formats/Instagram.php
  2. +50
    -13
      tests/InstagramTest.php
  3. +209
    -0
      tests/data/www.instagram.com/BGDpqNoiMJ0
  4. +14
    -3
      tests/data/www.instagram.com/aaronpk_?__a=1
  5. +196
    -258
      tests/data/www.instagram.com/explore_locations_109284789535230_
  6. +199
    -0
      tests/data/www.instagram.com/explore_locations_359000003_
  7. +18
    -0
      tests/data/www.instagram.com/indiewebcat_?__a=1
  8. +14
    -3
      tests/data/www.instagram.com/kmikeym_?__a=1
  9. +14
    -3
      tests/data/www.instagram.com/microformats_?__a=1
  10. +205
    -154
      tests/data/www.instagram.com/photo.html
  11. +205
    -154
      tests/data/www.instagram.com/photo_with_person_tag.html
  12. +205
    -155
      tests/data/www.instagram.com/photo_with_venue.html
  13. +206
    -168
      tests/data/www.instagram.com/photos_and_video.html
  14. +14
    -3
      tests/data/www.instagram.com/pk_spam_?__a=1
  15. +206
    -168
      tests/data/www.instagram.com/two_photos.html
  16. +210
    -159
      tests/data/www.instagram.com/video.html

+ 13
- 15
lib/XRay/Formats/Instagram.php View File

@ -68,7 +68,7 @@ class Instagram extends Format {
} }
$refs = []; $refs = [];
// Include the photo/video media URLs // Include the photo/video media URLs
// (Always return arrays, even for single images) // (Always return arrays, even for single images)
if(array_key_exists('edge_sidecar_to_children', $photoData)) { if(array_key_exists('edge_sidecar_to_children', $photoData)) {
@ -197,8 +197,8 @@ class Instagram extends Format {
if(!$response['error']) { if(!$response['error']) {
$profile = @json_decode($response['body'], true); $profile = @json_decode($response['body'], true);
if($profile && array_key_exists('user', $profile)) {
$user = $profile['user'];
if($profile && isset($profile['graphql']['user'])) {
$user = $profile['graphql']['user'];
return $user; return $user;
} }
} }
@ -245,18 +245,16 @@ class Instagram extends Format {
private static function _extractVenueDataFromVenuePage($html) { private static function _extractVenueDataFromVenuePage($html) {
$data = self::_extractIGData($html); $data = self::_extractIGData($html);
if($data && is_array($data) && array_key_exists('entry_data', $data)) {
if(isset($data['entry_data']['LocationsPage'])) {
$data = $data['entry_data']['LocationsPage'];
if(isset($data[0]['location'])) {
$location = $data[0]['location'];
# we don't need these and they're huge, so drop them now
unset($location['media']);
unset($location['top_posts']);
return $location;
}
if($data && isset($data['entry_data']['LocationsPage'])) {
$data = $data['entry_data']['LocationsPage'];
if(isset($data[0]['graphql']['location'])) {
$location = $data[0]['graphql']['location'];
# we don't need these and they're huge, so drop them now
unset($location['media']);
unset($location['top_posts']);
return $location;
} }
} }

+ 50
- 13
tests/InstagramTest.php View File

@ -33,10 +33,47 @@ class InstagramTest extends PHPUnit_Framework_TestCase {
$this->assertContains('2017', $data['data']['category']); $this->assertContains('2017', $data['data']['category']);
$this->assertEquals('Kind of crazy to see the whole year laid out like this. #planning #2017', $data['data']['content']['text']); $this->assertEquals('Kind of crazy to see the whole year laid out like this. #planning #2017', $data['data']['content']['text']);
$this->assertEquals(1, count($data['data']['photo'])); $this->assertEquals(1, count($data['data']['photo']));
$this->assertEquals(['https://instagram.fsjc1-3.fna.fbcdn.net/t51.2885-15/e35/15803256_1832278043695907_4846092951052353536_n.jpg'], $data['data']['photo']);
$this->assertEquals(['https://instagram.fsea1-1.fna.fbcdn.net/vp/214e719b6026ef54e0545f2ed70d4c83/5B56795F/t51.2885-15/e35/15803256_1832278043695907_4846092951052353536_n.jpg'], $data['data']['photo']);
$this->assertEquals('https://aaronparecki.com/', $data['data']['author']['url']); $this->assertEquals('https://aaronparecki.com/', $data['data']['author']['url']);
$this->assertEquals('Aaron Parecki', $data['data']['author']['name']); $this->assertEquals('Aaron Parecki', $data['data']['author']['name']);
$this->assertEquals('https://instagram.fsjc1-3.fna.fbcdn.net/t51.2885-19/s320x320/14240576_268350536897085_1129715662_a.jpg', $data['data']['author']['photo']);
$this->assertEquals('https://instagram.fsea1-1.fna.fbcdn.net/vp/661acbd22070fd8aa9863b2a6cec9a0c/5B3FC98E/t51.2885-19/s320x320/14240576_268350536897085_1129715662_a.jpg', $data['data']['author']['photo']);
}
public function testBGDpqNoiMJ0() {
// https://www.instagram.com/p/BGDpqNoiMJ0/
$url = 'http://www.instagram.com/BGDpqNoiMJ0';
$response = $this->parse(['url' => $url]);
$body = $response->getContent();
$this->assertEquals(200, $response->getStatusCode());
$data = json_decode($body, true);
$this->assertEquals('entry', $data['data']['type']);
$this->assertSame([
'type' => 'card',
'name' => 'pk_spam',
'url' => 'https://aaronparecki.com/',
'photo' => 'https://scontent-sof1-1.cdninstagram.com/vp/f17e1275a70fc32e93cbf434ddc32bcd/5B6CCC7A/t51.2885-19/11906329_960233084022564_1448528159_a.jpg'
], $data['data']['author']);
$this->assertSame([
'muffins',
'https://indiewebcat.com/'
], $data['data']['category']);
$this->assertEquals('Meow #muffins', $data['data']['content']['text']);
$this->assertSame(['https://instagram.fsea1-1.fna.fbcdn.net/vp/9433ea494a8b055bebabf70fd81cfa32/5B51F092/t51.2885-15/e35/13266755_877794672348882_1908663476_n.jpg'], $data['data']['photo']);
$this->assertEquals('2016-05-30T20:46:22-07:00', $data['data']['published']);
$this->assertEquals('https://www.instagram.com/explore/locations/359000003/', $data['data']['location'][0]);
$this->assertSame([
'type' => 'card',
'name' => 'Burnside 26',
'url' => 'https://www.instagram.com/explore/locations/359000003/',
'latitude' => 45.52322,
'longitude' => -122.63885
], $data['data']['refs']['https://www.instagram.com/explore/locations/359000003/']);
} }
public function testInstagramVideo() { public function testInstagramVideo() {
@ -52,12 +89,12 @@ class InstagramTest extends PHPUnit_Framework_TestCase {
$this->assertContains('100daysofmusic', $data['data']['category']); $this->assertContains('100daysofmusic', $data['data']['category']);
$this->assertEquals('Day 18. Maple and Spruce #100daysofmusic #100daysproject #the100dayproject https://aaronparecki.com/2017/01/07/14/day18', $data['data']['content']['text']); $this->assertEquals('Day 18. Maple and Spruce #100daysofmusic #100daysproject #the100dayproject https://aaronparecki.com/2017/01/07/14/day18', $data['data']['content']['text']);
$this->assertEquals(1, count($data['data']['photo'])); $this->assertEquals(1, count($data['data']['photo']));
$this->assertEquals(['https://instagram.fsjc1-3.fna.fbcdn.net/t51.2885-15/s640x640/e15/15624670_548881701986735_8264383763249627136_n.jpg'], $data['data']['photo']);
$this->assertEquals(['https://instagram.fsea1-1.fna.fbcdn.net/vp/4a53ff9447418192586344a101de1c37/5ABFEC5F/t51.2885-15/e15/15624670_548881701986735_8264383763249627136_n.jpg'], $data['data']['photo']);
$this->assertEquals(1, count($data['data']['video'])); $this->assertEquals(1, count($data['data']['video']));
$this->assertEquals(['https://instagram.fsjc1-3.fna.fbcdn.net/t50.2886-16/15921147_1074837002642259_2269307616507199488_n.mp4'], $data['data']['video']);
$this->assertEquals(['https://instagram.fsea1-1.fna.fbcdn.net/vp/c6647a32e561438cc42a8ffabc5e308c/5ABFB41E/t50.2886-16/15921147_1074837002642259_2269307616507199488_n.mp4'], $data['data']['video']);
$this->assertEquals('https://aaronparecki.com/', $data['data']['author']['url']); $this->assertEquals('https://aaronparecki.com/', $data['data']['author']['url']);
$this->assertEquals('Aaron Parecki', $data['data']['author']['name']); $this->assertEquals('Aaron Parecki', $data['data']['author']['name']);
$this->assertEquals('https://instagram.fsjc1-3.fna.fbcdn.net/t51.2885-19/s320x320/14240576_268350536897085_1129715662_a.jpg', $data['data']['author']['photo']);
$this->assertEquals('https://instagram.fsea1-1.fna.fbcdn.net/vp/661acbd22070fd8aa9863b2a6cec9a0c/5B3FC98E/t51.2885-19/s320x320/14240576_268350536897085_1129715662_a.jpg', $data['data']['author']['photo']);
} }
public function testInstagramPhotoWithPersonTag() { public function testInstagramPhotoWithPersonTag() {
@ -70,9 +107,9 @@ class InstagramTest extends PHPUnit_Framework_TestCase {
$data = json_decode($body, true); $data = json_decode($body, true);
$this->assertEquals(2, count($data['data']['category'])); $this->assertEquals(2, count($data['data']['category']));
$this->assertContains('http://tinyletter.com/kmikeym', $data['data']['category']);
$this->assertArrayHasKey('http://tinyletter.com/kmikeym', $data['data']['refs']);
$this->assertEquals(['type'=>'card','name'=>'Mike Merrill','url'=>'http://tinyletter.com/kmikeym','photo'=>'https://instagram.fsjc1-3.fna.fbcdn.net/t51.2885-19/s320x320/12627953_686238411518831_1544976311_a.jpg'], $data['data']['refs']['http://tinyletter.com/kmikeym']);
$this->assertContains('http://www.kmikeym.com/', $data['data']['category']);
$this->assertArrayHasKey('http://www.kmikeym.com/', $data['data']['refs']);
$this->assertEquals(['type'=>'card','name'=>'Mike Merrill','url'=>'http://www.kmikeym.com/','photo'=>'https://instagram.fsea1-1.fna.fbcdn.net/vp/b9d8b25c8e7003f05212c1425fc6c422/5B3F45FC/t51.2885-19/s320x320/20634957_814691788710973_2275383796935163904_a.jpg'], $data['data']['refs']['http://www.kmikeym.com/']);
} }
public function testInstagramPhotoWithVenue() { public function testInstagramPhotoWithVenue() {
@ -105,8 +142,8 @@ class InstagramTest extends PHPUnit_Framework_TestCase {
$data = json_decode($body, true); $data = json_decode($body, true);
$this->assertEquals(2, count($data['data']['photo'])); $this->assertEquals(2, count($data['data']['photo']));
$this->assertEquals('https://instagram.fsea1-1.fna.fbcdn.net/t51.2885-15/e35/21827424_134752690591737_8093088291252862976_n.jpg', $data['data']['photo'][0]);
$this->assertEquals('https://instagram.fsea1-1.fna.fbcdn.net/t51.2885-15/e35/21909774_347707439021016_5237540582556958720_n.jpg', $data['data']['photo'][1]);
$this->assertEquals('https://instagram.fsea1-1.fna.fbcdn.net/vp/01add5f543d1206a940ce388d82b345d/5B343A38/t51.2885-15/e35/21827424_134752690591737_8093088291252862976_n.jpg', $data['data']['photo'][0]);
$this->assertEquals('https://instagram.fsea1-1.fna.fbcdn.net/vp/80750e21e987cbde01a4aa0b73ee2654/5B37514D/t51.2885-15/e35/21909774_347707439021016_5237540582556958720_n.jpg', $data['data']['photo'][1]);
$this->assertArrayNotHasKey('video', $data['data']); $this->assertArrayNotHasKey('video', $data['data']);
$this->assertEquals(2, count($data['data']['category'])); $this->assertEquals(2, count($data['data']['category']));
} }
@ -121,9 +158,9 @@ class InstagramTest extends PHPUnit_Framework_TestCase {
$data = json_decode($body, true); $data = json_decode($body, true);
$this->assertEquals(3, count($data['data']['photo'])); $this->assertEquals(3, count($data['data']['photo']));
$this->assertEquals('https://instagram.fsea1-1.fna.fbcdn.net/t51.2885-15/e35/21878922_686481254874005_8468823712617988096_n.jpg', $data['data']['photo'][0]);
$this->assertEquals('https://instagram.fsea1-1.fna.fbcdn.net/t51.2885-15/e15/21910026_1507234999368159_6974261907783942144_n.jpg', $data['data']['photo'][1]);
$this->assertEquals('https://instagram.fsea1-1.fna.fbcdn.net/t51.2885-15/e35/21878800_273567963151023_7672178549897297920_n.jpg', $data['data']['photo'][2]);
$this->assertEquals('https://instagram.fsea1-1.fna.fbcdn.net/vp/b0f6cd9dc4d5c3371efe9f412a0d7f0b/5B6BC5B8/t51.2885-15/e35/21878922_686481254874005_8468823712617988096_n.jpg', $data['data']['photo'][0]);
$this->assertEquals('https://instagram.fsea1-1.fna.fbcdn.net/vp/872cf78dbd1e717e2867bd9cbb4a8a87/5ABF8D49/t51.2885-15/e15/21910026_1507234999368159_6974261907783942144_n.jpg', $data['data']['photo'][1]);
$this->assertEquals('https://instagram.fsea1-1.fna.fbcdn.net/vp/db0ed2c09dcd3a5c46279deaaca30cbf/5B410A88/t51.2885-15/e35/21878800_273567963151023_7672178549897297920_n.jpg', $data['data']['photo'][2]);
$this->assertArrayNotHasKey('video', $data['data']); $this->assertArrayNotHasKey('video', $data['data']);
$this->assertEquals(2, count($data['data']['category'])); $this->assertEquals(2, count($data['data']['category']));
} }

+ 209
- 0
tests/data/www.instagram.com/BGDpqNoiMJ0
File diff suppressed because it is too large
View File


+ 14
- 3
tests/data/www.instagram.com/aaronpk_?__a=1
File diff suppressed because it is too large
View File


+ 196
- 258
tests/data/www.instagram.com/explore_locations_109284789535230_
File diff suppressed because it is too large
View File


+ 199
- 0
tests/data/www.instagram.com/explore_locations_359000003_
File diff suppressed because it is too large
View File


+ 18
- 0
tests/data/www.instagram.com/indiewebcat_?__a=1
File diff suppressed because it is too large
View File


+ 14
- 3
tests/data/www.instagram.com/kmikeym_?__a=1
File diff suppressed because it is too large
View File


+ 14
- 3
tests/data/www.instagram.com/microformats_?__a=1 View File

@ -1,7 +1,18 @@
HTTP/1.1 200 OK HTTP/1.1 200 OK
Server: Apache
Date: Wed, 09 Dec 2015 03:29:14 GMT
Content-Type: application/json Content-Type: application/json
X-Frame-Options: SAMEORIGIN
Cache-Control: private, no-cache, no-store, must-revalidate
Pragma: no-cache
Expires: Sat, 01 Jan 2000 00:00:00 GMT
Vary: Cookie, Accept-Language, Accept-Encoding
Content-Language: en
Date: Thu, 29 Mar 2018 21:23:25 GMT
Strict-Transport-Security: max-age=86400
Set-Cookie: rur=FTW; Path=/
Set-Cookie: csrftoken=RMAMTeM95T4fCvhBf6rk7ElOC45vQsSH; expires=Thu, 28-Mar-2019 21:23:25 GMT; Max-Age=31449600; Path=/; Secure
Set-Cookie: mid=Wr1ZTQAEAAGt2gR6vff8Xy2AnQXh; expires=Wed, 24-Mar-2038 21:23:25 GMT; Max-Age=630720000; Path=/
Set-Cookie: urlgen="{\"time\": 1522358605}:1f1f13:VfiT5TvOwO3eYdChPIYDY5cMurw"; Path=/
Connection: keep-alive Connection: keep-alive
Content-Length: 3198
{"user": {"biography": "http://twitter.com/microformats", "blocked_by_viewer": false, "country_block": false, "external_url": "http://microformats.org/", "external_url_linkshimmed": "http://l.instagram.com/?u=http%3A%2F%2Fmicroformats.org%2F\u0026e=ATN_CrtZkKLdaFphortpLsVRly6f_2zeEV78c4j2uxUlPFrxFrE0no5f4Z5PjcpLJsZ0IQ", "followed_by": {"count": 303}, "followed_by_viewer": false, "follows": {"count": 66}, "follows_viewer": true, "full_name": null, "has_blocked_viewer": false, "has_requested_viewer": false, "id": "8911340", "is_private": false, "is_verified": false, "profile_pic_url": "https://instagram.fsea1-1.fna.fbcdn.net/t51.2885-19/11849954_706937769411730_315174835_a.jpg", "profile_pic_url_hd": "https://instagram.fsea1-1.fna.fbcdn.net/t51.2885-19/11849954_706937769411730_315174835_a.jpg", "requested_by_viewer": false, "username": "microformats", "connected_fb_page": null, "media": {"nodes": [{"__typename": "GraphImage", "id": "207279526", "comments_disabled": false, "dimensions": {"height": 612, "width": 612}, "gating_info": null, "media_preview": null, "owner": {"id": "8911340"}, "thumbnail_src": "https://instagram.fsea1-1.fna.fbcdn.net/t51.2885-15/e15/11184669_651043525029552_626633635_n.jpg", "thumbnail_resources": [{"src": "https://instagram.fsea1-1.fna.fbcdn.net/t51.2885-15/s150x150/e15/11184669_651043525029552_626633635_n.jpg", "config_width": 150, "config_height": 150}, {"src": "https://instagram.fsea1-1.fna.fbcdn.net/t51.2885-15/s240x240/e15/11184669_651043525029552_626633635_n.jpg", "config_width": 240, "config_height": 240}, {"src": "https://instagram.fsea1-1.fna.fbcdn.net/t51.2885-15/s320x320/e15/11184669_651043525029552_626633635_n.jpg", "config_width": 320, "config_height": 320}, {"src": "https://instagram.fsea1-1.fna.fbcdn.net/t51.2885-15/s480x480/e15/11184669_651043525029552_626633635_n.jpg", "config_width": 480, "config_height": 480}], "is_video": false, "code": "MWtWm", "date": 1315334603, "display_src": "https://instagram.fsea1-1.fna.fbcdn.net/t51.2885-15/e15/11184669_651043525029552_626633635_n.jpg", "caption": "Brighton #microformats meetup", "comments": {"count": 4}, "likes": {"count": 17}}], "count": 1, "page_info": {"has_next_page": false, "end_cursor": "AQDxmbcFVBEKns9y1r3onb4coy-unYo21exQcDSdubHFRIrPL3MhxQpRmCFFazPVzKE"}}, "saved_media": {"nodes": [], "count": 0, "page_info": {"has_next_page": false, "end_cursor": null}}}, "logging_page_id": "profilePage_8911340"}
{"logging_page_id":"profilePage_8911340","show_suggested_profiles":false,"graphql":{"user":{"biography":"http://twitter.com/microformats","blocked_by_viewer":false,"country_block":false,"external_url":"http://microformats.org/","external_url_linkshimmed":"https://l.instagram.com/?u=http%3A%2F%2Fmicroformats.org%2F&e=ATNSyuVyoSGmqcX8L2UPscB9dEpLiFpjfl0qZmhTqKbEoQ51yRITZcdDLQYgc3ociCmGUESp","edge_followed_by":{"count":296},"followed_by_viewer":false,"edge_follow":{"count":66},"follows_viewer":false,"full_name":"","has_blocked_viewer":false,"has_requested_viewer":false,"id":"8911340","is_private":false,"is_verified":false,"mutual_followers":null,"profile_pic_url":"https://instagram.fsea1-1.fna.fbcdn.net/vp/7d923fc0609bc0450e52edb206073897/5B390889/t51.2885-19/11849954_706937769411730_315174835_a.jpg","profile_pic_url_hd":"https://instagram.fsea1-1.fna.fbcdn.net/vp/7d923fc0609bc0450e52edb206073897/5B390889/t51.2885-19/11849954_706937769411730_315174835_a.jpg","requested_by_viewer":false,"username":"microformats","connected_fb_page":null,"edge_owner_to_timeline_media":{"count":1,"page_info":{"has_next_page":false,"end_cursor":"AQDctKBXRxTRKQRh4h3nM83DDZ9RCy6P-LuQ5FgmTThleahLdyny1etpi1OfDngf6oU"},"edges":[{"node":{"__typename":"GraphImage","id":"207279526","edge_media_to_caption":{"edges":[{"node":{"text":"Brighton #microformats meetup"}}]},"shortcode":"MWtWm","edge_media_to_comment":{"count":4},"comments_disabled":false,"taken_at_timestamp":1315334603,"dimensions":{"height":612,"width":612},"display_url":"https://instagram.fsea1-1.fna.fbcdn.net/vp/8fbe5c38cbd0ad700eb0dfcf8ae0725a/5B707BCE/t51.2885-15/e15/11184669_651043525029552_626633635_n.jpg","edge_liked_by":{"count":17},"edge_media_preview_like":{"count":17},"gating_info":null,"media_preview":null,"owner":{"id":"8911340"},"thumbnail_src":"https://instagram.fsea1-1.fna.fbcdn.net/vp/8fbe5c38cbd0ad700eb0dfcf8ae0725a/5B707BCE/t51.2885-15/e15/11184669_651043525029552_626633635_n.jpg","thumbnail_resources":[{"src":"https://instagram.fsea1-1.fna.fbcdn.net/vp/fce8304485a6b51ca035adc7ed34c3c9/5B401152/t51.2885-15/s150x150/e15/11184669_651043525029552_626633635_n.jpg","config_width":150,"config_height":150},{"src":"https://instagram.fsea1-1.fna.fbcdn.net/vp/111e119854c22b0946e9f6f8a8d37186/5B512ED7/t51.2885-15/s240x240/e15/11184669_651043525029552_626633635_n.jpg","config_width":240,"config_height":240},{"src":"https://instagram.fsea1-1.fna.fbcdn.net/vp/28277cac208fc522a2c452777f9f6280/5B3504B0/t51.2885-15/s320x320/e15/11184669_651043525029552_626633635_n.jpg","config_width":320,"config_height":320},{"src":"https://instagram.fsea1-1.fna.fbcdn.net/vp/9a3081cfc5b7fe28890774b7fff31ab8/5B52071C/t51.2885-15/s480x480/e15/11184669_651043525029552_626633635_n.jpg","config_width":480,"config_height":480},{"src":"https://instagram.fsea1-1.fna.fbcdn.net/vp/8fbe5c38cbd0ad700eb0dfcf8ae0725a/5B707BCE/t51.2885-15/e15/11184669_651043525029552_626633635_n.jpg","config_width":640,"config_height":640}],"is_video":false}}]},"edge_saved_media":{"count":0,"page_info":{"has_next_page":false,"end_cursor":null},"edges":[]},"edge_media_collections":{"count":0,"page_info":{"has_next_page":false,"end_cursor":null},"edges":[]}}}}

+ 205
- 154
tests/data/www.instagram.com/photo.html
File diff suppressed because it is too large
View File


+ 205
- 154
tests/data/www.instagram.com/photo_with_person_tag.html
File diff suppressed because it is too large
View File


+ 205
- 155
tests/data/www.instagram.com/photo_with_venue.html
File diff suppressed because it is too large
View File


+ 206
- 168
tests/data/www.instagram.com/photos_and_video.html
File diff suppressed because it is too large
View File


+ 14
- 3
tests/data/www.instagram.com/pk_spam_?__a=1
File diff suppressed because it is too large
View File


+ 206
- 168
tests/data/www.instagram.com/two_photos.html
File diff suppressed because it is too large
View File


+ 210
- 159
tests/data/www.instagram.com/video.html
File diff suppressed because it is too large
View File


Loading…
Cancel
Save