From 773252559d5f49929df70f992406ff3daf755e96 Mon Sep 17 00:00:00 2001 From: Aaron Parecki Date: Mon, 9 Jan 2017 08:54:13 -0800 Subject: [PATCH] parse instagram photos and videos --- composer.json | 1 + composer.lock | 14 +- controllers/Parse.php | 12 + lib/Formats/Instagram.php | 216 ++++++++++++++ lib/HTTPTest.php | 6 + tests/ParseTest.php | 71 +++++ tests/data/www.instagram.com/aaronpk_?__a=1 | 7 + .../explore_locations_109284789535230_ | 261 +++++++++++++++++ tests/data/www.instagram.com/kmikeym_?__a=1 | 7 + tests/data/www.instagram.com/photo.html | 271 +++++++++++++++++ .../photo_with_person_tag.html | 271 +++++++++++++++++ .../www.instagram.com/photo_with_venue.html | 271 +++++++++++++++++ tests/data/www.instagram.com/video.html | 276 ++++++++++++++++++ 13 files changed, 1677 insertions(+), 7 deletions(-) create mode 100644 lib/Formats/Instagram.php create mode 100644 tests/data/www.instagram.com/aaronpk_?__a=1 create mode 100644 tests/data/www.instagram.com/explore_locations_109284789535230_ create mode 100644 tests/data/www.instagram.com/kmikeym_?__a=1 create mode 100644 tests/data/www.instagram.com/photo.html create mode 100644 tests/data/www.instagram.com/photo_with_person_tag.html create mode 100644 tests/data/www.instagram.com/photo_with_venue.html create mode 100644 tests/data/www.instagram.com/video.html diff --git a/composer.json b/composer.json index b1296a5..14cd2d7 100644 --- a/composer.json +++ b/composer.json @@ -17,6 +17,7 @@ "lib/HTTPStream.php", "lib/HTTP.php", "lib/Formats/Mf2.php", + "lib/Formats/Instagram.php", "lib/Formats/HTMLPurifier_AttrDef_HTML_Microformats2.php" ] }, diff --git a/composer.lock b/composer.lock index a4b3a4c..711aa68 100644 --- a/composer.lock +++ b/composer.lock @@ -4,21 +4,21 @@ "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#composer-lock-the-lock-file", "This file is @generated automatically" ], - "hash": "2b5910cbc964bd8545d6a4737d319e5a", - "content-hash": "9dd49de07b7077eb937199147a258b4b", + "hash": "b55b9f1fabddb79bf5e8b0daf6b4f88e", + "content-hash": "a791e61b2f956830a8b7d9b0a2493148", "packages": [ { "name": "ezyang/htmlpurifier", - "version": "v4.7.0", + "version": "v4.8.0", "source": { "type": "git", "url": "https://github.com/ezyang/htmlpurifier.git", - "reference": "ae1828d955112356f7677c465f94f7deb7d27a40" + "reference": "d0c392f77d2f2a3dcf7fcb79e2a1e2b8804e75b2" }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/ezyang/htmlpurifier/zipball/ae1828d955112356f7677c465f94f7deb7d27a40", - "reference": "ae1828d955112356f7677c465f94f7deb7d27a40", + "url": "https://api.github.com/repos/ezyang/htmlpurifier/zipball/d0c392f77d2f2a3dcf7fcb79e2a1e2b8804e75b2", + "reference": "d0c392f77d2f2a3dcf7fcb79e2a1e2b8804e75b2", "shasum": "" }, "require": { @@ -49,7 +49,7 @@ "keywords": [ "html" ], - "time": "2015-08-05 01:03:42" + "time": "2016-07-16 12:58:58" }, { "name": "indieweb/link-rel-parser", diff --git a/controllers/Parse.php b/controllers/Parse.php index ee8691a..7e03100 100644 --- a/controllers/Parse.php +++ b/controllers/Parse.php @@ -143,6 +143,16 @@ class Parse { } + // Check for known services + $host = parse_url($result['url'], PHP_URL_HOST); + if(in_array($host, ['www.instagram.com','instagram.com'])) { + list($data, $parsed) = Formats\Instagram::parse($result['body'], $result['url'], $this->http); + if($request->get('include_original')) + $data['original'] = $parsed; + return $this->respond($response, 200, $data); + } + + // attempt to parse the page as HTML $doc = new DOMDocument(); @$doc->loadHTML(self::toHtmlEntities($result['body'])); @@ -215,6 +225,8 @@ class Parse { 'found_fragment' => $foundFragment ]; } + if($request->get('include_original')) + $data['original'] = $html; return $this->respond($response, 200, $data); } } diff --git a/lib/Formats/Instagram.php b/lib/Formats/Instagram.php new file mode 100644 index 0000000..6ce0b04 --- /dev/null +++ b/lib/Formats/Instagram.php @@ -0,0 +1,216 @@ + 'entry', + 'url' => $url, + 'author' => [ + 'type' => 'card', + 'name' => null, + 'photo' => null, + 'url' => null + ] + ); + + // Fetch profile info for this user + $username = $photoData['owner']['username']; + $profile = self::_getInstagramProfile($username, $http); + if($profile) { + $entry['author'] = self::_buildHCardFromInstagramProfile($profile); + } + + // Content and hashtags + if(isset($photoData['caption'])) { + if(preg_match_all('/#([a-z0-9_-]+)/i', $photoData['caption'], $matches)) { + $entry['category'] = []; + foreach($matches[1] as $match) { + $entry['category'][] = $match; + } + } + + $entry['content'] = [ + 'text' => $photoData['caption'] + ]; + } + + // Include the photo/video media URLs + // (Always return arrays) + $entry['photo'] = [$photoData['display_src']]; + + if(array_key_exists('is_video', $photoData) && $photoData['is_video']) { + $entry['video'] = [$photoData['video_url']]; + } + + $refs = []; + $profiles = []; + + // Find person tags and fetch user profiles + if(array_key_exists('usertags', $photoData) && $photoData['usertags']['nodes']) { + if(!isset($entry['category'])) $entry['category'] = []; + + foreach($photoData['usertags']['nodes'] as $tag) { + $profile = self::_getInstagramProfile($tag['user']['username'], $http); + if($profile) { + $card = self::_buildHCardFromInstagramProfile($profile); + $entry['category'][] = $card['url']; + $refs[$card['url']] = $card; + $profiles[] = $profile; + } + } + } + + // Include venue data + $locations = []; + if($photoData['location']) { + $location = self::_getInstagramLocation($photoData['location']['id'], $http); + if($location) { + $entry['location'] = [$location['url']]; + $refs[$location['url']] = $location; + $locations[] = $location; + } + } + + $response = [ + 'data' => $entry + ]; + + if(count($refs)) { + $response['refs'] = $refs; + } + + return [$response, [ + 'photo' => $photoData, + 'profiles' => $profiles, + 'locations' => $locations + ]]; + } + + private static function _buildHCardFromInstagramProfile($profile) { + if(!$profile) return false; + + $author = [ + 'type' => 'card' + ]; + + if($profile['full_name']) + $author['name'] = $profile['full_name']; + else + $author['name'] = $profile['username']; + + if(isset($profile['external_url']) && $profile['external_url']) + $author['url'] = $profile['external_url']; + else + $author['url'] = 'https://www.instagram.com/' . $username; + + if(isset($profile['profile_pic_url_hd'])) + $author['photo'] = $profile['profile_pic_url_hd']; + else + $author['photo'] = $profile['profile_pic_url']; + + return $author; + } + + private static function _getInstagramProfile($username, $http) { + $response = $http->get('https://www.instagram.com/'.$username.'/?__a=1'); + + if(!$response['error']) { + $profile = @json_decode($response['body'], true); + if($profile && array_key_exists('user', $profile)) { + $user = $profile['user']; + return $user; + } + } + return null; + } + + private static function _getInstagramLocation($id, $http) { + $igURL = 'https://www.instagram.com/explore/locations/'.$id.'/'; + $response = $http->get($igURL); + if($response['body']) { + $data = self::_extractVenueDataFromVenuePage($response['body']); + if($data) { + return [ + 'type' => 'card', + 'name' => $data['name'], + 'url' => $igURL, + 'latitude' => $data['lat'], + 'longitude' => $data['lng'], + ]; + } + } + return null; + } + + private static function _extractPhotoDataFromPhotoPage($html) { + $data = self::_extractIGData($html); + + if($data && is_array($data) && array_key_exists('entry_data', $data)) { + if(is_array($data['entry_data']) && array_key_exists('PostPage', $data['entry_data'])) { + $post = $data['entry_data']['PostPage']; + if(is_array($post) && array_key_exists(0, $post) && array_key_exists('media', $post[0])) { + $media = $post[0]['media']; + + return $media; + } + } + } + + return null; + } + + private static function _extractVenueDataFromVenuePage($html) { + $data = self::_extractIGData($html); + + if($data && is_array($data) && array_key_exists('entry_data', $data)) { + if(is_array($data['entry_data']) && array_key_exists('LocationsPage', $data['entry_data'])) { + $data = $data['entry_data']['LocationsPage']; + if(is_array($data) && array_key_exists(0, $data) && array_key_exists('location', $data[0])) { + $location = $data[0]['location']; + + # we don't need these and they're huge, so drop them now + unset($location['media']); + unset($location['top_posts']); + + return $location; + } + } + } + + return null; + } + + private static function _extractIGData($html) { + $doc = new DOMDocument(); + @$doc->loadHTML($html); + + if(!$doc) { + return null; + } + + $xpath = new DOMXPath($doc); + + $data = null; + + foreach($xpath->query('//script') as $script) { + if(preg_match('/window\._sharedData = ({.+});/', $script->textContent, $match)) { + $data = json_decode($match[1], true); + } + } + + return $data; + } + +} diff --git a/lib/HTTPTest.php b/lib/HTTPTest.php index ca3880e..966c722 100644 --- a/lib/HTTPTest.php +++ b/lib/HTTPTest.php @@ -32,6 +32,12 @@ class HTTPTest extends HTTPCurl { } private function _read_file($url) { + $parts = parse_url($url); + if($parts['path']) { + $parts['path'] = '/'.str_replace('/','_',substr($parts['path'],1)); + $url = \build_url($parts); + } + $filename = $this->_testDataPath.preg_replace('/https?:\/\//', '', $url); if(!file_exists($filename)) { $filename = $this->_testDataPath.'404.response.txt'; diff --git a/tests/ParseTest.php b/tests/ParseTest.php index 5475d61..35f722a 100644 --- a/tests/ParseTest.php +++ b/tests/ParseTest.php @@ -332,6 +332,7 @@ class ParseTest extends PHPUnit_Framework_TestCase { $body = $response->getContent(); $this->assertEquals(200, $response->getStatusCode()); $data = json_decode($body, true); + $this->assertEquals('event', $data['data']['type']); $this->assertEquals('Homebrew Website Club', $data['data']['name']); $this->assertEquals($url, $data['data']['url']); @@ -382,4 +383,74 @@ class ParseTest extends PHPUnit_Framework_TestCase { $this->assertFalse($data['info']['found_fragment']); } + public function testInstagramPhoto() { + $url = 'http://www.instagram.com/photo.html'; + $response = $this->parse(['url' => $url]); + + $body = $response->getContent(); + $this->assertEquals(200, $response->getStatusCode()); + $data = json_decode($body, true); + + $this->assertEquals('entry', $data['data']['type']); + $this->assertContains('planning', $data['data']['category']); + $this->assertContains('2017', $data['data']['category']); + $this->assertEquals('Kind of crazy to see the whole year laid out like this. #planning #2017', $data['data']['content']['text']); + $this->assertEquals(1, count($data['data']['photo'])); + $this->assertEquals(['https://scontent.cdninstagram.com/t51.2885-15/e35/15803256_1832278043695907_4846092951052353536_n.jpg?ig_cache_key=MTQyMTM1Nzk0NTMwNTEwMDkwNg%3D%3D.2'], $data['data']['photo']); + $this->assertEquals('http://aaronparecki.com/', $data['data']['author']['url']); + $this->assertEquals('Aaron Parecki', $data['data']['author']['name']); + $this->assertEquals('https://scontent.cdninstagram.com/t51.2885-19/s320x320/14240576_268350536897085_1129715662_a.jpg', $data['data']['author']['photo']); + } + + public function testInstagramVideo() { + $url = 'http://www.instagram.com/video.html'; + $response = $this->parse(['url' => $url]); + + $body = $response->getContent(); + $this->assertEquals(200, $response->getStatusCode()); + $data = json_decode($body, true); + + $this->assertEquals('entry', $data['data']['type']); + $this->assertContains('100daysofmusic', $data['data']['category']); + $this->assertEquals('Day 18. Maple and Spruce #100daysofmusic #100daysproject #the100dayproject https://aaronparecki.com/2017/01/07/14/day18', $data['data']['content']['text']); + $this->assertEquals(1, count($data['data']['photo'])); + $this->assertEquals(['https://scontent.cdninstagram.com/t51.2885-15/s640x640/e15/15624670_548881701986735_8264383763249627136_n.jpg?ig_cache_key=MTQyMjkzMTczMTg0MjE3NjE3Nw%3D%3D.2'], $data['data']['photo']); + $this->assertEquals(1, count($data['data']['video'])); + $this->assertEquals(['https://scontent.cdninstagram.com/t50.2886-16/15921147_1074837002642259_2269307616507199488_n.mp4'], $data['data']['video']); + $this->assertEquals('http://aaronparecki.com/', $data['data']['author']['url']); + $this->assertEquals('Aaron Parecki', $data['data']['author']['name']); + $this->assertEquals('https://scontent.cdninstagram.com/t51.2885-19/s320x320/14240576_268350536897085_1129715662_a.jpg', $data['data']['author']['photo']); + } + + public function testInstagramPhotoWithPersonTag() { + $url = 'http://www.instagram.com/photo_with_person_tag.html'; + $response = $this->parse(['url' => $url]); + + $body = $response->getContent(); + $this->assertEquals(200, $response->getStatusCode()); + $data = json_decode($body, true); + + $this->assertEquals(2, count($data['data']['category'])); + $this->assertContains('https://kmikeym.com/', $data['data']['category']); + $this->assertArrayHasKey('https://kmikeym.com/', $data['refs']); + $this->assertEquals(['type'=>'card','name'=>'Mike Merrill','url'=>'https://kmikeym.com/','photo'=>'https://scontent.cdninstagram.com/t51.2885-19/s320x320/12627953_686238411518831_1544976311_a.jpg'], $data['refs']['https://kmikeym.com/']); + } + + public function testInstagramPhotoWithVenue() { + $url = 'http://www.instagram.com/photo_with_venue.html'; + $response = $this->parse(['url' => $url]); + + $body = $response->getContent(); + $this->assertEquals(200, $response->getStatusCode()); + $data = json_decode($body, true); + + $this->assertEquals(1, count($data['data']['location'])); + $this->assertContains('https://www.instagram.com/explore/locations/109284789535230/', $data['data']['location']); + $this->assertArrayHasKey('https://www.instagram.com/explore/locations/109284789535230/', $data['refs']); + $venue = $data['refs']['https://www.instagram.com/explore/locations/109284789535230/']; + $this->assertEquals('XOXO Outpost', $venue['name']); + $this->assertEquals('45.5261002', $venue['latitude']); + $this->assertEquals('-122.6558081', $venue['longitude']); + } + } diff --git a/tests/data/www.instagram.com/aaronpk_?__a=1 b/tests/data/www.instagram.com/aaronpk_?__a=1 new file mode 100644 index 0000000..791e6fd --- /dev/null +++ b/tests/data/www.instagram.com/aaronpk_?__a=1 @@ -0,0 +1,7 @@ +HTTP/1.1 200 OK +Server: Apache +Date: Wed, 09 Dec 2015 03:29:14 GMT +Content-Type: application/json +Connection: keep-alive + +{"user": {"followed_by": {"count": 373}, "profile_pic_url_hd": "https://scontent.cdninstagram.com/t51.2885-19/s320x320/14240576_268350536897085_1129715662_a.jpg", "is_private": false, "username": "aaronpk", "id": "1500881", "external_url": "http://aaronparecki.com/", "follows": {"count": 196}, "followed_by_viewer": false, "is_verified": false, "has_blocked_viewer": false, "profile_pic_url": "https://scontent.cdninstagram.com/t51.2885-19/s150x150/14240576_268350536897085_1129715662_a.jpg", "biography": null, "external_url_linkshimmed": "http://l.instagram.com/?e=ATMouxm2Ftqd9PWctyaNWmAwjUOMz6oHuHXrMT5aP5dgmfHQrSemKTru1yOFmow\u0026u=http%3A%2F%2Faaronparecki.com%2F", "media": {"page_info": {"has_previous_page": false, "start_cursor": "1423583254649553942", "has_next_page": true, "end_cursor": "1416445372818376597"}, "nodes": [{"owner": {"id": "1500881"}, "dimensions": {"width": 640, "height": 360}, "caption": "Day 19. Something different. #100daysofmusic #100dayproject", "id": "1423583254649553942", "likes": {"count": 2}, "comments": {"count": 0}, "comments_disabled": false, "code": "BPBlW15lMQW", "is_video": true, "video_views": 18, "date": 1483924370, "thumbnail_src": "https://scontent.cdninstagram.com/t51.2885-15/e15/c157.0.405.405/15803744_1772803536373955_8904948470108913664_n.jpg?ig_cache_key=MTQyMzU4MzI1NDY0OTU1Mzk0Mg%3D%3D.2.c", "display_src": "https://scontent.cdninstagram.com/t51.2885-15/s640x640/e15/15803744_1772803536373955_8904948470108913664_n.jpg?ig_cache_key=MTQyMzU4MzI1NDY0OTU1Mzk0Mg%3D%3D.2"}, {"owner": {"id": "1500881"}, "dimensions": {"width": 640, "height": 360}, "caption": "Day 18. Maple and Spruce #100daysofmusic #100daysproject #the100dayproject https://aaronparecki.com/2017/01/07/14/day18", "id": "1422931731842176177", "likes": {"count": 2}, "comments": {"count": 0}, "comments_disabled": false, "code": "BO_RN8AFZSx", "is_video": true, "video_views": 32, "date": 1483846702, "thumbnail_src": "https://scontent.cdninstagram.com/t51.2885-15/e15/c157.0.405.405/15624670_548881701986735_8264383763249627136_n.jpg?ig_cache_key=MTQyMjkzMTczMTg0MjE3NjE3Nw%3D%3D.2.c", "display_src": "https://scontent.cdninstagram.com/t51.2885-15/s640x640/e15/15624670_548881701986735_8264383763249627136_n.jpg?ig_cache_key=MTQyMjkzMTczMTg0MjE3NjE3Nw%3D%3D.2"}, {"owner": {"id": "1500881"}, "dimensions": {"width": 640, "height": 360}, "caption": "Day 17 #100daysofmusic #100daysproject #the100dayproject", "id": "1422248445385971330", "likes": {"count": 3}, "comments": {"count": 0}, "comments_disabled": false, "code": "BO812z5lRaC", "is_video": true, "video_views": 32, "date": 1483765248, "thumbnail_src": "https://scontent.cdninstagram.com/t51.2885-15/e15/c157.0.405.405/15877501_1163795280401949_6778196169685204992_n.jpg?ig_cache_key=MTQyMjI0ODQ0NTM4NTk3MTMzMA%3D%3D.2.c", "display_src": "https://scontent.cdninstagram.com/t51.2885-15/s640x640/e15/15877501_1163795280401949_6778196169685204992_n.jpg?ig_cache_key=MTQyMjI0ODQ0NTM4NTk3MTMzMA%3D%3D.2"}, {"owner": {"id": "1500881"}, "dimensions": {"width": 640, "height": 360}, "caption": "Day 16: Woodbury. I sampled an ad from an old radio program, and set it to video from the TV version of the show. #100daysofmusic", "id": "1421488994613945367", "likes": {"count": 2}, "comments": {"count": 0}, "comments_disabled": false, "code": "BO6JLWQF2QX", "is_video": true, "video_views": 39, "date": 1483674714, "thumbnail_src": "https://scontent.cdninstagram.com/t51.2885-15/e15/c157.0.405.405/15877366_168619313618864_1261670673542021120_n.jpg?ig_cache_key=MTQyMTQ4ODk5NDYxMzk0NTM2Nw%3D%3D.2.c", "display_src": "https://scontent.cdninstagram.com/t51.2885-15/s640x640/e15/15877366_168619313618864_1261670673542021120_n.jpg?ig_cache_key=MTQyMTQ4ODk5NDYxMzk0NTM2Nw%3D%3D.2"}, {"owner": {"id": "1500881"}, "dimensions": {"width": 1080, "height": 809}, "caption": "Kind of crazy to see the whole year laid out like this. #planning #2017", "id": "1421357945305100906", "likes": {"count": 13}, "thumbnail_src": "https://scontent.cdninstagram.com/t51.2885-15/s640x640/sh0.08/e35/c135.0.809.809/15803256_1832278043695907_4846092951052353536_n.jpg?ig_cache_key=MTQyMTM1Nzk0NTMwNTEwMDkwNg%3D%3D.2.c", "comments_disabled": false, "code": "BO5rYVElvJq", "is_video": false, "date": 1483659092, "comments": {"count": 2}, "display_src": "https://scontent.cdninstagram.com/t51.2885-15/e35/15803256_1832278043695907_4846092951052353536_n.jpg?ig_cache_key=MTQyMTM1Nzk0NTMwNTEwMDkwNg%3D%3D.2"}, {"owner": {"id": "1500881"}, "dimensions": {"width": 640, "height": 360}, "caption": "Day 15. #100daysofmusic #100daysproject #the100dayproject", "id": "1420764733800503247", "likes": {"count": 3}, "comments": {"count": 0}, "comments_disabled": false, "code": "BO3kf9zlNfP", "is_video": true, "video_views": 22, "date": 1483588376, "thumbnail_src": "https://scontent.cdninstagram.com/t51.2885-15/e15/c157.0.405.405/15876755_221940651592214_6077781888125108224_n.jpg?ig_cache_key=MTQyMDc2NDczMzgwMDUwMzI0Nw%3D%3D.2.c", "display_src": "https://scontent.cdninstagram.com/t51.2885-15/s640x640/e15/15876755_221940651592214_6077781888125108224_n.jpg?ig_cache_key=MTQyMDc2NDczMzgwMDUwMzI0Nw%3D%3D.2"}, {"owner": {"id": "1500881"}, "dimensions": {"width": 640, "height": 360}, "caption": "Day 14: Slow-Scan TV #100daysofmusic #100daysproject \n#hamradio", "id": "1420121264157459769", "likes": {"count": 2}, "comments": {"count": 0}, "comments_disabled": false, "code": "BO1SMQAFYU5", "is_video": true, "video_views": 46, "date": 1483511668, "thumbnail_src": "https://scontent.cdninstagram.com/t51.2885-15/e15/c157.0.405.405/15876662_566485723547483_1182812333798326272_n.jpg?ig_cache_key=MTQyMDEyMTI2NDE1NzQ1OTc2OQ%3D%3D.2.c", "display_src": "https://scontent.cdninstagram.com/t51.2885-15/s640x640/e15/15876662_566485723547483_1182812333798326272_n.jpg?ig_cache_key=MTQyMDEyMTI2NDE1NzQ1OTc2OQ%3D%3D.2"}, {"owner": {"id": "1500881"}, "dimensions": {"width": 640, "height": 360}, "caption": "Day 13. Daft Punk vs The Office #100daysofmusic #100daysproject #the100dayproject", "id": "1419363203654358999", "likes": {"count": 3}, "comments": {"count": 2}, "comments_disabled": false, "code": "BOyl1BJFBfX", "is_video": true, "video_views": 54, "date": 1483421300, "thumbnail_src": "https://scontent.cdninstagram.com/t51.2885-15/e15/c157.0.405.405/15876038_1025262177578792_9151050615860756480_n.jpg?ig_cache_key=MTQxOTM2MzIwMzY1NDM1ODk5OQ%3D%3D.2.c", "display_src": "https://scontent.cdninstagram.com/t51.2885-15/s640x640/e15/15876038_1025262177578792_9151050615860756480_n.jpg?ig_cache_key=MTQxOTM2MzIwMzY1NDM1ODk5OQ%3D%3D.2"}, {"owner": {"id": "1500881"}, "dimensions": {"width": 640, "height": 360}, "caption": "Day 12. This started out as a totally unrelated riff, and by the time I was done adding to it, the original riff didn't make any sense so I deleted it. There are actually 4 different kinds of trumpet tracks in this in order to achieve this semi-realistic result. I was on the fence about adding a faint string section in the background and decided against it in the end. I enjoyed playing with the drum section in this one, using silence to emphasize the trumpet solos. #100daysofmusic #100daysproject #the100dayproject", "id": "1418523047083090611", "likes": {"count": 1}, "comments": {"count": 0}, "comments_disabled": false, "code": "BOvmzIXF6Kz", "is_video": true, "video_views": 31, "date": 1483321146, "thumbnail_src": "https://scontent.cdninstagram.com/t51.2885-15/e15/c157.0.405.405/15624009_706856202813838_3219228496890953728_n.jpg?ig_cache_key=MTQxODUyMzA0NzA4MzA5MDYxMQ%3D%3D.2.c", "display_src": "https://scontent.cdninstagram.com/t51.2885-15/s640x640/e15/15624009_706856202813838_3219228496890953728_n.jpg?ig_cache_key=MTQxODUyMzA0NzA4MzA5MDYxMQ%3D%3D.2"}, {"owner": {"id": "1500881"}, "dimensions": {"width": 640, "height": 360}, "caption": "Day 11. I'm definitely less happy with this one than any others so far, but hey that's what this project is about. Putting stuff out there and not worrying too much about it. #100daysofmusic #100daysproject #the100dayproject", "id": "1417868433870509102", "likes": {"count": 1}, "comments": {"count": 0}, "comments_disabled": false, "code": "BOtR9QTF1Qu", "is_video": true, "video_views": 18, "date": 1483243110, "thumbnail_src": "https://scontent.cdninstagram.com/t51.2885-15/e15/c157.0.405.405/15803065_1838848903022558_5714755476852310016_n.jpg?ig_cache_key=MTQxNzg2ODQzMzg3MDUwOTEwMg%3D%3D.2.c", "display_src": "https://scontent.cdninstagram.com/t51.2885-15/s640x640/e15/15803065_1838848903022558_5714755476852310016_n.jpg?ig_cache_key=MTQxNzg2ODQzMzg3MDUwOTEwMg%3D%3D.2"}, {"owner": {"id": "1500881"}, "dimensions": {"width": 640, "height": 360}, "caption": "Day 10. Mozart is probably rolling in his grave. I took the beginning of Lacrimosa, changed it to 4/4, and added drums and some other non orchestral instruments. This was my first time playing with the East/West Choir \"Word Builder\" which is a tool where you can type phonetically what you want the choir to sing and it puts it together. This rendition is by no means perfect, but I could get it a lot closer with more time, tuning the precise timings of moving from each consonant to vowel and back. The instrumentals I added are inspired by E.S. Posthumus. #100daysofmusic #100daysproject #the100dayproject", "id": "1417184091011571733", "likes": {"count": 3}, "comments": {"count": 0}, "comments_disabled": false, "code": "BOq2WwWFGAV", "is_video": true, "video_views": 31, "date": 1483161530, "thumbnail_src": "https://scontent.cdninstagram.com/t51.2885-15/e15/c157.0.405.405/15801826_365862793770860_7523962569996894208_n.jpg?ig_cache_key=MTQxNzE4NDA5MTAxMTU3MTczMw%3D%3D.2.c", "display_src": "https://scontent.cdninstagram.com/t51.2885-15/s640x640/e15/15801826_365862793770860_7523962569996894208_n.jpg?ig_cache_key=MTQxNzE4NDA5MTAxMTU3MTczMw%3D%3D.2"}, {"owner": {"id": "1500881"}, "dimensions": {"width": 640, "height": 360}, "caption": "Day 9. I don't really know what I was going for here. I had this tune in my head on the bike ride home from downtown this evening. This one only took an hour or so to jot down, since I wasn't going for any particular sound. The animation kind of sums it up. #100daysofmusic #100daysproject #the100dayproject", "id": "1416445372818376597", "likes": {"count": 3}, "comments": {"count": 1}, "comments_disabled": false, "code": "BOoOY_alceV", "is_video": true, "video_views": 42, "date": 1483073468, "thumbnail_src": "https://scontent.cdninstagram.com/t51.2885-15/e15/c157.0.405.405/14723712_924037024393881_4717633081979174912_n.jpg?ig_cache_key=MTQxNjQ0NTM3MjgxODM3NjU5Nw%3D%3D.2.c", "display_src": "https://scontent.cdninstagram.com/t51.2885-15/s640x640/e15/14723712_924037024393881_4717633081979174912_n.jpg?ig_cache_key=MTQxNjQ0NTM3MjgxODM3NjU5Nw%3D%3D.2"}], "count": 1365}, "has_requested_viewer": false, "requested_by_viewer": false, "follows_viewer": false, "full_name": "Aaron Parecki", "country_block": null, "connected_fb_page": null, "blocked_by_viewer": false}} \ No newline at end of file diff --git a/tests/data/www.instagram.com/explore_locations_109284789535230_ b/tests/data/www.instagram.com/explore_locations_109284789535230_ new file mode 100644 index 0000000..18459c7 --- /dev/null +++ b/tests/data/www.instagram.com/explore_locations_109284789535230_ @@ -0,0 +1,261 @@ +HTTP/1.1 200 OK +Server: Apache +Date: Wed, 09 Dec 2015 03:29:14 GMT +Content-Type: text/html; charset=utf-8 +Connection: keep-alive + + + + + + + + + + +XOXO Outpost • Instagram photos and videos + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/tests/data/www.instagram.com/kmikeym_?__a=1 b/tests/data/www.instagram.com/kmikeym_?__a=1 new file mode 100644 index 0000000..3e47791 --- /dev/null +++ b/tests/data/www.instagram.com/kmikeym_?__a=1 @@ -0,0 +1,7 @@ +HTTP/1.1 200 OK +Server: Apache +Date: Wed, 09 Dec 2015 03:29:14 GMT +Content-Type: application/json +Connection: keep-alive + +{"user": {"country_block": null, "followed_by_viewer": false, "is_private": false, "id": "1538723", "full_name": "Mike Merrill", "has_blocked_viewer": false, "is_verified": false, "profile_pic_url_hd": "https://scontent.cdninstagram.com/t51.2885-19/s320x320/12627953_686238411518831_1544976311_a.jpg", "biography": "Financier, Piano Teacher for Dogs, Lover of Markets, and World's Oldest Millennial\u2122", "external_url_linkshimmed": "https://l.instagram.com/?u=https%3A%2F%2Fkmikeym.com%2F\u0026e=ATMP32vRo0XElDpQPhuDxhJDcrSiPxVmPsKywXU5V1NwSia00YcATMLe85NbpoY", "username": "kmikeym", "requested_by_viewer": false, "profile_pic_url": "https://scontent.cdninstagram.com/t51.2885-19/s150x150/12627953_686238411518831_1544976311_a.jpg", "has_requested_viewer": false, "connected_fb_page": null, "follows_viewer": false, "follows": {"count": 558}, "blocked_by_viewer": false, "followed_by": {"count": 6585}, "external_url": "https://kmikeym.com/", "media": {"nodes": [{"dimensions": {"height": 750, "width": 750}, "id": "1423575630100745913", "comments_disabled": false, "comments": {"count": 1}, "thumbnail_src": "https://scontent.cdninstagram.com/t51.2885-15/s640x640/sh0.08/e35/15801830_1831056737110057_2546244586607476736_n.jpg?ig_cache_key=MTQyMzU3NTYzMDEwMDc0NTkxMw%3D%3D.2", "caption": "Every night the family gathers to learn a new song.", "display_src": "https://scontent.cdninstagram.com/t51.2885-15/s750x750/sh0.08/e35/15801830_1831056737110057_2546244586607476736_n.jpg?ig_cache_key=MTQyMzU3NTYzMDEwMDc0NTkxMw%3D%3D.2", "date": 1483923461, "likes": {"count": 65}, "is_video": false, "code": "BPBjn4_Ds65", "owner": {"id": "1538723"}}, {"dimensions": {"height": 810, "width": 1080}, "id": "1416384956754503647", "comments_disabled": false, "comments": {"count": 2}, "thumbnail_src": "https://scontent.cdninstagram.com/t51.2885-15/s640x640/sh0.08/e35/c135.0.810.810/15625437_162684017546974_1545879177893249024_n.jpg?ig_cache_key=MTQxNjM4NDk1Njc1NDUwMzY0Nw%3D%3D.2.c", "caption": "The \u2600\ufe0f struck my Lean Canvas. THIS MEANS SOMETHING. #god #bigmanupstairs #meaning #leancanvas", "display_src": "https://scontent.cdninstagram.com/t51.2885-15/e35/15625437_162684017546974_1545879177893249024_n.jpg?ig_cache_key=MTQxNjM4NDk1Njc1NDUwMzY0Nw%3D%3D.2", "date": 1483066266, "likes": {"count": 42}, "is_video": false, "code": "BOoAp0kjyff", "owner": {"id": "1538723"}}, {"dimensions": {"height": 360, "width": 640}, "likes": {"count": 52}, "id": "1411177753353256727", "comments_disabled": false, "comments": {"count": 0}, "thumbnail_src": "https://scontent.cdninstagram.com/t51.2885-15/e15/c157.0.405.405/15535512_384984601842479_5090512938502979584_n.jpg?ig_cache_key=MTQxMTE3Nzc1MzM1MzI1NjcyNw%3D%3D.2.c", "caption": "Where did it go?", "display_src": "https://scontent.cdninstagram.com/t51.2885-15/s640x640/e15/15535512_384984601842479_5090512938502979584_n.jpg?ig_cache_key=MTQxMTE3Nzc1MzM1MzI1NjcyNw%3D%3D.2", "date": 1482445519, "video_views": 129, "is_video": true, "code": "BOVgrCfjf8X", "owner": {"id": "1538723"}}, {"dimensions": {"height": 1080, "width": 1080}, "id": "1406756387552006844", "comments_disabled": false, "comments": {"count": 1}, "thumbnail_src": "https://scontent.cdninstagram.com/t51.2885-15/s640x640/sh0.08/e35/15538977_1818220835083291_1856515105643036672_n.jpg?ig_cache_key=MTQwNjc1NjM4NzU1MjAwNjg0NA%3D%3D.2", "caption": "Facilitating some deep business thinking with @junebotanicals. Great way to spend an afternoon.", "display_src": "https://scontent.cdninstagram.com/t51.2885-15/e35/15538977_1818220835083291_1856515105643036672_n.jpg?ig_cache_key=MTQwNjc1NjM4NzU1MjAwNjg0NA%3D%3D.2", "date": 1481918451, "likes": {"count": 45}, "is_video": false, "code": "BOFzXstjJK8", "owner": {"id": "1538723"}}, {"dimensions": {"height": 640, "width": 640}, "likes": {"count": 64}, "id": "1402046623085512418", "comments_disabled": false, "comments": {"count": 4}, "thumbnail_src": "https://scontent.cdninstagram.com/t51.2885-15/s640x640/e15/15275470_1661287440835263_5839118329377193984_n.jpg?ig_cache_key=MTQwMjA0NjYyMzA4NTUxMjQxOA%3D%3D.2", "caption": "The ice storm has met its match.", "display_src": "https://scontent.cdninstagram.com/t51.2885-15/s640x640/e15/15275470_1661287440835263_5839118329377193984_n.jpg?ig_cache_key=MTQwMjA0NjYyMzA4NTUxMjQxOA%3D%3D.2", "date": 1481357003, "video_views": 246, "is_video": true, "code": "BN1EfmvjEbi", "owner": {"id": "1538723"}}, {"dimensions": {"height": 1080, "width": 1080}, "id": "1401027634444199838", "comments_disabled": false, "comments": {"count": 4}, "thumbnail_src": "https://scontent.cdninstagram.com/t51.2885-15/s640x640/sh0.08/e35/15259081_1814485698828626_7222143260918022144_n.jpg?ig_cache_key=MTQwMTAyNzYzNDQ0NDE5OTgzOA%3D%3D.2", "caption": "Something is wrong with my palm tree... \ud83c\udf34\ud83e\udd14", "display_src": "https://scontent.cdninstagram.com/t51.2885-15/e35/15259081_1814485698828626_7222143260918022144_n.jpg?ig_cache_key=MTQwMTAyNzYzNDQ0NDE5OTgzOA%3D%3D.2", "date": 1481235530, "likes": {"count": 46}, "is_video": false, "code": "BNxczXnjWee", "owner": {"id": "1538723"}}, {"dimensions": {"height": 1080, "width": 1080}, "id": "1400183189096281784", "comments_disabled": false, "comments": {"count": 5}, "thumbnail_src": "https://scontent.cdninstagram.com/t51.2885-15/s640x640/sh0.08/e35/15276657_1299530646786408_4726681401250807808_n.jpg?ig_cache_key=MTQwMDE4MzE4OTA5NjI4MTc4NA%3D%3D.2", "caption": "Sometimes it's wet and miserable in the morning and I wish we had a cat. Today is a good day to have a dog. #frosty", "display_src": "https://scontent.cdninstagram.com/t51.2885-15/e35/15276657_1299530646786408_4726681401250807808_n.jpg?ig_cache_key=MTQwMDE4MzE4OTA5NjI4MTc4NA%3D%3D.2", "date": 1481134864, "likes": {"count": 40}, "is_video": false, "code": "BNuczEmjra4", "owner": {"id": "1538723"}}, {"dimensions": {"height": 1080, "width": 1080}, "id": "1399385836311409878", "comments_disabled": false, "comments": {"count": 1}, "thumbnail_src": "https://scontent.cdninstagram.com/t51.2885-15/s640x640/sh0.08/e35/15276667_1463520143677014_7084147293137403904_n.jpg?ig_cache_key=MTM5OTM4NTgzNjMxMTQwOTg3OA%3D%3D.2", "caption": "Had an idea for a software product... used what I learned from startup training camp to do a lean canvas and uncertainty and impact chart. My hard question is, how many people want to \"go public\"?", "display_src": "https://scontent.cdninstagram.com/t51.2885-15/e35/15276667_1463520143677014_7084147293137403904_n.jpg?ig_cache_key=MTM5OTM4NTgzNjMxMTQwOTg3OA%3D%3D.2", "date": 1481039813, "likes": {"count": 50}, "is_video": false, "code": "BNrngD9DfTW", "owner": {"id": "1538723"}}, {"dimensions": {"height": 1080, "width": 1080}, "id": "1398995694476082518", "comments_disabled": false, "comments": {"count": 0}, "thumbnail_src": "https://scontent.cdninstagram.com/t51.2885-15/s640x640/sh0.08/e35/15275555_653932024779565_6403470125764182016_n.jpg?ig_cache_key=MTM5ODk5NTY5NDQ3NjA4MjUxOA%3D%3D.2", "caption": "Pretty much everything I've ever learned from @marcusestes is on display in this image. #VR #pcm", "display_src": "https://scontent.cdninstagram.com/t51.2885-15/e35/15275555_653932024779565_6403470125764182016_n.jpg?ig_cache_key=MTM5ODk5NTY5NDQ3NjA4MjUxOA%3D%3D.2", "date": 1480993304, "likes": {"count": 67}, "is_video": false, "code": "BNqOywDjD1W", "owner": {"id": "1538723"}}, {"dimensions": {"height": 810, "width": 1080}, "id": "1397274879221250742", "comments_disabled": false, "comments": {"count": 7}, "thumbnail_src": "https://scontent.cdninstagram.com/t51.2885-15/s640x640/sh0.08/e35/c135.0.810.810/15099451_1162637233820065_3188070089706438656_n.jpg?ig_cache_key=MTM5NzI3NDg3OTIyMTI1MDc0Mg%3D%3D.2.c", "caption": "My cardboard cutout is a permanent resident of the K5M Head Office reminding the entire neighborhood to get back to work. #work #officelife #kmikeym", "display_src": "https://scontent.cdninstagram.com/t51.2885-15/e35/15099451_1162637233820065_3188070089706438656_n.jpg?ig_cache_key=MTM5NzI3NDg3OTIyMTI1MDc0Mg%3D%3D.2", "date": 1480788167, "likes": {"count": 97}, "is_video": false, "code": "BNkHhl9jzq2", "owner": {"id": "1538723"}}, {"dimensions": {"height": 1080, "width": 1080}, "id": "1395917317292915182", "comments_disabled": false, "comments": {"count": 0}, "thumbnail_src": "https://scontent.cdninstagram.com/t51.2885-15/s640x640/sh0.08/e35/15275516_673921266119256_8518261595103559680_n.jpg?ig_cache_key=MTM5NTkxNzMxNzI5MjkxNTE4Mg%3D%3D.2", "caption": "The successful implementation of shareholder question 99. I have been interviewed by the documentary crew of The New Corporation. #pgtips #interview #thenewcorporation", "display_src": "https://scontent.cdninstagram.com/t51.2885-15/e35/15275516_673921266119256_8518261595103559680_n.jpg?ig_cache_key=MTM5NTkxNzMxNzI5MjkxNTE4Mg%3D%3D.2", "date": 1480626333, "likes": {"count": 33}, "is_video": false, "code": "BNfS2d5D3nu", "owner": {"id": "1538723"}}, {"dimensions": {"height": 1080, "width": 1080}, "id": "1394513901555383560", "comments_disabled": false, "comments": {"count": 4}, "thumbnail_src": "https://scontent.cdninstagram.com/t51.2885-15/s640x640/sh0.08/e35/15047046_346283595729057_9167489646135869440_n.jpg?ig_cache_key=MTM5NDUxMzkwMTU1NTM4MzU2MA%3D%3D.2", "caption": "Thought provoking pre-Christmas gift. #sandwich", "display_src": "https://scontent.cdninstagram.com/t51.2885-15/e35/15047046_346283595729057_9167489646135869440_n.jpg?ig_cache_key=MTM5NDUxMzkwMTU1NTM4MzU2MA%3D%3D.2", "date": 1480459033, "likes": {"count": 60}, "is_video": false, "code": "BNaTwFIjX0I", "owner": {"id": "1538723"}}], "page_info": {"end_cursor": "1394513901555383560", "has_previous_page": false, "start_cursor": "1423575630100745913", "has_next_page": true}, "count": 1436}}} \ No newline at end of file diff --git a/tests/data/www.instagram.com/photo.html b/tests/data/www.instagram.com/photo.html new file mode 100644 index 0000000..40e6441 --- /dev/null +++ b/tests/data/www.instagram.com/photo.html @@ -0,0 +1,271 @@ +HTTP/1.1 200 OK +Server: Apache +Date: Wed, 09 Dec 2015 03:29:14 GMT +Content-Type: text/html; charset=utf-8 +Connection: keep-alive + + + + + + + + + + +Instagram + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/tests/data/www.instagram.com/photo_with_person_tag.html b/tests/data/www.instagram.com/photo_with_person_tag.html new file mode 100644 index 0000000..1d6db51 --- /dev/null +++ b/tests/data/www.instagram.com/photo_with_person_tag.html @@ -0,0 +1,271 @@ +HTTP/1.1 200 OK +Server: Apache +Date: Wed, 09 Dec 2015 03:29:14 GMT +Content-Type: text/html; charset=utf-8 +Connection: keep-alive + + + + + + + + + + +Instagram + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/tests/data/www.instagram.com/photo_with_venue.html b/tests/data/www.instagram.com/photo_with_venue.html new file mode 100644 index 0000000..d0f7527 --- /dev/null +++ b/tests/data/www.instagram.com/photo_with_venue.html @@ -0,0 +1,271 @@ +HTTP/1.1 200 OK +Server: Apache +Date: Wed, 09 Dec 2015 03:29:14 GMT +Content-Type: text/html; charset=utf-8 +Connection: keep-alive + + + + + + + + + + +Instagram + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/tests/data/www.instagram.com/video.html b/tests/data/www.instagram.com/video.html new file mode 100644 index 0000000..e30f990 --- /dev/null +++ b/tests/data/www.instagram.com/video.html @@ -0,0 +1,276 @@ +HTTP/1.1 200 OK +Server: Apache +Date: Wed, 09 Dec 2015 03:29:14 GMT +Content-Type: text/html; charset=utf-8 +Connection: keep-alive + + + + + + + + + + +Instagram + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file