diff --git a/README.md b/README.md index 0df9ed6..31f3064 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,6 @@ XRay parses structured content from a URL. XRay will parse content in the following formats. First the URL is checked against known services: -* Instagram * Twitter * GitHub * XKCD @@ -420,7 +419,7 @@ Other properties are returned in the response at the same level as the `data` pr * `mf2+json` * `feed+json` * `xml` - * `instagram`/`github`/`xkcd` + * `github`/`xkcd` #### Feeds diff --git a/lib/XRay/Fetcher.php b/lib/XRay/Fetcher.php index 5d7877a..09a5dcd 100644 --- a/lib/XRay/Fetcher.php +++ b/lib/XRay/Fetcher.php @@ -53,11 +53,6 @@ class Fetcher { return Formats\Hackernews::fetch($this->http, $url, $opts); } - // Check if this is an Instagram URL and enable passing a session cookie - if(Formats\Instagram::matches($url)) { - return Formats\Instagram::fetch($this->http, $url, $opts); - } - // All other URLs are fetched normally // Special-case appspot.com URLs to not follow redirects. diff --git a/lib/XRay/Formats/Instagram.php b/lib/XRay/Formats/Instagram.php deleted file mode 100644 index 93d4880..0000000 --- a/lib/XRay/Formats/Instagram.php +++ /dev/null @@ -1,406 +0,0 @@ -get($url, $headers); - - // Check for errors such as getting redirected to the login page or getting rate limiited - /* - // TODO - if(false) { - return [ - 'error' => 'rate_limited', - 'error_description' => 'Instagram has rate limited this client. Please try again later.', - 'url' => $result['url'], - 'code' => $result['code'], - ]; - } - - if(false) { - return [ - 'error' => 'unauthorized', - 'error_description' => 'Instagram redirected to the login page. Either this user is private, or the client has been rate limited.', - 'url' => $result['url'], - 'code' => $result['code'], - ]; - } - */ - - return $result; - } - - public static function parse($http, $http_response, $opts=[]) { - $html = $http_response['body']; - $url = $http_response['url']; - - if(preg_match('#instagram.com/([^/]+)/$#', $url)) { - if(isset($opts['expect']) && $opts['expect'] == 'feed') - return self::parseFeed($http, $html, $url); - else - return self::parseProfile($http, $html, $url); - } else { - return self::parsePhoto($http, $html, $url); - } - } - - private static function parseProfile($http, $html, $url) { - $profileData = self::_parseProfileFromHTML($html); - if(!$profileData) - return self::_unknown(); - - $card = self::_buildHCardFromInstagramProfile($profileData); - - return [ - 'data' => $card, - 'source-format' => 'instagram', - ]; - } - - private static function parseFeed($http, $html, $url) { - $profileData = self::_parseProfileFromHTML($html); - if(!$profileData) - return self::_unknown(); - - $photos = $profileData['edge_owner_to_timeline_media']['edges']; - $items = []; - - foreach($photos as $photoData) { - $item = self::parsePhotoFromData($http, $photoData['node'], - 'https://www.instagram.com/p/'.$photoData['node']['shortcode'].'/', $profileData); - // Note: Not all the photo info is available in the initial JSON. - // Things like video mp4 URLs and person tags and locations are missing. - // Consumers of the feed will need to fetch the photo permalink in order to get all missing information. - // if($photoData['is_video']) - // $item['data']['video'] = true; - $items[] = $item['data']; - } - - return [ - 'data' => [ - 'type' => 'feed', - 'items' => $items, - ], - 'source-format' => 'instagram', - ]; - } - - private static function parsePhoto($http, $html, $url, $profile=false) { - $photoData = self::_extractPhotoDataFromPhotoPage($html); - return self::parsePhotoFromData($http, $photoData, $url, $profile); - } - - private static function altTextIsPlaceholder($text) { - return $text == 'No photo description available.'; - } - - private static function parsePhotoFromData($http, $photoData, $url, $profile=false) { - - if(!$photoData) - return self::_unknown(); - - // Start building the h-entry - $entry = array( - 'type' => 'entry', - 'url' => $url, - 'author' => [ - 'type' => 'card', - 'name' => null, - 'photo' => null, - 'url' => null - ] - ); - - $profiles = []; - - if(!$profile) { - if(isset($photoData['owner'])) { - // Get profile info from the page - $entry['author'] = self::_buildHCardFromInstagramProfile($photoData['owner']); - } - // 2019-10-13 disabling this fetch because profile fetches are severely rate limited now - // // Fetch profile info for this user - // $username = $photoData['owner']['username']; - // $profile = self::_getInstagramProfile($username, $http); - // if($profile) { - // $entry['author'] = self::_buildHCardFromInstagramProfile($profile); - // $profiles[] = $profile; - // } - } else { - $entry['author'] = self::_buildHCardFromInstagramProfile($profile); - $profiles[] = $profile; - } - - // Content and hashtags - $caption = false; - - - if(isset($photoData['caption'])) { - $caption = $photoData['caption']; - } elseif(isset($photoData['edge_media_to_caption']['edges'][0]['node']['text'])) { - $caption = $photoData['edge_media_to_caption']['edges'][0]['node']['text']; - } - - if($caption) { - if(preg_match_all('/#([a-z0-9_-]+)/i', $caption, $matches)) { - $entry['category'] = []; - foreach($matches[1] as $match) { - $entry['category'][] = $match; - } - } - - $entry['content'] = [ - 'text' => $caption - ]; - } - - $refs = []; - $meta = []; - - // Include the photo/video media URLs - // (Always return arrays, even for single images) - if(array_key_exists('edge_sidecar_to_children', $photoData)) { - // Multi-post - // For now, we will only pull photos from multi-posts, and skip videos. - // https://github.com/aaronpk/XRay/issues/84 - - $entry['photo'] = []; - foreach($photoData['edge_sidecar_to_children']['edges'] as $edge) { - $entry['photo'][] = $edge['node']['display_url']; - // Don't need to pull person-tags from here because the main parent object already has them. - if(isset($edge['node']['accessibility_caption']) && $edge['node']['accessibility_caption'] && !self::altTextIsPlaceholder($edge['node']['accessibility_caption'])) { - $meta[$edge['node']['display_url']] = [ - 'alt' => $edge['node']['accessibility_caption'] - ]; - } - } - - } else { - // Single photo or video - - if(array_key_exists('display_src', $photoData)) - $entry['photo'] = [$photoData['display_src']]; - elseif(array_key_exists('display_url', $photoData)) - $entry['photo'] = [$photoData['display_url']]; - - if(isset($photoData['accessibility_caption']) && $photoData['accessibility_caption'] && !self::altTextIsPlaceholder($photoData['accessibility_caption'])) { - $meta[$entry['photo'][0]] = [ - 'alt' => $photoData['accessibility_caption'] - ]; - } - - if(isset($photoData['is_video']) && $photoData['is_video'] && isset($photoData['video_url'])) { - $entry['video'] = [$photoData['video_url']]; - } - } - - // Find person tags and fetch user profiles - if(isset($photoData['edge_media_to_tagged_user']['edges'])) { - if(!isset($entry['category'])) $entry['category'] = []; - - foreach($photoData['edge_media_to_tagged_user']['edges'] as $edge) { - $profile = self::_getInstagramProfile($edge['node']['user']['username'], $http); - if($profile) { - $card = self::_buildHCardFromInstagramProfile($profile); - $entry['category'][] = $card['url']; - $refs[$card['url']] = $card; - $profiles[] = $profile; - } - } - } - - // Published date - if(isset($photoData['taken_at_timestamp'])) - $published = DateTime::createFromFormat('U', $photoData['taken_at_timestamp']); - elseif(isset($photoData['date'])) - $published = DateTime::createFromFormat('U', $photoData['date']); - - // Include venue data - $locations = []; - if(isset($photoData['location'])) { - $location = self::_getInstagramLocation($photoData['location']['id'], $http); - if($location) { - $entry['location'] = [$location['url']]; - $refs[$location['url']] = $location; - $locations[] = $location; - - // Look up timezone - if($location['latitude']) { - $tz = \p3k\Timezone::timezone_for_location($location['latitude'], $location['longitude']); - if($tz) { - $published->setTimeZone(new DateTimeZone($tz)); - } - } - } - } - - $entry['published'] = $published->format('c'); - - if(count($refs)) { - $entry['refs'] = $refs; - } - - if(count($meta)) { - $entry['meta'] = $meta; - } - - $entry['post-type'] = \p3k\XRay\PostType::discover($entry); - - return [ - 'data' => $entry, - 'original' => json_encode([ - 'photo' => $photoData, - 'profiles' => $profiles, - 'locations' => $locations - ]), - 'source-format' => 'instagram', - ]; - } - - private static function _buildHCardFromInstagramProfile($profile) { - if(!$profile) return false; - - $author = [ - 'type' => 'card' - ]; - - if($profile['full_name']) - $author['name'] = $profile['full_name']; - else - $author['name'] = $profile['username']; - - $author['nickname'] = $profile['username']; - - $author['url'] = 'https://www.instagram.com/' . $profile['username'] . '/'; - - if(isset($profile['profile_pic_url_hd'])) - $author['photo'] = $profile['profile_pic_url_hd']; - elseif(isset($profile['profile_pic_url'])) - $author['photo'] = $profile['profile_pic_url']; - - if(isset($profile['biography'])) - $author['note'] = $profile['biography']; - - return $author; - } - - private static function _getInstagramProfile($username, $http) { - $response = $http->get('https://www.instagram.com/'.$username.'/'); - - if(!$response['error']) - return self::_parseProfileFromHTML($response['body']); - - return null; - } - - private static function _parseProfileFromHTML($html) { - $data = self::_extractIGData($html); - if(isset($data['entry_data']['ProfilePage'][0])) { - $profile = $data['entry_data']['ProfilePage'][0]; - if($profile && isset($profile['graphql']['user'])) { - $user = $profile['graphql']['user']; - return $user; - } - } - return null; - } - - private static function _getInstagramLocation($id, $http) { - $igURL = 'https://www.instagram.com/explore/locations/'.$id.'/'; - $response = $http->get($igURL); - if($response['body']) { - $data = self::_extractVenueDataFromVenuePage($response['body']); - if($data) { - return [ - 'type' => 'card', - 'name' => $data['name'], - 'url' => $igURL, - 'latitude' => $data['lat'], - 'longitude' => $data['lng'], - ]; - } - } - return null; - } - - private static function _extractPhotoDataFromPhotoPage($html) { - $data = self::_extractIGData($html); - - if($data && is_array($data) && array_key_exists('entry_data', $data)) { - if(is_array($data['entry_data']) && array_key_exists('PostPage', $data['entry_data'])) { - $post = $data['entry_data']['PostPage']; - if(isset($post[0]['graphql']['shortcode_media'])) { - return $post[0]['graphql']['shortcode_media']; - } elseif(isset($post[0]['graphql']['media'])) { - return $post[0]['graphql']['media']; - } elseif(isset($post[0]['media'])) { - return $post[0]['media']; - } - } - } - - return null; - } - - private static function _extractVenueDataFromVenuePage($html) { - $data = self::_extractIGData($html); - - if($data && isset($data['entry_data']['LocationsPage'])) { - $data = $data['entry_data']['LocationsPage']; - if(isset($data[0]['graphql']['location'])) { - $location = $data[0]['graphql']['location']; - - # we don't need these and they're huge, so drop them now - unset($location['media']); - unset($location['top_posts']); - - return $location; - } - } - - return null; - } - - private static function _extractIGData($html) { - $doc = new DOMDocument(); - @$doc->loadHTML($html); - - if(!$doc) { - return null; - } - - $xpath = new DOMXPath($doc); - - $data = null; - - foreach($xpath->query('//script') as $script) { - if(preg_match('/window\._sharedData = ({.+});/', $script->textContent, $match)) { - $data = json_decode($match[1], true); - } - } - - return $data; - } - -} diff --git a/lib/XRay/Parser.php b/lib/XRay/Parser.php index 45b4eb9..f7b405d 100644 --- a/lib/XRay/Parser.php +++ b/lib/XRay/Parser.php @@ -79,10 +79,6 @@ class Parser { // Check if the URL matches a special parser $url = $http_response['url']; - if(Formats\Instagram::matches($url)) { - return Formats\Instagram::parse($this->http, $http_response, $opts); - } - if(Formats\GitHub::matches($url)) { return Formats\GitHub::parse($http_response); } diff --git a/tests/InstagramTest.php b/tests/InstagramTest.php deleted file mode 100644 index e45701c..0000000 --- a/tests/InstagramTest.php +++ /dev/null @@ -1,324 +0,0 @@ -client = new Parse(); - $this->client->http = new p3k\HTTP\Test(dirname(__FILE__).'/data/'); - $this->client->mc = null; - } - - private function parse($params) - { - $request = new Request($params); - $response = new Response(); - return $this->client->parse($request, $response); - } - - public function testInstagramPhoto() - { - // Original URL: https://www.instagram.com/p/BO5rYVElvJq/ - $url = 'https://www.instagram.com/p/BO5rYVElvJq/'; - $response = $this->parse(['url' => $url]); - - $body = $response->getContent(); - $this->assertEquals(200, $response->getStatusCode()); - $data = json_decode($body, true); - - $this->assertEquals(200, $data['code']); - $this->assertEquals('instagram', $data['source-format']); - $this->assertEquals('entry', $data['data']['type']); - $this->assertEquals('photo', $data['data']['post-type']); - $this->assertEquals('2017-01-05T23:31:32+00:00', $data['data']['published']); - $this->assertContains('planning', $data['data']['category']); - $this->assertContains('2017', $data['data']['category']); - $this->assertEquals('Kind of crazy to see the whole year laid out like this. #planning #2017', $data['data']['content']['text']); - $this->assertEquals(1, count($data['data']['photo'])); - $this->assertEquals(['https://instagram.fsjc1-3.fna.fbcdn.net/vp/af9471f885e6197478d71807a7cbf297/5CBA6E5F/t51.2885-15/e35/15803256_1832278043695907_4846092951052353536_n.jpg?_nc_ht=instagram.fsjc1-3.fna.fbcdn.net'], $data['data']['photo']); - $this->assertEquals('https://www.instagram.com/aaronpk/', $data['data']['author']['url']); - $this->assertEquals('Aaron Parecki', $data['data']['author']['name']); - $this->assertEquals('https://instagram.fsjc1-3.fna.fbcdn.net/vp/a2909937316893f18760f1077ca88fa1/5CBB520A/t51.2885-19/s150x150/14240576_268350536897085_1129715662_a.jpg?_nc_ht=instagram.fsjc1-3.fna.fbcdn.net', $data['data']['author']['photo']); - $this->assertArrayNotHasKey('meta', $data['data']); // make sure this does not include alt text (autogenerated placeholder from instagram) - } - - public function testBGDpqNoiMJ0() - { - // https://www.instagram.com/p/BGDpqNoiMJ0/ - $url = 'http://www.instagram.com/BGDpqNoiMJ0'; - $response = $this->parse(['url' => $url]); - - $body = $response->getContent(); - $this->assertEquals(200, $response->getStatusCode()); - $data = json_decode($body, true); - - $this->assertEquals(200, $data['code']); - $this->assertEquals('instagram', $data['source-format']); - - $this->assertEquals('entry', $data['data']['type']); - $this->assertEquals('photo', $data['data']['post-type']); - $this->assertSame( - [ - 'type' => 'card', - 'name' => 'pk_spam', - 'nickname' => 'pk_spam', - 'url' => 'https://www.instagram.com/pk_spam/', - 'photo' => 'https://scontent-frx5-1.cdninstagram.com/vp/f17e1275a70fc32e93cbf434ddc32bcd/5B6CCC7A/t51.2885-19/11906329_960233084022564_1448528159_a.jpg', - ], $data['data']['author'] - ); - - $this->assertSame( - [ - 'muffins', - 'https://www.instagram.com/indiewebcat/' - ], $data['data']['category'] - ); - - $this->assertEquals('Meow #muffins', $data['data']['content']['text']); - $this->assertSame(['https://instagram.fsea1-1.fna.fbcdn.net/vp/9433ea494a8b055bebabf70fd81cfa32/5B51F092/t51.2885-15/e35/13266755_877794672348882_1908663476_n.jpg'], $data['data']['photo']); - $this->assertEquals('2016-05-30T20:46:22-07:00', $data['data']['published']); - - $this->assertEquals('https://www.instagram.com/explore/locations/359000003/', $data['data']['location'][0]); - - $this->assertSame( - [ - 'type' => 'card', - 'name' => 'Burnside 26', - 'url' => 'https://www.instagram.com/explore/locations/359000003/', - 'latitude' => 45.52322, - 'longitude' => -122.63885 - ], $data['data']['refs']['https://www.instagram.com/explore/locations/359000003/'] - ); - } - - public function testInstagramVideo() - { - // Original URL: https://www.instagram.com/p/BO_RN8AFZSx/ - $url = 'https://www.instagram.com/p/BO_RN8AFZSx/'; - $response = $this->parse(['url' => $url]); - - $body = $response->getContent(); - $this->assertEquals(200, $response->getStatusCode()); - $data = json_decode($body, true); - - $this->assertEquals(200, $data['code']); - $this->assertEquals('instagram', $data['source-format']); - - $this->assertEquals('entry', $data['data']['type']); - $this->assertEquals('video', $data['data']['post-type']); - $this->assertContains('100daysofmusic', $data['data']['category']); - $this->assertEquals('Day 18. Maple and Spruce #100daysofmusic #100daysproject #the100dayproject https://aaronparecki.com/2017/01/07/14/day18', $data['data']['content']['text']); - $this->assertEquals(1, count($data['data']['photo'])); - $this->assertEquals(['https://instagram.fsjc1-3.fna.fbcdn.net/vp/a77f8672f977413d2eb5239cd6d5c4cf/5C3A4ADF/t51.2885-15/e15/15624670_548881701986735_8264383763249627136_n.jpg?_nc_ht=instagram.fsjc1-3.fna.fbcdn.net'], $data['data']['photo']); - $this->assertEquals(1, count($data['data']['video'])); - $this->assertEquals(['https://instagram.fsjc1-3.fna.fbcdn.net/vp/90ed8fe576cba16e258c0f4cfc05299a/5C3A129E/t50.2886-16/15921147_1074837002642259_2269307616507199488_n.mp4?_nc_ht=instagram.fsjc1-3.fna.fbcdn.net'], $data['data']['video']); - $this->assertEquals('https://www.instagram.com/aaronpk/', $data['data']['author']['url']); - $this->assertEquals('Aaron Parecki', $data['data']['author']['name']); - $this->assertEquals('https://instagram.fsjc1-3.fna.fbcdn.net/vp/a2909937316893f18760f1077ca88fa1/5CBB520A/t51.2885-19/s150x150/14240576_268350536897085_1129715662_a.jpg?_nc_ht=instagram.fsjc1-3.fna.fbcdn.net', $data['data']['author']['photo']); - } - - public function testInstagramPhotoWithPersonTag() - { - // Original URL: https://www.instagram.com/p/BNfqVfVlmkj/ - $url = 'https://www.instagram.com/p/BNfqVfVlmkj/'; - $response = $this->parse(['url' => $url]); - - $body = $response->getContent(); - $this->assertEquals(200, $response->getStatusCode()); - $data = json_decode($body, true); - - $this->assertEquals(200, $data['code']); - $this->assertEquals('instagram', $data['source-format']); - - $this->assertEquals(2, count($data['data']['category'])); - $this->assertEquals(['type'=>'card','name'=>'KmikeyM™️','nickname'=>'kmikeym','url'=>'https://www.instagram.com/kmikeym/','photo'=>'https://instagram.fsjc1-3.fna.fbcdn.net/vp/ea5b988b616dbcc778b3013bf2426d70/5CCAC7FC/t51.2885-19/s320x320/20634957_814691788710973_2275383796935163904_a.jpg?_nc_ht=instagram.fsjc1-3.fna.fbcdn.net','note'=>"The world’s first publicly traded person.\n•\nAcct in collaboration with @norbertoinc\n•\nBecome a shareholder today!\n•"], $data['data']['refs']['https://www.instagram.com/kmikeym/']); - $this->assertContains('https://www.instagram.com/kmikeym/', $data['data']['category']); - $this->assertArrayHasKey('https://www.instagram.com/kmikeym/', $data['data']['refs']); - } - - public function testInstagramPhotoWithVenue() - { - // Original URL: https://www.instagram.com/p/BN3Z5salSys/ - $url = 'https://www.instagram.com/p/BN3Z5salSys/'; - $response = $this->parse(['url' => $url]); - - $body = $response->getContent(); - $this->assertEquals(200, $response->getStatusCode()); - $data = json_decode($body, true); - - $this->assertEquals(200, $data['code']); - $this->assertEquals('instagram', $data['source-format']); - - $this->assertEquals(1, count($data['data']['location'])); - $this->assertContains('https://www.instagram.com/explore/locations/109284789535230/', $data['data']['location']); - $this->assertArrayHasKey('https://www.instagram.com/explore/locations/109284789535230/', $data['data']['refs']); - $venue = $data['data']['refs']['https://www.instagram.com/explore/locations/109284789535230/']; - $this->assertEquals('XOXO Outpost', $venue['name']); - $this->assertEquals('45.5261002', $venue['latitude']); - $this->assertEquals('-122.6558081', $venue['longitude']); - // Setting a venue should set the timezone - $this->assertEquals('2016-12-10T21:48:56-08:00', $data['data']['published']); - } - - public function testTwoPhotos() - { - // Original URL: https://www.instagram.com/p/BZWmUB_DVtp/ - $url = 'https://www.instagram.com/p/BZWmUB_DVtp/'; - $response = $this->parse(['url' => $url]); - - $body = $response->getContent(); - $this->assertEquals(200, $response->getStatusCode()); - $data = json_decode($body, true); - - $this->assertEquals(200, $data['code']); - $this->assertEquals('instagram', $data['source-format']); - - $this->assertEquals(2, count($data['data']['photo'])); - $this->assertEquals('https://instagram.fsjc1-3.fna.fbcdn.net/vp/6b09c3d5490ee3efb55849858a9ec014/5CBFBC38/t51.2885-15/e35/21827424_134752690591737_8093088291252862976_n.jpg?_nc_ht=instagram.fsjc1-3.fna.fbcdn.net', $data['data']['photo'][0]); - $this->assertEquals('https://instagram.fsjc1-3.fna.fbcdn.net/vp/8b1b2e6efa86a4856ec37a60f0fa77f5/5CC2D34D/t51.2885-15/e35/21909774_347707439021016_5237540582556958720_n.jpg?_nc_ht=instagram.fsjc1-3.fna.fbcdn.net', $data['data']['photo'][1]); - $this->assertArrayNotHasKey('video', $data['data']); - $this->assertEquals(2, count($data['data']['category'])); - $this->assertArrayNotHasKey('meta', $data['data']); - } - - public function testMixPhotosAndVideos() - { - // Original URL: https://www.instagram.com/p/BZWmpecjBwN/ - $url = 'https://www.instagram.com/p/BZWmpecjBwN/'; - $response = $this->parse(['url' => $url]); - - $body = $response->getContent(); - $this->assertEquals(200, $response->getStatusCode()); - $data = json_decode($body, true); - - $this->assertEquals(200, $data['code']); - $this->assertEquals('instagram', $data['source-format']); - - $this->assertEquals('photo', $data['data']['post-type']); // we discard videos in this case right now - $this->assertEquals(3, count($data['data']['photo'])); - $this->assertEquals('https://instagram.fsjc1-3.fna.fbcdn.net/vp/ee1a28763918069f3e54dad35be24ad8/5CCFBAB8/t51.2885-15/e35/21878922_686481254874005_8468823712617988096_n.jpg?_nc_ht=instagram.fsjc1-3.fna.fbcdn.net', $data['data']['photo'][0]); - $this->assertEquals('https://instagram.fsjc1-3.fna.fbcdn.net/vp/ddc0ebe969bb1f9e6bf8adada0892c90/5C39EBC9/t51.2885-15/e15/21910026_1507234999368159_6974261907783942144_n.jpg?_nc_ht=instagram.fsjc1-3.fna.fbcdn.net', $data['data']['photo'][1]); - $this->assertEquals('https://instagram.fsjc1-3.fna.fbcdn.net/vp/bfe032af795427443ea448840df1c3a4/5CCC8C88/t51.2885-15/e35/21878800_273567963151023_7672178549897297920_n.jpg?_nc_ht=instagram.fsjc1-3.fna.fbcdn.net', $data['data']['photo'][2]); - $this->assertArrayNotHasKey('video', $data['data']); - $this->assertEquals(2, count($data['data']['category'])); - } - - public function testInstagramProfile() - { - $url = 'https://www.instagram.com/aaronpk/'; - $response = $this->parse(['url' => $url]); - - $body = $response->getContent(); - $this->assertEquals(200, $response->getStatusCode()); - $data = json_decode($body, true); - - $this->assertEquals(200, $data['code']); - $this->assertEquals('instagram', $data['source-format']); - - $this->assertSame( - [ - 'type' => 'card', - 'name' => 'Aaron Parecki', - 'nickname' => 'aaronpk', - 'url' => 'https://www.instagram.com/aaronpk/', - 'photo' => 'https://instagram.fsjc1-3.fna.fbcdn.net/vp/45aee453740a714bf408f8947f89da8e/5CCB4B8E/t51.2885-19/s320x320/14240576_268350536897085_1129715662_a.jpg?_nc_ht=instagram.fsjc1-3.fna.fbcdn.net', - 'note' => '🔒 oauth.net 🎥 backpedal.tv 🎙 streampdx.com 📡 w7apk.com' - ], $data['data'] - ); - } - - public function testInstagramProfileWithBio() - { - $url = 'https://www.instagram.com/pk_spam/'; - $response = $this->parse(['url' => $url]); - - $body = $response->getContent(); - $this->assertEquals(200, $response->getStatusCode()); - $data = json_decode($body, true); - - $this->assertEquals(200, $data['code']); - $this->assertEquals('instagram', $data['source-format']); - - $this->assertSame( - [ - 'type' => 'card', - 'name' => 'pk_spam', - 'nickname' => 'pk_spam', - 'url' => 'https://www.instagram.com/pk_spam/', - 'photo' => 'https://scontent-frx5-1.cdninstagram.com/vp/74112f515c64726429c69fedcb927c2d/5CB64CF1/t51.2885-19/44884218_345707102882519_2446069589734326272_n.jpg?_nc_ht=scontent-frx5-1.cdninstagram.com', - 'note' => 'My website is https://aaronparecki.com.dev/ and http://aaronpk.micro.blog/about/ and https://tiny.xyz.dev/' - ], $data['data'] - ); - } - - public function testInstagramProfileFeed() - { - $url = 'https://www.instagram.com/pk_spam/'; - $response = $this->parse(['url' => $url, 'expect' => 'feed']); - - $body = $response->getContent(); - $this->assertEquals(200, $response->getStatusCode()); - $data = json_decode($body, true); - - $this->assertEquals(200, $data['code']); - $this->assertEquals('instagram', $data['source-format']); - - $this->assertEquals('feed', $data['data']['type']); - $this->assertEquals(12, count($data['data']['items'])); - $this->assertEquals('https://www.instagram.com/p/BsdlOmLh_IX/', $data['data']['items'][0]['url']); - $this->assertEquals('https://www.instagram.com/p/BGFdtAViMJy/', $data['data']['items'][11]['url']); - } - - public function testInstagramPhotoWithAltText() - { - $url = 'https://www.instagram.com/p/BsdjKytBZyx/'; - - $response = $this->parse(['url' => $url]); - - $body = $response->getContent(); - $this->assertEquals(200, $response->getStatusCode()); - $data = json_decode($body, true); - - $this->assertEquals(200, $data['code']); - $this->assertEquals('instagram', $data['source-format']); - - $this->assertEquals('Pink text on a white background that says "Photo with alt text"', $data['data']['meta']['https://instagram.fsjc1-3.fna.fbcdn.net/vp/a7e61adf3d84f07863ffdb99f0fdcc86/5CD9B7F3/t51.2885-15/e35/47692478_2276538359047529_8318084305806697090_n.jpg?_nc_ht=instagram.fsjc1-3.fna.fbcdn.net']['alt']); - } - - public function testInstagramMultiPhotoWithAltText() - { - $url = 'https://www.instagram.com/p/BsdlOmLh_IX/'; - $response = $this->parse(['url' => $url]); - - $body = $response->getContent(); - $this->assertEquals(200, $response->getStatusCode()); - $data = json_decode($body, true); - - $this->assertEquals(200, $data['code']); - $this->assertEquals('instagram', $data['source-format']); - - $this->assertEquals('A large pink "1" in a circle with a small green "2" behind it', $data['data']['meta']['https://instagram.fsjc1-3.fna.fbcdn.net/vp/90bf019b7396d7bc2b1ee02170902a2e/5CCC9B87/t51.2885-15/e35/47692921_321791688431421_3314633848293773579_n.jpg?_nc_ht=instagram.fsjc1-3.fna.fbcdn.net']['alt']); - $this->assertEquals('A large green "2" in a circle with a small pink "1" behind it', $data['data']['meta']['https://instagram.fsjc1-3.fna.fbcdn.net/vp/a6c93d8fcd5ad0e3b60f2ac0695eb34e/5CC3898E/t51.2885-15/e35/49663055_349750985612151_2949260446582336214_n.jpg?_nc_ht=instagram.fsjc1-3.fna.fbcdn.net']['alt']); - } - - public function testInstagramPhotoAutogeneratedAltText() - { - $url = 'https://www.instagram.com/p/Bq8U12UAcdq/'; - $response = $this->parse(['url' => $url]); - - $body = $response->getContent(); - $this->assertEquals(200, $response->getStatusCode()); - $data = json_decode($body, true); - - $this->assertEquals(200, $data['code']); - $this->assertEquals('instagram', $data['source-format']); - - $this->assertEquals('Image may contain: one or more people and hat', $data['data']['meta']['https://instagram.fsjc1-3.fna.fbcdn.net/vp/7f8954f33de897c0c57656b798637f4c/5CC3DF9F/t51.2885-15/e35/45605085_1989380037822519_4707213851165118070_n.jpg?_nc_ht=instagram.fsjc1-3.fna.fbcdn.net']['alt']); - } - -} diff --git a/tests/data/www.instagram.com/BGDpqNoiMJ0 b/tests/data/www.instagram.com/BGDpqNoiMJ0 deleted file mode 100644 index 7adfb13..0000000 --- a/tests/data/www.instagram.com/BGDpqNoiMJ0 +++ /dev/null @@ -1,204 +0,0 @@ -HTTP/1.1 200 OK -Content-Type: text/html -X-Frame-Options: SAMEORIGIN -Cache-Control: private, no-cache, no-store, must-revalidate -Pragma: no-cache -Expires: Sat, 01 Jan 2000 00:00:00 GMT -Vary: Cookie, Accept-Language, Accept-Encoding -Content-Language: en -Date: Thu, 19 Apr 2018 16:23:40 GMT -Strict-Transport-Security: max-age=86400 -Set-Cookie: rur=FRC; Path=/ -Set-Cookie: csrftoken=DbxkxtTYJxTaGsTkmceKre2mJ0lVHZ4O; expires=Thu, 18-Apr-2019 16:23:40 GMT; Max-Age=31449600; Path=/; Secure -Set-Cookie: mid=WtjCjAAEAAEYeWicZ2tRKT2vUAcw; expires=Wed, 14-Apr-2038 16:23:40 GMT; Max-Age=630720000; Path=/ -Set-Cookie: urlgen="{\"time\": 1524155020}:1f9CLU:Jpd7aGlPjH5xvvgWcaZXyzwsJRs"; Path=/ -Connection: keep-alive -Content-Length: 23132 - - - - - - - -@pk_spam on Instagram: “Meow #muffins” - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/tests/data/www.instagram.com/aaronpk_ b/tests/data/www.instagram.com/aaronpk_ deleted file mode 100644 index 6ace579..0000000 --- a/tests/data/www.instagram.com/aaronpk_ +++ /dev/null @@ -1,312 +0,0 @@ -HTTP/1.1 200 OK -Content-Type: text/html; charset=utf-8 -Vary: Accept-Language, Cookie, Accept-Encoding -Content-Language: en -Date: Thu, 10 Jan 2019 18:39:09 GMT -Strict-Transport-Security: max-age=3600 -Cache-Control: private, no-cache, no-store, must-revalidate -Pragma: no-cache -Expires: Sat, 01 Jan 2000 00:00:00 GMT -X-Frame-Options: SAMEORIGIN -content-security-policy: report-uri https://www.instagram.com/security/csp_report/; default-src 'self' https://www.instagram.com; img-src https: data: blob:; font-src https: data:; media-src 'self' blob: https://www.instagram.com https://*.cdninstagram.com https://*.fbcdn.net; manifest-src 'self' https://www.instagram.com; script-src 'self' https://instagram.com https://www.instagram.com https://*.www.instagram.com https://*.cdninstagram.com wss://www.instagram.com https://*.facebook.com https://*.fbcdn.net https://*.facebook.net 'unsafe-inline' 'unsafe-eval' blob:; style-src 'self' https://*.www.instagram.com https://www.instagram.com 'unsafe-inline'; connect-src 'self' https://instagram.com https://www.instagram.com https://*.www.instagram.com https://graph.instagram.com https://*.graph.instagram.com https://*.cdninstagram.com https://api.instagram.com wss://www.instagram.com wss://edge-chat.instagram.com https://*.facebook.com https://*.fbcdn.net https://*.facebook.net chrome-extension://boadgeojelhgndaghljhdicfkmllpafd; worker-src 'self' https://www.instagram.com; frame-src 'self' https://instagram.com https://www.instagram.com https://staticxx.facebook.com https://www.facebook.com https://web.facebook.com https://connect.facebook.net https://m.facebook.com; object-src 'none'; upgrade-insecure-requests -X-Content-Type-Options: nosniff -X-XSS-Protection: 0 -Set-Cookie: urlgen="{\"108.161.19.190\": 54154}:1ghfET:KEbgZ2M9MXE_BmpTUiUwXmc1C7Y"; Domain=.instagram.com; HttpOnly; Path=/; Secure -Set-Cookie: rur=PRN; Domain=.instagram.com; HttpOnly; Path=/; Secure -Set-Cookie: mid=XDeRTQAEAAHmtCUUKXPD6xfL2KKN; Domain=.instagram.com; expires=Sun, 07-Jan-2029 18:39:09 GMT; Max-Age=315360000; Path=/; Secure -Set-Cookie: mcd=3; Domain=.instagram.com; expires=Sun, 07-Jan-2029 18:39:09 GMT; Max-Age=315360000; Path=/; Secure -Set-Cookie: csrftoken=IOQ3tuosAyMxJWIuczdurkNhhbHWDDg3; Domain=.instagram.com; expires=Thu, 09-Jan-2020 18:39:09 GMT; Max-Age=31449600; Path=/; Secure -Connection: keep-alive -Content-Length: 63710 - - - - - - - - -Aaron Parecki (@aaronpk) • Instagram photos and videos - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/tests/data/www.instagram.com/explore_locations_109284789535230_ b/tests/data/www.instagram.com/explore_locations_109284789535230_ deleted file mode 100644 index 97cad04..0000000 --- a/tests/data/www.instagram.com/explore_locations_109284789535230_ +++ /dev/null @@ -1,303 +0,0 @@ -HTTP/1.1 200 OK -Content-Type: text/html; charset=utf-8 -Vary: Accept-Language, Cookie, Accept-Encoding -Content-Language: en -Date: Thu, 10 Jan 2019 18:39:38 GMT -Strict-Transport-Security: max-age=3600 -Cache-Control: private, no-cache, no-store, must-revalidate -Pragma: no-cache -Expires: Sat, 01 Jan 2000 00:00:00 GMT -X-Frame-Options: SAMEORIGIN -content-security-policy: report-uri https://www.instagram.com/security/csp_report/; default-src 'self' https://www.instagram.com; img-src https: data: blob:; font-src https: data:; media-src 'self' blob: https://www.instagram.com https://*.cdninstagram.com https://*.fbcdn.net; manifest-src 'self' https://www.instagram.com; script-src 'self' https://instagram.com https://www.instagram.com https://*.www.instagram.com https://*.cdninstagram.com wss://www.instagram.com https://*.facebook.com https://*.fbcdn.net https://*.facebook.net 'unsafe-inline' 'unsafe-eval' blob:; style-src 'self' https://*.www.instagram.com https://www.instagram.com 'unsafe-inline'; connect-src 'self' https://instagram.com https://www.instagram.com https://*.www.instagram.com https://graph.instagram.com https://*.graph.instagram.com https://*.cdninstagram.com https://api.instagram.com wss://www.instagram.com wss://edge-chat.instagram.com https://*.facebook.com https://*.fbcdn.net https://*.facebook.net chrome-extension://boadgeojelhgndaghljhdicfkmllpafd; worker-src 'self' https://www.instagram.com; frame-src 'self' https://instagram.com https://www.instagram.com https://staticxx.facebook.com https://www.facebook.com https://web.facebook.com https://connect.facebook.net https://m.facebook.com; object-src 'none'; upgrade-insecure-requests -X-Content-Type-Options: nosniff -X-XSS-Protection: 0 -Set-Cookie: urlgen="{\"108.161.19.190\": 54154}:1ghfEw:Zrho3kCblfqpRBnPW1wRP55ED8s"; Domain=.instagram.com; HttpOnly; Path=/; Secure -Set-Cookie: rur=PRN; Domain=.instagram.com; HttpOnly; Path=/; Secure -Set-Cookie: mid=XDeRaQAEAAHIfRJ0BM8F4thMSHKn; Domain=.instagram.com; expires=Sun, 07-Jan-2029 18:39:38 GMT; Max-Age=315360000; Path=/; Secure -Set-Cookie: mcd=3; Domain=.instagram.com; expires=Sun, 07-Jan-2029 18:39:38 GMT; Max-Age=315360000; Path=/; Secure -Set-Cookie: csrftoken=onHtyASna5Co5LS1n34AE6E4h1Hm0VbS; Domain=.instagram.com; expires=Thu, 09-Jan-2020 18:39:38 GMT; Max-Age=31449600; Path=/; Secure -Connection: keep-alive -Content-Length: 105764 - - - - - - - - -XOXO Outpost on Instagram • Photos and Videos - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/tests/data/www.instagram.com/explore_locations_359000003_ b/tests/data/www.instagram.com/explore_locations_359000003_ deleted file mode 100644 index 52187d5..0000000 --- a/tests/data/www.instagram.com/explore_locations_359000003_ +++ /dev/null @@ -1,303 +0,0 @@ -HTTP/1.1 200 OK -Content-Type: text/html; charset=utf-8 -Vary: Accept-Language, Cookie, Accept-Encoding -Content-Language: en -Date: Thu, 10 Jan 2019 18:39:41 GMT -Strict-Transport-Security: max-age=3600 -Cache-Control: private, no-cache, no-store, must-revalidate -Pragma: no-cache -Expires: Sat, 01 Jan 2000 00:00:00 GMT -X-Frame-Options: SAMEORIGIN -content-security-policy: report-uri https://www.instagram.com/security/csp_report/; default-src 'self' https://www.instagram.com; img-src https: data: blob:; font-src https: data:; media-src 'self' blob: https://www.instagram.com https://*.cdninstagram.com https://*.fbcdn.net; manifest-src 'self' https://www.instagram.com; script-src 'self' https://instagram.com https://www.instagram.com https://*.www.instagram.com https://*.cdninstagram.com wss://www.instagram.com https://*.facebook.com https://*.fbcdn.net https://*.facebook.net 'unsafe-inline' 'unsafe-eval' blob:; style-src 'self' https://*.www.instagram.com https://www.instagram.com 'unsafe-inline'; connect-src 'self' https://instagram.com https://www.instagram.com https://*.www.instagram.com https://graph.instagram.com https://*.graph.instagram.com https://*.cdninstagram.com https://api.instagram.com wss://www.instagram.com wss://edge-chat.instagram.com https://*.facebook.com https://*.fbcdn.net https://*.facebook.net chrome-extension://boadgeojelhgndaghljhdicfkmllpafd; worker-src 'self' https://www.instagram.com; frame-src 'self' https://instagram.com https://www.instagram.com https://staticxx.facebook.com https://www.facebook.com https://web.facebook.com https://connect.facebook.net https://m.facebook.com; object-src 'none'; upgrade-insecure-requests -X-Content-Type-Options: nosniff -X-XSS-Protection: 0 -Set-Cookie: mid=XDeRbAAEAAE1bMywvOoaEiuOxgSl; Domain=.instagram.com; expires=Sun, 07-Jan-2029 18:39:41 GMT; Max-Age=315360000; Path=/; Secure -Set-Cookie: rur=PRN; Domain=.instagram.com; HttpOnly; Path=/; Secure -Set-Cookie: urlgen="{\"108.161.19.190\": 54154}:1ghfEz:3wKu1OslQooDHQVB9l48-QQmmn4"; Domain=.instagram.com; HttpOnly; Path=/; Secure -Set-Cookie: mcd=3; Domain=.instagram.com; expires=Sun, 07-Jan-2029 18:39:41 GMT; Max-Age=315360000; Path=/; Secure -Set-Cookie: csrftoken=nSPG6yUChjQdK9r8pBu1ekOeNhlP2lVA; Domain=.instagram.com; expires=Thu, 09-Jan-2020 18:39:41 GMT; Max-Age=31449600; Path=/; Secure -Connection: keep-alive -Content-Length: 104576 - - - - - - - - -Burnside 26 on Instagram • Photos and Videos - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/tests/data/www.instagram.com/indiewebcat_ b/tests/data/www.instagram.com/indiewebcat_ deleted file mode 100644 index 4df61ef..0000000 --- a/tests/data/www.instagram.com/indiewebcat_ +++ /dev/null @@ -1,316 +0,0 @@ -HTTP/1.1 200 OK -Content-Type: text/html; charset=utf-8 -Vary: Accept-Language, Cookie, Accept-Encoding -Content-Language: en -Date: Thu, 10 Jan 2019 18:39:11 GMT -Strict-Transport-Security: max-age=3600 -Cache-Control: private, no-cache, no-store, must-revalidate -Pragma: no-cache -Expires: Sat, 01 Jan 2000 00:00:00 GMT -X-Frame-Options: SAMEORIGIN -content-security-policy: report-uri https://www.instagram.com/security/csp_report/; default-src 'self' https://www.instagram.com; img-src https: data: blob:; font-src https: data:; media-src 'self' blob: https://www.instagram.com https://*.cdninstagram.com https://*.fbcdn.net; manifest-src 'self' https://www.instagram.com; script-src 'self' https://instagram.com https://www.instagram.com https://*.www.instagram.com https://*.cdninstagram.com wss://www.instagram.com https://*.facebook.com https://*.fbcdn.net https://*.facebook.net 'unsafe-inline' 'unsafe-eval' blob:; style-src 'self' https://*.www.instagram.com https://www.instagram.com 'unsafe-inline'; connect-src 'self' https://instagram.com https://www.instagram.com https://*.www.instagram.com https://graph.instagram.com https://*.graph.instagram.com https://*.cdninstagram.com https://api.instagram.com wss://www.instagram.com wss://edge-chat.instagram.com https://*.facebook.com https://*.fbcdn.net https://*.facebook.net chrome-extension://boadgeojelhgndaghljhdicfkmllpafd; worker-src 'self' https://www.instagram.com; frame-src 'self' https://instagram.com https://www.instagram.com https://staticxx.facebook.com https://www.facebook.com https://web.facebook.com https://connect.facebook.net https://m.facebook.com; object-src 'none'; upgrade-insecure-requests -X-Content-Type-Options: nosniff -X-XSS-Protection: 0 -Set-Cookie: mid=XDeRTwAEAAHginklHSspUTKzWhjT; Domain=.instagram.com; expires=Sun, 07-Jan-2029 18:39:11 GMT; Max-Age=315360000; Path=/; Secure -Set-Cookie: urlgen="{\"108.161.19.190\": 54154}:1ghfEV:Rt89ZIdO939cibtdOrb9M7fLvOk"; Domain=.instagram.com; HttpOnly; Path=/; Secure -Set-Cookie: rur=PRN; Domain=.instagram.com; HttpOnly; Path=/; Secure -Set-Cookie: mcd=3; Domain=.instagram.com; expires=Sun, 07-Jan-2029 18:39:11 GMT; Max-Age=315360000; Path=/; Secure -Set-Cookie: csrftoken=2VcQeMPKG6waJpQisIrMeQrxgDVDjW6P; Domain=.instagram.com; expires=Thu, 09-Jan-2020 18:39:11 GMT; Max-Age=31449600; Path=/; Secure -Connection: keep-alive -Content-Length: 66623 - - - - - - - - -Dora (@indiewebcat) • Instagram photos and videos - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/tests/data/www.instagram.com/kmikeym_ b/tests/data/www.instagram.com/kmikeym_ deleted file mode 100644 index 3b9f29b..0000000 --- a/tests/data/www.instagram.com/kmikeym_ +++ /dev/null @@ -1,316 +0,0 @@ -HTTP/1.1 200 OK -Content-Type: text/html; charset=utf-8 -Vary: Accept-Language, Cookie, Accept-Encoding -Content-Language: en -Date: Thu, 10 Jan 2019 18:39:14 GMT -Strict-Transport-Security: max-age=3600 -Cache-Control: private, no-cache, no-store, must-revalidate -Pragma: no-cache -Expires: Sat, 01 Jan 2000 00:00:00 GMT -X-Frame-Options: SAMEORIGIN -content-security-policy: report-uri https://www.instagram.com/security/csp_report/; default-src 'self' https://www.instagram.com; img-src https: data: blob:; font-src https: data:; media-src 'self' blob: https://www.instagram.com https://*.cdninstagram.com https://*.fbcdn.net; manifest-src 'self' https://www.instagram.com; script-src 'self' https://instagram.com https://www.instagram.com https://*.www.instagram.com https://*.cdninstagram.com wss://www.instagram.com https://*.facebook.com https://*.fbcdn.net https://*.facebook.net 'unsafe-inline' 'unsafe-eval' blob:; style-src 'self' https://*.www.instagram.com https://www.instagram.com 'unsafe-inline'; connect-src 'self' https://instagram.com https://www.instagram.com https://*.www.instagram.com https://graph.instagram.com https://*.graph.instagram.com https://*.cdninstagram.com https://api.instagram.com wss://www.instagram.com wss://edge-chat.instagram.com https://*.facebook.com https://*.fbcdn.net https://*.facebook.net chrome-extension://boadgeojelhgndaghljhdicfkmllpafd; worker-src 'self' https://www.instagram.com; frame-src 'self' https://instagram.com https://www.instagram.com https://staticxx.facebook.com https://www.facebook.com https://web.facebook.com https://connect.facebook.net https://m.facebook.com; object-src 'none'; upgrade-insecure-requests -X-Content-Type-Options: nosniff -X-XSS-Protection: 0 -Set-Cookie: mid=XDeRUgAEAAF7oFTO68Wf7_ZrYhPp; Domain=.instagram.com; expires=Sun, 07-Jan-2029 18:39:14 GMT; Max-Age=315360000; Path=/; Secure -Set-Cookie: rur=PRN; Domain=.instagram.com; HttpOnly; Path=/; Secure -Set-Cookie: urlgen="{\"108.161.19.190\": 54154}:1ghfEY:JoMs879m8nOA_JvffoEBMxn9qvc"; Domain=.instagram.com; HttpOnly; Path=/; Secure -Set-Cookie: mcd=3; Domain=.instagram.com; expires=Sun, 07-Jan-2029 18:39:14 GMT; Max-Age=315360000; Path=/; Secure -Set-Cookie: csrftoken=SEPGSA8YCB9ckMmvPVLDjqbj0G3Ssxjf; Domain=.instagram.com; expires=Thu, 09-Jan-2020 18:39:14 GMT; Max-Age=31449600; Path=/; Secure -Connection: keep-alive -Content-Length: 56963 - - - - - - - - -KmikeyM™️ (@kmikeym) • Instagram photos and videos - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/tests/data/www.instagram.com/microformats_ b/tests/data/www.instagram.com/microformats_ deleted file mode 100644 index ef70b27..0000000 --- a/tests/data/www.instagram.com/microformats_ +++ /dev/null @@ -1,316 +0,0 @@ -HTTP/1.1 200 OK -Content-Type: text/html; charset=utf-8 -Vary: Accept-Language, Cookie, Accept-Encoding -Content-Language: en -Date: Thu, 10 Jan 2019 18:39:16 GMT -Strict-Transport-Security: max-age=3600 -Cache-Control: private, no-cache, no-store, must-revalidate -Pragma: no-cache -Expires: Sat, 01 Jan 2000 00:00:00 GMT -X-Frame-Options: SAMEORIGIN -content-security-policy: report-uri https://www.instagram.com/security/csp_report/; default-src 'self' https://www.instagram.com; img-src https: data: blob:; font-src https: data:; media-src 'self' blob: https://www.instagram.com https://*.cdninstagram.com https://*.fbcdn.net; manifest-src 'self' https://www.instagram.com; script-src 'self' https://instagram.com https://www.instagram.com https://*.www.instagram.com https://*.cdninstagram.com wss://www.instagram.com https://*.facebook.com https://*.fbcdn.net https://*.facebook.net 'unsafe-inline' 'unsafe-eval' blob:; style-src 'self' https://*.www.instagram.com https://www.instagram.com 'unsafe-inline'; connect-src 'self' https://instagram.com https://www.instagram.com https://*.www.instagram.com https://graph.instagram.com https://*.graph.instagram.com https://*.cdninstagram.com https://api.instagram.com wss://www.instagram.com wss://edge-chat.instagram.com https://*.facebook.com https://*.fbcdn.net https://*.facebook.net chrome-extension://boadgeojelhgndaghljhdicfkmllpafd; worker-src 'self' https://www.instagram.com; frame-src 'self' https://instagram.com https://www.instagram.com https://staticxx.facebook.com https://www.facebook.com https://web.facebook.com https://connect.facebook.net https://m.facebook.com; object-src 'none'; upgrade-insecure-requests -X-Content-Type-Options: nosniff -X-XSS-Protection: 0 -Set-Cookie: mid=XDeRVAAEAAFYa87b9FVDxn7T54n8; Domain=.instagram.com; expires=Sun, 07-Jan-2029 18:39:16 GMT; Max-Age=315360000; Path=/; Secure -Set-Cookie: urlgen="{\"108.161.19.190\": 54154}:1ghfEa:Sk6tRDTp7FAM6grsCt11qngeP-I"; Domain=.instagram.com; HttpOnly; Path=/; Secure -Set-Cookie: rur=PRN; Domain=.instagram.com; HttpOnly; Path=/; Secure -Set-Cookie: mcd=3; Domain=.instagram.com; expires=Sun, 07-Jan-2029 18:39:16 GMT; Max-Age=315360000; Path=/; Secure -Set-Cookie: csrftoken=ezVJs93GiaGzIlBw6ke2dTzrGZJMORx6; Domain=.instagram.com; expires=Thu, 09-Jan-2020 18:39:16 GMT; Max-Age=31449600; Path=/; Secure -Connection: keep-alive -Content-Length: 33979 - - - - - - - - -@microformats • Instagram photos and videos - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/tests/data/www.instagram.com/p_BGDpqNoiMJ0_ b/tests/data/www.instagram.com/p_BGDpqNoiMJ0_ deleted file mode 100644 index 8284952..0000000 --- a/tests/data/www.instagram.com/p_BGDpqNoiMJ0_ +++ /dev/null @@ -1,316 +0,0 @@ -HTTP/1.1 200 OK -Content-Type: text/html; charset=utf-8 -Vary: Accept-Language, Cookie, Accept-Encoding -Content-Language: en -Date: Thu, 10 Jan 2019 18:39:23 GMT -Strict-Transport-Security: max-age=3600 -Cache-Control: private, no-cache, no-store, must-revalidate -Pragma: no-cache -Expires: Sat, 01 Jan 2000 00:00:00 GMT -X-Frame-Options: SAMEORIGIN -content-security-policy: report-uri https://www.instagram.com/security/csp_report/; default-src 'self' https://www.instagram.com; img-src https: data: blob:; font-src https: data:; media-src 'self' blob: https://www.instagram.com https://*.cdninstagram.com https://*.fbcdn.net; manifest-src 'self' https://www.instagram.com; script-src 'self' https://instagram.com https://www.instagram.com https://*.www.instagram.com https://*.cdninstagram.com wss://www.instagram.com https://*.facebook.com https://*.fbcdn.net https://*.facebook.net 'unsafe-inline' 'unsafe-eval' blob:; style-src 'self' https://*.www.instagram.com https://www.instagram.com 'unsafe-inline'; connect-src 'self' https://instagram.com https://www.instagram.com https://*.www.instagram.com https://graph.instagram.com https://*.graph.instagram.com https://*.cdninstagram.com https://api.instagram.com wss://www.instagram.com wss://edge-chat.instagram.com https://*.facebook.com https://*.fbcdn.net https://*.facebook.net chrome-extension://boadgeojelhgndaghljhdicfkmllpafd; worker-src 'self' https://www.instagram.com; frame-src 'self' https://instagram.com https://www.instagram.com https://staticxx.facebook.com https://www.facebook.com https://web.facebook.com https://connect.facebook.net https://m.facebook.com; object-src 'none'; upgrade-insecure-requests -X-Content-Type-Options: nosniff -X-XSS-Protection: 0 -Set-Cookie: mid=XDeRWwAEAAEtad-PZiH-LfIuttoC; Domain=.instagram.com; expires=Sun, 07-Jan-2029 18:39:23 GMT; Max-Age=315360000; Path=/; Secure -Set-Cookie: urlgen="{\"108.161.19.190\": 54154}:1ghfEh:FSM6XcxaV1AwOzgNFxJxnYVeoM4"; Domain=.instagram.com; HttpOnly; Path=/; Secure -Set-Cookie: rur=PRN; Domain=.instagram.com; HttpOnly; Path=/; Secure -Set-Cookie: mcd=3; Domain=.instagram.com; expires=Sun, 07-Jan-2029 18:39:23 GMT; Max-Age=315360000; Path=/; Secure -Set-Cookie: csrftoken=z3NYvbLvqlx0nJ3ZyBsE3b6mQIDbOUlu; Domain=.instagram.com; expires=Thu, 09-Jan-2020 18:39:23 GMT; Max-Age=31449600; Path=/; Secure -Connection: keep-alive -Content-Length: 34928 - - - - - - - - -@pk_spam on Instagram: “Meow #muffins” - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/tests/data/www.instagram.com/p_BN3Z5salSys_ b/tests/data/www.instagram.com/p_BN3Z5salSys_ deleted file mode 100644 index 34f0c10..0000000 --- a/tests/data/www.instagram.com/p_BN3Z5salSys_ +++ /dev/null @@ -1,316 +0,0 @@ -HTTP/1.1 200 OK -Content-Type: text/html; charset=utf-8 -Vary: Accept-Language, Cookie, Accept-Encoding -Content-Language: en -Date: Thu, 10 Jan 2019 18:39:30 GMT -Strict-Transport-Security: max-age=3600 -Cache-Control: private, no-cache, no-store, must-revalidate -Pragma: no-cache -Expires: Sat, 01 Jan 2000 00:00:00 GMT -X-Frame-Options: SAMEORIGIN -content-security-policy: report-uri https://www.instagram.com/security/csp_report/; default-src 'self' https://www.instagram.com; img-src https: data: blob:; font-src https: data:; media-src 'self' blob: https://www.instagram.com https://*.cdninstagram.com https://*.fbcdn.net; manifest-src 'self' https://www.instagram.com; script-src 'self' https://instagram.com https://www.instagram.com https://*.www.instagram.com https://*.cdninstagram.com wss://www.instagram.com https://*.facebook.com https://*.fbcdn.net https://*.facebook.net 'unsafe-inline' 'unsafe-eval' blob:; style-src 'self' https://*.www.instagram.com https://www.instagram.com 'unsafe-inline'; connect-src 'self' https://instagram.com https://www.instagram.com https://*.www.instagram.com https://graph.instagram.com https://*.graph.instagram.com https://*.cdninstagram.com https://api.instagram.com wss://www.instagram.com wss://edge-chat.instagram.com https://*.facebook.com https://*.fbcdn.net https://*.facebook.net chrome-extension://boadgeojelhgndaghljhdicfkmllpafd; worker-src 'self' https://www.instagram.com; frame-src 'self' https://instagram.com https://www.instagram.com https://staticxx.facebook.com https://www.facebook.com https://web.facebook.com https://connect.facebook.net https://m.facebook.com; object-src 'none'; upgrade-insecure-requests -X-Content-Type-Options: nosniff -X-XSS-Protection: 0 -Set-Cookie: rur=PRN; Domain=.instagram.com; HttpOnly; Path=/; Secure -Set-Cookie: mid=XDeRYgAEAAEgKff_6RtCHX0fIcfG; Domain=.instagram.com; expires=Sun, 07-Jan-2029 18:39:30 GMT; Max-Age=315360000; Path=/; Secure -Set-Cookie: urlgen="{\"108.161.19.190\": 54154}:1ghfEo:SUAJwpCs99oHHTky_5f_6lXNKzw"; Domain=.instagram.com; HttpOnly; Path=/; Secure -Set-Cookie: mcd=3; Domain=.instagram.com; expires=Sun, 07-Jan-2029 18:39:30 GMT; Max-Age=315360000; Path=/; Secure -Set-Cookie: csrftoken=gBEa8QEBGkSeaNGH4fGKnSEFF1yZeGpD; Domain=.instagram.com; expires=Thu, 09-Jan-2020 18:39:30 GMT; Max-Age=31449600; Path=/; Secure -Connection: keep-alive -Content-Length: 36306 - - - - - - - - -Aaron Parecki on Instagram: “Super thrilled about the launch of our podcast studio in an Airstream! It's been a fun day of a dozen people recording podcast episodes for…” - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/tests/data/www.instagram.com/p_BNfqVfVlmkj_ b/tests/data/www.instagram.com/p_BNfqVfVlmkj_ deleted file mode 100644 index e8d40d4..0000000 --- a/tests/data/www.instagram.com/p_BNfqVfVlmkj_ +++ /dev/null @@ -1,316 +0,0 @@ -HTTP/1.1 200 OK -Content-Type: text/html; charset=utf-8 -Vary: Accept-Language, Cookie, Accept-Encoding -Content-Language: en -Date: Thu, 10 Jan 2019 18:39:28 GMT -Strict-Transport-Security: max-age=3600 -Cache-Control: private, no-cache, no-store, must-revalidate -Pragma: no-cache -Expires: Sat, 01 Jan 2000 00:00:00 GMT -X-Frame-Options: SAMEORIGIN -content-security-policy: report-uri https://www.instagram.com/security/csp_report/; default-src 'self' https://www.instagram.com; img-src https: data: blob:; font-src https: data:; media-src 'self' blob: https://www.instagram.com https://*.cdninstagram.com https://*.fbcdn.net; manifest-src 'self' https://www.instagram.com; script-src 'self' https://instagram.com https://www.instagram.com https://*.www.instagram.com https://*.cdninstagram.com wss://www.instagram.com https://*.facebook.com https://*.fbcdn.net https://*.facebook.net 'unsafe-inline' 'unsafe-eval' blob:; style-src 'self' https://*.www.instagram.com https://www.instagram.com 'unsafe-inline'; connect-src 'self' https://instagram.com https://www.instagram.com https://*.www.instagram.com https://graph.instagram.com https://*.graph.instagram.com https://*.cdninstagram.com https://api.instagram.com wss://www.instagram.com wss://edge-chat.instagram.com https://*.facebook.com https://*.fbcdn.net https://*.facebook.net chrome-extension://boadgeojelhgndaghljhdicfkmllpafd; worker-src 'self' https://www.instagram.com; frame-src 'self' https://instagram.com https://www.instagram.com https://staticxx.facebook.com https://www.facebook.com https://web.facebook.com https://connect.facebook.net https://m.facebook.com; object-src 'none'; upgrade-insecure-requests -X-Content-Type-Options: nosniff -X-XSS-Protection: 0 -Set-Cookie: mid=XDeRYAAEAAHxlVcLKJ2sbtQzFCZX; Domain=.instagram.com; expires=Sun, 07-Jan-2029 18:39:28 GMT; Max-Age=315360000; Path=/; Secure -Set-Cookie: rur=PRN; Domain=.instagram.com; HttpOnly; Path=/; Secure -Set-Cookie: urlgen="{\"108.161.19.190\": 54154}:1ghfEm:COLhSHYz_bKqkZsIC9w7OF6OYjs"; Domain=.instagram.com; HttpOnly; Path=/; Secure -Set-Cookie: mcd=3; Domain=.instagram.com; expires=Sun, 07-Jan-2029 18:39:28 GMT; Max-Age=315360000; Path=/; Secure -Set-Cookie: csrftoken=jn2PWTjhc7r9d6XvQMrdDdeXuLdCxgg3; Domain=.instagram.com; expires=Thu, 09-Jan-2020 18:39:28 GMT; Max-Age=31449600; Path=/; Secure -Connection: keep-alive -Content-Length: 33749 - - - - - - - - -Aaron Parecki on Instagram: “Streaming the #kmikeym shareholder meeting!” - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/tests/data/www.instagram.com/p_BO5rYVElvJq_ b/tests/data/www.instagram.com/p_BO5rYVElvJq_ deleted file mode 100644 index 12003cb..0000000 --- a/tests/data/www.instagram.com/p_BO5rYVElvJq_ +++ /dev/null @@ -1,316 +0,0 @@ -HTTP/1.1 200 OK -Content-Type: text/html; charset=utf-8 -Vary: Accept-Language, Cookie, Accept-Encoding -Content-Language: en -Date: Thu, 10 Jan 2019 18:39:21 GMT -Strict-Transport-Security: max-age=3600 -Cache-Control: private, no-cache, no-store, must-revalidate -Pragma: no-cache -Expires: Sat, 01 Jan 2000 00:00:00 GMT -X-Frame-Options: SAMEORIGIN -content-security-policy: report-uri https://www.instagram.com/security/csp_report/; default-src 'self' https://www.instagram.com; img-src https: data: blob:; font-src https: data:; media-src 'self' blob: https://www.instagram.com https://*.cdninstagram.com https://*.fbcdn.net; manifest-src 'self' https://www.instagram.com; script-src 'self' https://instagram.com https://www.instagram.com https://*.www.instagram.com https://*.cdninstagram.com wss://www.instagram.com https://*.facebook.com https://*.fbcdn.net https://*.facebook.net 'unsafe-inline' 'unsafe-eval' blob:; style-src 'self' https://*.www.instagram.com https://www.instagram.com 'unsafe-inline'; connect-src 'self' https://instagram.com https://www.instagram.com https://*.www.instagram.com https://graph.instagram.com https://*.graph.instagram.com https://*.cdninstagram.com https://api.instagram.com wss://www.instagram.com wss://edge-chat.instagram.com https://*.facebook.com https://*.fbcdn.net https://*.facebook.net chrome-extension://boadgeojelhgndaghljhdicfkmllpafd; worker-src 'self' https://www.instagram.com; frame-src 'self' https://instagram.com https://www.instagram.com https://staticxx.facebook.com https://www.facebook.com https://web.facebook.com https://connect.facebook.net https://m.facebook.com; object-src 'none'; upgrade-insecure-requests -X-Content-Type-Options: nosniff -X-XSS-Protection: 0 -Set-Cookie: urlgen="{\"108.161.19.190\": 54154}:1ghfEf:3FfukxjqM2IKU2wWTrWL03dPYUg"; Domain=.instagram.com; HttpOnly; Path=/; Secure -Set-Cookie: rur=PRN; Domain=.instagram.com; HttpOnly; Path=/; Secure -Set-Cookie: mid=XDeRWQAEAAFgl_yvYPGRIAXLsocG; Domain=.instagram.com; expires=Sun, 07-Jan-2029 18:39:21 GMT; Max-Age=315360000; Path=/; Secure -Set-Cookie: mcd=3; Domain=.instagram.com; expires=Sun, 07-Jan-2029 18:39:21 GMT; Max-Age=315360000; Path=/; Secure -Set-Cookie: csrftoken=qPuCyvERpxY9wVbwJP8ndmVE0u7qjqZE; Domain=.instagram.com; expires=Thu, 09-Jan-2020 18:39:21 GMT; Max-Age=31449600; Path=/; Secure -Connection: keep-alive -Content-Length: 34750 - - - - - - - - -Aaron Parecki on Instagram: “Kind of crazy to see the whole year laid out like this. #planning #2017” - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/tests/data/www.instagram.com/p_BO_RN8AFZSx_ b/tests/data/www.instagram.com/p_BO_RN8AFZSx_ deleted file mode 100644 index 5e2dbfd..0000000 --- a/tests/data/www.instagram.com/p_BO_RN8AFZSx_ +++ /dev/null @@ -1,321 +0,0 @@ -HTTP/1.1 200 OK -Content-Type: text/html; charset=utf-8 -Vary: Accept-Language, Cookie, Accept-Encoding -Content-Language: en -Date: Thu, 10 Jan 2019 18:39:26 GMT -Strict-Transport-Security: max-age=3600 -Cache-Control: private, no-cache, no-store, must-revalidate -Pragma: no-cache -Expires: Sat, 01 Jan 2000 00:00:00 GMT -X-Frame-Options: SAMEORIGIN -content-security-policy: report-uri https://www.instagram.com/security/csp_report/; default-src 'self' https://www.instagram.com; img-src https: data: blob:; font-src https: data:; media-src 'self' blob: https://www.instagram.com https://*.cdninstagram.com https://*.fbcdn.net; manifest-src 'self' https://www.instagram.com; script-src 'self' https://instagram.com https://www.instagram.com https://*.www.instagram.com https://*.cdninstagram.com wss://www.instagram.com https://*.facebook.com https://*.fbcdn.net https://*.facebook.net 'unsafe-inline' 'unsafe-eval' blob:; style-src 'self' https://*.www.instagram.com https://www.instagram.com 'unsafe-inline'; connect-src 'self' https://instagram.com https://www.instagram.com https://*.www.instagram.com https://graph.instagram.com https://*.graph.instagram.com https://*.cdninstagram.com https://api.instagram.com wss://www.instagram.com wss://edge-chat.instagram.com https://*.facebook.com https://*.fbcdn.net https://*.facebook.net chrome-extension://boadgeojelhgndaghljhdicfkmllpafd; worker-src 'self' https://www.instagram.com; frame-src 'self' https://instagram.com https://www.instagram.com https://staticxx.facebook.com https://www.facebook.com https://web.facebook.com https://connect.facebook.net https://m.facebook.com; object-src 'none'; upgrade-insecure-requests -X-Content-Type-Options: nosniff -X-XSS-Protection: 0 -Set-Cookie: rur=PRN; Domain=.instagram.com; HttpOnly; Path=/; Secure -Set-Cookie: urlgen="{\"108.161.19.190\": 54154}:1ghfEk:CMpnbQJqlwVL8ZC_DNO4hay-4wE"; Domain=.instagram.com; HttpOnly; Path=/; Secure -Set-Cookie: mid=XDeRXQAEAAGI0Dwi4MSFsX7ZlcQb; Domain=.instagram.com; expires=Sun, 07-Jan-2029 18:39:26 GMT; Max-Age=315360000; Path=/; Secure -Set-Cookie: mcd=3; Domain=.instagram.com; expires=Sun, 07-Jan-2029 18:39:26 GMT; Max-Age=315360000; Path=/; Secure -Set-Cookie: csrftoken=Y2wGO5etEx24WF165IPhKsRNWlVpYbbL; Domain=.instagram.com; expires=Thu, 09-Jan-2020 18:39:26 GMT; Max-Age=31449600; Path=/; Secure -Connection: keep-alive -Content-Length: 35288 - - - - - - - - -Aaron Parecki on Instagram: “Day 18. Maple and Spruce #100daysofmusic #100daysproject #the100dayproject https://aaronparecki.com/2017/01/07/14/day18” - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/tests/data/www.instagram.com/p_BZWmUB_DVtp_ b/tests/data/www.instagram.com/p_BZWmUB_DVtp_ deleted file mode 100644 index 859bd81..0000000 --- a/tests/data/www.instagram.com/p_BZWmUB_DVtp_ +++ /dev/null @@ -1,316 +0,0 @@ -HTTP/1.1 200 OK -Content-Type: text/html; charset=utf-8 -Vary: Accept-Language, Cookie, Accept-Encoding -Content-Language: en -Date: Thu, 10 Jan 2019 18:39:33 GMT -Strict-Transport-Security: max-age=3600 -Cache-Control: private, no-cache, no-store, must-revalidate -Pragma: no-cache -Expires: Sat, 01 Jan 2000 00:00:00 GMT -X-Frame-Options: SAMEORIGIN -content-security-policy: report-uri https://www.instagram.com/security/csp_report/; default-src 'self' https://www.instagram.com; img-src https: data: blob:; font-src https: data:; media-src 'self' blob: https://www.instagram.com https://*.cdninstagram.com https://*.fbcdn.net; manifest-src 'self' https://www.instagram.com; script-src 'self' https://instagram.com https://www.instagram.com https://*.www.instagram.com https://*.cdninstagram.com wss://www.instagram.com https://*.facebook.com https://*.fbcdn.net https://*.facebook.net 'unsafe-inline' 'unsafe-eval' blob:; style-src 'self' https://*.www.instagram.com https://www.instagram.com 'unsafe-inline'; connect-src 'self' https://instagram.com https://www.instagram.com https://*.www.instagram.com https://graph.instagram.com https://*.graph.instagram.com https://*.cdninstagram.com https://api.instagram.com wss://www.instagram.com wss://edge-chat.instagram.com https://*.facebook.com https://*.fbcdn.net https://*.facebook.net chrome-extension://boadgeojelhgndaghljhdicfkmllpafd; worker-src 'self' https://www.instagram.com; frame-src 'self' https://instagram.com https://www.instagram.com https://staticxx.facebook.com https://www.facebook.com https://web.facebook.com https://connect.facebook.net https://m.facebook.com; object-src 'none'; upgrade-insecure-requests -X-Content-Type-Options: nosniff -X-XSS-Protection: 0 -Set-Cookie: urlgen="{\"108.161.19.190\": 54154}:1ghfEr:7FA5vdwr0XWX5B9nfyC8-BEXCP4"; Domain=.instagram.com; HttpOnly; Path=/; Secure -Set-Cookie: rur=PRN; Domain=.instagram.com; HttpOnly; Path=/; Secure -Set-Cookie: mid=XDeRZQAEAAEdXp4u8v_oLKl5Fu0J; Domain=.instagram.com; expires=Sun, 07-Jan-2029 18:39:33 GMT; Max-Age=315360000; Path=/; Secure -Set-Cookie: mcd=3; Domain=.instagram.com; expires=Sun, 07-Jan-2029 18:39:33 GMT; Max-Age=315360000; Path=/; Secure -Set-Cookie: csrftoken=1tbwWnNlz4fyLZ7h6jPplWJ2vlpPDshm; Domain=.instagram.com; expires=Thu, 09-Jan-2020 18:39:33 GMT; Max-Age=31449600; Path=/; Secure -Connection: keep-alive -Content-Length: 38539 - - - - - - - - -@pk_spam on Instagram: “Two photos, the first tagged with one person and the second tagged with two people” - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/tests/data/www.instagram.com/p_BZWmpecjBwN_ b/tests/data/www.instagram.com/p_BZWmpecjBwN_ deleted file mode 100644 index ad8c3e9..0000000 --- a/tests/data/www.instagram.com/p_BZWmpecjBwN_ +++ /dev/null @@ -1,316 +0,0 @@ -HTTP/1.1 200 OK -Content-Type: text/html; charset=utf-8 -Vary: Accept-Language, Cookie, Accept-Encoding -Content-Language: en -Date: Thu, 10 Jan 2019 18:39:35 GMT -Strict-Transport-Security: max-age=3600 -Cache-Control: private, no-cache, no-store, must-revalidate -Pragma: no-cache -Expires: Sat, 01 Jan 2000 00:00:00 GMT -X-Frame-Options: SAMEORIGIN -content-security-policy: report-uri https://www.instagram.com/security/csp_report/; default-src 'self' https://www.instagram.com; img-src https: data: blob:; font-src https: data:; media-src 'self' blob: https://www.instagram.com https://*.cdninstagram.com https://*.fbcdn.net; manifest-src 'self' https://www.instagram.com; script-src 'self' https://instagram.com https://www.instagram.com https://*.www.instagram.com https://*.cdninstagram.com wss://www.instagram.com https://*.facebook.com https://*.fbcdn.net https://*.facebook.net 'unsafe-inline' 'unsafe-eval' blob:; style-src 'self' https://*.www.instagram.com https://www.instagram.com 'unsafe-inline'; connect-src 'self' https://instagram.com https://www.instagram.com https://*.www.instagram.com https://graph.instagram.com https://*.graph.instagram.com https://*.cdninstagram.com https://api.instagram.com wss://www.instagram.com wss://edge-chat.instagram.com https://*.facebook.com https://*.fbcdn.net https://*.facebook.net chrome-extension://boadgeojelhgndaghljhdicfkmllpafd; worker-src 'self' https://www.instagram.com; frame-src 'self' https://instagram.com https://www.instagram.com https://staticxx.facebook.com https://www.facebook.com https://web.facebook.com https://connect.facebook.net https://m.facebook.com; object-src 'none'; upgrade-insecure-requests -X-Content-Type-Options: nosniff -X-XSS-Protection: 0 -Set-Cookie: rur=PRN; Domain=.instagram.com; HttpOnly; Path=/; Secure -Set-Cookie: urlgen="{\"108.161.19.190\": 54154}:1ghfEt:UjYdmkV-_el6KPg7Cs6-kI2cBDM"; Domain=.instagram.com; HttpOnly; Path=/; Secure -Set-Cookie: mid=XDeRZwAEAAEw2JOOWcyaCaYTUyXi; Domain=.instagram.com; expires=Sun, 07-Jan-2029 18:39:35 GMT; Max-Age=315360000; Path=/; Secure -Set-Cookie: mcd=3; Domain=.instagram.com; expires=Sun, 07-Jan-2029 18:39:35 GMT; Max-Age=315360000; Path=/; Secure -Set-Cookie: csrftoken=QMRPKFV88uqB7dCMZtfy9IHFxWJMG6g5; Domain=.instagram.com; expires=Thu, 09-Jan-2020 18:39:35 GMT; Max-Age=31449600; Path=/; Secure -Connection: keep-alive -Content-Length: 40394 - - - - - - - - -@pk_spam on Instagram: “Two photos and one video, and some people are tagged in the photos” - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/tests/data/www.instagram.com/p_Bq8U12UAcdq_ b/tests/data/www.instagram.com/p_Bq8U12UAcdq_ deleted file mode 100644 index 315318b..0000000 --- a/tests/data/www.instagram.com/p_Bq8U12UAcdq_ +++ /dev/null @@ -1,312 +0,0 @@ -HTTP/1.1 200 OK -Content-Type: text/html; charset=utf-8 -Vary: Accept-Language, Cookie, Accept-Encoding -Content-Language: en -Date: Thu, 10 Jan 2019 18:59:50 GMT -Strict-Transport-Security: max-age=3600 -Cache-Control: private, no-cache, no-store, must-revalidate -Pragma: no-cache -Expires: Sat, 01 Jan 2000 00:00:00 GMT -X-Frame-Options: SAMEORIGIN -content-security-policy: report-uri https://www.instagram.com/security/csp_report/; default-src 'self' https://www.instagram.com; img-src https: data: blob:; font-src https: data:; media-src 'self' blob: https://www.instagram.com https://*.cdninstagram.com https://*.fbcdn.net; manifest-src 'self' https://www.instagram.com; script-src 'self' https://instagram.com https://www.instagram.com https://*.www.instagram.com https://*.cdninstagram.com wss://www.instagram.com https://*.facebook.com https://*.fbcdn.net https://*.facebook.net 'unsafe-inline' 'unsafe-eval' blob:; style-src 'self' https://*.www.instagram.com https://www.instagram.com 'unsafe-inline'; connect-src 'self' https://instagram.com https://www.instagram.com https://*.www.instagram.com https://graph.instagram.com https://*.graph.instagram.com https://*.cdninstagram.com https://api.instagram.com wss://www.instagram.com wss://edge-chat.instagram.com https://*.facebook.com https://*.fbcdn.net https://*.facebook.net chrome-extension://boadgeojelhgndaghljhdicfkmllpafd; worker-src 'self' https://www.instagram.com; frame-src 'self' https://instagram.com https://www.instagram.com https://staticxx.facebook.com https://www.facebook.com https://web.facebook.com https://connect.facebook.net https://m.facebook.com; object-src 'none'; upgrade-insecure-requests -X-Content-Type-Options: nosniff -X-XSS-Protection: 0 -Set-Cookie: urlgen="{\"108.161.19.190\": 54154}:1ghfYU:eKaA3HGZXpJpYX969QrXgbecnUg"; Domain=.instagram.com; HttpOnly; Path=/; Secure -Set-Cookie: mid=XDeWJgAEAAHA4fKyESvVBDv4JZ3i; Domain=.instagram.com; expires=Sun, 07-Jan-2029 18:59:50 GMT; Max-Age=315360000; Path=/; Secure -Set-Cookie: rur=PRN; Domain=.instagram.com; HttpOnly; Path=/; Secure -Set-Cookie: mcd=3; Domain=.instagram.com; expires=Sun, 07-Jan-2029 18:59:50 GMT; Max-Age=315360000; Path=/; Secure -Set-Cookie: csrftoken=9pFPcSYEu3Fc2hSMz4qJzhR6jImNc3ol; Domain=.instagram.com; expires=Thu, 09-Jan-2020 18:59:50 GMT; Max-Age=31449600; Path=/; Secure -Connection: keep-alive -Content-Length: 32756 - - - - - - - - -Ryan B on Instagram: “🤔” - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/tests/data/www.instagram.com/p_BsdjKytBZyx_ b/tests/data/www.instagram.com/p_BsdjKytBZyx_ deleted file mode 100644 index 94aa65b..0000000 --- a/tests/data/www.instagram.com/p_BsdjKytBZyx_ +++ /dev/null @@ -1,316 +0,0 @@ -HTTP/1.1 200 OK -Content-Type: text/html; charset=utf-8 -Vary: Accept-Language, Cookie, Accept-Encoding -Content-Language: en -Date: Thu, 10 Jan 2019 18:39:44 GMT -Strict-Transport-Security: max-age=3600 -Cache-Control: private, no-cache, no-store, must-revalidate -Pragma: no-cache -Expires: Sat, 01 Jan 2000 00:00:00 GMT -X-Frame-Options: SAMEORIGIN -content-security-policy: report-uri https://www.instagram.com/security/csp_report/; default-src 'self' https://www.instagram.com; img-src https: data: blob:; font-src https: data:; media-src 'self' blob: https://www.instagram.com https://*.cdninstagram.com https://*.fbcdn.net; manifest-src 'self' https://www.instagram.com; script-src 'self' https://instagram.com https://www.instagram.com https://*.www.instagram.com https://*.cdninstagram.com wss://www.instagram.com https://*.facebook.com https://*.fbcdn.net https://*.facebook.net 'unsafe-inline' 'unsafe-eval' blob:; style-src 'self' https://*.www.instagram.com https://www.instagram.com 'unsafe-inline'; connect-src 'self' https://instagram.com https://www.instagram.com https://*.www.instagram.com https://graph.instagram.com https://*.graph.instagram.com https://*.cdninstagram.com https://api.instagram.com wss://www.instagram.com wss://edge-chat.instagram.com https://*.facebook.com https://*.fbcdn.net https://*.facebook.net chrome-extension://boadgeojelhgndaghljhdicfkmllpafd; worker-src 'self' https://www.instagram.com; frame-src 'self' https://instagram.com https://www.instagram.com https://staticxx.facebook.com https://www.facebook.com https://web.facebook.com https://connect.facebook.net https://m.facebook.com; object-src 'none'; upgrade-insecure-requests -X-Content-Type-Options: nosniff -X-XSS-Protection: 0 -Set-Cookie: mid=XDeRcAAEAAFXCkGoC_qfrvCdRyFn; Domain=.instagram.com; expires=Sun, 07-Jan-2029 18:39:44 GMT; Max-Age=315360000; Path=/; Secure -Set-Cookie: urlgen="{\"108.161.19.190\": 54154}:1ghfF2:CTXqJSA1V33HDwK6tJ-lhYSJQyI"; Domain=.instagram.com; HttpOnly; Path=/; Secure -Set-Cookie: rur=PRN; Domain=.instagram.com; HttpOnly; Path=/; Secure -Set-Cookie: mcd=3; Domain=.instagram.com; expires=Sun, 07-Jan-2029 18:39:44 GMT; Max-Age=315360000; Path=/; Secure -Set-Cookie: csrftoken=tOzYD3M0zNc2Pfn4mrmcLmBO69WWUwZ0; Domain=.instagram.com; expires=Thu, 09-Jan-2020 18:39:44 GMT; Max-Age=31449600; Path=/; Secure -Connection: keep-alive -Content-Length: 32922 - - - - - - - - -@pk_spam on Instagram: “This photo has alt text” - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/tests/data/www.instagram.com/p_BsdlOmLh_IX_ b/tests/data/www.instagram.com/p_BsdlOmLh_IX_ deleted file mode 100644 index dd64ce0..0000000 --- a/tests/data/www.instagram.com/p_BsdlOmLh_IX_ +++ /dev/null @@ -1,316 +0,0 @@ -HTTP/1.1 200 OK -Content-Type: text/html; charset=utf-8 -Vary: Accept-Language, Cookie, Accept-Encoding -Content-Language: en -Date: Thu, 10 Jan 2019 18:39:46 GMT -Strict-Transport-Security: max-age=3600 -Cache-Control: private, no-cache, no-store, must-revalidate -Pragma: no-cache -Expires: Sat, 01 Jan 2000 00:00:00 GMT -X-Frame-Options: SAMEORIGIN -content-security-policy: report-uri https://www.instagram.com/security/csp_report/; default-src 'self' https://www.instagram.com; img-src https: data: blob:; font-src https: data:; media-src 'self' blob: https://www.instagram.com https://*.cdninstagram.com https://*.fbcdn.net; manifest-src 'self' https://www.instagram.com; script-src 'self' https://instagram.com https://www.instagram.com https://*.www.instagram.com https://*.cdninstagram.com wss://www.instagram.com https://*.facebook.com https://*.fbcdn.net https://*.facebook.net 'unsafe-inline' 'unsafe-eval' blob:; style-src 'self' https://*.www.instagram.com https://www.instagram.com 'unsafe-inline'; connect-src 'self' https://instagram.com https://www.instagram.com https://*.www.instagram.com https://graph.instagram.com https://*.graph.instagram.com https://*.cdninstagram.com https://api.instagram.com wss://www.instagram.com wss://edge-chat.instagram.com https://*.facebook.com https://*.fbcdn.net https://*.facebook.net chrome-extension://boadgeojelhgndaghljhdicfkmllpafd; worker-src 'self' https://www.instagram.com; frame-src 'self' https://instagram.com https://www.instagram.com https://staticxx.facebook.com https://www.facebook.com https://web.facebook.com https://connect.facebook.net https://m.facebook.com; object-src 'none'; upgrade-insecure-requests -X-Content-Type-Options: nosniff -X-XSS-Protection: 0 -Set-Cookie: urlgen="{\"108.161.19.190\": 54154}:1ghfF4:DaNIwTxtPN2sXTcJ3Gj1J2XK8OU"; Domain=.instagram.com; HttpOnly; Path=/; Secure -Set-Cookie: rur=PRN; Domain=.instagram.com; HttpOnly; Path=/; Secure -Set-Cookie: mid=XDeRcgAEAAERmKP3pKqZ02n4zJvn; Domain=.instagram.com; expires=Sun, 07-Jan-2029 18:39:46 GMT; Max-Age=315360000; Path=/; Secure -Set-Cookie: mcd=3; Domain=.instagram.com; expires=Sun, 07-Jan-2029 18:39:46 GMT; Max-Age=315360000; Path=/; Secure -Set-Cookie: csrftoken=BhnSMXtLPNIypS1PoAD57VWTnLH1t4CA; Domain=.instagram.com; expires=Thu, 09-Jan-2020 18:39:46 GMT; Max-Age=31449600; Path=/; Secure -Connection: keep-alive -Content-Length: 36322 - - - - - - - - -@pk_spam on Instagram: “This post has two photos with alt text” - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/tests/data/www.instagram.com/pk_spam_ b/tests/data/www.instagram.com/pk_spam_ deleted file mode 100644 index 1171f8b..0000000 --- a/tests/data/www.instagram.com/pk_spam_ +++ /dev/null @@ -1,316 +0,0 @@ -HTTP/1.1 200 OK -Content-Type: text/html; charset=utf-8 -Vary: Accept-Language, Cookie, Accept-Encoding -Content-Language: en -Date: Thu, 10 Jan 2019 18:39:19 GMT -Strict-Transport-Security: max-age=3600 -Cache-Control: private, no-cache, no-store, must-revalidate -Pragma: no-cache -Expires: Sat, 01 Jan 2000 00:00:00 GMT -X-Frame-Options: SAMEORIGIN -content-security-policy: report-uri https://www.instagram.com/security/csp_report/; default-src 'self' https://www.instagram.com; img-src https: data: blob:; font-src https: data:; media-src 'self' blob: https://www.instagram.com https://*.cdninstagram.com https://*.fbcdn.net; manifest-src 'self' https://www.instagram.com; script-src 'self' https://instagram.com https://www.instagram.com https://*.www.instagram.com https://*.cdninstagram.com wss://www.instagram.com https://*.facebook.com https://*.fbcdn.net https://*.facebook.net 'unsafe-inline' 'unsafe-eval' blob:; style-src 'self' https://*.www.instagram.com https://www.instagram.com 'unsafe-inline'; connect-src 'self' https://instagram.com https://www.instagram.com https://*.www.instagram.com https://graph.instagram.com https://*.graph.instagram.com https://*.cdninstagram.com https://api.instagram.com wss://www.instagram.com wss://edge-chat.instagram.com https://*.facebook.com https://*.fbcdn.net https://*.facebook.net chrome-extension://boadgeojelhgndaghljhdicfkmllpafd; worker-src 'self' https://www.instagram.com; frame-src 'self' https://instagram.com https://www.instagram.com https://staticxx.facebook.com https://www.facebook.com https://web.facebook.com https://connect.facebook.net https://m.facebook.com; object-src 'none'; upgrade-insecure-requests -X-Content-Type-Options: nosniff -X-XSS-Protection: 0 -Set-Cookie: rur=PRN; Domain=.instagram.com; HttpOnly; Path=/; Secure -Set-Cookie: urlgen="{\"108.161.19.190\": 54154}:1ghfEd:_3iFGSS6jc4J7NZAI5kjGipgvgU"; Domain=.instagram.com; HttpOnly; Path=/; Secure -Set-Cookie: mid=XDeRVgAEAAHu_1vK6bJWo60YJPFD; Domain=.instagram.com; expires=Sun, 07-Jan-2029 18:39:19 GMT; Max-Age=315360000; Path=/; Secure -Set-Cookie: mcd=3; Domain=.instagram.com; expires=Sun, 07-Jan-2029 18:39:19 GMT; Max-Age=315360000; Path=/; Secure -Set-Cookie: csrftoken=BYZ3MAAFYWZhLCYnNSWHNvHZsb8sklnh; Domain=.instagram.com; expires=Thu, 09-Jan-2020 18:39:19 GMT; Max-Age=31449600; Path=/; Secure -Connection: keep-alive -Content-Length: 61661 - - - - - - - - -@pk_spam • Instagram photos and videos - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/tests/download-instagram-data.sh b/tests/download-instagram-data.sh deleted file mode 100755 index ae3b9ed..0000000 --- a/tests/download-instagram-data.sh +++ /dev/null @@ -1,30 +0,0 @@ -#!/bin/bash - -urls=( - 'https://www.instagram.com/aaronpk/' - 'https://www.instagram.com/indiewebcat/' - 'https://www.instagram.com/kmikeym/' - 'https://www.instagram.com/microformats/' - 'https://www.instagram.com/pk_spam/' - 'https://www.instagram.com/p/BO5rYVElvJq/' - 'https://www.instagram.com/p/BGDpqNoiMJ0/' - 'https://www.instagram.com/p/BO_RN8AFZSx/' - 'https://www.instagram.com/p/BNfqVfVlmkj/' - 'https://www.instagram.com/p/BN3Z5salSys/' - 'https://www.instagram.com/p/BZWmUB_DVtp/' - 'https://www.instagram.com/p/BZWmpecjBwN/' - 'https://www.instagram.com/explore/locations/109284789535230/' - 'https://www.instagram.com/explore/locations/359000003/' - 'https://www.instagram.com/p/BsdjKytBZyx/' - 'https://www.instagram.com/p/BsdlOmLh_IX/' - 'https://www.instagram.com/p/Bq8U12UAcdq/' -) - -for url in ${urls[@]}; do - fn=$(echo $url | sed 's#https://www.instagram.com/##' | sed 's#/#_#g') - echo "$url > $fn" - curl -i -s $url > data/www.instagram.com/$fn - unix2dos data/www.instagram.com/$fn - sleep 2 -done -