Browse Source

remove instagram

pull/108/head v1.12.0
Aaron Parecki 3 years ago
parent
commit
3e63e841bf
24 changed files with 1 additions and 6318 deletions
  1. +1
    -2
      README.md
  2. +0
    -5
      lib/XRay/Fetcher.php
  3. +0
    -406
      lib/XRay/Formats/Instagram.php
  4. +0
    -4
      lib/XRay/Parser.php
  5. +0
    -324
      tests/InstagramTest.php
  6. +0
    -204
      tests/data/www.instagram.com/BGDpqNoiMJ0
  7. +0
    -312
      tests/data/www.instagram.com/aaronpk_
  8. +0
    -303
      tests/data/www.instagram.com/explore_locations_109284789535230_
  9. +0
    -303
      tests/data/www.instagram.com/explore_locations_359000003_
  10. +0
    -316
      tests/data/www.instagram.com/indiewebcat_
  11. +0
    -316
      tests/data/www.instagram.com/kmikeym_
  12. +0
    -316
      tests/data/www.instagram.com/microformats_
  13. +0
    -316
      tests/data/www.instagram.com/p_BGDpqNoiMJ0_
  14. +0
    -316
      tests/data/www.instagram.com/p_BN3Z5salSys_
  15. +0
    -316
      tests/data/www.instagram.com/p_BNfqVfVlmkj_
  16. +0
    -316
      tests/data/www.instagram.com/p_BO5rYVElvJq_
  17. +0
    -321
      tests/data/www.instagram.com/p_BO_RN8AFZSx_
  18. +0
    -316
      tests/data/www.instagram.com/p_BZWmUB_DVtp_
  19. +0
    -316
      tests/data/www.instagram.com/p_BZWmpecjBwN_
  20. +0
    -312
      tests/data/www.instagram.com/p_Bq8U12UAcdq_
  21. +0
    -316
      tests/data/www.instagram.com/p_BsdjKytBZyx_
  22. +0
    -316
      tests/data/www.instagram.com/p_BsdlOmLh_IX_
  23. +0
    -316
      tests/data/www.instagram.com/pk_spam_
  24. +0
    -30
      tests/download-instagram-data.sh

+ 1
- 2
README.md View File

@ -8,7 +8,6 @@ XRay parses structured content from a URL.
XRay will parse content in the following formats. First the URL is checked against known services:
* Instagram
* Twitter
* GitHub
* XKCD
@ -420,7 +419,7 @@ Other properties are returned in the response at the same level as the `data` pr
* `mf2+json`
* `feed+json`
* `xml`
* `instagram`/`github`/`xkcd`
* `github`/`xkcd`
#### Feeds

+ 0
- 5
lib/XRay/Fetcher.php View File

@ -53,11 +53,6 @@ class Fetcher {
return Formats\Hackernews::fetch($this->http, $url, $opts);
}
// Check if this is an Instagram URL and enable passing a session cookie
if(Formats\Instagram::matches($url)) {
return Formats\Instagram::fetch($this->http, $url, $opts);
}
// All other URLs are fetched normally
// Special-case appspot.com URLs to not follow redirects.

+ 0
- 406
lib/XRay/Formats/Instagram.php View File

@ -1,406 +0,0 @@
<?php
namespace p3k\XRay\Formats;
use DOMDocument, DOMXPath;
use DateTime, DateTimeZone;
class Instagram extends Format {
public static function matches_host($url) {
$host = parse_url($url, PHP_URL_HOST);
return in_array($host, ['www.instagram.com','instagram.com']);
}
public static function matches($url) {
return self::matches_host($url);
}
public static function fetch($http, $url, $opts=[]) {
if(!self::matches($url))
return false;
$headers = [];
if(isset($opts['instagram_session']) && $opts['instagram_session'])
$headers[] = 'Cookie: sessionid='.$opts['instagram_session'];
$result = $http->get($url, $headers);
// Check for errors such as getting redirected to the login page or getting rate limiited
/*
// TODO
if(false) {
return [
'error' => 'rate_limited',
'error_description' => 'Instagram has rate limited this client. Please try again later.',
'url' => $result['url'],
'code' => $result['code'],
];
}
if(false) {
return [
'error' => 'unauthorized',
'error_description' => 'Instagram redirected to the login page. Either this user is private, or the client has been rate limited.',
'url' => $result['url'],
'code' => $result['code'],
];
}
*/
return $result;
}
public static function parse($http, $http_response, $opts=[]) {
$html = $http_response['body'];
$url = $http_response['url'];
if(preg_match('#instagram.com/([^/]+)/$#', $url)) {
if(isset($opts['expect']) && $opts['expect'] == 'feed')
return self::parseFeed($http, $html, $url);
else
return self::parseProfile($http, $html, $url);
} else {
return self::parsePhoto($http, $html, $url);
}
}
private static function parseProfile($http, $html, $url) {
$profileData = self::_parseProfileFromHTML($html);
if(!$profileData)
return self::_unknown();
$card = self::_buildHCardFromInstagramProfile($profileData);
return [
'data' => $card,
'source-format' => 'instagram',
];
}
private static function parseFeed($http, $html, $url) {
$profileData = self::_parseProfileFromHTML($html);
if(!$profileData)
return self::_unknown();
$photos = $profileData['edge_owner_to_timeline_media']['edges'];
$items = [];
foreach($photos as $photoData) {
$item = self::parsePhotoFromData($http, $photoData['node'],
'https://www.instagram.com/p/'.$photoData['node']['shortcode'].'/', $profileData);
// Note: Not all the photo info is available in the initial JSON.
// Things like video mp4 URLs and person tags and locations are missing.
// Consumers of the feed will need to fetch the photo permalink in order to get all missing information.
// if($photoData['is_video'])
// $item['data']['video'] = true;
$items[] = $item['data'];
}
return [
'data' => [
'type' => 'feed',
'items' => $items,
],
'source-format' => 'instagram',
];
}
private static function parsePhoto($http, $html, $url, $profile=false) {
$photoData = self::_extractPhotoDataFromPhotoPage($html);
return self::parsePhotoFromData($http, $photoData, $url, $profile);
}
private static function altTextIsPlaceholder($text) {
return $text == 'No photo description available.';
}
private static function parsePhotoFromData($http, $photoData, $url, $profile=false) {
if(!$photoData)
return self::_unknown();
// Start building the h-entry
$entry = array(
'type' => 'entry',
'url' => $url,
'author' => [
'type' => 'card',
'name' => null,
'photo' => null,
'url' => null
]
);
$profiles = [];
if(!$profile) {
if(isset($photoData['owner'])) {
// Get profile info from the page
$entry['author'] = self::_buildHCardFromInstagramProfile($photoData['owner']);
}
// 2019-10-13 disabling this fetch because profile fetches are severely rate limited now
// // Fetch profile info for this user
// $username = $photoData['owner']['username'];
// $profile = self::_getInstagramProfile($username, $http);
// if($profile) {
// $entry['author'] = self::_buildHCardFromInstagramProfile($profile);
// $profiles[] = $profile;
// }
} else {
$entry['author'] = self::_buildHCardFromInstagramProfile($profile);
$profiles[] = $profile;
}
// Content and hashtags
$caption = false;
if(isset($photoData['caption'])) {
$caption = $photoData['caption'];
} elseif(isset($photoData['edge_media_to_caption']['edges'][0]['node']['text'])) {
$caption = $photoData['edge_media_to_caption']['edges'][0]['node']['text'];
}
if($caption) {
if(preg_match_all('/#([a-z0-9_-]+)/i', $caption, $matches)) {
$entry['category'] = [];
foreach($matches[1] as $match) {
$entry['category'][] = $match;
}
}
$entry['content'] = [
'text' => $caption
];
}
$refs = [];
$meta = [];
// Include the photo/video media URLs
// (Always return arrays, even for single images)
if(array_key_exists('edge_sidecar_to_children', $photoData)) {
// Multi-post
// For now, we will only pull photos from multi-posts, and skip videos.
// https://github.com/aaronpk/XRay/issues/84
$entry['photo'] = [];
foreach($photoData['edge_sidecar_to_children']['edges'] as $edge) {
$entry['photo'][] = $edge['node']['display_url'];
// Don't need to pull person-tags from here because the main parent object already has them.
if(isset($edge['node']['accessibility_caption']) && $edge['node']['accessibility_caption'] && !self::altTextIsPlaceholder($edge['node']['accessibility_caption'])) {
$meta[$edge['node']['display_url']] = [
'alt' => $edge['node']['accessibility_caption']
];
}
}
} else {
// Single photo or video
if(array_key_exists('display_src', $photoData))
$entry['photo'] = [$photoData['display_src']];
elseif(array_key_exists('display_url', $photoData))
$entry['photo'] = [$photoData['display_url']];
if(isset($photoData['accessibility_caption']) && $photoData['accessibility_caption'] && !self::altTextIsPlaceholder($photoData['accessibility_caption'])) {
$meta[$entry['photo'][0]] = [
'alt' => $photoData['accessibility_caption']
];
}
if(isset($photoData['is_video']) && $photoData['is_video'] && isset($photoData['video_url'])) {
$entry['video'] = [$photoData['video_url']];
}
}
// Find person tags and fetch user profiles
if(isset($photoData['edge_media_to_tagged_user']['edges'])) {
if(!isset($entry['category'])) $entry['category'] = [];
foreach($photoData['edge_media_to_tagged_user']['edges'] as $edge) {
$profile = self::_getInstagramProfile($edge['node']['user']['username'], $http);
if($profile) {
$card = self::_buildHCardFromInstagramProfile($profile);
$entry['category'][] = $card['url'];
$refs[$card['url']] = $card;
$profiles[] = $profile;
}
}
}
// Published date
if(isset($photoData['taken_at_timestamp']))
$published = DateTime::createFromFormat('U', $photoData['taken_at_timestamp']);
elseif(isset($photoData['date']))
$published = DateTime::createFromFormat('U', $photoData['date']);
// Include venue data
$locations = [];
if(isset($photoData['location'])) {
$location = self::_getInstagramLocation($photoData['location']['id'], $http);
if($location) {
$entry['location'] = [$location['url']];
$refs[$location['url']] = $location;
$locations[] = $location;
// Look up timezone
if($location['latitude']) {
$tz = \p3k\Timezone::timezone_for_location($location['latitude'], $location['longitude']);
if($tz) {
$published->setTimeZone(new DateTimeZone($tz));
}
}
}
}
$entry['published'] = $published->format('c');
if(count($refs)) {
$entry['refs'] = $refs;
}
if(count($meta)) {
$entry['meta'] = $meta;
}
$entry['post-type'] = \p3k\XRay\PostType::discover($entry);
return [
'data' => $entry,
'original' => json_encode([
'photo' => $photoData,
'profiles' => $profiles,
'locations' => $locations
]),
'source-format' => 'instagram',
];
}
private static function _buildHCardFromInstagramProfile($profile) {
if(!$profile) return false;
$author = [
'type' => 'card'
];
if($profile['full_name'])
$author['name'] = $profile['full_name'];
else
$author['name'] = $profile['username'];
$author['nickname'] = $profile['username'];
$author['url'] = 'https://www.instagram.com/' . $profile['username'] . '/';
if(isset($profile['profile_pic_url_hd']))
$author['photo'] = $profile['profile_pic_url_hd'];
elseif(isset($profile['profile_pic_url']))
$author['photo'] = $profile['profile_pic_url'];
if(isset($profile['biography']))
$author['note'] = $profile['biography'];
return $author;
}
private static function _getInstagramProfile($username, $http) {
$response = $http->get('https://www.instagram.com/'.$username.'/');
if(!$response['error'])
return self::_parseProfileFromHTML($response['body']);
return null;
}
private static function _parseProfileFromHTML($html) {
$data = self::_extractIGData($html);
if(isset($data['entry_data']['ProfilePage'][0])) {
$profile = $data['entry_data']['ProfilePage'][0];
if($profile && isset($profile['graphql']['user'])) {
$user = $profile['graphql']['user'];
return $user;
}
}
return null;
}
private static function _getInstagramLocation($id, $http) {
$igURL = 'https://www.instagram.com/explore/locations/'.$id.'/';
$response = $http->get($igURL);
if($response['body']) {
$data = self::_extractVenueDataFromVenuePage($response['body']);
if($data) {
return [
'type' => 'card',
'name' => $data['name'],
'url' => $igURL,
'latitude' => $data['lat'],
'longitude' => $data['lng'],
];
}
}
return null;
}
private static function _extractPhotoDataFromPhotoPage($html) {
$data = self::_extractIGData($html);
if($data && is_array($data) && array_key_exists('entry_data', $data)) {
if(is_array($data['entry_data']) && array_key_exists('PostPage', $data['entry_data'])) {
$post = $data['entry_data']['PostPage'];
if(isset($post[0]['graphql']['shortcode_media'])) {
return $post[0]['graphql']['shortcode_media'];
} elseif(isset($post[0]['graphql']['media'])) {
return $post[0]['graphql']['media'];
} elseif(isset($post[0]['media'])) {
return $post[0]['media'];
}
}
}
return null;
}
private static function _extractVenueDataFromVenuePage($html) {
$data = self::_extractIGData($html);
if($data && isset($data['entry_data']['LocationsPage'])) {
$data = $data['entry_data']['LocationsPage'];
if(isset($data[0]['graphql']['location'])) {
$location = $data[0]['graphql']['location'];
# we don't need these and they're huge, so drop them now
unset($location['media']);
unset($location['top_posts']);
return $location;
}
}
return null;
}
private static function _extractIGData($html) {
$doc = new DOMDocument();
@$doc->loadHTML($html);
if(!$doc) {
return null;
}
$xpath = new DOMXPath($doc);
$data = null;
foreach($xpath->query('//script') as $script) {
if(preg_match('/window\._sharedData = ({.+});/', $script->textContent, $match)) {
$data = json_decode($match[1], true);
}
}
return $data;
}
}

+ 0
- 4
lib/XRay/Parser.php View File

@ -79,10 +79,6 @@ class Parser {
// Check if the URL matches a special parser
$url = $http_response['url'];
if(Formats\Instagram::matches($url)) {
return Formats\Instagram::parse($this->http, $http_response, $opts);
}
if(Formats\GitHub::matches($url)) {
return Formats\GitHub::parse($http_response);
}

+ 0
- 324
tests/InstagramTest.php View File

@ -1,324 +0,0 @@
<?php
use Symfony\Component\HttpFoundation\Request;
use Symfony\Component\HttpFoundation\Response;
class InstagramTest extends PHPUnit\Framework\TestCase
{
private $http;
public function setUp(): void
{
$this->client = new Parse();
$this->client->http = new p3k\HTTP\Test(dirname(__FILE__).'/data/');
$this->client->mc = null;
}
private function parse($params)
{
$request = new Request($params);
$response = new Response();
return $this->client->parse($request, $response);
}
public function testInstagramPhoto()
{
// Original URL: https://www.instagram.com/p/BO5rYVElvJq/
$url = 'https://www.instagram.com/p/BO5rYVElvJq/';
$response = $this->parse(['url' => $url]);
$body = $response->getContent();
$this->assertEquals(200, $response->getStatusCode());
$data = json_decode($body, true);
$this->assertEquals(200, $data['code']);
$this->assertEquals('instagram', $data['source-format']);
$this->assertEquals('entry', $data['data']['type']);
$this->assertEquals('photo', $data['data']['post-type']);
$this->assertEquals('2017-01-05T23:31:32+00:00', $data['data']['published']);
$this->assertContains('planning', $data['data']['category']);
$this->assertContains('2017', $data['data']['category']);
$this->assertEquals('Kind of crazy to see the whole year laid out like this. #planning #2017', $data['data']['content']['text']);
$this->assertEquals(1, count($data['data']['photo']));
$this->assertEquals(['https://instagram.fsjc1-3.fna.fbcdn.net/vp/af9471f885e6197478d71807a7cbf297/5CBA6E5F/t51.2885-15/e35/15803256_1832278043695907_4846092951052353536_n.jpg?_nc_ht=instagram.fsjc1-3.fna.fbcdn.net'], $data['data']['photo']);
$this->assertEquals('https://www.instagram.com/aaronpk/', $data['data']['author']['url']);
$this->assertEquals('Aaron Parecki', $data['data']['author']['name']);
$this->assertEquals('https://instagram.fsjc1-3.fna.fbcdn.net/vp/a2909937316893f18760f1077ca88fa1/5CBB520A/t51.2885-19/s150x150/14240576_268350536897085_1129715662_a.jpg?_nc_ht=instagram.fsjc1-3.fna.fbcdn.net', $data['data']['author']['photo']);
$this->assertArrayNotHasKey('meta', $data['data']); // make sure this does not include alt text (autogenerated placeholder from instagram)
}
public function testBGDpqNoiMJ0()
{
// https://www.instagram.com/p/BGDpqNoiMJ0/
$url = 'http://www.instagram.com/BGDpqNoiMJ0';
$response = $this->parse(['url' => $url]);
$body = $response->getContent();
$this->assertEquals(200, $response->getStatusCode());
$data = json_decode($body, true);
$this->assertEquals(200, $data['code']);
$this->assertEquals('instagram', $data['source-format']);
$this->assertEquals('entry', $data['data']['type']);
$this->assertEquals('photo', $data['data']['post-type']);
$this->assertSame(
[
'type' => 'card',
'name' => 'pk_spam',
'nickname' => 'pk_spam',
'url' => 'https://www.instagram.com/pk_spam/',
'photo' => 'https://scontent-frx5-1.cdninstagram.com/vp/f17e1275a70fc32e93cbf434ddc32bcd/5B6CCC7A/t51.2885-19/11906329_960233084022564_1448528159_a.jpg',
], $data['data']['author']
);
$this->assertSame(
[
'muffins',
'https://www.instagram.com/indiewebcat/'
], $data['data']['category']
);
$this->assertEquals('Meow #muffins', $data['data']['content']['text']);
$this->assertSame(['https://instagram.fsea1-1.fna.fbcdn.net/vp/9433ea494a8b055bebabf70fd81cfa32/5B51F092/t51.2885-15/e35/13266755_877794672348882_1908663476_n.jpg'], $data['data']['photo']);
$this->assertEquals('2016-05-30T20:46:22-07:00', $data['data']['published']);
$this->assertEquals('https://www.instagram.com/explore/locations/359000003/', $data['data']['location'][0]);
$this->assertSame(
[
'type' => 'card',
'name' => 'Burnside 26',
'url' => 'https://www.instagram.com/explore/locations/359000003/',
'latitude' => 45.52322,
'longitude' => -122.63885
], $data['data']['refs']['https://www.instagram.com/explore/locations/359000003/']
);
}
public function testInstagramVideo()
{
// Original URL: https://www.instagram.com/p/BO_RN8AFZSx/
$url = 'https://www.instagram.com/p/BO_RN8AFZSx/';
$response = $this->parse(['url' => $url]);
$body = $response->getContent();
$this->assertEquals(200, $response->getStatusCode());
$data = json_decode($body, true);
$this->assertEquals(200, $data['code']);
$this->assertEquals('instagram', $data['source-format']);
$this->assertEquals('entry', $data['data']['type']);
$this->assertEquals('video', $data['data']['post-type']);
$this->assertContains('100daysofmusic', $data['data']['category']);
$this->assertEquals('Day 18. Maple and Spruce #100daysofmusic #100daysproject #the100dayproject https://aaronparecki.com/2017/01/07/14/day18', $data['data']['content']['text']);
$this->assertEquals(1, count($data['data']['photo']));
$this->assertEquals(['https://instagram.fsjc1-3.fna.fbcdn.net/vp/a77f8672f977413d2eb5239cd6d5c4cf/5C3A4ADF/t51.2885-15/e15/15624670_548881701986735_8264383763249627136_n.jpg?_nc_ht=instagram.fsjc1-3.fna.fbcdn.net'], $data['data']['photo']);
$this->assertEquals(1, count($data['data']['video']));
$this->assertEquals(['https://instagram.fsjc1-3.fna.fbcdn.net/vp/90ed8fe576cba16e258c0f4cfc05299a/5C3A129E/t50.2886-16/15921147_1074837002642259_2269307616507199488_n.mp4?_nc_ht=instagram.fsjc1-3.fna.fbcdn.net'], $data['data']['video']);
$this->assertEquals('https://www.instagram.com/aaronpk/', $data['data']['author']['url']);
$this->assertEquals('Aaron Parecki', $data['data']['author']['name']);
$this->assertEquals('https://instagram.fsjc1-3.fna.fbcdn.net/vp/a2909937316893f18760f1077ca88fa1/5CBB520A/t51.2885-19/s150x150/14240576_268350536897085_1129715662_a.jpg?_nc_ht=instagram.fsjc1-3.fna.fbcdn.net', $data['data']['author']['photo']);
}
public function testInstagramPhotoWithPersonTag()
{
// Original URL: https://www.instagram.com/p/BNfqVfVlmkj/
$url = 'https://www.instagram.com/p/BNfqVfVlmkj/';
$response = $this->parse(['url' => $url]);
$body = $response->getContent();
$this->assertEquals(200, $response->getStatusCode());
$data = json_decode($body, true);
$this->assertEquals(200, $data['code']);
$this->assertEquals('instagram', $data['source-format']);
$this->assertEquals(2, count($data['data']['category']));
$this->assertEquals(['type'=>'card','name'=>'KmikeyM™️','nickname'=>'kmikeym','url'=>'https://www.instagram.com/kmikeym/','photo'=>'https://instagram.fsjc1-3.fna.fbcdn.net/vp/ea5b988b616dbcc778b3013bf2426d70/5CCAC7FC/t51.2885-19/s320x320/20634957_814691788710973_2275383796935163904_a.jpg?_nc_ht=instagram.fsjc1-3.fna.fbcdn.net','note'=>"The world’s first publicly traded person.\n\nAcct in collaboration with @norbertoinc\n\nBecome a shareholder today!\n"], $data['data']['refs']['https://www.instagram.com/kmikeym/']);
$this->assertContains('https://www.instagram.com/kmikeym/', $data['data']['category']);
$this->assertArrayHasKey('https://www.instagram.com/kmikeym/', $data['data']['refs']);
}
public function testInstagramPhotoWithVenue()
{
// Original URL: https://www.instagram.com/p/BN3Z5salSys/
$url = 'https://www.instagram.com/p/BN3Z5salSys/';
$response = $this->parse(['url' => $url]);
$body = $response->getContent();
$this->assertEquals(200, $response->getStatusCode());
$data = json_decode($body, true);
$this->assertEquals(200, $data['code']);
$this->assertEquals('instagram', $data['source-format']);
$this->assertEquals(1, count($data['data']['location']));
$this->assertContains('https://www.instagram.com/explore/locations/109284789535230/', $data['data']['location']);
$this->assertArrayHasKey('https://www.instagram.com/explore/locations/109284789535230/', $data['data']['refs']);
$venue = $data['data']['refs']['https://www.instagram.com/explore/locations/109284789535230/'];
$this->assertEquals('XOXO Outpost', $venue['name']);
$this->assertEquals('45.5261002', $venue['latitude']);
$this->assertEquals('-122.6558081', $venue['longitude']);
// Setting a venue should set the timezone
$this->assertEquals('2016-12-10T21:48:56-08:00', $data['data']['published']);
}
public function testTwoPhotos()
{
// Original URL: https://www.instagram.com/p/BZWmUB_DVtp/
$url = 'https://www.instagram.com/p/BZWmUB_DVtp/';
$response = $this->parse(['url' => $url]);
$body = $response->getContent();
$this->assertEquals(200, $response->getStatusCode());
$data = json_decode($body, true);
$this->assertEquals(200, $data['code']);
$this->assertEquals('instagram', $data['source-format']);
$this->assertEquals(2, count($data['data']['photo']));
$this->assertEquals('https://instagram.fsjc1-3.fna.fbcdn.net/vp/6b09c3d5490ee3efb55849858a9ec014/5CBFBC38/t51.2885-15/e35/21827424_134752690591737_8093088291252862976_n.jpg?_nc_ht=instagram.fsjc1-3.fna.fbcdn.net', $data['data']['photo'][0]);
$this->assertEquals('https://instagram.fsjc1-3.fna.fbcdn.net/vp/8b1b2e6efa86a4856ec37a60f0fa77f5/5CC2D34D/t51.2885-15/e35/21909774_347707439021016_5237540582556958720_n.jpg?_nc_ht=instagram.fsjc1-3.fna.fbcdn.net', $data['data']['photo'][1]);
$this->assertArrayNotHasKey('video', $data['data']);
$this->assertEquals(2, count($data['data']['category']));
$this->assertArrayNotHasKey('meta', $data['data']);
}
public function testMixPhotosAndVideos()
{
// Original URL: https://www.instagram.com/p/BZWmpecjBwN/
$url = 'https://www.instagram.com/p/BZWmpecjBwN/';
$response = $this->parse(['url' => $url]);
$body = $response->getContent();
$this->assertEquals(200, $response->getStatusCode());
$data = json_decode($body, true);
$this->assertEquals(200, $data['code']);
$this->assertEquals('instagram', $data['source-format']);
$this->assertEquals('photo', $data['data']['post-type']); // we discard videos in this case right now
$this->assertEquals(3, count($data['data']['photo']));
$this->assertEquals('https://instagram.fsjc1-3.fna.fbcdn.net/vp/ee1a28763918069f3e54dad35be24ad8/5CCFBAB8/t51.2885-15/e35/21878922_686481254874005_8468823712617988096_n.jpg?_nc_ht=instagram.fsjc1-3.fna.fbcdn.net', $data['data']['photo'][0]);
$this->assertEquals('https://instagram.fsjc1-3.fna.fbcdn.net/vp/ddc0ebe969bb1f9e6bf8adada0892c90/5C39EBC9/t51.2885-15/e15/21910026_1507234999368159_6974261907783942144_n.jpg?_nc_ht=instagram.fsjc1-3.fna.fbcdn.net', $data['data']['photo'][1]);
$this->assertEquals('https://instagram.fsjc1-3.fna.fbcdn.net/vp/bfe032af795427443ea448840df1c3a4/5CCC8C88/t51.2885-15/e35/21878800_273567963151023_7672178549897297920_n.jpg?_nc_ht=instagram.fsjc1-3.fna.fbcdn.net', $data['data']['photo'][2]);
$this->assertArrayNotHasKey('video', $data['data']);
$this->assertEquals(2, count($data['data']['category']));
}
public function testInstagramProfile()
{
$url = 'https://www.instagram.com/aaronpk/';
$response = $this->parse(['url' => $url]);
$body = $response->getContent();
$this->assertEquals(200, $response->getStatusCode());
$data = json_decode($body, true);
$this->assertEquals(200, $data['code']);
$this->assertEquals('instagram', $data['source-format']);
$this->assertSame(
[
'type' => 'card',
'name' => 'Aaron Parecki',
'nickname' => 'aaronpk',
'url' => 'https://www.instagram.com/aaronpk/',
'photo' => 'https://instagram.fsjc1-3.fna.fbcdn.net/vp/45aee453740a714bf408f8947f89da8e/5CCB4B8E/t51.2885-19/s320x320/14240576_268350536897085_1129715662_a.jpg?_nc_ht=instagram.fsjc1-3.fna.fbcdn.net',
'note' => '🔒 oauth.net 🎥 backpedal.tv 🎙 streampdx.com 📡 w7apk.com'
], $data['data']
);
}
public function testInstagramProfileWithBio()
{
$url = 'https://www.instagram.com/pk_spam/';
$response = $this->parse(['url' => $url]);
$body = $response->getContent();
$this->assertEquals(200, $response->getStatusCode());
$data = json_decode($body, true);
$this->assertEquals(200, $data['code']);
$this->assertEquals('instagram', $data['source-format']);
$this->assertSame(
[
'type' => 'card',
'name' => 'pk_spam',
'nickname' => 'pk_spam',
'url' => 'https://www.instagram.com/pk_spam/',
'photo' => 'https://scontent-frx5-1.cdninstagram.com/vp/74112f515c64726429c69fedcb927c2d/5CB64CF1/t51.2885-19/44884218_345707102882519_2446069589734326272_n.jpg?_nc_ht=scontent-frx5-1.cdninstagram.com',
'note' => 'My website is https://aaronparecki.com.dev/ and http://aaronpk.micro.blog/about/ and https://tiny.xyz.dev/'
], $data['data']
);
}
public function testInstagramProfileFeed()
{
$url = 'https://www.instagram.com/pk_spam/';
$response = $this->parse(['url' => $url, 'expect' => 'feed']);
$body = $response->getContent();
$this->assertEquals(200, $response->getStatusCode());
$data = json_decode($body, true);
$this->assertEquals(200, $data['code']);
$this->assertEquals('instagram', $data['source-format']);
$this->assertEquals('feed', $data['data']['type']);
$this->assertEquals(12, count($data['data']['items']));
$this->assertEquals('https://www.instagram.com/p/BsdlOmLh_IX/', $data['data']['items'][0]['url']);
$this->assertEquals('https://www.instagram.com/p/BGFdtAViMJy/', $data['data']['items'][11]['url']);
}
public function testInstagramPhotoWithAltText()
{
$url = 'https://www.instagram.com/p/BsdjKytBZyx/';
$response = $this->parse(['url' => $url]);
$body = $response->getContent();
$this->assertEquals(200, $response->getStatusCode());
$data = json_decode($body, true);
$this->assertEquals(200, $data['code']);
$this->assertEquals('instagram', $data['source-format']);
$this->assertEquals('Pink text on a white background that says "Photo with alt text"', $data['data']['meta']['https://instagram.fsjc1-3.fna.fbcdn.net/vp/a7e61adf3d84f07863ffdb99f0fdcc86/5CD9B7F3/t51.2885-15/e35/47692478_2276538359047529_8318084305806697090_n.jpg?_nc_ht=instagram.fsjc1-3.fna.fbcdn.net']['alt']);
}
public function testInstagramMultiPhotoWithAltText()
{
$url = 'https://www.instagram.com/p/BsdlOmLh_IX/';
$response = $this->parse(['url' => $url]);
$body = $response->getContent();
$this->assertEquals(200, $response->getStatusCode());
$data = json_decode($body, true);
$this->assertEquals(200, $data['code']);
$this->assertEquals('instagram', $data['source-format']);
$this->assertEquals('A large pink "1" in a circle with a small green "2" behind it', $data['data']['meta']['https://instagram.fsjc1-3.fna.fbcdn.net/vp/90bf019b7396d7bc2b1ee02170902a2e/5CCC9B87/t51.2885-15/e35/47692921_321791688431421_3314633848293773579_n.jpg?_nc_ht=instagram.fsjc1-3.fna.fbcdn.net']['alt']);
$this->assertEquals('A large green "2" in a circle with a small pink "1" behind it', $data['data']['meta']['https://instagram.fsjc1-3.fna.fbcdn.net/vp/a6c93d8fcd5ad0e3b60f2ac0695eb34e/5CC3898E/t51.2885-15/e35/49663055_349750985612151_2949260446582336214_n.jpg?_nc_ht=instagram.fsjc1-3.fna.fbcdn.net']['alt']);
}
public function testInstagramPhotoAutogeneratedAltText()
{
$url = 'https://www.instagram.com/p/Bq8U12UAcdq/';
$response = $this->parse(['url' => $url]);
$body = $response->getContent();
$this->assertEquals(200, $response->getStatusCode());
$data = json_decode($body, true);
$this->assertEquals(200, $data['code']);
$this->assertEquals('instagram', $data['source-format']);
$this->assertEquals('Image may contain: one or more people and hat', $data['data']['meta']['https://instagram.fsjc1-3.fna.fbcdn.net/vp/7f8954f33de897c0c57656b798637f4c/5CC3DF9F/t51.2885-15/e35/45605085_1989380037822519_4707213851165118070_n.jpg?_nc_ht=instagram.fsjc1-3.fna.fbcdn.net']['alt']);
}
}

+ 0
- 204
tests/data/www.instagram.com/BGDpqNoiMJ0
File diff suppressed because it is too large
View File


+ 0
- 312
tests/data/www.instagram.com/aaronpk_
File diff suppressed because it is too large
View File


+ 0
- 303
tests/data/www.instagram.com/explore_locations_109284789535230_
File diff suppressed because it is too large
View File


+ 0
- 303
tests/data/www.instagram.com/explore_locations_359000003_
File diff suppressed because it is too large
View File


+ 0
- 316
tests/data/www.instagram.com/indiewebcat_
File diff suppressed because it is too large
View File


+ 0
- 316
tests/data/www.instagram.com/kmikeym_
File diff suppressed because it is too large
View File


+ 0
- 316
tests/data/www.instagram.com/microformats_
File diff suppressed because it is too large
View File


+ 0
- 316
tests/data/www.instagram.com/p_BGDpqNoiMJ0_
File diff suppressed because it is too large
View File


+ 0
- 316
tests/data/www.instagram.com/p_BN3Z5salSys_
File diff suppressed because it is too large
View File


+ 0
- 316
tests/data/www.instagram.com/p_BNfqVfVlmkj_
File diff suppressed because it is too large
View File


+ 0
- 316
tests/data/www.instagram.com/p_BO5rYVElvJq_
File diff suppressed because it is too large
View File


+ 0
- 321
tests/data/www.instagram.com/p_BO_RN8AFZSx_
File diff suppressed because it is too large
View File


+ 0
- 316
tests/data/www.instagram.com/p_BZWmUB_DVtp_
File diff suppressed because it is too large
View File


+ 0
- 316
tests/data/www.instagram.com/p_BZWmpecjBwN_
File diff suppressed because it is too large
View File


+ 0
- 312
tests/data/www.instagram.com/p_Bq8U12UAcdq_
File diff suppressed because it is too large
View File


+ 0
- 316
tests/data/www.instagram.com/p_BsdjKytBZyx_
File diff suppressed because it is too large
View File


+ 0
- 316
tests/data/www.instagram.com/p_BsdlOmLh_IX_
File diff suppressed because it is too large
View File


+ 0
- 316
tests/data/www.instagram.com/pk_spam_
File diff suppressed because it is too large
View File


+ 0
- 30
tests/download-instagram-data.sh View File

@ -1,30 +0,0 @@
#!/bin/bash
urls=(
'https://www.instagram.com/aaronpk/'
'https://www.instagram.com/indiewebcat/'
'https://www.instagram.com/kmikeym/'
'https://www.instagram.com/microformats/'
'https://www.instagram.com/pk_spam/'
'https://www.instagram.com/p/BO5rYVElvJq/'
'https://www.instagram.com/p/BGDpqNoiMJ0/'
'https://www.instagram.com/p/BO_RN8AFZSx/'
'https://www.instagram.com/p/BNfqVfVlmkj/'
'https://www.instagram.com/p/BN3Z5salSys/'
'https://www.instagram.com/p/BZWmUB_DVtp/'
'https://www.instagram.com/p/BZWmpecjBwN/'
'https://www.instagram.com/explore/locations/109284789535230/'
'https://www.instagram.com/explore/locations/359000003/'
'https://www.instagram.com/p/BsdjKytBZyx/'
'https://www.instagram.com/p/BsdlOmLh_IX/'
'https://www.instagram.com/p/Bq8U12UAcdq/'
)
for url in ${urls[@]}; do
fn=$(echo $url | sed 's#https://www.instagram.com/##' | sed 's#/#_#g')
echo "$url > $fn"
curl -i -s $url > data/www.instagram.com/$fn
unix2dos data/www.instagram.com/$fn
sleep 2
done

Loading…
Cancel
Save