From 156fd62678d765743971b9997023ea8023492f07 Mon Sep 17 00:00:00 2001 From: Aaron Parecki Date: Thu, 10 Jan 2019 10:37:57 -0800 Subject: [PATCH] Include alt text from Instagram posts This adds a new property, "meta", which includes alt text and has room to include additional properties later. closes #85 --- lib/XRay/Formats/Instagram.php | 17 ++ tests/InstagramTest.php | 31 ++ tests/data/www.instagram.com/p_BsdjKytBZyx_ | 316 ++++++++++++++++++++ tests/data/www.instagram.com/p_BsdlOmLh_IX_ | 316 ++++++++++++++++++++ tests/download-instagram-data.sh | 2 + 5 files changed, 682 insertions(+) create mode 100644 tests/data/www.instagram.com/p_BsdjKytBZyx_ create mode 100644 tests/data/www.instagram.com/p_BsdlOmLh_IX_ diff --git a/lib/XRay/Formats/Instagram.php b/lib/XRay/Formats/Instagram.php index 274490b..1bfb843 100644 --- a/lib/XRay/Formats/Instagram.php +++ b/lib/XRay/Formats/Instagram.php @@ -128,17 +128,24 @@ class Instagram extends Format { } $refs = []; + $meta = []; // Include the photo/video media URLs // (Always return arrays, even for single images) if(array_key_exists('edge_sidecar_to_children', $photoData)) { // Multi-post // For now, we will only pull photos from multi-posts, and skip videos. + // https://github.com/aaronpk/XRay/issues/84 $entry['photo'] = []; foreach($photoData['edge_sidecar_to_children']['edges'] as $edge) { $entry['photo'][] = $edge['node']['display_url']; // Don't need to pull person-tags from here because the main parent object already has them. + if(isset($edge['node']['accessibility_caption'])) { + $meta[$edge['node']['display_url']] = [ + 'alt' => $edge['node']['accessibility_caption'] + ]; + } } } else { @@ -149,6 +156,12 @@ class Instagram extends Format { elseif(array_key_exists('display_url', $photoData)) $entry['photo'] = [$photoData['display_url']]; + if(isset($photoData['accessibility_caption']) && $photoData['accessibility_caption']) { + $meta[$entry['photo'][0]] = [ + 'alt' => $photoData['accessibility_caption'] + ]; + } + if(isset($photoData['is_video']) && $photoData['is_video'] && isset($photoData['video_url'])) { $entry['video'] = [$photoData['video_url']]; } @@ -200,6 +213,10 @@ class Instagram extends Format { $entry['refs'] = $refs; } + if(count($meta)) { + $entry['meta'] = $meta; + } + $entry['post-type'] = \p3k\XRay\PostType::discover($entry); return [ diff --git a/tests/InstagramTest.php b/tests/InstagramTest.php index 4d2af9c..bcfd513 100644 --- a/tests/InstagramTest.php +++ b/tests/InstagramTest.php @@ -247,4 +247,35 @@ class InstagramTest extends PHPUnit_Framework_TestCase { $this->assertEquals('https://www.instagram.com/p/BGC8l_ZCMKb/', $data['data']['items'][11]['url']); } + public function testInstagramPhotoWithAltText() { + $url = 'https://www.instagram.com/p/BsdjKytBZyx/'; + + $response = $this->parse(['url' => $url]); + + $body = $response->getContent(); + $this->assertEquals(200, $response->getStatusCode()); + $data = json_decode($body, true); + + $this->assertEquals(200, $data['code']); + $this->assertEquals('instagram', $data['source-format']); + + $this->assertEquals('Pink text on a white background that says "Photo with alt text"', $data['data']['meta']['https://instagram.fsjc1-3.fna.fbcdn.net/vp/a7e61adf3d84f07863ffdb99f0fdcc86/5CD9B7F3/t51.2885-15/e35/47692478_2276538359047529_8318084305806697090_n.jpg?_nc_ht=instagram.fsjc1-3.fna.fbcdn.net']['alt']); + } + + public function testInstagramMultiPhotoWithAltText() { + $url = 'https://www.instagram.com/p/BsdlOmLh_IX/'; + + $response = $this->parse(['url' => $url]); + + $body = $response->getContent(); + $this->assertEquals(200, $response->getStatusCode()); + $data = json_decode($body, true); + + $this->assertEquals(200, $data['code']); + $this->assertEquals('instagram', $data['source-format']); + + $this->assertEquals('A large pink "1" in a circle with a small green "2" behind it', $data['data']['meta']['https://instagram.fsjc1-3.fna.fbcdn.net/vp/90bf019b7396d7bc2b1ee02170902a2e/5CCC9B87/t51.2885-15/e35/47692921_321791688431421_3314633848293773579_n.jpg?_nc_ht=instagram.fsjc1-3.fna.fbcdn.net']['alt']); + $this->assertEquals('A large green "2" in a circle with a small pink "1" behind it', $data['data']['meta']['https://instagram.fsjc1-3.fna.fbcdn.net/vp/a6c93d8fcd5ad0e3b60f2ac0695eb34e/5CC3898E/t51.2885-15/e35/49663055_349750985612151_2949260446582336214_n.jpg?_nc_ht=instagram.fsjc1-3.fna.fbcdn.net']['alt']); + } + } diff --git a/tests/data/www.instagram.com/p_BsdjKytBZyx_ b/tests/data/www.instagram.com/p_BsdjKytBZyx_ new file mode 100644 index 0000000..56cfabe --- /dev/null +++ b/tests/data/www.instagram.com/p_BsdjKytBZyx_ @@ -0,0 +1,316 @@ +HTTP/1.1 200 OK +Content-Type: text/html; charset=utf-8 +Vary: Accept-Language, Cookie, Accept-Encoding +Content-Language: en +Date: Thu, 10 Jan 2019 17:08:15 GMT +Strict-Transport-Security: max-age=3600 +Cache-Control: private, no-cache, no-store, must-revalidate +Pragma: no-cache +Expires: Sat, 01 Jan 2000 00:00:00 GMT +X-Frame-Options: SAMEORIGIN +content-security-policy: report-uri https://www.instagram.com/security/csp_report/; default-src 'self' https://www.instagram.com; img-src https: data: blob:; font-src https: data:; media-src 'self' blob: https://www.instagram.com https://*.cdninstagram.com https://*.fbcdn.net; manifest-src 'self' https://www.instagram.com; script-src 'self' https://instagram.com https://www.instagram.com https://*.www.instagram.com https://*.cdninstagram.com wss://www.instagram.com https://*.facebook.com https://*.fbcdn.net https://*.facebook.net 'unsafe-inline' 'unsafe-eval' blob:; style-src 'self' https://*.www.instagram.com https://www.instagram.com 'unsafe-inline'; connect-src 'self' https://instagram.com https://www.instagram.com https://*.www.instagram.com https://graph.instagram.com https://*.graph.instagram.com https://*.cdninstagram.com https://api.instagram.com wss://www.instagram.com wss://edge-chat.instagram.com https://*.facebook.com https://*.fbcdn.net https://*.facebook.net chrome-extension://boadgeojelhgndaghljhdicfkmllpafd; worker-src 'self' https://www.instagram.com; frame-src 'self' https://instagram.com https://www.instagram.com https://staticxx.facebook.com https://www.facebook.com https://web.facebook.com https://connect.facebook.net https://m.facebook.com; object-src 'none'; upgrade-insecure-requests +X-Content-Type-Options: nosniff +X-XSS-Protection: 0 +Set-Cookie: urlgen="{\"108.161.19.190\": 54154}:1ghdoV:im6UK2FX8I1TlHIiI_iwXYjDRKU"; Domain=.instagram.com; HttpOnly; Path=/; Secure +Set-Cookie: rur=PRN; Domain=.instagram.com; HttpOnly; Path=/; Secure +Set-Cookie: mid=XDd7_wAEAAH4otGp9D4ALT_YzT6J; Domain=.instagram.com; expires=Sun, 07-Jan-2029 17:08:15 GMT; Max-Age=315360000; Path=/; Secure +Set-Cookie: mcd=3; Domain=.instagram.com; expires=Sun, 07-Jan-2029 17:08:15 GMT; Max-Age=315360000; Path=/; Secure +Set-Cookie: csrftoken=nrcJcaKDOujTgj4650qHTzdT1eEYD3oD; Domain=.instagram.com; expires=Thu, 09-Jan-2020 17:08:15 GMT; Max-Age=31449600; Path=/; Secure +Connection: keep-alive +Content-Length: 32865 + + + + + + + + +@pk_spam on Instagram: “This photo has alt text” + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tests/data/www.instagram.com/p_BsdlOmLh_IX_ b/tests/data/www.instagram.com/p_BsdlOmLh_IX_ new file mode 100644 index 0000000..a80cd0d --- /dev/null +++ b/tests/data/www.instagram.com/p_BsdlOmLh_IX_ @@ -0,0 +1,316 @@ +HTTP/1.1 200 OK +Content-Type: text/html; charset=utf-8 +Vary: Accept-Language, Cookie, Accept-Encoding +Content-Language: en +Date: Thu, 10 Jan 2019 18:11:48 GMT +Strict-Transport-Security: max-age=3600 +Cache-Control: private, no-cache, no-store, must-revalidate +Pragma: no-cache +Expires: Sat, 01 Jan 2000 00:00:00 GMT +X-Frame-Options: SAMEORIGIN +content-security-policy: report-uri https://www.instagram.com/security/csp_report/; default-src 'self' https://www.instagram.com; img-src https: data: blob:; font-src https: data:; media-src 'self' blob: https://www.instagram.com https://*.cdninstagram.com https://*.fbcdn.net; manifest-src 'self' https://www.instagram.com; script-src 'self' https://instagram.com https://www.instagram.com https://*.www.instagram.com https://*.cdninstagram.com wss://www.instagram.com https://*.facebook.com https://*.fbcdn.net https://*.facebook.net 'unsafe-inline' 'unsafe-eval' blob:; style-src 'self' https://*.www.instagram.com https://www.instagram.com 'unsafe-inline'; connect-src 'self' https://instagram.com https://www.instagram.com https://*.www.instagram.com https://graph.instagram.com https://*.graph.instagram.com https://*.cdninstagram.com https://api.instagram.com wss://www.instagram.com wss://edge-chat.instagram.com https://*.facebook.com https://*.fbcdn.net https://*.facebook.net chrome-extension://boadgeojelhgndaghljhdicfkmllpafd; worker-src 'self' https://www.instagram.com; frame-src 'self' https://instagram.com https://www.instagram.com https://staticxx.facebook.com https://www.facebook.com https://web.facebook.com https://connect.facebook.net https://m.facebook.com; object-src 'none'; upgrade-insecure-requests +X-Content-Type-Options: nosniff +X-XSS-Protection: 0 +Set-Cookie: urlgen="{\"108.161.19.190\": 54154}:1gheo0:HTp4zLA7oQExYE5gEPmQHl9b_YE"; Domain=.instagram.com; HttpOnly; Path=/; Secure +Set-Cookie: mid=XDeK5AAEAAEJlizJo249TbqNAGlf; Domain=.instagram.com; expires=Sun, 07-Jan-2029 18:11:48 GMT; Max-Age=315360000; Path=/; Secure +Set-Cookie: rur=PRN; Domain=.instagram.com; HttpOnly; Path=/; Secure +Set-Cookie: mcd=3; Domain=.instagram.com; expires=Sun, 07-Jan-2029 18:11:48 GMT; Max-Age=315360000; Path=/; Secure +Set-Cookie: csrftoken=xsUzwhNqgtwmqGk24MQRlBJG8xzIl3AS; Domain=.instagram.com; expires=Thu, 09-Jan-2020 18:11:48 GMT; Max-Age=31449600; Path=/; Secure +Connection: keep-alive +Content-Length: 36257 + + + + + + + + +@pk_spam on Instagram: “This post has two photos with alt text” + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tests/download-instagram-data.sh b/tests/download-instagram-data.sh index c761ad7..506df0a 100755 --- a/tests/download-instagram-data.sh +++ b/tests/download-instagram-data.sh @@ -11,6 +11,8 @@ urls=( 'https://www.instagram.com/p/BZWmpecjBwN/' 'https://www.instagram.com/explore/locations/109284789535230/' 'https://www.instagram.com/explore/locations/359000003/' + 'https://www.instagram.com/p/BsdjKytBZyx/' + 'https://www.instagram.com/p/BsdlOmLh_IX/' ) for url in ${urls[@]}; do