Skip to content

Commit d4618e8

Browse files
committed
[InstagramEmbedBridge] Add a new Instagram Bridge based on the embed url
This bridge is a simplified version of InstagramBridge that is based on the embed page of a profile page. It does only support the Username mode, and is limited to the last 6 medias. At least actually, it does not need any cookies, and does work from a server IP, without limitation.
1 parent 3ff2ef9 commit d4618e8

File tree

1 file changed

+224
-0
lines changed

1 file changed

+224
-0
lines changed

bridges/InstagramEmbedBridge.php

Lines changed: 224 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,224 @@
1+
<?php
2+
3+
class InstagramEmbedBridge extends BridgeAbstract
4+
{
5+
const MAINTAINER = 'sysadminstory';
6+
const NAME = 'Instagram Embed Bridge';
7+
const URI = 'https://www.instagram.com/';
8+
const DESCRIPTION = 'Returns the newest Instagram from a specific usernane using the Instagram Embed page';
9+
10+
const PARAMETERS = [
11+
'Username' => [
12+
'u' => [
13+
'name' => 'username',
14+
'exampleValue' => 'aesoprockwins',
15+
'required' => true
16+
]
17+
],
18+
'global' => [
19+
'media_type' => [
20+
'name' => 'Media type',
21+
'type' => 'list',
22+
'required' => false,
23+
'values' => [
24+
'All' => 'all',
25+
'Video' => 'video',
26+
'Picture' => 'picture',
27+
'Multiple' => 'multiple',
28+
],
29+
'defaultValue' => 'all'
30+
],
31+
'direct_links' => [
32+
'name' => 'Use direct media links',
33+
'type' => 'checkbox',
34+
]
35+
]
36+
37+
];
38+
39+
const TEST_DETECT_PARAMETERS = [
40+
'https://www.instagram.com/metaverse' => ['context' => 'Username', 'u' => 'metaverse'],
41+
'https://instagram.com/metaverse' => ['context' => 'Username', 'u' => 'metaverse'],
42+
'http://www.instagram.com/metaverse' => ['context' => 'Username', 'u' => 'metaverse'],
43+
];
44+
45+
public function collectData()
46+
{
47+
$username = $this->getInput('u');
48+
$directLink = !is_null($this->getInput('direct_links')) && $this->getInput('direct_links');
49+
50+
// Get the HTML code of the profile embed page, and extract the JSON of it
51+
$html = getSimpleHTMLDOMCached(self::URI . $username . '/embed/');
52+
$jsCode = $html->find('body', 0)->find('script', 3)->innertext;
53+
$regex = '#"contextJSON":"(.*)"}\]\],\["NavigationMetrics"#m';
54+
preg_match($regex, $jsCode, $matches);
55+
$jsVariable = $matches[1];
56+
$jsonString = stripcslashes($jsVariable);
57+
$jsonData = Json::decode($jsonString, false);
58+
$medias = $jsonData->context->graphql_media;
59+
60+
foreach ($medias as $graphqlMedia) {
61+
$media = $graphqlMedia->shortcode_media;
62+
switch ($this->getInput('media_type')) {
63+
case 'all':
64+
break;
65+
case 'video':
66+
if ($media->__typename != 'GraphVideo' || !$media->is_video) {
67+
continue 2;
68+
}
69+
break;
70+
case 'picture':
71+
if ($media->__typename != 'GraphImage') {
72+
continue 2;
73+
}
74+
break;
75+
case 'multiple':
76+
if ($media->__typename != 'GraphSidecar') {
77+
continue 2;
78+
}
79+
break;
80+
default:
81+
break;
82+
}
83+
84+
$item = [];
85+
$item['uri'] = self::URI . 'p/' . $media->shortcode . '/';
86+
87+
if (isset($media->owner->username)) {
88+
$item['author'] = $media->owner->username;
89+
}
90+
91+
$textContent = $this->getTextContent($media);
92+
93+
$item['title'] = ($media->is_video ? '' : '') . $textContent;
94+
$titleLinePos = strpos(wordwrap($item['title'], 120), "\n");
95+
if ($titleLinePos != false) {
96+
$item['title'] = substr($item['title'], 0, $titleLinePos) . '...';
97+
}
98+
99+
if ($directLink) {
100+
$mediaURI = $media->display_url;
101+
} else {
102+
$mediaURI = self::URI . 'p/' . $media->shortcode . '/media?size=l';
103+
}
104+
105+
$pattern = ['/\@([\w\.]+)/', '/#([\w\.]+)/'];
106+
$replace = [
107+
'<a href="https://www.instagram.com/$1">@$1</a>',
108+
'<a href="https://www.instagram.com/explore/tags/$1">#$1</a>'];
109+
110+
switch ($media->__typename) {
111+
case 'GraphSidecar':
112+
$data = $this->getInstagramSidecarData($item['uri'], $item['title'], $media, $textContent);
113+
$item['content'] = $data[0];
114+
$item['enclosures'] = $data[1];
115+
break;
116+
case 'GraphImage':
117+
$item['content'] = '<a href="' . htmlentities($item['uri']) . '" target="_blank">';
118+
$item['content'] .= '<img src="' . htmlentities($mediaURI) . '" alt="' . $item['title'] . '" />';
119+
$item['content'] .= '</a><br><br>' . nl2br(preg_replace($pattern, $replace, htmlentities($textContent)));
120+
$item['enclosures'] = [$mediaURI];
121+
break;
122+
case 'GraphVideo':
123+
$data = $this->getInstagramVideoData($item['uri'], $mediaURI, $media, $textContent);
124+
$item['content'] = $data[0];
125+
if ($directLink) {
126+
$item['enclosures'] = $data[1];
127+
} else {
128+
$item['enclosures'] = [$mediaURI];
129+
}
130+
$item['thumbnail'] = $mediaURI;
131+
break;
132+
default:
133+
break;
134+
}
135+
$item['timestamp'] = $media->taken_at_timestamp;
136+
137+
$this->items[] = $item;
138+
}
139+
}
140+
141+
142+
public function getName()
143+
{
144+
if (!is_null($this->getInput('u'))) {
145+
return $this->getInput('u') . ' - Instagram Embed Bridge';
146+
}
147+
148+
return parent::getName();
149+
}
150+
151+
public function getURI()
152+
{
153+
if (!is_null($this->getInput('u'))) {
154+
return self::URI . urlencode($this->getInput('u')) . '/';
155+
}
156+
return parent::getURI();
157+
}
158+
159+
protected function getTextContent($media)
160+
{
161+
$textContent = '(no text)';
162+
//Process the first element, that isn't in the node graph
163+
if (count($media->edge_media_to_caption->edges) > 0) {
164+
$textContent = trim($media->edge_media_to_caption->edges[0]->node->text);
165+
}
166+
return $textContent;
167+
}
168+
169+
// returns Video post's contents and enclosures
170+
protected function getInstagramVideoData($uri, $mediaURI, $mediaInfo, $textContent)
171+
{
172+
$content = '<video controls>';
173+
$content .= '<source src="' . $mediaInfo->video_url . '" poster="' . $mediaURI . '" type="video/mp4">';
174+
$content .= '<img src="' . $mediaURI . '" alt="">';
175+
$content .= '</video><br>';
176+
$content .= '<br>' . nl2br(htmlentities($textContent));
177+
178+
return [$content, [$mediaInfo->video_url]];
179+
}
180+
181+
// returns Sidecar(a post which has multiple media)'s contents and enclosures
182+
protected function getInstagramSidecarData($uri, $postTitle, $mediaInfo, $textContent)
183+
{
184+
$enclosures = [];
185+
$content = '';
186+
foreach ($mediaInfo->edge_sidecar_to_children->edges as $singleMedia) {
187+
$singleMedia = $singleMedia->node;
188+
if ($singleMedia->is_video) {
189+
if (in_array($singleMedia->video_url, $enclosures)) {
190+
continue; // check if not added yet
191+
}
192+
$content .= '<video controls><source src="' . $singleMedia->video_url . '" type="video/mp4"></video><br>';
193+
array_push($enclosures, $singleMedia->video_url);
194+
} else {
195+
if (in_array($singleMedia->display_url, $enclosures)) {
196+
continue; // check if not added yet
197+
}
198+
$content .= '<a href="' . $singleMedia->display_url . '" target="_blank">';
199+
$content .= '<img src="' . $singleMedia->display_url . '" alt="' . $postTitle . '" />';
200+
$content .= '</a><br>';
201+
array_push($enclosures, $singleMedia->display_url);
202+
}
203+
}
204+
$content .= '<br>' . nl2br(htmlentities($textContent));
205+
206+
return [$content, $enclosures];
207+
}
208+
209+
public function detectParameters($url)
210+
{
211+
$params = [];
212+
213+
// By username
214+
$regex = '/^(https?:\/\/)?(www\.)?instagram\.com\/([^\/?\n]+)/';
215+
216+
if (preg_match($regex, $url, $matches) > 0) {
217+
$params['context'] = 'Username';
218+
$params['u'] = urldecode($matches[3]);
219+
return $params;
220+
}
221+
222+
return null;
223+
}
224+
}

0 commit comments

Comments
 (0)