96 lines
3.2 KiB
Python
96 lines
3.2 KiB
Python
import requests
|
|
import os
|
|
import time
|
|
import json
|
|
from tqdm import tqdm
|
|
import sys
|
|
|
|
header = {
|
|
# 'x-ig-app-id': 'You should find IG App ID on the browser',
|
|
'x-ig-app-id': '936619743392459',
|
|
'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36'
|
|
}
|
|
|
|
def getLinks(username: str):
|
|
print("Get images' links...'")
|
|
|
|
# find this user's total media counts
|
|
# progress bar
|
|
response = requests.get("https://www.instagram.com/api/v1/users/web_profile_info/?username="+username, headers=header)
|
|
progress = tqdm(total=json.loads(response.text)['data']['user']['edge_owner_to_timeline_media']['count']) # media counts
|
|
|
|
url = "https://www.instagram.com/api/v1/feed/user/{}/username/?count=15".format(username)
|
|
links = []
|
|
|
|
# get pictures' links
|
|
load_count = 0
|
|
flag = True
|
|
while flag:
|
|
# We should use "next_max_id" to requests the next group of medias
|
|
# But we will not have "next_max_id" in the first requst
|
|
if load_count == 0:
|
|
response = requests.get(url, headers=header)
|
|
url += "&max_id={}"
|
|
else:
|
|
response = requests.get(url.format(data['next_max_id']), headers=header)
|
|
|
|
# get links from responded JSON
|
|
data = json.loads(response.text)
|
|
for item in data['items']:
|
|
if 'carousel_media' in item:
|
|
code = item['code']
|
|
|
|
# if the picture exist, break this loop and return links that have images haven't been downloaded
|
|
if os.path.isfile("images/{}/{}_0".format(username, code)):
|
|
flag = False
|
|
break
|
|
for index, image in enumerate(item['carousel_media']):
|
|
link = image['image_versions2']['candidates'][0]['url']
|
|
links.append((link, "{}_{}".format(code, index)))
|
|
else:
|
|
link = item['image_versions2']['candidates'][0]['url']
|
|
code = item['code']
|
|
if os.path.isfile("images/{}/{}".format(username, code)):
|
|
flag = False
|
|
break
|
|
links.append((link, code))
|
|
|
|
# update progress bar
|
|
progress.update(data['num_results'])
|
|
|
|
# check whether need to break
|
|
if data['more_available'] == False:
|
|
break
|
|
|
|
load_count += 1
|
|
time.sleep(0.2)
|
|
|
|
if not flag:
|
|
print("Only need to get {} images".format(len(links)))
|
|
|
|
return links
|
|
|
|
def checkDir(username: str):
|
|
if not os.path.isdir('images'):
|
|
os.mkdir('images')
|
|
if not os.path.isdir('images/'+username):
|
|
os.mkdir('images/'+username)
|
|
|
|
def downloadImages(username, links: list):
|
|
print("download images...")
|
|
progress = tqdm(total=len(links))
|
|
for link in links:
|
|
response = requests.get(link[0])
|
|
with open('images/{}/{}'.format(username, link[1]), 'wb') as fp:
|
|
fp.write(response.content)
|
|
progress.update(1)
|
|
time.sleep(0.5)
|
|
|
|
if __name__ == "__main__":
|
|
if len(sys.argv) != 2:
|
|
print("Usage: python main.py [username]")
|
|
else:
|
|
username = sys.argv[1]
|
|
checkDir(username)
|
|
downloadImages( username, getLinks(username) )
|