feat: main functions
This commit is contained in:
commit
0501f46803
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
@ -0,0 +1 @@
|
||||
images/*
|
||||
95
main.py
Normal file
95
main.py
Normal file
@ -0,0 +1,95 @@
|
||||
import requests
|
||||
import os
|
||||
import time
|
||||
import json
|
||||
from tqdm import tqdm
|
||||
import sys
|
||||
|
||||
header = {
|
||||
# 'x-ig-app-id': 'You should find IG App ID on the browser',
|
||||
'x-ig-app-id': '936619743392459',
|
||||
'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36'
|
||||
}
|
||||
|
||||
def getLinks(username: str):
|
||||
print("Get images' links...'")
|
||||
|
||||
# find this user's total media counts
|
||||
# progress bar
|
||||
response = requests.get("https://www.instagram.com/api/v1/users/web_profile_info/?username="+username, headers=header)
|
||||
progress = tqdm(total=json.loads(response.text)['data']['user']['edge_owner_to_timeline_media']['count']) # media counts
|
||||
|
||||
url = "https://www.instagram.com/api/v1/feed/user/{}/username/?count=15".format(username)
|
||||
links = []
|
||||
|
||||
# get pictures' links
|
||||
load_count = 0
|
||||
flag = True
|
||||
while flag:
|
||||
# We should use "next_max_id" to requests the next group of medias
|
||||
# But we will not have "next_max_id" in the first requst
|
||||
if load_count == 0:
|
||||
response = requests.get(url, headers=header)
|
||||
url += "&max_id={}"
|
||||
else:
|
||||
response = requests.get(url.format(data['next_max_id']), headers=header)
|
||||
|
||||
# get links from responded JSON
|
||||
data = json.loads(response.text)
|
||||
for item in data['items']:
|
||||
if 'carousel_media' in item:
|
||||
code = item['code']
|
||||
|
||||
# if the picture exist, break this loop and return links that have images haven't been downloaded
|
||||
if os.path.isfile("images/{}/{}_0".format(username, code)):
|
||||
flag = False
|
||||
break
|
||||
for index, image in enumerate(item['carousel_media']):
|
||||
link = image['image_versions2']['candidates'][0]['url']
|
||||
links.append((link, "{}_{}".format(code, index)))
|
||||
else:
|
||||
link = item['image_versions2']['candidates'][0]['url']
|
||||
code = item['code']
|
||||
if os.path.isfile("images/{}/{}".format(username, code)):
|
||||
flag = False
|
||||
break
|
||||
links.append((link, code))
|
||||
|
||||
# update progress bar
|
||||
progress.update(data['num_results'])
|
||||
|
||||
# check whether need to break
|
||||
if data['more_available'] == False:
|
||||
break
|
||||
|
||||
load_count += 1
|
||||
time.sleep(0.2)
|
||||
|
||||
if not flag:
|
||||
print("Only need to get {} images".format(len(links)))
|
||||
|
||||
return links
|
||||
|
||||
def checkDir(username: str):
|
||||
if not os.path.isdir('images'):
|
||||
os.mkdir('images')
|
||||
if not os.path.isdir('images/'+username):
|
||||
os.mkdir('images/'+username)
|
||||
|
||||
def downloadImages(username, links: list):
|
||||
print("download images...")
|
||||
progress = tqdm(total=len(links))
|
||||
for link in links:
|
||||
response = requests.get(link[0])
|
||||
with open('images/{}/{}'.format(username, link[1]), 'wb') as fp:
|
||||
fp.write(response.content)
|
||||
progress.update(1)
|
||||
time.sleep(0.5)
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) != 2:
|
||||
print("Usage: python main.py [username]")
|
||||
else:
|
||||
username = sys.argv[1]
|
||||
checkDir(username)
|
||||
downloadImages( username, getLinks(username) )
|
||||
Loading…
Reference in New Issue
Block a user