feat: main functions

This commit is contained in:
Ting-Jun Wang 2022-12-08 01:21:52 +08:00
commit 0501f46803
Signed by: snsd0805
GPG Key ID: 8DB0D22BC1217D33
2 changed files with 96 additions and 0 deletions

1
.gitignore vendored Normal file
View File

@ -0,0 +1 @@
images/*

95
main.py Normal file
View File

@ -0,0 +1,95 @@
import requests
import os
import time
import json
from tqdm import tqdm
import sys
header = {
# 'x-ig-app-id': 'You should find IG App ID on the browser',
'x-ig-app-id': '936619743392459',
'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36'
}
def getLinks(username: str):
print("Get images' links...'")
# find this user's total media counts
# progress bar
response = requests.get("https://www.instagram.com/api/v1/users/web_profile_info/?username="+username, headers=header)
progress = tqdm(total=json.loads(response.text)['data']['user']['edge_owner_to_timeline_media']['count']) # media counts
url = "https://www.instagram.com/api/v1/feed/user/{}/username/?count=15".format(username)
links = []
# get pictures' links
load_count = 0
flag = True
while flag:
# We should use "next_max_id" to requests the next group of medias
# But we will not have "next_max_id" in the first requst
if load_count == 0:
response = requests.get(url, headers=header)
url += "&max_id={}"
else:
response = requests.get(url.format(data['next_max_id']), headers=header)
# get links from responded JSON
data = json.loads(response.text)
for item in data['items']:
if 'carousel_media' in item:
code = item['code']
# if the picture exist, break this loop and return links that have images haven't been downloaded
if os.path.isfile("images/{}/{}_0".format(username, code)):
flag = False
break
for index, image in enumerate(item['carousel_media']):
link = image['image_versions2']['candidates'][0]['url']
links.append((link, "{}_{}".format(code, index)))
else:
link = item['image_versions2']['candidates'][0]['url']
code = item['code']
if os.path.isfile("images/{}/{}".format(username, code)):
flag = False
break
links.append((link, code))
# update progress bar
progress.update(data['num_results'])
# check whether need to break
if data['more_available'] == False:
break
load_count += 1
time.sleep(0.2)
if not flag:
print("Only need to get {} images".format(len(links)))
return links
def checkDir(username: str):
if not os.path.isdir('images'):
os.mkdir('images')
if not os.path.isdir('images/'+username):
os.mkdir('images/'+username)
def downloadImages(username, links: list):
print("download images...")
progress = tqdm(total=len(links))
for link in links:
response = requests.get(link[0])
with open('images/{}/{}'.format(username, link[1]), 'wb') as fp:
fp.write(response.content)
progress.update(1)
time.sleep(0.5)
if __name__ == "__main__":
if len(sys.argv) != 2:
print("Usage: python main.py [username]")
else:
username = sys.argv[1]
checkDir(username)
downloadImages( username, getLinks(username) )