InstaDownloader/main.py

import requests
import os
import time
import json
from tqdm import tqdm
import sys

header = {
        # 'x-ig-app-id': 'You should find IG App ID on the browser',
        'x-ig-app-id': '936619743392459',
        'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36'
    }

def getLinks(username: str):
    print("Get images' links...'")

    # find this user's total media counts
    # progress bar
    response = requests.get("https://www.instagram.com/api/v1/users/web_profile_info/?username="+username, headers=header)
    progress = tqdm(total=json.loads(response.text)['data']['user']['edge_owner_to_timeline_media']['count'])               # media counts

    url = "https://www.instagram.com/api/v1/feed/user/{}/username/?count=15".format(username)
    links = []

    # get pictures' links
    load_count = 0
    flag = True
    while flag:
        # We should use "next_max_id" to requests the next group of medias
        # But we will not have "next_max_id" in the first requst
        if load_count == 0:
            response = requests.get(url, headers=header)
            url += "&max_id={}"
        else:
            response = requests.get(url.format(data['next_max_id']), headers=header)

        # get links from responded JSON
        data = json.loads(response.text)
        for item in data['items']:
            if 'carousel_media' in item:
                code = item['code']

                # if the picture exist, break this loop and return links that have images haven't been downloaded
                if os.path.isfile("images/{}/{}_0".format(username, code)):
                    flag = False
                    break
                for index, image in enumerate(item['carousel_media']):
                    link = image['image_versions2']['candidates'][0]['url']
                    links.append((link, "{}_{}".format(code, index)))
            else:
                link = item['image_versions2']['candidates'][0]['url']
                code = item['code']
                if os.path.isfile("images/{}/{}".format(username, code)):
                    flag = False
                    break
                links.append((link, code))

        # update progress bar
        progress.update(data['num_results'])

        # check whether need to break
        if data['more_available'] == False:
            break

        load_count += 1
        time.sleep(0.2)

    if not flag:
        print("Only need to get {} images".format(len(links)))

    return links

def checkDir(username: str):
    if not os.path.isdir('images'):
        os.mkdir('images')
    if not os.path.isdir('images/'+username):
        os.mkdir('images/'+username)

def downloadImages(username, links: list):
    print("download images...")
    progress = tqdm(total=len(links))
    for link in links:
        response = requests.get(link[0])
        with open('images/{}/{}'.format(username, link[1]), 'wb') as fp:
            fp.write(response.content)
        progress.update(1)
        time.sleep(0.5)

if __name__ == "__main__":
    if len(sys.argv) != 2:
        print("Usage: python main.py [username]")
    else:
        username = sys.argv[1]
        checkDir(username)
        downloadImages( username, getLinks(username) )