简体   繁体   中英

How to save photos using instagram API and python

I'm using the Instagram API to obtain photos taken at a particular location using the python 3 code below:

import urllib.request

wp = urllib.request.urlopen("https://api.instagram.com/v1/media/search?lat=48.858844&lng=2.294351&access_token="ACCESS TOKEN")
pw = wp.read()
print(pw)

This allows me to retrieve all the photos. I wanted to know how I can save these on my computer.

An additional question I have is, is there any limit to the number of images returned by running the above? Thanks!

Eventually came up with this. In case anybody needs it, here you go:

#This Python Script will download 10,000 images from a specified location. 
# 10k images takes approx 15-20 minutes, approx 700 MB.


import urllib, json, requests
import time, csv

print "time.time(): %f " %  time.time()   #Current epoch time (Unix Timestamp)
print time.asctime( time.localtime(time.time()) )   #Current time in human readable format

#lat='48.858844' #Latitude of the center search coordinate. If used, lng is required.
#lng='2.294351'  #Longitude of the center search coordinate. If used, lat is required.

#Brooklyn Brewery
lat='40.721645'
lng='-73.957258'


distance='5000' #Default is 1km (distance=1000), max distance is 5km.
access_token='<YOUR TOKEN HERE>' #Access token to use API


#The default time span is set to 5 days. The time span must not exceed 7 days. 
#min_timestamp #    A unix timestamp. All media returned will be taken later than this timestamp.
#max_timestamp #    A unix timestamp. All media returned will be taken earlier than this timestamp.


#Settings for Verification Dataset of images
#lat, long =40.721645, -73.957258, dist = 5000, default timestamp (5 days)

images={}
#to keep track of duplicates

total_count=0
count=0
#count for each loop

timestamp_last_image=0
flag=0

#images are returned in reverse order, i.e. most recent to least recent
#A max of 100 images are returned in during each request, to get the next set, we use last image (least recent) timestamp as max timestamp and continue
#to avoid duplicates we check if image ID has already been recorded (instagram tends to return images based on a %60 timestamp)
#flag checks for first run of loop
#use JSON viewer http://www.jsoneditoronline.org/ and use commented API response links below to comprehend JSON response
while total_count<10000:
    if flag==0: 
        response = urllib.urlopen('https://api.instagram.com/v1/media/search?lat='+lat+'&lng='+lng+'&distance='+distance+'&access_token='+access_token+'&count=100')
        #https://api.instagram.com/v1/media/search?lat=48.858844&lng=2.294351&distance=5000&access_token=2017228644.ab103e5.f6083159690e476b94dff6cbe8b53759
    else:
        response = urllib.urlopen('https://api.instagram.com/v1/media/search?lat='+lat+'&lng='+lng+'&distance='+distance+'&max_timestamp='+timestamp_last_image+'&access_token='+access_token+'&count=100')

    data = json.load(response)

    for img in data["data"]:
        #print img["images"]["standard_resolution"]["url"]
        if img['id'] in images:
            continue
        images[img['id']] = 1
        total_count = total_count + 1
        count=count+1
        urllib.urlretrieve(img["images"]["standard_resolution"]["url"],"C://Instagram/"+str(total_count)+".jpg")
        #above line downloads image by retrieving it from the url
        instaUrlFile.write(img["images"]["standard_resolution"]["url"]+"\n")
        #above line captures image url so it can be passed directly to Face++ API from the text file instaUrlFile.txt
        print "IMAGE WITH name "+str(total_count)+".jpg was just saved with created time "+data["data"][count-1]["created_time"]
    #This for loop will download all the images from instagram and save them in the above path

    timestamp_last_image=data["data"][count-1]["created_time"]
    flag=1
    count=0

Here the code which save all images. I can't test it, coz i have not instagramm token.

import urllib, json


access_token = "ACCESS TOKEN" # Put here your ACCESS TOKEN
search_results = urllib.urlopen("https://api.instagram.com/v1/media/search?lat=48.858844&lng=2.294351&access_token='%s'" % access_token)

instagram_answer = json.loads(search_results) # Load Instagram Media Result

for row in instagram_answer['data']:
    if row['type'] == "image": # Filter non images files
        filename = row['id']
        url = row['images']['standard_resolution']['url']
        file_obj, headers = urllib.urlretrieve(
            url=url,
            filename=url
        ) # Save images

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM