I've tried to make a short mass-downloader script in Python to store lists of images locally.
It works perfectly fine for http
image urls, however fails to download any image with an https
url. The lines of code in question are:
import urllib.request
urllib.request.urlretrieve(url, filename)
For instance, https://cdn.discordapp.com/attachments/299398003486097412/303580387786096641/FB_IMG_1490534565948.jpg
results in HTTP Error 403: Forbidden
, as well as any other https
image.
This leaves me with two questions:
https
urls if they are basically just files?Here's the stack trace:
Traceback (most recent call last):
File "img_down.py", line 52, in <module>
main()
File "img_down.py", line 38, in main
save_img(d, l)
File "img_down.py", line 49, in save_img
stream = read_img(url)
File "img_down.py", line 42, in read_img
with urllib.request.urlopen(url) as response:
File "D:\Users\Jan\AppData\Local\Programs\Python\Python36-32\lib\urllib\request.py", line 223, in urlopen
return opener.open(url, data, timeout)
File "D:\Users\Jan\AppData\Local\Programs\Python\Python36-32\lib\urllib\request.py", line 532, in open
response = meth(req, response)
File "D:\Users\Jan\AppData\Local\Programs\Python\Python36-32\lib\urllib\request.py", line 642, in http_response
'http', request, response, code, msg, hdrs)
File "D:\Users\Jan\AppData\Local\Programs\Python\Python36-32\lib\urllib\request.py", line 570, in error
return self._call_chain(*args)
File "D:\Users\Jan\AppData\Local\Programs\Python\Python36-32\lib\urllib\request.py", line 504, in _call_chain
result = func(*args)
File "D:\Users\Jan\AppData\Local\Programs\Python\Python36-32\lib\urllib\request.py", line 650, in http_error_default
raise HTTPError(req.full_url, code, msg, hdrs, fp)
urllib.error.HTTPError: HTTP Error 403: Forbidden
Hope this helps.
import requests
with open('FB_IMG_1490534565948.jpg', 'wb') as f:
f.write(requests.get('https://url/to/image.jpg').content)
May help you...
I made this script , but never finished (the final intention was make it running everyday automatically)
But to not be the kind of person who postpone the answers, here's the piece of code you're interest:
def downloadimg(self):
import datetime
imgurl = self.getdailyimg();
imgfilename = datetime.datetime.today().strftime('%Y%m%d') + '_' + imgurl.split('/')[-1]
with open(IMGFOLDER + imgfilename, 'wb') as f:
f.write(self.readimg(imgurl))
Hope it helps you out!
Edited
PS: using python3
Full script
#!/usr/bin/python
# -*- coding: utf-8 -*-
import os
IMGFOLDER = os.getcwd() + '/images/'
class BingImage(object):
"""docstring for BingImage"""
BINGURL = 'http://www.bing.com/'
JSONURL = 'HPImageArchive.aspx?format=js&idx=0&n=1&mkt=pt-BR'
LASTIMG = None
def __init__(self):
super(BingImage, self).__init__()
try:
self.downloadimg()
except:
pass
def getdailyimg(self):
import json
import urllib.request
with urllib.request.urlopen(self.BINGURL + self.JSONURL) as response:
rawjson = response.read().decode('utf-8')
parsedjson = json.loads(rawjson)
return self.BINGURL + parsedjson['images'][0]['url'][1:]
def downloadimg(self):
import datetime
imgurl = self.getdailyimg();
imgfilename = datetime.datetime.today().strftime('%Y%m%d') + '_' + imgurl.split('/')[-1]
with open(IMGFOLDER + imgfilename, 'wb') as f:
f.write(self.readimg(imgurl))
self.LASTIMG = IMGFOLDER + imgfilename
def checkfolder(self):
d = os.path.dirname(IMGFOLDER)
if not os.path.exists(d):
os.makedirs(d)
def readimg(self, url):
import urllib.request
with urllib.request.urlopen(url) as response:
return response.read()
def DefineBackground(src):
import platform
if platform.system() == 'Linux':
MAINCMD = "gsettings set org.gnome.desktop.background picture-uri"
os.system(MAINCMD + ' file://' + src)
def GetRandomImg():
"""Return a random image already downloaded from the images folder"""
import random
f = []
for (dirpath, dirnames, filenames) in os.walk(IMGFOLDER):
f.extend(filenames)
break
return IMGFOLDER + random.choice(f)
if __name__ == '__main__':
# get a new today's image from Bing
img = BingImage()
# check whether a new image was get or not
if(img.LASTIMG):
DefineBackground(img.LASTIMG)
else:
DefineBackground(GetRandomImg())
print('Background defined')
Here is the latest answer to that question, I have used openCV to store images along with request Module Also it will handle batch operations and can be added as common code
import numpy as np
from urllib.request import urlopen
import cv2
import os
current_path = os.getcwd()
try: os.mkdir(current_path + "\\Downloaded\\")
except:pass
def downloadImage(url):
try:
print("Downloading %s" % (url))
image_name = str(url).split('/')[-1]
resp = urlopen(url)
image = np.asarray(bytearray(resp.read()), dtype="uint8")
image = cv2.imdecode(image, cv2.IMREAD_COLOR)
cv2.imwrite(current_path + "\\Downloaded\\" + image_name, image)
except Exception as error:
print(error)
if __name__ == '__main__':
urls = ["https://www.google.com/logos/doodles/2019/st-georges-day-2019-6234830302871552.20-2x.png"]
for url in urls:
downloadImage(url)
referring to the following link here you may face “HTTP Error 403: Forbidden” error even after using the solution from @Thiago Cardoso as It is because the server does not know the request is coming from. Some websites will verify the UserAgent in order to prevent from abnormal visit. So you should provide information of your fake browser visit.
so i modified the code readimg method to be as following:-
def readimg(self, img_url):
from urllib.request import urlopen, Request
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.3'}
req = Request(url=img_url, headers=headers)
with urlopen(req) as response:
return response.read()
You need to make a userAgent. Probably this is a server security feature which blocks unknown user agents.
And if you setting a known browser user agent with, will work.
def download_img(img_url, img_name):
request = Request(img_url, headers={'User-Agent': 'Mozilla/5.0'})
response = urlopen(request)
with open(img_name, "wb") as f:
f.write(response.read())
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.