![](/img/trans.png)
[英]How can i asyncronously download images from a list of urls in python?
[英]How can I download images from URLs and skip those images that doesn't exists in Python?
我的所有鏈接都處於工作狀態,我在瀏覽器中進行了測試,下載圖像時仍然出現以下錯誤。
獲取時出錯:“ http://epaperlokmat.in/eNewspaper/News/LOK/MULK/2020/04/29/20200429_1.jpeg ”
獲取時出錯:“ http://epaperlokmat.in/eNewspaper/News/LOK/MULK/2020/04/29/20200429_2.jpeg ”
獲取時出錯:“ http://epaperlokmat.in/eNewspaper/News/LOK/MULK/2020/04/29/20200429_3.jpeg ”
獲取時出錯:“ http://epaperlokmat.in/eNewspaper/News/LOK/MULK/2020/04/29/20200429_4.jpeg ”
import urllib.request
from urllib.error import URLError # the docs say this is the base error you need to catch
import time
import datetime,time
from PIL import Image
start_time = time.time()
today=time.strftime("%Y%m%d")
m=today=time.strftime("%m")
d=today=time.strftime("%d")
Y=today=time.strftime("%Y")
A=today=time.strftime("%b")
for i in range(1,5):
issue_id1=str(i)
url = "http://epaperlokmat.in/eNewspaper/News/LOK/MULK/"+str(Y) +"/"+str(m)+"/"+str(d)+"/"+str(Y+m+d)+"_"+str(i)+".jpeg"
try:
s = urllib.request.urlopen(url)
contents = s.read()
except URLError:
print('an error occurred while fetching: "{}"'.format(url))
continue
file = open("D:/IMAGES/"+issue_id1+".jpeg", "wb")
file.write(contents)
現在我可以使用以下代碼下載圖像。
但如果未找到 URL,它仍會下載來源/擴展名/格式未知的圖像。 (404 未找到)
import requests
import urllib.request
from urllib.error import URLError # the docs say this is the base error you need to catch
import time
import datetime,time
from PIL import Image
start_time = time.time()
today=time.strftime("%Y%m%d")
m=today=time.strftime("%m")
d=today=time.strftime("%d")
Y=today=time.strftime("%Y")
A=today=time.strftime("%b")
for i in range(1,10):
issue_id1=str(i)
try:
url = "http://epaperlokmat.in/eNewspaper/News/LOK/MULK/"+str(Y) +"/"+str(m)+"/"+str(d)+"/"+str(Y+m+d)+"_"+str(i)+".jpeg"
myfile=requests.get(url)
except URLError:
print('an error occurred while fetching: "{}"'.format(url))
continue
open("D:/IMAGES/"+issue_id1+".jpeg", "wb").write(myfile.content)
您獲取圖像的主機似乎不喜歡 urllib 附帶的默認標頭。
此調整后的版本似乎可以正確獲取您的圖像:
import urllib.request
from urllib.error import URLError # the docs say this is the base error you need to catch
import time
import datetime,time
from PIL import Image
start_time = time.time()
today=time.strftime("%Y%m%d")
m=today=time.strftime("%m")
d=today=time.strftime("%d")
Y=today=time.strftime("%Y")
A=today=time.strftime("%b")
fetched_images = []
for i in range(1,5):
issue_id1=str(i)
url = "http://epaperlokmat.in/eNewspaper/News/LOK/MULK/"+str(Y) +"/"+str(m)+"/"+str(d)+"/"+str(Y+m+d)+"_"+str(i)+".jpeg"
try:
# First build the request, and adjust the headers to something else.
req = urllib.request.Request(url,
headers={'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36'}
)
# Secondly fetch your image
s = urllib.request.urlopen(req)
contents = s.read()
# Append to your image-list
fetched_images.append(url)
except URLError:
print(url)
print('an error occurred while fetching: "{}"'.format(url))
continue
file = open("D:/IMAGES/"+issue_id1+".jpeg", "wb")
file.write(contents)
為了澄清,首先使用調整后的標頭構建您的請求。 只有通過獲取req
打開 url 。
go 關於這個的另一種方法是使用請求。 在您的情況下,它實際上是開箱即用的。 在此運行之前,您需要獲取請求 package。 pip install requests
import requests
import datetime,time
start_time = time.time()
today=time.strftime("%Y%m%d")
month=today=time.strftime("%m")
day=today=time.strftime("%d")
year=today=time.strftime("%Y")
url = "http://epaperlokmat.in/eNewspaper/News/LOK/MULK/{year}/{month}/{day}/{year}{month}{day}_{issue_id}.jpeg"
path = "D:/IMAGES/{issue_id}.jpeg"
fetched_images = []
for issue_id in range(1, 5):
try:
# Let's create the url for the given issue.
issue_url = url.format(
year=year,
month=month,
day=day,
issue_id=issue_id)
# GET the url content
req = requests.get(issue_url, headers={'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36'}))
# Add the image to your list
fetched_images.append(issue_url)
# Save to file if succesful and close the file when done.
with open(path.format(issue_id=issue_id), 'wb') as f:
f.write(req.content)
except Exception as e:
# If something went wrong, just print the url and the error.
print('Failed to fetch {url} with error {e}'.format(
url=issue_url, e=e))
我已經執行了下面的代碼。
import requests
import urllib.request
import datetime,time
start_time = time.time()
today=time.strftime("%Y%m%d")
month=today=time.strftime("%m")
day=today=time.strftime("%d")
year=today=time.strftime("%Y")
url = "http://epaperlokmat.in/eNewspaper/News/LOK/MULK/{year}/{month}/{day}/{year}{month}{day}_{issue_id}.jpeg"
path = "D:/IMAGES/{issue_id}.jpeg"
fetched_images = []
for issue_id in range(1, 15):
try:
# Let's create the url for the given issue.
issue_url = url.format(
year=year,
month=month,
day=day,
issue_id=issue_id)
# GET the url content
req = urllib.request.Request(url,
headers={'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36'})
# Add the image to your list
fetched_images.append(url)
# Save to file if succesful and close the file when done.
with open(path.format(issue_id=issue_id), 'wb') as f:
f.write(req.content)
except Exception as e:
# If something went wrong, just print the url and the error.
print('Failed to fetch {url} with error {e}'.format(
url=issue_url, e=e))
現在得到以下錯誤:
Failed to fetch http://epaperlokmat.in/eNewspaper/News/LOK/MULK/2020/04/29/20200429_1.jpeg with error 'Request' object has no attribute 'content'
Failed to fetch http://epaperlokmat.in/eNewspaper/News/LOK/MULK/2020/04/29/20200429_2.jpeg with error 'Request' object has no attribute 'content'
Failed to fetch http://epaperlokmat.in/eNewspaper/News/LOK/MULK/2020/04/29/20200429_3.jpeg with error 'Request' object has no attribute 'content'
Failed to fetch http://epaperlokmat.in/eNewspaper/News/LOK/MULK/2020/04/29/20200429_4.jpeg with error 'Request' object has no attribute 'content'
Failed to fetch http://epaperlokmat.in/eNewspaper/News/LOK/MULK/2020/04/29/20200429_5.jpeg with error 'Request' object has no attribute 'content'
Failed to fetch http://epaperlokmat.in/eNewspaper/News/LOK/MULK/2020/04/29/20200429_6.jpeg with error 'Request' object has no attribute 'content'
Failed to fetch http://epaperlokmat.in/eNewspaper/News/LOK/MULK/2020/04/29/20200429_7.jpeg with error 'Request' object has no attribute 'content'
Failed to fetch http://epaperlokmat.in/eNewspaper/News/LOK/MULK/2020/04/29/20200429_8.jpeg with error 'Request' object has no attribute 'content'
Failed to fetch http://epaperlokmat.in/eNewspaper/News/LOK/MULK/2020/04/29/20200429_9.jpeg with error 'Request' object has no attribute 'content'
Failed to fetch http://epaperlokmat.in/eNewspaper/News/LOK/MULK/2020/04/29/20200429_10.jpeg with error 'Request' object has no attribute 'content'
Failed to fetch http://epaperlokmat.in/eNewspaper/News/LOK/MULK/2020/04/29/20200429_11.jpeg with error 'Request' object has no attribute 'content'
Failed to fetch http://epaperlokmat.in/eNewspaper/News/LOK/MULK/2020/04/29/20200429_12.jpeg with error 'Request' object has no attribute 'content'
Failed to fetch http://epaperlokmat.in/eNewspaper/News/LOK/MULK/2020/04/29/20200429_13.jpeg with error 'Request' object has no attribute 'content'
Failed to fetch http://epaperlokmat.in/eNewspaper/News/LOK/MULK/2020/04/29/20200429_14.jpeg with error 'Request' object has no attribute 'content'
這是您的第二個示例的代碼。
import requests
import datetime,time
start_time = time.time()
today=time.strftime("%Y%m%d")
month=today=time.strftime("%m")
day=today=time.strftime("%d")
year=today=time.strftime("%Y")
url = "http://epaperlokmat.in/eNewspaper/News/LOK/MULK/{year}/{month}/{day}/{year}{month}{day}_{issue_id}.jpeg"
path = "D:/IMAGES/{issue_id}.jpeg"
fetched_images = []
for issue_id in range(1, 15):
try:
# Let's create the url for the given issue.
issue_url = url.format(
year=year,
month=month,
day=day,
issue_id=issue_id)
# GET the url content
req = urllib.request.Request(url,
headers={'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36'}
# Add the image to your list
fetched_images.append(url)
# Save to file if succesful and close the file when done.
with open(path.format(issue_id=issue_id), 'wb') as f:
f.write(req.content)
except Exception as e:
# If something went wrong, just print the url and the error.
print('Failed to fetch {url} with error {e}'.format(
url=issue_url, e=e))
給出錯誤
File "d:/test2.py", line 29
fetched_images.append(url)
^
SyntaxError: invalid syntax
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.