I want to download a photo from the Iranian website and put the code in the culab and get timeout error and URLerror.
from bs4 import BeautifulSoup
import urllib.request
def make_soup(url):
thepage = urllib.request.urlopen(url)
#req = Request(url, headers={'User-Agent': 'Mozilla/5.0'})
#thepage = urlopen(req).read()
soupdata = BeautifulSoup(thepage, "html.parser")
return soupdata
i=1
soup = make_soup("https://www.banikhodro.com/car/pride/")
for img in soup.find_all('img'):
temp = img.get('src')
#print(temp)
if temp[0]=="/":
image = "https://www.banikhodro.com/car/pride/"+temp
else:
image = temp
#print(image)
nametemp = img.get('alt')
nametemp = str(nametemp)
if len(nametemp)== 0:
i=i+1
else:
filename=nametemp
imagefile = open(filename+ ".jpeg", 'wb')
imagefile.write(urllib.request.urlopen(image).read())
imagefile.close()
TimeoutError Traceback (most recent call last)
/usr/local/lib/python3.6/dist-packages/urllib3/connection.py in _new_conn(self)
158 conn = connection.create_connection(
--> 159 (self._dns_host, self.port), self.timeout, **extra_kw)
160
15 frames
TimeoutError: [Errno 110] Connection timed out
During handling of the above exception, another exception occurred:
NewConnectionError Traceback (most recent call last)
NewConnectionError: <urllib3.connection.VerifiedHTTPSConnection object at 0x7f079e4cdcf8>: Failed to establish a new connection: [Errno 110] Connection timed out
During handling of the above exception, another exception occurred:
MaxRetryError Traceback (most recent call last)
MaxRetryError: HTTPSConnectionPool(host='www.banikhodro.com', port=443): Max retries exceeded with url: /car/pride/ (Caused by NewConnectionError('<urllib3.connection.VerifiedHTTPSConnection object at 0x7f079e4cdcf8>: Failed to establish a new connection: [Errno 110] Connection timed out',))
During handling of the above exception, another exception occurred:
ConnectionError Traceback (most recent call last)
/usr/local/lib/python3.6/dist-packages/requests/adapters.py in send(self, request, stream, timeout, verify, cert, proxies)
514 raise SSLError(e, request=request)
515
--> 516 raise ConnectionError(e, request=request)
517
518 except ClosedPoolError as e:
ConnectionError: HTTPSConnectionPool(host='www.banikhodro.com', port=443): Max retries exceeded with url: /car/pride/ (Caused by NewConnectionError('<urllib3.connection.VerifiedHTTPSConnection object at 0x7f079e4cdcf8>: Failed to establish a new connection: [Errno 110] Connection timed out',))
add timeout error and connection error.These errors are given to me in GoogelColab when use Iranian Websait for downloded images in colab Thanks in advance to those who answer my questions
One way of doing this would be:
import requests
from bs4 import BeautifulSoup
page = requests.get("https://www.banikhodro.com/car/pride/").content
soup = BeautifulSoup(page, "html5lib").find_all("span", {"class": "photo"})
images = [
f"https://www.banikhodro.com{img.find('img')['src']}" for img in soup
if "Adv" in img.find("img")["src"]
]
for image in images:
print(f"Fetching {image}")
with open(image.rsplit("/")[-1], "wb") as img:
img.write(requests.get(image).content)
This fetches all non-generic images of car offers to your local folder.
183093_1-m.jpg
183098_1-m.jpg
183194_1-m.jpg
183208_1-m.jpg
183209_1-m.jpg
183272_1-m.jpg
183279_1-m.jpg
183286_1-m.jpg
183384_1-m.jpg
import requests
from bs4 import BeautifulSoup
page = requests.get("https://www.banikhodro.com/car/pride/").content
soup = BeautifulSoup(page, "html5lib")
images = [
f"https://www.banikhodro.com{img['src']}" for img in soup.find_all('img')
# sort it accordingly based on class or id inside find_all method
]
for image in images:
print(f"Fetching {image}")
with open(image.split("/")[-1], "wb") as img:
img.write(requests.get(image).content)
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.