[英]How to solve 'RecursionError: maximum recursion depth exceeded' with Eventlet and Requests in Python
I am trying to implement the Amazon Web Scraper mentioned here .我正在尝试实施此处提到的 Amazon Web Scraper。 However, I get the output mentioned below.
但是,我得到了下面提到的输出。 The output repeats until it stops with
RecursionError: maximum recursion depth exceeded
.输出重复,直到停止并出现
RecursionError: maximum recursion depth exceeded
。 I have already tried downgrading eventlet to version 0.17.4 as mentioned here .我已经尝试降级eventlet到0.17.4版本提到这里。 Also, the
requests
module is getting patched as you can see in helpers.py
.此外,正如您在
helpers.py
看到的那样, requests
模块正在打补丁。
helpers.py helpers.py
import os
import random
from datetime import datetime
from urllib.parse import urlparse
import eventlet
requests = eventlet.import_patched('requests.__init__')
time = eventlet.import_patched('time')
import redis
from bs4 import BeautifulSoup
from requests.exceptions import RequestException
import settings
num_requests = 0
redis = redis.StrictRedis(host=settings.redis_host, port=settings.redis_port, db=settings.redis_db)
def make_request(url, return_soup=True):
# global request building and response handling
url = format_url(url)
if "picassoRedirect" in url:
return None # skip the redirect URLs
global num_requests
if num_requests >= settings.max_requests:
raise Exception("Reached the max number of requests: {}".format(settings.max_requests))
proxies = get_proxy()
try:
r = requests.get(url, headers=settings.headers, proxies=proxies)
except RequestException as e:
log("WARNING: Request for {} failed, trying again.".format(url))
num_requests += 1
if r.status_code != 200:
os.system('say "Got non-200 Response"')
log("WARNING: Got a {} status code for URL: {}".format(r.status_code, url))
return None
if return_soup:
return BeautifulSoup(r.text), r.text
return r
def format_url(url):
# make sure URLs aren't relative, and strip unnecssary query args
u = urlparse(url)
scheme = u.scheme or "https"
host = u.netloc or "www.amazon.de"
path = u.path
if not u.query:
query = ""
else:
query = "?"
for piece in u.query.split("&"):
k, v = piece.split("=")
if k in settings.allowed_params:
query += "{k}={v}&".format(**locals())
query = query[:-1]
return "{scheme}://{host}{path}{query}".format(**locals())
def log(msg):
# global logging function
if settings.log_stdout:
try:
print("{}: {}".format(datetime.now(), msg))
except UnicodeEncodeError:
pass # squash logging errors in case of non-ascii text
def get_proxy():
# choose a proxy server to use for this request, if we need one
if not settings.proxies or len(settings.proxies) == 0:
return None
proxy = random.choice(settings.proxies)
proxy_url = "socks5://{user}:{passwd}@{ip}:{port}/".format(
user=settings.proxy_user,
passwd=settings.proxy_pass,
ip=proxy,
port=settings.proxy_port,
)
return {
"http": proxy_url,
"https": proxy_url
}
if __name__ == '__main__':
# test proxy server IP masking
r = make_request('https://api.ipify.org?format=json', return_soup=False)
print(r.text)
output输出
Traceback (most recent call last):
File "helpers.py", line 112, in <module>
r = make_request('https://api.ipify.org?format=json', return_soup=False)
File "helpers.py", line 36, in make_request
r = requests.get(url, headers=settings.headers, proxies=proxies)
File "/home/ec2-user/env/lib64/python3.7/site-packages/requests/api.py", line 76, in get
return request('get', url, params=params, **kwargs)
File "/home/ec2-user/env/lib64/python3.7/site-packages/requests/api.py", line 61, in request
return session.request(method=method, url=url, **kwargs)
File "/home/ec2-user/env/lib64/python3.7/site-packages/requests/sessions.py", line 530, in request
resp = self.send(prep, **send_kwargs)
File "/home/ec2-user/env/lib64/python3.7/site-packages/requests/sessions.py", line 643, in send
r = adapter.send(request, **kwargs)
File "/home/ec2-user/env/lib64/python3.7/site-packages/requests/adapters.py", line 449, in send
timeout=timeout
File "/home/ec2-user/env/lib64/python3.7/site-packages/urllib3/connectionpool.py", line 672, in urlopen
chunked=chunked,
File "/home/ec2-user/env/lib64/python3.7/site-packages/urllib3/connectionpool.py", line 376, in _make_request
self._validate_conn(conn)
File "/home/ec2-user/env/lib64/python3.7/site-packages/urllib3/connectionpool.py", line 994, in _validate_conn
conn.connect()
File "/home/ec2-user/env/lib64/python3.7/site-packages/urllib3/connection.py", line 300, in connect
conn = self._new_conn()
File "/home/ec2-user/env/lib64/python3.7/site-packages/urllib3/contrib/socks.py", line 99, in _new_conn
**extra_kw
File "/home/ec2-user/env/lib64/python3.7/site-packages/socks.py", line 199, in create_connection
sock.connect((remote_host, remote_port))
File "/home/ec2-user/env/lib64/python3.7/site-packages/socks.py", line 47, in wrapper
return function(*args, **kwargs)
File "/home/ec2-user/env/lib64/python3.7/site-packages/socks.py", line 774, in connect
super(socksocket, self).settimeout(self._timeout)
File "/home/ec2-user/env/lib64/python3.7/site-packages/eventlet/greenio/base.py", line 395, in settimeout
self.setblocking(True)
What might be the problem here?这里可能有什么问题?
原来删除eventlet.monkey_patch()
和import eventlet
解决了这个问题。
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.