[英]What is the easiest way to get the url a link redirects to?
我有许多重定向到其他网址的网址,我想找到它们重定向到的链接的地址。 例如,
https://stlouisfed.org/fomcspeak/thomas-barkin/2019/04/apr-speech-a-practitioners-perspective-on-the-productivity-slowdown
重定向到
https://www.richmondfed.org/press_room/speeches/thomas_i_barkin/2019/barkin_speech_20190404
我尝试像这样使用requests
库:
import requests
url = "https://stlouisfed.org/fomcspeak/thomas-barkin/2019/04/apr-speech-a-practitioners-perspective-on-the-productivity-slowdown"
def get_redirected_url(url: str) -> str:
r = requests.get(url)
return r.url
get_redirected_url(url)
但是我正在运行时出现以下错误。 知道可能出了什么问题吗?
---------------------------------------------------------------------------
Error Traceback (most recent call last)
/opt/anaconda3/lib/python3.8/site-packages/urllib3/contrib/pyopenssl.py in wrap_socket(self, sock, server_side, do_handshake_on_connect, suppress_ragged_eofs, server_hostname)
487 try:
--> 488 cnx.do_handshake()
489 except OpenSSL.SSL.WantReadError:
/opt/anaconda3/lib/python3.8/site-packages/OpenSSL/SSL.py in do_handshake(self)
1933
-> 1934 :param addr: A remove address
1935 :return: What the socket's connect_ex method returns
/opt/anaconda3/lib/python3.8/site-packages/OpenSSL/SSL.py in _raise_ssl_error(self, ssl, result)
1670
-> 1671 def set_tlsext_host_name(self, name):
1672 """
/opt/anaconda3/lib/python3.8/site-packages/OpenSSL/_util.py in exception_from_error_queue(exception_type)
53 text(lib.ERR_reason_error_string(error)),
---> 54 )
55 )
Error: [('SSL routines', 'tls_process_server_certificate', 'certificate verify failed')]
During handling of the above exception, another exception occurred:
SSLError Traceback (most recent call last)
/opt/anaconda3/lib/python3.8/site-packages/urllib3/connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
669 # Make the request on the httplib connection object.
--> 670 httplib_response = self._make_request(
671 conn,
/opt/anaconda3/lib/python3.8/site-packages/urllib3/connectionpool.py in _make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
380 try:
--> 381 self._validate_conn(conn)
382 except (SocketTimeout, BaseSSLError) as e:
/opt/anaconda3/lib/python3.8/site-packages/urllib3/connectionpool.py in _validate_conn(self, conn)
977 if not getattr(conn, "sock", None): # AppEngine might not have `.sock`
--> 978 conn.connect()
979
/opt/anaconda3/lib/python3.8/site-packages/urllib3/connection.py in connect(self)
361
--> 362 self.sock = ssl_wrap_socket(
363 sock=conn,
/opt/anaconda3/lib/python3.8/site-packages/urllib3/util/ssl_.py in ssl_wrap_socket(sock, keyfile, certfile, cert_reqs, ca_certs, server_hostname, ssl_version, ciphers, ssl_context, ca_cert_dir, key_password, ca_cert_data)
385 if HAS_SNI and server_hostname is not None:
--> 386 return context.wrap_socket(sock, server_hostname=server_hostname)
387
/opt/anaconda3/lib/python3.8/site-packages/urllib3/contrib/pyopenssl.py in wrap_socket(self, sock, server_side, do_handshake_on_connect, suppress_ragged_eofs, server_hostname)
493 except OpenSSL.SSL.Error as e:
--> 494 raise ssl.SSLError("bad handshake: %r" % e)
495 break
SSLError: ("bad handshake: Error([('SSL routines', 'tls_process_server_certificate', 'certificate verify failed')])",)
During handling of the above exception, another exception occurred:
MaxRetryError Traceback (most recent call last)
/opt/anaconda3/lib/python3.8/site-packages/requests/adapters.py in send(self, request, stream, timeout, verify, cert, proxies)
438 if not chunked:
--> 439 resp = conn.urlopen(
440 method=request.method,
/opt/anaconda3/lib/python3.8/site-packages/urllib3/connectionpool.py in urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
725
--> 726 retries = retries.increment(
727 method, url, error=e, _pool=self, _stacktrace=sys.exc_info()[2]
/opt/anaconda3/lib/python3.8/site-packages/urllib3/util/retry.py in increment(self, method, url, response, error, _pool, _stacktrace)
445 if new_retry.is_exhausted():
--> 446 raise MaxRetryError(_pool, url, error or ResponseError(cause))
447
MaxRetryError: HTTPSConnectionPool(host='stlouisfed.org', port=443): Max retries exceeded with url: /fomcspeak/thomas-barkin/2019/04/apr-speech-a-practitioners-perspective-on-the-productivity-slowdown (Caused by SSLError(SSLError("bad handshake: Error([('SSL routines', 'tls_process_server_certificate', 'certificate verify failed')])")))
During handling of the above exception, another exception occurred:
SSLError Traceback (most recent call last)
<ipython-input-126-5a66b7870cb5> in <module>
----> 1 get_redirected_url(url)
<ipython-input-125-d2cc1a52d8fa> in get_redirected_url(url)
1 def get_redirected_url(url: str) -> str:
2 # url = urljoin("https://stlouisfed.org/", url)
----> 3 r = requests.get(url, timeout=10)
4 return r.url
5
/opt/anaconda3/lib/python3.8/site-packages/requests/api.py in get(url, params, **kwargs)
73
74 kwargs.setdefault('allow_redirects', True)
---> 75 return request('get', url, params=params, **kwargs)
76
77
/opt/anaconda3/lib/python3.8/site-packages/requests/api.py in request(method, url, **kwargs)
58 # cases, and look like a memory leak in others.
59 with sessions.Session() as session:
---> 60 return session.request(method=method, url=url, **kwargs)
61
62
/opt/anaconda3/lib/python3.8/site-packages/requests/sessions.py in request(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)
531 }
532 send_kwargs.update(settings)
--> 533 resp = self.send(prep, **send_kwargs)
534
535 return resp
/opt/anaconda3/lib/python3.8/site-packages/requests/sessions.py in send(self, request, **kwargs)
644
645 # Send the request
--> 646 r = adapter.send(request, **kwargs)
647
648 # Total elapsed time of the request (approximately)
/opt/anaconda3/lib/python3.8/site-packages/requests/adapters.py in send(self, request, stream, timeout, verify, cert, proxies)
512 if isinstance(e.reason, _SSLError):
513 # This branch is for urllib3 v1.22 and later.
--> 514 raise SSLError(e, request=request)
515
516 raise ConnectionError(e, request=request)
SSLError: HTTPSConnectionPool(host='stlouisfed.org', port=443): Max retries exceeded with url: /fomcspeak/thomas-barkin/2019/04/apr-speech-a-practitioners-perspective-on-the-productivity-slowdown (Caused by SSLError(SSLError("bad handshake: Error([('SSL routines', 'tls_process_server_certificate', 'certificate verify failed')])")))
我使用以下代码重现了您的问题:
import requests
url = "https://stlouisfed.org/fomcspeak/thomas-barkin/2019/04/apr-speech-a-practitioners-perspective-on-the-productivity-slowdown"
def get_redirected_url(url):
r = requests.get(url)
return r.url
get_redirected_url(url)
根本原因是服务器 stlouisfed.org 没有提供正确的证书链,缺少 L1K 中间证书。 您可以通过检查https://www.ssllabs.com/ssltest/analyze.html?d=https://stlouisfed.org/来确认
为了在您这边(客户端)解决/解决它,我找到了 2 个选项:
使用任一选项,都可以返回重定向的 URL。
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.