簡體   English   中英

IOError:[Errno套接字錯誤]使用BeautifulSoup

[英]IOError: [Errno socket error] using BeautifulSoup

我正在嘗試使用帶有Python 2.7的漂亮湯從美國人口普查網站獲取數據。 這是我使用的代碼:

import urllib
from bs4 import BeautifulSoup

url = "https://www.census.gov/quickfacts/table/PST045215/01"
html = urllib.urlopen(url).read()
soup = BeautifulSoup(html)

但是,這是我得到的錯誤:

IOError                                   Traceback (most recent call last)
<ipython-input-5-47941f5ea96a> in <module>()
     59 
     60 url = "https://www.census.gov/quickfacts/table/PST045215/01"
---> 61 html = urllib.urlopen(url).read()
     62 soup = BeautifulSoup(html)
     63 

/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/urllib.pyc in urlopen(url, data, proxies, context)
     85         opener = _urlopener
     86     if data is None:
---> 87         return opener.open(url)
     88     else:
     89         return opener.open(url, data)

/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/urllib.pyc in open(self, fullurl, data)
    211         try:
    212             if data is None:
--> 213                 return getattr(self, name)(url)
    214             else:
    215                 return getattr(self, name)(url, data)

/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/urllib.pyc in open_https(self, url, data)
    441             if realhost: h.putheader('Host', realhost)
    442             for args in self.addheaders: h.putheader(*args)
--> 443             h.endheaders(data)
    444             errcode, errmsg, headers = h.getreply()
    445             fp = h.getfile()

/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/httplib.pyc in endheaders(self, message_body)
   1051         else:
   1052             raise CannotSendHeader()
-> 1053         self._send_output(message_body)
   1054 
   1055     def request(self, method, url, body=None, headers={}):

/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/httplib.pyc in _send_output(self, message_body)
    895             msg += message_body
    896             message_body = None
--> 897         self.send(msg)
    898         if message_body is not None:
    899             #message_body was not a string (i.e. it is a file) and

/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/httplib.pyc in send(self, data)
    857         if self.sock is None:
    858             if self.auto_open:
--> 859                 self.connect()
    860             else:
    861                 raise NotConnected()

/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/httplib.pyc in connect(self)
   1276 
   1277             self.sock = self._context.wrap_socket(self.sock,
-> 1278                                                   server_hostname=server_hostname)
   1279 
   1280     __all__.append("HTTPSConnection")

/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/ssl.pyc in wrap_socket(self, sock, server_side, do_handshake_on_connect, suppress_ragged_eofs, server_hostname)
    351                          suppress_ragged_eofs=suppress_ragged_eofs,
    352                          server_hostname=server_hostname,
--> 353                          _context=self)
    354 
    355     def set_npn_protocols(self, npn_protocols):

/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/ssl.pyc in __init__(self, sock, keyfile, certfile, server_side, cert_reqs, ssl_version, ca_certs, do_handshake_on_connect, family, type, proto, fileno, suppress_ragged_eofs, npn_protocols, ciphers, server_hostname, _context)
    599                         # non-blocking
    600                         raise ValueError("do_handshake_on_connect should not be specified for non-blocking sockets")
--> 601                     self.do_handshake()
    602 
    603             except (OSError, ValueError):

/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/ssl.pyc in do_handshake(self, block)
    828             if timeout == 0.0 and block:
    829                 self.settimeout(None)
--> 830             self._sslobj.do_handshake()
    831         finally:
    832             self.settimeout(timeout)

IOError: [Errno socket error] [SSL: SSLV3_ALERT_HANDSHAKE_FAILURE] sslv3 alert handshake failure (_ssl.c:590)

我已經從兩個堆棧溢出來源,如看着這個這個解決辦法,但他們都沒有解決問題。

解決此問題的一種方法是切換到requests

import requests
from bs4 import BeautifulSoup

url = "https://www.census.gov/quickfacts/table/PST045215/01"
response = requests.get(url)
soup = BeautifulSoup(response.content, "html.parser")
print(soup.title.get_text())

打印:

Alabama QuickFacts from the US Census Bureau

請注意,這可能還需要安裝requests\\[security\\]軟件包

pip install requests[security]

暫無
暫無

聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.

 
粵ICP備18138465號  © 2020-2024 STACKOOM.COM