简体   繁体   中英

Boto3 throws ConnectionReset and Protocol Errors when reading files from S3

I am performing analysis in a Jupyter Notebook on my local computer and reading data from S3 to do so. When I close a notebook and open another to read another file in, I get the following error:

ProtocolError: ("Connection broken: ConnectionResetError(10054, 'An existing connection was forcibly closed by the remote host', None, 10054, None)", ConnectionResetError(10054, 'An existing connection was forcibly closed by the remote host', None, 10054, None))

Since the problem seems to be with an existing connection, and according to this thread I tried to wait and close my existing connections. Boto3 does not appear to have a .close() or equivalent for its s3client.get_object() (see code below)

My first connection on startup does not produce this error.

When I shut down my computer and start it up again I can avoid this error after having already seen it.

When I restart my computer, the error persists.

How can I close the connection without needing to restart my computer?

import pandas as pd
import boto3
import boto3.session
from botocore.client import Config

config = Config(connect_timeout=500, retries={'max_attempts': 5}, read_timeout=1000)


cred = boto3.Session().get_credentials()
ACCESS_KEY = cred.access_key
SECRET_KEY = cred.secret_key
SESSION_TOKEN = cred.token

s3client = boto3.client('s3', 
                        aws_access_key_id = ACCESS_KEY, 
                        aws_secret_access_key = SECRET_KEY, 
                        aws_session_token = SESSION_TOKEN,
                        config = config
                       )

response = s3client.get_object(Bucket='mydatabucket', Key='mydata.csv')
df = pd.read_csv(response['Body'])

Here is the traceback and error I get instead of the expected pandas dataframe stored as df :

---------------------------------------------------------------------------
ConnectionResetError                      Traceback (most recent call last)
C:\ProgramData\Anaconda3\lib\site-packages\urllib3\response.py in _error_catcher(self)
    359             try:
--> 360                 yield
    361 

C:\ProgramData\Anaconda3\lib\site-packages\urllib3\response.py in read(self, amt, decode_content, cache_content)
    441                 cache_content = False
--> 442                 data = self._fp.read(amt)
    443                 if amt != 0 and not data:  # Platform-specific: Buggy versions of Python.

C:\ProgramData\Anaconda3\lib\http\client.py in read(self, amt)
    446             b = bytearray(amt)
--> 447             n = self.readinto(b)
    448             return memoryview(b)[:n].tobytes()


C:\ProgramData\Anaconda3\lib\http\client.py in readinto(self, b)
    490         # (for example, reading in 1k chunks)
--> 491         n = self.fp.readinto(b)
    492         if not n and b:

C:\ProgramData\Anaconda3\lib\socket.py in readinto(self, b)
    588             try:
--> 589                 return self._sock.recv_into(b)
    590             except timeout:

C:\ProgramData\Anaconda3\lib\ssl.py in recv_into(self, buffer, nbytes, flags)
   1051                   self.__class__)
-> 1052             return self.read(nbytes, buffer)
   1053         else:

C:\ProgramData\Anaconda3\lib\ssl.py in read(self, len, buffer)
    910             if buffer is not None:
--> 911                 return self._sslobj.read(len, buffer)
    912             else:

ConnectionResetError: [WinError 10054] An existing connection was forcibly closed by the remote host

During handling of the above exception, another exception occurred:

ProtocolError                             Traceback (most recent call last)
<ipython-input-5-4d25be33c7b8> in <module>
      1 response = s3client.get_object(Bucket='mydatabucket', Key='mydata.csv')
----> 2 audit = pd.read_csv(response['Body'])

C:\ProgramData\Anaconda3\lib\site-packages\pandas\io\parsers.py in parser_f(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, dialect, tupleize_cols, error_bad_lines, warn_bad_lines, delim_whitespace, low_memory, memory_map, float_precision)
    700                     skip_blank_lines=skip_blank_lines)
    701 
--> 702         return _read(filepath_or_buffer, kwds)
    703 
    704     parser_f.__name__ = name

C:\ProgramData\Anaconda3\lib\site-packages\pandas\io\parsers.py in _read(filepath_or_buffer, kwds)
    433 
    434     try:
--> 435         data = parser.read(nrows)
    436     finally:
    437         parser.close()

C:\ProgramData\Anaconda3\lib\site-packages\pandas\io\parsers.py in read(self, nrows)
   1137     def read(self, nrows=None):
   1138         nrows = _validate_integer('nrows', nrows)
-> 1139         ret = self._engine.read(nrows)
   1140 
   1141         # May alter columns / col_dict

C:\ProgramData\Anaconda3\lib\site-packages\pandas\io\parsers.py in read(self, nrows)
   1993     def read(self, nrows=None):
   1994         try:
-> 1995             data = self._reader.read(nrows)
   1996         except StopIteration:
   1997             if self._first_chunk:

pandas/_libs/parsers.pyx in pandas._libs.parsers.TextReader.read()

pandas/_libs/parsers.pyx in pandas._libs.parsers.TextReader._read_low_memory()

pandas/_libs/parsers.pyx in pandas._libs.parsers.TextReader._read_rows()

pandas/_libs/parsers.pyx in pandas._libs.parsers.TextReader._tokenize_rows()

pandas/_libs/parsers.pyx in pandas._libs.parsers.raise_parser_error()

C:\ProgramData\Anaconda3\lib\site-packages\botocore\response.py in read(self, amt)
     76         """
     77         try:
---> 78             chunk = self._raw_stream.read(amt)
     79         except URLLib3ReadTimeoutError as e:
     80             # TODO: the url will be None as urllib3 isn't setting it yet

C:\ProgramData\Anaconda3\lib\site-packages\urllib3\response.py in read(self, amt, decode_content, cache_content)
    457                         # raised during streaming, so all calls with incorrect
    458                         # Content-Length are caught.
--> 459                         raise IncompleteRead(self._fp_bytes_read, self.length_remaining)
    460 
    461         if data:

C:\ProgramData\Anaconda3\lib\contextlib.py in __exit__(self, type, value, traceback)
    128                 value = type()
    129             try:
--> 130                 self.gen.throw(type, value, traceback)
    131             except StopIteration as exc:
    132                 # Suppress StopIteration *unless* it's the same exception that

C:\ProgramData\Anaconda3\lib\site-packages\urllib3\response.py in _error_catcher(self)
    376             except (HTTPException, SocketError) as e:
    377                 # This includes IncompleteRead.
--> 378                 raise ProtocolError('Connection broken: %r' % e, e)
    379 
    380             # If no exception is thrown, we should avoid cleaning up

ProtocolError: ("Connection broken: ConnectionResetError(10054, 'An existing connection was forcibly closed by the remote host', None, 10054, None)", ConnectionResetError(10054, 'An existing connection was forcibly closed by the remote host', None, 10054, None))

No need to close or delete the client to re-open it later. Each call to AWS is a distinct API request to an endpoint for that service with no long-term connection maintained.

So you can access multiple files in one connection, and you don't have to worry about closing and re-opening connections to S3.

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM