繁体   English   中英

请求超时错误 (requests.exceptions.ConnectionError: ('Connection aborted.', OSError("(10060, 'WSAETIMEDOUT')")))

[英]Requests Timeout Error (requests.exceptions.ConnectionError: ('Connection aborted.', OSError("(10060, 'WSAETIMEDOUT')")))

我已经看到很多关于这个主题的问题,但没有一个包含对我有用的解决方案。 这是我的完整代码:

import pandas as pd
import requests
import time

pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

# Game Id
game_id = '0021900001'

# Headers for API Request
header_data = {
    'Host': 'stats.nba.com',
    'Connection': 'keep-alive',
    'Cache-Control': 'max-age=0',
    'Upgrade-Insecure-Requests': '1',
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36',
    'Referer': 'stats.nba.com',
    'Accept-Encoding': 'gzip, deflate, br',
    'Accept-Language': 'en-US,en;q=0.9',
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
}


###
### Calculate Players on court at the start of each period
###

# Build advanced boxscore url
def advanced_boxscore_url(game_id, start, end):
    return 'https://stats.nba.com/stats/boxscoretraditionalv2/?gameId={0}&startPeriod=0&endPeriod=14&startRange={1}&endRange={2}&rangeType=2'.format(game_id, start, end)


# Helper functions
def calculate_time_at_period(period):
    if period > 5:
        return (720 * 4 + (period - 5) * (5 * 60)) * 10
    else:
        return (720 * (period - 1)) * 10


def split_subs(df, tag):
    subs = df[[tag, 'PERIOD', 'EVENTNUM']]
    subs['SUB'] = tag
    subs.columns = ['PLAYER_ID', 'PERIOD', 'EVENTNUM', 'SUB']
    return subs


def frame_to_row(df):
    team1 = df['TEAM_ID'].unique()[0]
    team2 = df['TEAM_ID'].unique()[1]
    players1 = df[df['TEAM_ID'] == team1]['PLAYER_ID'].tolist()
    players1.sort()
    players2 = df[df['TEAM_ID'] == team2]['PLAYER_ID'].tolist()
    players2.sort()

    lst = [team1]
    lst.append(players1)
    lst.append(team2)
    lst.append(players2)


    return lst


# extracts data from api response
def extract_data(url):
    print(url)
    r = requests.get(url, headers=header_data)
    resp = r.json()
    results = resp['resultSets'][0]
    headers = results['headers']
    rows = results['rowSet']
    frame = pd.DataFrame(rows)
    frame.columns = headers
    return frame


play_by_play = pd.read_csv('pbp_v3_0210.csv')

substitutionsOnly = play_by_play[play_by_play['EVENTMSGTYPE'] == 8][['PERIOD', 'EVENTNUM', 'PLAYER1_ID', 'PLAYER2_ID']]
substitutionsOnly.columns = ['PERIOD', 'EVENTNUM', 'OUT', 'IN']

subs_in = split_subs(substitutionsOnly, 'IN')
subs_out = split_subs(substitutionsOnly, 'OUT')

full_subs = pd.concat([subs_out, subs_in], axis=0).reset_index()[['PLAYER_ID', 'PERIOD', 'EVENTNUM', 'SUB']]
first_event_of_period = full_subs.loc[full_subs.groupby(by=['PERIOD', 'PLAYER_ID'])['EVENTNUM'].idxmin()]
players_subbed_in_at_each_period = first_event_of_period[first_event_of_period['SUB'] == 'IN'][
    ['PLAYER_ID', 'PERIOD', 'SUB']]

periods = players_subbed_in_at_each_period['PERIOD'].drop_duplicates().values.tolist()

rows = []
for period in periods:
    low = calculate_time_at_period(period) + 5
    high = calculate_time_at_period(period + 1) - 5
    boxscore = advanced_boxscore_url(game_id, low, high)
    # time.sleep(2)
    boxscore_players = extract_data(boxscore)[['PLAYER_NAME', 'PLAYER_ID', 'TEAM_ID']]
    boxscore_players['PERIOD'] = period

    players_subbed_in_at_period = players_subbed_in_at_each_period[players_subbed_in_at_each_period['PERIOD'] == period]

    joined_players = pd.merge(boxscore_players, players_subbed_in_at_period, on=['PLAYER_ID', 'PERIOD'], how='left')
    joined_players = joined_players[pd.isnull(joined_players['SUB'])][['PLAYER_NAME', 'PLAYER_ID', 'TEAM_ID', 'PERIOD']]
    row = frame_to_row(joined_players)
    row.append(period)
    rows.append(row)

players_on_court_at_start_of_period = pd.DataFrame(rows)
cols = ['TEAM_ID_1', 'TEAM_1_PLAYERS', 'TEAM_ID_2', 'TEAM_2_PLAYERS', 'PERIOD']
players_on_court_at_start_of_period.columns = cols

holder = "00219"
for x in range(2, 10):
    time.sleep(2)
    if x == 707:
        continue
    excess = ""
    if(x < 10):
        excess = "0000" + str(x)
    elif(x < 100):
        excess = "000" + str(x)
    elif(x < 1000):
        excess = "00" + str(x)
    else:
        excess = "0" + str(x)
    holder = "00219" + excess
    game_id = holder

    holder_play_by_play = pd.read_csv('pbp_for_parsing')

    substitutionsOnly = holder_play_by_play[holder_play_by_play['EVENTMSGTYPE'] == 8][
        ['PERIOD', 'EVENTNUM', 'PLAYER1_ID', 'PLAYER2_ID']]
    substitutionsOnly.columns = ['PERIOD', 'EVENTNUM', 'OUT', 'IN']

    subs_in = split_subs(substitutionsOnly, 'IN')
    subs_out = split_subs(substitutionsOnly, 'OUT')

    full_subs = pd.concat([subs_out, subs_in], axis=0).reset_index()[['PLAYER_ID', 'PERIOD', 'EVENTNUM', 'SUB']]
    first_event_of_period = full_subs.loc[full_subs.groupby(by=['PERIOD', 'PLAYER_ID'])['EVENTNUM'].idxmin()]
    players_subbed_in_at_each_period = first_event_of_period[first_event_of_period['SUB'] == 'IN'][
        ['PLAYER_ID', 'PERIOD', 'SUB']]

    periods = players_subbed_in_at_each_period['PERIOD'].drop_duplicates().values.tolist()

    rows = []
    for period in periods:
        low = calculate_time_at_period(period) + 5
        high = calculate_time_at_period(period + 1) - 5
        boxscore = advanced_boxscore_url(game_id, low, high)
        boxscore_players = extract_data(boxscore)[['PLAYER_NAME', 'PLAYER_ID', 'TEAM_ID']]
        boxscore_players['PERIOD'] = period

        players_subbed_in_at_period = players_subbed_in_at_each_period[
            players_subbed_in_at_each_period['PERIOD'] == period]

        joined_players = pd.merge(boxscore_players, players_subbed_in_at_period, on=['PLAYER_ID', 'PERIOD'], how='left')
        joined_players = joined_players[pd.isnull(joined_players['SUB'])][
            ['PLAYER_NAME', 'PLAYER_ID', 'TEAM_ID', 'PERIOD']]
        row = frame_to_row(joined_players)
        row.append(period)
        rows.append(row)

    holder_players_on_court_at_start_of_period = pd.DataFrame(rows)
    cols = ['TEAM_ID_1', 'TEAM_1_PLAYERS', 'TEAM_ID_2', 'TEAM_2_PLAYERS', 'PERIOD']
    holder_players_on_court_at_start_of_period.columns = cols
    players_on_court_at_start_of_period = players_on_court_at_start_of_period.concat([players_on_court_at_start_of_period, holder_players_on_court_at_start_of_period], axis=0).reset_index()[['TEAM_ID_1', 'TEAM_1_PLAYERS', 'TEAM_ID_2', 'TEAM_2_PLAYERS', 'PERIOD']]


players_on_court_at_start_of_period.to_csv("onoff0210.csv", index=False)

我收到的完整错误消息是:

Traceback (most recent call last):
  File "C:\Users\xxxxx\Anaconda3\envs\NBAdata\lib\site-packages\urllib3\connectionpool.py", line 672, in urlopen
    chunked=chunked,
  File "C:\Users\xxxxx\Anaconda3\envs\NBAdata\lib\site-packages\urllib3\connectionpool.py", line 421, in _make_request
    six.raise_from(e, None)
  File "<string>", line 3, in raise_from
  File "C:\Users\xxxxx\Anaconda3\envs\NBAdata\lib\site-packages\urllib3\connectionpool.py", line 416, in _make_request
    httplib_response = conn.getresponse()
  File "C:\Users\xxxxx\Anaconda3\envs\NBAdata\lib\http\client.py", line 1344, in getresponse
    response.begin()
  File "C:\Users\xxxxx\Anaconda3\envs\NBAdata\lib\http\client.py", line 306, in begin
    version, status, reason = self._read_status()
  File "C:\Users\xxxxx\Anaconda3\envs\NBAdata\lib\http\client.py", line 267, in _read_status
    line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
  File "C:\Users\xxxxx\Anaconda3\envs\NBAdata\lib\socket.py", line 589, in readinto
    return self._sock.recv_into(b)
  File "C:\Users\xxxxx\Anaconda3\envs\NBAdata\lib\site-packages\urllib3\contrib\pyopenssl.py", line 318, in recv_into
    raise SocketError(str(e))
OSError: (10060, 'WSAETIMEDOUT')

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "C:\Users\xxxxx\Anaconda3\envs\NBAdata\lib\site-packages\requests\adapters.py", line 449, in send
    timeout=timeout
  File "C:\Users\xxxxx\Anaconda3\envs\NBAdata\lib\site-packages\urllib3\connectionpool.py", line 720, in urlopen
    method, url, error=e, _pool=self, _stacktrace=sys.exc_info()[2]
  File "C:\Users\xxxxx\Anaconda3\envs\NBAdata\lib\site-packages\urllib3\util\retry.py", line 400, in increment
    raise six.reraise(type(error), error, _stacktrace)
  File "C:\Users\xxxxx\Anaconda3\envs\NBAdata\lib\site-packages\urllib3\packages\six.py", line 734, in reraise
    raise value.with_traceback(tb)
  File "C:\Users\xxxxx\Anaconda3\envs\NBAdata\lib\site-packages\urllib3\connectionpool.py", line 672, in urlopen
    chunked=chunked,
  File "C:\Users\xxxxx\Anaconda3\envs\NBAdata\lib\site-packages\urllib3\connectionpool.py", line 421, in _make_request
    six.raise_from(e, None)
  File "<string>", line 3, in raise_from
  File "C:\Users\xxxxx\Anaconda3\envs\NBAdata\lib\site-packages\urllib3\connectionpool.py", line 416, in _make_request
    httplib_response = conn.getresponse()
  File "C:\Users\xxxxx\Anaconda3\envs\NBAdata\lib\http\client.py", line 1344, in getresponse
    response.begin()
  File "C:\Users\xxxxx\Anaconda3\envs\NBAdata\lib\http\client.py", line 306, in begin
    version, status, reason = self._read_status()
  File "C:\Users\xxxxx\Anaconda3\envs\NBAdata\lib\http\client.py", line 267, in _read_status
    line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
  File "C:\Users\xxxxx\Anaconda3\envs\NBAdata\lib\socket.py", line 589, in readinto
    return self._sock.recv_into(b)
  File "C:\Users\xxxxx\Anaconda3\envs\NBAdata\lib\site-packages\urllib3\contrib\pyopenssl.py", line 318, in recv_into
    raise SocketError(str(e))
urllib3.exceptions.ProtocolError: ('Connection aborted.', OSError("(10060, 'WSAETIMEDOUT')"))

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "C:/Users/xxxxx/PycharmProjects/NBAdata/V.3/On Off V3.py", line 100, in <module>
    boxscore_players = extract_data(boxscore)[['PLAYER_NAME', 'PLAYER_ID', 'TEAM_ID']]
  File "C:/Users/xxxxx/PycharmProjects/NBAdata/V.3/On Off V3.py", line 69, in extract_data
    r = requests.get(url, headers=header_data)
  File "C:\Users\xxxxx\Anaconda3\envs\NBAdata\lib\site-packages\requests\api.py", line 75, in get
    return request('get', url, params=params, **kwargs)
  File "C:\Users\xxxxx\Anaconda3\envs\NBAdata\lib\site-packages\requests\api.py", line 60, in request
    return session.request(method=method, url=url, **kwargs)
  File "C:\Users\xxxxx\Anaconda3\envs\NBAdata\lib\site-packages\requests\sessions.py", line 533, in request
    resp = self.send(prep, **send_kwargs)
  File "C:\Users\xxxxx\Anaconda3\envs\NBAdata\lib\site-packages\requests\sessions.py", line 646, in send
    r = adapter.send(request, **kwargs)
  File "C:\Users\xxxxx\Anaconda3\envs\NBAdata\lib\site-packages\requests\adapters.py", line 498, in send
    raise ConnectionError(err, request=request)
requests.exceptions.ConnectionError: ('Connection aborted.', OSError("(10060, 'WSAETIMEDOUT')"))

我对这种代码不是最有经验的,所以我来到堆栈溢出试图找到一个解决方案,其中一些涉及创建一个用户代理(我已经完成了),在控制面板中切换高级 LAN 设置(可以甚至找不到高级 LAN 设置,可能已从 Windows 中删除),尝试使用在线 IDE(但我发现没有一个允许我导入 csv 并在完成后将代码输出到 csv),尝试为我的 requests.get 添加超时(这实际上只会导致更多错误),甚至可能是我目前忘记的其他一些错误。 我还制作了一些格式和目标网址类似的其他文件,它们运行良好。 是的,我的互联网连接完全正常,其他一切都运行顺利,包括其他 Python 文件。

这确实在我当前的项目中造成了障碍,在我解决这个问题之前我无法真正继续任何事情,所以如果有人能提出一个神奇的解决方案,那就太棒了。 这段代码的预期结果是它会通过最大参数一直遍历 for 循环并输出一个 csv,然后将其用作我刚刚抓取的所有数据的数据帧。

事实证明,之所以会引发 Key Error 是因为该值在字符串的其余部分之前以“00”开头,但数据帧将该值作为整数读取并去掉了这两个零,从而创建了一个无处可去的 URL。 更改数据类型后,程序现在可以正常工作了。

暂无
暂无

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM