#Initialization for beautifulsoup to access site for per game stats
url = "https://stats.nba.com/players/traditional/?sort=PTS&dir=-1&Season=2018-19&SeasonType=Regular%20Season"
d = webdriver.Chrome(ChromeDriverManager().install())
d.get(url)
#Initializes data frame to store player data
data_df= pd.DataFrame(columns={'Player','Team','3PA','3P%','3PaTotal','Season'})
for yearCount in range(0,20):
season = [18,19]
seasonStr = str(season[0])+"/"+str(season[1])
for pageCounter in range(0,11):
#Scrapes all of the data putting it into headers
soup = BeautifulSoup(d.page_source, 'html.parser').find('table')
headers, [_, *data] = [i.text for i in soup.find_all('th')], [[i.text for i in b.find_all('td')] for b in soup.find_all('tr')]
final_data = [i for i in data if len(i) > 1]
#Creates a dictionary of headers
data_attrs = [dict(zip(headers, i)) for i in final_data]
#Collects stats that are used for graph
players = [i['PLAYER'] for i in data_attrs]
teams = [i['TEAM'] for i in data_attrs]
threePointAttempts = [i['3PA'] for i in data_attrs]
threePointPercentage = [i['3P%'] for i in data_attrs]
#Adds the data collected to the dataframe
temp_df = pd.DataFrame({'Player': players,
'Team': teams,
'3PA': threePointAttempts,
'3P%': threePointPercentage,
'3PaTotal' : 0,
'Season' : seasonStr})
data_df = data_df.append(temp_df, ignore_index=True)
data_df = data_df[['Player','Team','3PA','3P%','3PaTotal','Season']]
#Goes to next page
nxt = d.find_element_by_class_name("stats-table-pagination__next")
nxt.click()
dropDown = Select(d.find_element_by_name("Season"))
dropDown.select_by_index(yearCount)
My error code:
Traceback (most recent call last): File "C:/Users/brenn/PycharmProjects/NBAstats/venv/Lib/site-packages/Player 3-Point.py", line 44, in headers, [_, *data] = [i.text for i in soup.find_all('th')], [[i.text for i in b.find_all('td')] for b in soup.find_all('tr')]
AttributeError: 'NoneType' object has no attribute 'find_all'
I am having an issue when attempting to collect data for past seasons on the NBA site. My code collects all of the player data for the current season (iterating through each page with no issues). But when I try to collect the data from the past year by navigating through the dropdown it does not work. If I use the URL of the past season, without using the dropdown menu navigation it collects the data with no issue. Also in the selenium chrome tab, the page switches to the past year but runs into the issue when attempting to read the data.
I love working with sports data!
I'd like to suggest a slightly different method. The data is rendered through a request url that will return a json response. You can use the query parameters for that to iterate through the seasons (starting in 1996). Then you can dump the whole thing into a dataframe and filter/manipulate that dataframe anyway you want.
import requests
import pandas as pd
request_url = 'https://stats.nba.com/stats/leaguedashplayerstats'
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36'}
results = pd.DataFrame()
for yearCount in range(1996,2019):
season = int(str(yearCount)[-2:])
seasonStr = '%02d/%02d' %(int(str(season)[-2:]), int(str(season+1)[-2:]))
season_query = '%s-%s' %(yearCount, str(yearCount+1)[-2:])
params = {
'College': '',
'Conference':'',
'Country': '',
'DateFrom': '',
'DateTo': '',
'Division': '',
'DraftPick': '',
'DraftYear': '',
'GameScope': '',
'GameSegment': '',
'Height': '',
'LastNGames': '0',
'LeagueID': '00',
'Location': '',
'MeasureType': 'Base',
'Month': '0',
'OpponentTeamID': '0',
'Outcome': '',
'PORound': '0',
'PaceAdjust': 'N',
'PerMode': 'PerGame',
'Period': '0',
'PlayerExperience':'',
'PlayerPosition': '',
'PlusMinus': 'N',
'Rank': 'N',
'Season': season_query,
'SeasonSegment': '',
'SeasonType': 'Regular Season',
'ShotClockRange': '',
'StarterBench': '',
'TeamID': '0',
'TwoWay': '0',
'VsConference': '',
'VsDivision': '',
'Weight': ''}
jsonObj = requests.get(request_url, headers=headers, params=params).json()
cols = jsonObj['resultSets'][0]['headers']
rows = jsonObj['resultSets'][0]['rowSet']
temp_df = pd.DataFrame(columns = cols)
for row in rows:
row_df = pd.DataFrame([row], columns = cols)
temp_df = temp_df.append(row_df)
temp_df['Season'] = seasonStr
print ('Aquired %s stats' %(seasonStr))
results = results.append(temp_df).reset_index(drop=True)
Output:
print(results)
PLAYER_ID PLAYER_NAME ... CFPARAMS Season
0 1489 None ... 1489, 96/97
1 902 None ... 902, 96/97
2 2179 None ... 2179, 96/97
3 1049 None ... 1049, 96/97
4 775 None ... 775, 96/97
5 93 None ... 93, 96/97
6 920 A.C. Green ... 920,1610612742 96/97
7 243 Aaron McKie ... 243,1610612765 96/97
8 1425 Aaron Williams ... 1425,1610612763 96/97
9 768 Acie Earl ... 768,1610612749 96/97
10 228 Adam Keefe ... 228,1610612762 96/97
11 154 Adrian Caldwell ... 154,1610612755 96/97
12 673 Alan Henderson ... 673,1610612737 96/97
13 1059 Aleksandar Djordjevic ... 1059,1610612757 96/97
14 275 Allan Houston ... 275,1610612752 96/97
15 947 Allen Iverson ... 947,1610612755 96/97
16 297 Alonzo Mourning ... 297,1610612748 96/97
17 175 Alton Lister ... 175,1610612738 96/97
18 1043 Amal McCaskill ... 1043,1610612753 96/97
19 692 Andrew DeClercq ... 692,1610612744 96/97
20 457 Andrew Lang ... 457,1610612749 96/97
21 358 Anfernee Hardaway ... 358,1610612753 96/97
22 924 Anthony Goldwire ... 924,1610612743 96/97
23 193 Anthony Mason ... 193,1610612766 96/97
24 292 Anthony Miller ... 292,1610612737 96/97
25 324 Anthony Peeler ... 324,1610612763 96/97
26 156 Antoine Carr ... 156,1610612762 96/97
27 952 Antoine Walker ... 952,1610612738 96/97
28 213 Antonio Davis ... 213,1610612754 96/97
29 176 Antonio Harvey ... 176,1610612760 96/97
... ... ... ... ...
10599 204020 Tyler Johnson ... 204020,1610612756 18/19
10600 1628399 Tyler Lydon ... 1628399,1610612743 18/19
10601 1627755 Tyler Ulis ... 1627755,1610612741 18/19
10602 203092 Tyler Zeller ... 203092,1610612737 18/19
10603 201936 Tyreke Evans ... 201936,1610612754 18/19
10604 1627820 Tyrone Wallace ... 1627820,1610612746 18/19
10605 2199 Tyson Chandler ... 2199,1610612747 18/19
10606 1626145 Tyus Jones ... 1626145,1610612750 18/19
10607 2617 Udonis Haslem ... 2617,1610612748 18/19
10608 203506 Victor Oladipo ... 203506,1610612754 18/19
10609 1713 Vince Carter ... 1713,1610612737 18/19
10610 1629053 Vincent Edwards ... 1629053,1610612745 18/19
10611 1627735 Wade Baldwin IV ... 1627735,1610612757 18/19
10612 201961 Wayne Ellington ... 201961,1610612765 18/19
10613 1627782 Wayne Selden ... 1627782,1610612741 18/19
10614 1628976 Wendell Carter Jr. ... 1628976,1610612741 18/19
10615 1628411 Wes Iwundu ... 1628411,1610612753 18/19
10616 202325 Wesley Johnson ... 202325,1610612764 18/19
10617 202083 Wesley Matthews ... 202083,1610612754 18/19
10618 203115 Will Barton ... 203115,1610612743 18/19
10619 1626161 Willie Cauley-Stein ... 1626161,1610612758 18/19
10620 1626195 Willy Hernangomez ... 1626195,1610612766 18/19
10621 201163 Wilson Chandler ... 201163,1610612746 18/19
10622 1627812 Yogi Ferrell ... 1627812,1610612758 18/19
10623 1629139 Yuta Watanabe ... 1629139,1610612763 18/19
10624 1628380 Zach Collins ... 1628380,1610612757 18/19
10625 203897 Zach LaVine ... 203897,1610612741 18/19
10626 1629155 Zach Lofton ... 1629155,1610612765 18/19
10627 2585 Zaza Pachulia ... 2585,1610612765 18/19
10628 1627753 Zhou Qi ... 1627753,1610612745 18/19
[10629 rows x 66 columns]
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.