![](/img/trans.png)
[英]Error response ''NoneType' object is not iterable' while scraping website using scrapy
[英]Scraping website to get game data: NoneType error
我正在運行以下代碼來抓取www.oddsportal.com
雖然代碼運行良好,但是,一些 URL 在頁面上沒有任何數據,因此,由於頁面缺少元素,我收到 NoneType 錯誤。
如何將“如果頁面上沒有數據,go 到下一個 URL”添加到此代碼:
import os
import pandas as pd
from selenium import webdriver
from tabulate import tabulate
from datetime import datetime
import time
from bs4 import BeautifulSoup as bs
browser = webdriver.Chrome()
class GameData:
def __init__(self):
self.date = []
self.time = []
self.game = []
self.score = []
self.home_odds = []
self.draw_odds = []
self.away_odds = []
self.country = []
self.league = []
def parse_data(url):
browser.get(url)
df = pd.read_html(browser.page_source, header=0)[0]
html = browser.page_source
soup = bs(html, "lxml")
cont = soup.find('div', {'id': 'wrap'})
content = cont.find('div', {'id': 'col-content'})
content = content.find('table', {'class': 'table-main'}, {'id': 'tournamentTable'})
main = content.find('th', {'class': 'first2 tl'})
count = main.findAll('a')
country = count[1].text
league = count[2].text
game_data = GameData()
game_date = None
for row in df.itertuples():
if not isinstance(row[1], str):
continue
elif ':' not in row[1]:
game_date = row[1].split('-')[0]
continue
game_data.date.append(game_date)
game_data.time.append(row[1])
game_data.game.append(row[2])
game_data.score.append(row[3])
game_data.home_odds.append(row[4])
game_data.draw_odds.append(row[5])
game_data.away_odds.append(row[6])
game_data.country.append(country)
game_data.league.append(league)
return game_data
urls = {
"https://www.oddsportal.com/soccer/ghana/premier-league-2014-2015/results/#/page/3/"
}
if __name__ == '__main__':
results = None
for url in urls:
game_data = parse_data(url)
result = pd.DataFrame(game_data.__dict__)
if results is None:
results = result
else:
results = results.append(result, ignore_index=True)
print(tabulate(results, headers='keys', tablefmt="github"))
錯誤:
Traceback (most recent call last):
File "C:/Users/user/AppData/Roaming/JetBrains/PyCharmCE2020.3/scratches/scratch_26.py", line 76, in <module>
game_data = parse_data(url)
File "C:/Users/user/AppData/Roaming/JetBrains/PyCharmCE2020.3/scratches/scratch_26.py", line 44, in parse_data
count = main.findAll('a')
AttributeError: 'NoneType' object has no attribute 'findAll'
此外,有時網頁無法加載。 如果 selenium 未加載,如何觸發 url 重試?
謝謝
為了讓我的評論更清楚,這里是 parse_data 的編輯:
def parse_data(url):
browser.get(url)
df = pd.read_html(browser.page_source, header=0)[0]
html = browser.page_source
soup = bs(html, "lxml")
cont = soup.find('div', {'id': 'wrap'})
content = cont.find('div', {'id': 'col-content'})
content = content.find('table', {'class': 'table-main'}, {'id': 'tournamentTable'})
main = content.find('th', {'class': 'first2 tl'})
# return if not found.
if main is None:
return None
count = main.findAll('a')
country = count[1].text
league = count[2].text
game_data = GameData()
game_date = None
for row in df.itertuples():
if not isinstance(row[1], str):
continue
elif ':' not in row[1]:
game_date = row[1].split('-')[0]
continue
game_data.date.append(game_date)
game_data.time.append(row[1])
game_data.game.append(row[2])
game_data.score.append(row[3])
game_data.home_odds.append(row[4])
game_data.draw_odds.append(row[5])
game_data.away_odds.append(row[6])
game_data.country.append(country)
game_data.league.append(league)
return game_data
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.