[英]Selenium and chromedriver
我正在從https://www.flashscore.com/basketball/china/cba/results/抓取一些數據。
我想在沒有“event__stage”的情況下提取“event__time”。
我怎樣才能做到這一點?
這是我的腳本:
from selenium import webdriver
import time
import pandas as pd
country = 'china'
ligue = 'cba'
year= '2021-2022'
url = 'https://www.flashscore.es/baloncesto/'+country+'/'+ligue+'/resultados/'
driver = webdriver.Chrome()
call = driver.get(url)
data = driver.find_elements_by_class_name('event__time')
data_clean = []
for i in range(len(data)):
data_clean.append(data[i].text)
data_clean = [x.replace("\n", ";") for x in data_clean]
dataframe = pd.DataFrame(data_clean)
dataframe.to_csv(country+ligue+year+'.csv', index=False)
用 '\n' 分割文本數據,得到 div 的第一個元素
工作代碼 -
import time
import pandas as pd
from selenium import webdriver
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager
options = webdriver.ChromeOptions()
# options.add_argument("--headless")
options.add_argument("--no-sandbox")
options.add_argument("--disable-gpu")
options.add_argument("--window-size=1920x1080")
options.add_argument("--disable-extensions")
chrome_driver = webdriver.Chrome(
service=Service(ChromeDriverManager().install()),
options=options
)
def flashscore_scraper():
country = 'china'
ligue = 'cba'
year = '2021-2022'
url = 'https://www.flashscore.es/baloncesto/' + country + '/' + ligue + '/resultados/'
with chrome_driver as driver:
driver.implicitly_wait(15)
driver.get(url)
data = driver.find_elements(By.CSS_SELECTOR, 'div.event__time')
# or
# driver.find_elements_by_class_name('event__time')
event_time_data = [d.text for d in data]
# pick the first element split by '\n'
data_clean = [time.split('\n')[0] for time in event_time_data]
dataframe = pd.DataFrame(data_clean)
dataframe.to_csv(country + ligue + year + '.csv', index=False)
flashscore_scraper()
輸出 -
0
26.04. 13:35
24.04. 13:35
22.04. 13:35
20.04. 13:35
17.04. 13:35
16.04. 13:35
15.04. 13:35
14.04. 13:35
13.04. 13:35
12.04. 13:35
10.04. 13:35
10.04. 09:00
09.04. 13:35
09.04. 09:00
08.04. 13:35
08.04. 09:00
07.04. 13:35
07.04. 09:00
05.04. 13:35
04.04. 13:35
04.04. 09:00
03.04. 13:35
03.04. 09:00
02.04. 13:35
02.04. 09:00
01.04. 13:35
01.04. 09:00
22.03. 14:00
22.03. 14:00
22.03. 09:30
22.03. 09:30
22.03. 05:00
22.03. 05:00
21.03. 13:35
21.03. 13:35
21.03. 09:00
21.03. 09:00
20.03. 14:00
20.03. 14:00
20.03. 09:30
20.03. 09:30
20.03. 05:00
20.03. 05:00
19.03. 13:35
19.03. 13:35
19.03. 09:00
19.03. 09:00
18.03. 13:35
18.03. 13:35
18.03. 09:00
18.03. 09:00
17.03. 14:00
17.03. 14:00
17.03. 09:30
17.03. 09:30
17.03. 05:00
17.03. 05:00
16.03. 13:35
16.03. 13:35
15.03. 14:00
15.03. 14:00
15.03. 09:30
15.03. 09:30
15.03. 05:00
15.03. 05:00
14.03. 13:35
14.03. 13:35
13.03. 14:00
13.03. 14:00
13.03. 09:30
13.03. 09:30
13.03. 05:00
13.03. 05:00
12.03. 13:35
12.03. 13:35
12.03. 09:00
12.03. 09:00
11.03. 13:35
11.03. 13:35
11.03. 09:00
11.03. 09:00
10.03. 14:00
10.03. 14:00
10.03. 09:30
10.03. 09:30
10.03. 05:00
10.03. 05:00
09.03. 13:35
09.03. 13:35
08.03. 14:00
08.03. 13:35
08.03. 09:30
08.03. 09:00
08.03. 05:00
08.03. 05:00
07.03. 13:35
07.03. 13:35
06.03. 14:00
06.03. 13:35
06.03. 09:30
06.03. 09:00
06.03. 05:00
06.03. 05:00
05.03. 13:35
05.03. 13:35
05.03. 09:00
05.03. 09:00
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.