I am trying to scrape a webpage, however despite giving correct CSS in Chrome inspect Selenium does not scrape all of the data it only scrapes on the odds of the first page as seen below and then gives an error message.
I have re-tested the CSS and changed it multiple times however, Selenium Python does not seem to scrape the data correctly.
I also tend to get:
Traceback (most recent call last):
File "C:/Users/Bain3/PycharmProjects/untitled4/Vpalmerbet1.py", line 1365, in <module>
EC.element_to_be_clickable((By.CSS_SELECTOR, ('.match-pop-market a[href*="/sports/soccer/"]'))))
File "C:\Users\Bain3\Anaconda3\lib\site-packages\selenium\webdriver\support\wait.py", line 80, in until
raise TimeoutException(message, screen, stacktrace)
selenium.common.exceptions.TimeoutException: Message:
I have tried changing CSS as well as using xpath for:
#clickMe = wait(driver, 15).until(EC.element_to_be_clickable((By.CSS_SELECTOR, ('.match-pop-market a[href*="/sports/soccer/"]'))))
clickMe = wait(driver, 15).until(EC.element_to_be_clickable((By.XPATH, ("//*[@class='match-pop-market']//a[href*='/sports/soccer/']"))))
You can see that chrome inspects detects this CSS
My full code is:
from selenium import webdriver
driver = webdriver.Chrome()
driver.set_window_size(1024, 600)
driver.maximize_window()
try:
os.remove('vtg121.csv')
except OSError:
pass
driver.get('https://www.palmerbet.com/sports/soccer')
#SCROLL_PAUSE_TIME = 0.5
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
#clickMe = wait(driver, 3).until(EC.element_to_be_clickable((By.XPATH, ('//*[@id="TopPromotionBetNow"]'))))
#if driver.find_element_by_css_selector('#TopPromotionBetNow'):
#driver.find_element_by_css_selector('#TopPromotionBetNow').click()
#last_height = driver.execute_script("return document.body.scrollHeight")
#while True:
#driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
#time.sleep(SCROLL_PAUSE_TIME)
#new_height = driver.execute_script("return document.body.scrollHeight")
#if new_height == last_height:
#break
#last_height = new_height
time.sleep(1)
clickMe = wait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, ('//*[contains(@class,"filter_labe")]'))))
clickMe.click()
time.sleep(0)
clickMe = wait(driver, 10).until(EC.element_to_be_clickable((By.XPATH,'(//*[contains(@class,"filter_labe")])')))
options = driver.find_elements_by_xpath('//*[contains(@class,"filter_labe")]')
indexes = [index for index in range(len(options))]
shuffle(indexes)
for index in indexes:
time.sleep(0)
#driver.get('https://www.bet365.com.au/#/AS/B1/')
clickMe1 = wait(driver, 10).until(EC.element_to_be_clickable((By.XPATH,'(//ul[@id="tournaments"]//li//input)[%s]' % str(index + 1))))
clickMe1.click()
time.sleep(0)
##tournaments > li > input
#//*[@id='tournaments']//li//input
# Team
#clickMe = wait(driver, 10).until(EC.element_to_be_clickable((By.CSS_SELECTOR,("#mta_row td:nth-child(1)"))))
langs3 = driver.find_elements_by_css_selector("#mta_row td:nth-child(1)")
langs3_text = []
for lang in langs3:
print(lang.text)
langs3_text.append(lang.text)
time.sleep(0)
# Team ODDS
langs = driver.find_elements_by_css_selector("#mta_row .mpm_teams_cell_click:nth-child(2) .mpm_teams_bet_val")
langs_text = []
for lang in langs:
print(lang.text)
langs_text.append(lang.text)
time.sleep(0)
# HREF
#langs2 = driver.find_elements_by_xpath("//ul[@class='runners']//li[1]")
#a[href*="/sports/soccer/"]
#url1 = driver.current_url
#clickMe = wait(driver, 15).until(EC.element_to_be_clickable((By.CSS_SELECTOR, ('.match-pop-market a[href*="/sports/soccer/"]'))))
clickMe = wait(driver, 15).until(EC.element_to_be_clickable((By.XPATH, ("//*[@class='match-pop-market']//a[href*='/sports/soccer/']"))))
elems = driver.find_elements_by_css_selector('.match-pop-market a[href*="/sports/soccer/"]')
elem_href = []
for elem in elems:
print(elem.get_attribute("href"))
elem_href.append(elem.get_attribute("href"))
print(("NEW LINE BREAK"))
import sys
import io
with open('vtg121.csv', 'a', newline='', encoding="utf-8") as outfile:
writer = csv.writer(outfile)
for row in zip(langs_text, langs3_text, elem_href):
writer.writerow(row)
print(row)
Your XPath is incorrect. Note that predicate like [href*="/sports/soccer/"]
can be used in CSS selector while in XPath you should use [contains(@href, "/sports/soccer/")]
. So complete line should be
from selenium.common.exceptions import TimeoutException
try:
clickMe = wait(driver, 15).until(EC.element_to_be_clickable((By.XPATH, "//*[@class='match-pop-market']//a[contains(@href, '/sports/soccer/')]")))
clickMe1.click()
except TimeoutException:
print("No link was found")
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.