Good afternoon, I'm trying to download a csv file programmatically through python and selenium because I need to do this hundreds of times. The manual steps to do the work are:
I have everything working programmatically through Step 5, and I believe step 6 (box2... below) works as well. However when I run the submit2 line in the code, nothing seems to download. I'm assuming this is probably an easy catch/fix for those of you that know selenium far better than I. I've also tried
source = driver.find_element(By.ID, 'DTLNavigator_Report2_ReportsListBox')
action = webdriver.ActionChains(driver)
action.double_click(source)
but it didn't seem to work either. So I'm either messing something up with the code or I just can't seem to find the downloaded file. Any help you can provide would be greatly appreciated. I hope I've included enough information for you to follow.
Below is my code so far:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.select import Select
from selenium.webdriver.common.by import By
options = Options()
options.add_experimental_option("prefs", {"download.default_directory": r"D://Users//User//Downloads","download.prompt_for_download": False, "download.directory_upgrade": True, "safebrowsing.enabled": True})
options.headless = True
options.add_argument("--window-size=1920,1200")
DRIVER_PATH = "C://temp/webscraping/chromedriver.exe"
driver = webdriver.Chrome(options=options, executable_path=DRIVER_PATH)
driver.get("https://propertyinfo.revenue.wi.gov/WisconsinProd/Search/Disclaimer.aspx?FromUrl=../search/advancedsearch.aspx?mode=advanced")
wait = WebDriverWait(driver,60)
driver.get("https://propertyinfo.revenue.wi.gov/WisconsinProd/search/advancedsearch.aspx?mode=advanced")
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#btAgree"))).click()
box = Select(wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#sCriteria"))))
box.select_by_index(4)
iE = driver.find_element(By.ID, "txtCrit")
iE.send_keys('IOWA')
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#btAdd"))).click()
box = Select(wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#sCriteria"))))
box.select_by_index(3)
iE = driver.find_element(By.ID, "txtCrit")
iE.send_keys('358407')
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#btAdd"))).click()
submit = driver.find_element(By.ID, "btSearch").click()
myTable = driver.find_element(By.CLASS_NAME, 'SearchResults')
dataSelect = myTable.click()
box2 = Select(wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#DTLNavigator_Report2_ReportsListBox"))))
box2.select_by_value('CSVMailingList')
submit2 = driver.find_element(By.ID, "ReportListButton").click()
A workaround to download files in headless mode is to specify the download path using the driver.command_executor
method.
I was able to download the csv in the current directory using the following code in headless mode-
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.select import Select
from selenium.webdriver.common.by import By
import os
import time
options = Options()
options.headless = True
options.add_argument("--window-size=1920,1200")
DRIVER_PATH = "C://temp/webscraping/chromedriver.exe"
driver = webdriver.Chrome(options=options, executable_path=DRIVER_PATH)
driver.command_executor._commands["send_command"] = ("POST", '/session/$sessionId/chromium/send_command')
#set download path (set to current working directory in this example)
params = {'cmd': 'Page.setDownloadBehavior', 'params': {'behavior': 'allow','downloadPath':os.getcwd()}}
command_result = driver.execute("send_command", params)
driver.get("https://propertyinfo.revenue.wi.gov/WisconsinProd/Search/Disclaimer.aspx?FromUrl=../search/advancedsearch.aspx?mode=advanced")
wait = WebDriverWait(driver,60)
driver.get("https://propertyinfo.revenue.wi.gov/WisconsinProd/search/advancedsearch.aspx?mode=advanced")
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#btAgree"))).click()
box = Select(wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#sCriteria"))))
box.select_by_index(4)
iE = driver.find_element(By.ID, "txtCrit")
iE.send_keys('IOWA')
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#btAdd"))).click()
box = Select(wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#sCriteria"))))
box.select_by_index(3)
iE = driver.find_element(By.ID, "txtCrit")
iE.send_keys('358407')
wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#btAdd"))).click()
submit = driver.find_element(By.ID, "btSearch").click()
myTable = driver.find_element(By.CLASS_NAME, 'SearchResults')
dataSelect = myTable.click()
box2 = Select(wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "#DTLNavigator_Report2_ReportsListBox"))))
box2.select_by_value('CSVMailingList')
submit2 = driver.find_element(By.ID, "ReportListButton").click()
# wait for csv download to complete
time.sleep(5)
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.