[英]Python/Selenium: save to pdf not saving
我正在使用 selenium/python 將一系列網頁保存到 pdf。 網頁有一個用 javascript 呈現的表格; 我正在使用“find_element_by_xpath”來確定 js 表中的 pdf 圖標出現在繼續打印之前。 理想情況下,我不想設置硬等待/睡眠時間,因為我有數千頁要保存。
該代碼似乎有效,但沒有保存 pdf。
代碼如下:
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException
import json
options = webdriver.ChromeOptions()
settings = {
"recentDestinations": [{
"id": "Save as PDF",
"origin": "local",
"account": "",
}],
"selectedDestinationId": "Save as PDF",
"version": 2
}
prefs = {'printing.print_preview_sticky_settings.appState': json.dumps(settings)}
options.add_experimental_option('prefs', prefs)
options.add_experimental_option('excludeSwitches', ['enable-logging'])
options.add_argument('--kiosk-printing')
CHROMEDRIVER_PATH = 'chromedriver.exe'
driver = webdriver.Chrome(options=options, executable_path=CHROMEDRIVER_PATH)
driver = webdriver.Chrome(options=options, executable_path=CHROMEDRIVER_PATH)
try:
element = driver.find_element_by_xpath("//div[@class='fas fa-file-pdf']")
WebDriverWait(driver, 10).until(EC.staleness_of(element))
except NoSuchElementException:
element = None
print(element)
driver.get("url")
driver.execute_script('window.print();')
#driver.quit()
import os
from selenium import webdriver
if os.path.exists(temp_down_path):
if len(os.listdir(temp_down_path)) != 0:
for i in os.listdir(temp_down_path):
if os.path.isdir(temp_down_path + i):
shutil.rmtree(temp_down_path + i)
elif os.path.isfile(temp_down_path + i):
os.remove(temp_down_path + i)
else:
pass
else:
os.makedirs(temp_down_path)
temp_down_path = os.getcwd() + '\\temp_files\\'
options = webdriver.ChromeOptions()
options.add_experimental_option('prefs', {
"download.default_directory": temp_down_path, # Change default directory for downloads
"download.prompt_for_download": False, # To auto download the file
"download.directory_upgrade": True,
"plugins.always_open_pdf_externally": True # It will not show PDF directly in chrome
})
driver = webdriver.Chrome(options=options, executable_path= 'chromedriver.exe')
search_url = 'your site url'
driver.get(search_url)
link = driver.find_element(By.TAG_NAME, "a").get_attribute('href')
response = requests.get(link)
Document = os.getcwd() + 'file_name' + '.pdf'
if response.status_code == 200:
open(Document, 'wb').write(response.content)
else:
print('Document download problem')
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.