簡體   English   中英

在python中使用Selenium Webdriver保存pdf

[英]Save the pdf using the selenium webdriver in python

我無法保存單擊打開的pdf。 據我介紹,使用Selenium WebDriver可使代碼自動運行。 我希望打開的pdf應該在python中使用硒自動通過代碼保存。 請協助以下代碼將pdf保存在文件夾中。

enter code here
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
import urllib.request
from bs4 import BeautifulSoup
import os
from selenium.webdriver.support.select import Select
import time
url = 'https://maharerait.mahaonline.gov.in'
chrome_path = r'C:/Users/User/AppData/Local/Programs/Python/Python36/Scripts/chromedriver.exe'

driver = webdriver.Chrome(executable_path=chrome_path)
driver.get(url)
WebDriverWait(driver, 
    20).until(EC.element_to_be_clickable((By.XPATH,"//div[@class='search- 
    pro-details']//a[contains(.,'Search Project Details')]"))).click()
Registered_Project_radio= WebDriverWait(driver, 
    10).until(EC.element_to_be_clickable((By.ID,"Promoter")))
driver.execute_script("arguments[0].click();",Registered_Project_radio)
Application = driver.find_element_by_id("CertiNo")
Application.send_keys("P50500000005")
Search = WebDriverWait(driver, 
    10).until(EC.element_to_be_clickable((By.ID,"btnSearch")))
driver.execute_script("arguments[0].click();",Search)
View = [item.get_attribute('href') for item in 
      driver.find_elements_by_tag_name("a") if
      item.get_attribute('href') is not None]

View = View[0]
request = urllib.request.Request(View)
btn = WebDriverWait(driver, 
     20).until(EC.element_to_be_clickable((By.XPATH, 
     "//a[@class='btn btn-md btn-success' and @id='btnShow_2017']")))

driver.execute_script("arguments[0].click();",btn)

單擊后,添加<object data="application/pdf;base64,..."> ,其中所有PDF都以文本編碼的bas64形式存在於data=

driver.execute_script("arguments[0].click();",btn)

time.sleep(5)

# get tag <object>
obj = driver.find_element_by_tag_name('object')

# get `data=`
data = obj.get_attribute('data')

# get text after `base64,`
text = data.split(',')[1]

# encode text to PDF's content (as bytes)
import base64
bytes = base64.b64decode(text)

# save bytes in file
with open('output.pdf', 'wb') as fp:
    fp.write(bytes)

現在您已經擁有了所有output.pdf

經過Firefox測試

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
import urllib.request
from bs4 import BeautifulSoup
import os
from selenium.webdriver.support.select import Select
import time

url = 'https://maharerait.mahaonline.gov.in'
#chrome_path = r'C:/Users/User/AppData/Local/Programs/Python/Python36/Scripts/chromedriver.exe'

driver = webdriver.Firefox()#executable_path=chrome_path)
driver.get(url)
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH,"//div[@class='search-pro-details']//a[contains(.,'Search Project Details')]"))).click()
Registered_Project_radio = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.ID,"Promoter")))

driver.execute_script("arguments[0].click();",Registered_Project_radio)

Application = driver.find_element_by_id("CertiNo")
Application.send_keys("P50500000005")

Search = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.ID,"btnSearch")))
driver.execute_script("arguments[0].click();", Search)
View = [item.get_attribute('href') for item in driver.find_elements_by_tag_name("a") if item.get_attribute('href') is not None]

btn = WebDriverWait(driver, 
     20).until(EC.element_to_be_clickable((By.XPATH, 
     "//a[@class='btn btn-md btn-success' and @id='btnShow_2017']")))

driver.execute_script("arguments[0].click();",btn)

time.sleep(5)

obj = driver.find_element_by_tag_name('object')
data = obj.get_attribute('data')
text = data.split(',')[1]

import base64
bytes = base64.b64decode(text)

with open('output.pdf', 'wb') as fp:
    fp.write(bytes)

暫無
暫無

聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.

 
粵ICP備18138465號  © 2020-2024 STACKOOM.COM