[英]pop up warning message when scraping
我正在使用硒來刮擦此網站: https : //www.fedsdatacenter.com/federal-pay-rates/index.php? y = all &n=& l =&a=& o =
我的代碼可以很好地工作,方法是繼續單擊下一步並解析表,直到出現警告消息為止:
DataTables警告:table id = table-example-無效的JSON響應。
由於此錯誤,我的代碼停止了。 即使是手動操作,單擊“下一步”也會給我同樣的警告。
這是我的代碼。 我該怎么辦? 如果有任何改進我的代碼的方法,請幫助我。
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
from selenium.common.exceptions import TimeoutException
from selenium.common.exceptions import ElementNotVisibleException
from selenium.common.exceptions import StaleElementReferenceException
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from bs4 import BeautifulSoup
import csv
import time
def has_class_onclick(tag):
return tag.has_attr('onclick')
def extract_table_content_into_rows(website_lists):
# This function is to extract all the table content from and put them into a list of row.
list_of_row = []
for table_page in website_lists:
soup_page = BeautifulSoup(table_page, "html.parser")
soup_table_raw = soup_page.find("table")
if soup_table_raw:
soup_table = soup_table_raw.find("tbody")
for soup_row in soup_table.find_all("tr"):
row_content = []
for soup_column in soup_row.find_all("td"):
if not soup_column.contents:
row_content.append(".")
else:
column_content = soup_column.contents[0].strip()
row_content.append(column_content)
list_of_row.append(row_content)
else:
continue
return list_of_row
def csv_writer(lists_of_row):
# This function is to write the table contents into a csv file.
with open("federal.csv", "at", newline="") as csvfile:
for row_to_write in lists_of_row:
writer = csv.writer(csvfile)
writer.writerow(row_to_write)
driver = webdriver.Chrome('chromedriver') # Optional argument, if not specified will search path.
driver.get('https://www.fedsdatacenter.com/federal-pay-rates/index.php?y=all&n=&l=&a=&o=')
driver.find_element_by_xpath('//*[@id="table-example_length"]/label/select').click()
time.sleep(3)
driver.find_element_by_xpath('//*[@id="table-example_length"]/label/select/option[4]').click()
time.sleep(3)
page_num = 1
while page_num > 0 and page_num <= 5:
html = driver.page_source
website_list = [html]
row_list = extract_table_content_into_rows(website_list)
print(row_list)
csv_writer(row_list)
driver.find_element_by_xpath('//*[@id="table-example_next"]/a').click()
time.sleep(3)
print(page_num)
page_num += 1
while page_num > 5:
html = driver.page_source
website_list = [html]
row_list = extract_table_content_into_rows(website_list)
print(row_list)
csv_writer(row_list)
driver.find_element_by_xpath('//*[@id="table-example_next"]/a').click()
not_find = 1
while not_find == 1:
try:
driver.find_element_by_xpath('//*[@id="table-example_paginate"]/ul/li[6]/a')
while driver.find_element_by_xpath('//*[@id="table-example_pagina'
'te"]/ul/li[6]/a').text != str(page_num + 2):
time.sleep(0.1)
not_find = 0
except StaleElementReferenceException:
continue
print(page_num)
page_num += 1
一種方法是使用某些JavaScript禁用頁面上的所有警報:
driver.execute_script('window.alert = function() {};')
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.