[英]Python BeautifulSoup4 Web Scraping Multiple Pages on one Web Site
[英]Python Web Scraping: Clicking links one by one on Ajax Site
我有一個保存在csv文件中的搜索條件列表。 我想遍歷每個搜索條件,以在網站上生成相應的搜索結果。 對於生成的每組搜索結果(都是鏈接),我想單擊鏈接,然后從生成的新頁面中獲取數據。 不幸的是,我在進入每個鏈接時遇到問題。 如果有人可以請提供一些見識,將不勝感激 。
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from bs4 import BeautifulSoup
# read list of CAS Numbers to be searched
data = pd.read_csv("NPRI CACs.csv", names=["CAS Number", "Chemical Name"])
data.dropna()
CAS = data["CAS Number"]
# Parameters to be called
url = 'http://www.lifelabs.msdss.com/Login.aspx?ReturnUrl=%2fMainMenu.aspx%3ffm%3d0%26tb%3d0'
# Sign into SafeTec
browser = webdriver.Firefox()
browser.get(url)
browser.find_element_by_class_name("text").click()
# Conduct MSDS Searches on SafeTec
for i in range(10):
try:
Ingredient_CAS_Number = browser.find_element_by_id("placeBody_dynField48_txtTextBox")
Ingredient_CAS_Number.send_keys(CAS[i])
browser.find_element_by_id("placeBody_linkSearchBottom").click()
list_links = browser.find_elements_by_css_selector("a[href*='MSDSDetail']")
links = []
for j in range(len(list_links)):
links.append(list_links[j].get_attribute('href'))
Product_Name = []
for link in links:
browser.get(link)
product = browser.find_element_by_id("placeBody_dynField1_txtTextBox")
Product_Name.append(product)
print(Product_Name)
browser.get(url)
except:
print(CAS[i])
continue
我設法用下面的代碼解決了這個問題。 雖然,解決方案有點不完善...
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from bs4 import BeautifulSoup
# read list of CAS Numbers to be searched
data = pd.read_csv("NPRI CACs.csv", names=["CAS Number", "Chemical Name"])
data.dropna()
CAS = data["CAS Number"]
# Parameters to be called
url = 'http://www.lifelabs.msdss.com/Login.aspx?ReturnUrl=%2fMainMenu.aspx%3ffm%3d0%26tb%3d0'
# Sign into SafeTec
browser = webdriver.Firefox()
browser.get(url)
browser.find_element_by_class_name("text").click()
# Conduct MSDS Searches on SafeTec
for i in range(2):
Ingredient_CAS_Number = browser.find_element_by_id("placeBody_dynField48_txtTextBox")
Ingredient_CAS_Number.send_keys(CAS[i])
browser.find_element_by_id("placeBody_linkSearchBottom").click()
list_links = browser.find_elements_by_css_selector("a[href*='MSDSDetail']")
all_results = []
for j in list_links:
result = j.text
all_results.append(result)
for i in range(len(all_results)):
browser.find_element_by_link_text(all_results[i]).click()
browser.back()
browser.get(url)
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.