[英]How to navigate to next page in selenium?
我是这个领域的新手。 因此,我试图导航到 web 页面以抓取数据,当我执行代码时,它抓取了第一页数据,但从不导航到下一页。 我尝试了很多方法,但找不到。 请在我的代码下方检查我已经编写了分页代码。 请任何人帮助我。 提前致谢
import xlwt
from selenium import webdriver
import re
import time
from datetime import date
class kotsovolosmobiles:
def __init__(self):
self.url='https://www.kotsovolos.gr/mobile-phones-gps/mobile-phones/smartphones?pageSize=60'
self.country='GR'
self.currency='euro'
self.VAT= 'Included'
self.shipping = 'Available for shipment'
self.Pre_PromotionPrice ='N/A'
def kotsovolos(self):
wb = xlwt.Workbook()
ws = wb.add_sheet('Sheet1',cell_overwrite_ok=True)
ws.write(0,0,"Product_Url")
ws.write(0,0,"Product_Manufacturer")
ws.write(0,1,"Product_Url")
ws.write(0,2,"Product_Price")
ws.write(0,3,"Product_Model")
ws.write(0,4,"Memory")
ws.write(0,5,"Currency")
ws.write(0,6,"Color")
ws.write(0,7,"VAT")
ws.write(0,8,"Shipping Cost")
ws.write(0,9,"Pre-PromotionPrice")
ws.write(0,10,"Country")
ws.write(0,11,"Date")
ws.write(0,12,"Raw_Model")
wb.save(r"C:\Users\Karthick R\Desktop\VS code\kotsovolos.xls")
driver=webdriver.Chrome()
driver.get(self.url)
today = date.today()
time.sleep(5)
cookies = driver.find_element_by_css_selector('a[id="CybotCookiebotDialogBodyLevelButtonLevelOptinAllowAll"]')
cookies.click()
print("cookies accepted")
driver.maximize_window()
time.sleep(5)
titles = []
models = []
memorys = []
prod_prices = []
p_links =[]
p_colors = []
while True:
storage_box = []
storage_box = driver.find_elements_by_css_selector('div[class="product"]')
for storage_boxes in storage_box:
product_url = storage_boxes.find_element_by_css_selector('div[class="title"] a').get_attribute('href')
print(product_url)
p_links.append(product_url)
p_model = storage_boxes.find_element_by_css_selector('div[class="title"] a').text
print(p_model)
models.append(p_model)
manufacturer1 = p_model.split(" ")
print(manufacturer1[0])
titles.append(manufacturer1[0])
memory = []
memory = re.findall('\d+ ?[gG][bB]',p_model)
print(memory)
memory1 = str(memory).replace("['",'').replace("']",'').replace("[]",'').strip()
if "," in memory1:
arr=memory1.split(",")
for str1 in arr:
str2=str1.replace("GB", "").replace("gb", "").replace("'", "").strip()
if len(str2)!=1:
memory_str=str1
break
elif (memory1 == ""):
memory_str ='N/A'
else:
memory_str=memory1
memory_str = memory_str.replace("'", "").strip()
print(memory_str)
memorys.append(memory_str)
colors= []
prod_color = p_model.split(" ")
length = len(prod_color)
indexcolor = length-3
colors.append(prod_color[indexcolor])
color1 = str(colors).replace("['",'').replace("']",'').strip()
print(color1)
p_colors.append(color1)
p_price = storage_boxes.find_element_by_css_selector('.priceWithVat > .price').text
print(p_price)
prod_prices.append(p_price)
next = driver.find_element_by_css_selector('.pagination_next a')
time.sleep(3)
next.click()
print("next page")
time.sleep(3)
kotsovolos_gr = kotsovolosmobiles()
kotsovolos_gr.kotsovolos()
也许页面还没有加载,尝试将循环中的最后一个块替换为
next = driver.find_element_by_css_selector('.pagination_next a')
url = next.get_attribute('href')
driver.get(url)
sleep(3) # Maybe it's not necessary
像下面这样尝试。 我能够访问所有其他 5 页。
driver.implicitly_wait(10)
driver.get("https://www.kotsovolos.gr/mobile-phones-gps/mobile-phones/smartphones?pageSize=60")
time.sleep(5) # Would be better to apply Explicit wait to click on `Απόρριψη`
driver.find_element_by_xpath("//a[contains(text(),'Απόρριψη')]").click()
nextbuttons = driver.find_element_by_xpath("//ul[@class='pagination']/li[5]/a")
length = int(nextbuttons.get_attribute("innerText"))
for i in range(2,length+1):
nextopt = driver.find_element_by_xpath("//ul[@class='pagination']/li/a[contains(text(),'{}')]".format(i))
nextopt.click()
time.sleep(5)
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.