繁体   English   中英

如何导航到 selenium 中的下一页?

[英]How to navigate to next page in selenium?

我是这个领域的新手。 因此,我试图导航到 web 页面以抓取数据,当我执行代码时,它抓取了第一页数据,但从不导航到下一页。 我尝试了很多方法,但找不到。 请在我的代码下方检查我已经编写了分页代码。 请任何人帮助我。 提前致谢

import xlwt
from selenium import webdriver
import re
import time
from datetime import date
class kotsovolosmobiles:
    def __init__(self):
        self.url='https://www.kotsovolos.gr/mobile-phones-gps/mobile-phones/smartphones?pageSize=60'
        self.country='GR'
        self.currency='euro'
        self.VAT= 'Included'
        self.shipping = 'Available for shipment'
        self.Pre_PromotionPrice ='N/A'
    def kotsovolos(self):
        
        wb = xlwt.Workbook()
        ws = wb.add_sheet('Sheet1',cell_overwrite_ok=True)
        ws.write(0,0,"Product_Url")
        ws.write(0,0,"Product_Manufacturer")
        ws.write(0,1,"Product_Url")
        ws.write(0,2,"Product_Price")
        ws.write(0,3,"Product_Model")
        ws.write(0,4,"Memory")
        ws.write(0,5,"Currency")
        ws.write(0,6,"Color")
        ws.write(0,7,"VAT")
        ws.write(0,8,"Shipping Cost")
        ws.write(0,9,"Pre-PromotionPrice")
        ws.write(0,10,"Country")
        ws.write(0,11,"Date")
        ws.write(0,12,"Raw_Model")
        wb.save(r"C:\Users\Karthick R\Desktop\VS code\kotsovolos.xls")
        driver=webdriver.Chrome()            
        driver.get(self.url)
        today = date.today()
        time.sleep(5)
        cookies = driver.find_element_by_css_selector('a[id="CybotCookiebotDialogBodyLevelButtonLevelOptinAllowAll"]')
        cookies.click()
        print("cookies accepted")
        driver.maximize_window() 
        time.sleep(5)
        titles = []
        models = []
        memorys = []
        prod_prices = []
        p_links =[]
        p_colors = []
        while True:
            storage_box = []
            storage_box = driver.find_elements_by_css_selector('div[class="product"]')
            for storage_boxes in storage_box:
                product_url = storage_boxes.find_element_by_css_selector('div[class="title"] a').get_attribute('href')
                print(product_url)
                p_links.append(product_url)

                p_model = storage_boxes.find_element_by_css_selector('div[class="title"] a').text
                print(p_model)
                models.append(p_model)

                manufacturer1 = p_model.split(" ")
                print(manufacturer1[0])
                titles.append(manufacturer1[0])

                memory = []
                memory = re.findall('\d+ ?[gG][bB]',p_model)
                print(memory)
                memory1 = str(memory).replace("['",'').replace("']",'').replace("[]",'').strip()
                if "," in memory1:
                    arr=memory1.split(",")
                    for str1 in arr:
                        str2=str1.replace("GB", "").replace("gb", "").replace("'", "").strip() 
                        if len(str2)!=1: 
                            memory_str=str1
                            break 
                elif (memory1 == ""):
                    memory_str ='N/A'
                else:
                    memory_str=memory1 
                memory_str = memory_str.replace("'", "").strip() 
                print(memory_str)
                memorys.append(memory_str)

                colors= []
                prod_color = p_model.split(" ")
                length = len(prod_color)
                indexcolor = length-3 
                colors.append(prod_color[indexcolor])
                color1 = str(colors).replace("['",'').replace("']",'').strip()
                print(color1)
                p_colors.append(color1)

                p_price = storage_boxes.find_element_by_css_selector('.priceWithVat > .price').text
                print(p_price)
                prod_prices.append(p_price)
 
            next = driver.find_element_by_css_selector('.pagination_next a')
            time.sleep(3)
            next.click()
            print("next page") 
            time.sleep(3)

kotsovolos_gr = kotsovolosmobiles()
kotsovolos_gr.kotsovolos()

也许页面还没有加载,尝试将循环中的最后一个块替换为

next = driver.find_element_by_css_selector('.pagination_next a')
url = next.get_attribute('href')
driver.get(url)
sleep(3) # Maybe it's not necessary

像下面这样尝试。 我能够访问所有其他 5 页。

driver.implicitly_wait(10)
driver.get("https://www.kotsovolos.gr/mobile-phones-gps/mobile-phones/smartphones?pageSize=60")
time.sleep(5) # Would be better to apply Explicit wait to click on `Απόρριψη`
driver.find_element_by_xpath("//a[contains(text(),'Απόρριψη')]").click()
nextbuttons = driver.find_element_by_xpath("//ul[@class='pagination']/li[5]/a")
length = int(nextbuttons.get_attribute("innerText"))
for i in range(2,length+1):
    nextopt = driver.find_element_by_xpath("//ul[@class='pagination']/li/a[contains(text(),'{}')]".format(i))
    nextopt.click()
    time.sleep(5)

暂无
暂无

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM