[英]How to navigate to next page in selenium?
I'm a fresher in this field.我是这个领域的新手。 So I'm trying to navigate to a web page to scrape a data, when I execute the code, its scrapes the 1st page data but never navigates to the next pages.
因此,我试图导航到 web 页面以抓取数据,当我执行代码时,它抓取了第一页数据,但从不导航到下一页。 I have tried in a many ways but I couldn't find.
我尝试了很多方法,但找不到。 Please check below my code I have written a code for pagination.
请在我的代码下方检查我已经编写了分页代码。 please anyone help me on this.
请任何人帮助我。 thanks in advance
提前致谢
import xlwt
from selenium import webdriver
import re
import time
from datetime import date
class kotsovolosmobiles:
def __init__(self):
self.url='https://www.kotsovolos.gr/mobile-phones-gps/mobile-phones/smartphones?pageSize=60'
self.country='GR'
self.currency='euro'
self.VAT= 'Included'
self.shipping = 'Available for shipment'
self.Pre_PromotionPrice ='N/A'
def kotsovolos(self):
wb = xlwt.Workbook()
ws = wb.add_sheet('Sheet1',cell_overwrite_ok=True)
ws.write(0,0,"Product_Url")
ws.write(0,0,"Product_Manufacturer")
ws.write(0,1,"Product_Url")
ws.write(0,2,"Product_Price")
ws.write(0,3,"Product_Model")
ws.write(0,4,"Memory")
ws.write(0,5,"Currency")
ws.write(0,6,"Color")
ws.write(0,7,"VAT")
ws.write(0,8,"Shipping Cost")
ws.write(0,9,"Pre-PromotionPrice")
ws.write(0,10,"Country")
ws.write(0,11,"Date")
ws.write(0,12,"Raw_Model")
wb.save(r"C:\Users\Karthick R\Desktop\VS code\kotsovolos.xls")
driver=webdriver.Chrome()
driver.get(self.url)
today = date.today()
time.sleep(5)
cookies = driver.find_element_by_css_selector('a[id="CybotCookiebotDialogBodyLevelButtonLevelOptinAllowAll"]')
cookies.click()
print("cookies accepted")
driver.maximize_window()
time.sleep(5)
titles = []
models = []
memorys = []
prod_prices = []
p_links =[]
p_colors = []
while True:
storage_box = []
storage_box = driver.find_elements_by_css_selector('div[class="product"]')
for storage_boxes in storage_box:
product_url = storage_boxes.find_element_by_css_selector('div[class="title"] a').get_attribute('href')
print(product_url)
p_links.append(product_url)
p_model = storage_boxes.find_element_by_css_selector('div[class="title"] a').text
print(p_model)
models.append(p_model)
manufacturer1 = p_model.split(" ")
print(manufacturer1[0])
titles.append(manufacturer1[0])
memory = []
memory = re.findall('\d+ ?[gG][bB]',p_model)
print(memory)
memory1 = str(memory).replace("['",'').replace("']",'').replace("[]",'').strip()
if "," in memory1:
arr=memory1.split(",")
for str1 in arr:
str2=str1.replace("GB", "").replace("gb", "").replace("'", "").strip()
if len(str2)!=1:
memory_str=str1
break
elif (memory1 == ""):
memory_str ='N/A'
else:
memory_str=memory1
memory_str = memory_str.replace("'", "").strip()
print(memory_str)
memorys.append(memory_str)
colors= []
prod_color = p_model.split(" ")
length = len(prod_color)
indexcolor = length-3
colors.append(prod_color[indexcolor])
color1 = str(colors).replace("['",'').replace("']",'').strip()
print(color1)
p_colors.append(color1)
p_price = storage_boxes.find_element_by_css_selector('.priceWithVat > .price').text
print(p_price)
prod_prices.append(p_price)
next = driver.find_element_by_css_selector('.pagination_next a')
time.sleep(3)
next.click()
print("next page")
time.sleep(3)
kotsovolos_gr = kotsovolosmobiles()
kotsovolos_gr.kotsovolos()
Maybe the page isn't loaded yet, try to replace the last block in the loop by也许页面还没有加载,尝试将循环中的最后一个块替换为
next = driver.find_element_by_css_selector('.pagination_next a')
url = next.get_attribute('href')
driver.get(url)
sleep(3) # Maybe it's not necessary
Try like below.像下面这样尝试。 I was able to visit all the other 5 pages.
我能够访问所有其他 5 页。
driver.implicitly_wait(10)
driver.get("https://www.kotsovolos.gr/mobile-phones-gps/mobile-phones/smartphones?pageSize=60")
time.sleep(5) # Would be better to apply Explicit wait to click on `Απόρριψη`
driver.find_element_by_xpath("//a[contains(text(),'Απόρριψη')]").click()
nextbuttons = driver.find_element_by_xpath("//ul[@class='pagination']/li[5]/a")
length = int(nextbuttons.get_attribute("innerText"))
for i in range(2,length+1):
nextopt = driver.find_element_by_xpath("//ul[@class='pagination']/li/a[contains(text(),'{}')]".format(i))
nextopt.click()
time.sleep(5)
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.