I have a python script I divided into different functions. My first function 'get_url' is supposed to get the product url after the user have inputted the required product name in the prompt. I want to get the product details in all the pages of the site. Now when i run my code, I get product details ONLY from the first web page instead of all the 25pages. please help @Nathan Mills
HERE'S MY CODE BLOCK
from selenium import webdriver
import time
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
import pandas as pd
def get_url(product):
for x in range(1, 26):
product = product.replace(' ', '%20')
template = 'https://www.konga.com/search?search=={product}&page=={x}'
url = template.format(product, x)
return url
def get_all_products(nest):
name = nest.find_element(By.CLASS_NAME, 'af885_1iPzH').text.strip()
current_price = nest.find_element(By.CLASS_NAME, 'd7c0f_sJAqi').text.strip()
reviews = nest.find_element(By.CLASS_NAME, 'eea9b_1Ma8-').text.strip()
product_info = (name, current_price, reviews)
return product_info
def main(product):
product_list = []
url = get_url(product)
service = Service(executable_path="C:/driver/chromedriver_win32/chromedriver.exe")
driver = webdriver.Chrome(service=service)
driver.get(url)
driver.maximize_window()
time.sleep(5)
product_cards = driver.find_elements(By.CLASS_NAME, 'bbe45_3oExY')
time.sleep(5)
for everyCard in product_cards:
productDetails = get_all_products(everyCard)
product_list.append(productDetails)
col = ['Product_Name', 'Current_Price', 'Product_Reviews']
df = pd.DataFrame(product_list, columns=col)
df.to_csv("C:\\Users\LP\Documents\MainTest\MainTest.csv", index=False, encoding='utf-8', mode='a')
product = input('Enter Product You Are Looking For : ')
main(product)
The function get_url()
has a loop that ends with return. So the function stops working after the first cycle. It generates one URL, returns it and that's all it does.
To fix that you can collect all urls into a variable inside the function and return it or change return
to yield
. This yield will turn your function into a generator and you can use it as a list.
Also I changed the line url = template.format(product, x)
to url = template.format(product=product, x=x)
as it was not working.
So the code with yield
is the following:
from selenium import webdriver
import time
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
import pandas as pd
def get_url(product):
for x in range(1, 26):
product = product.replace(' ', '%20')
template = 'https://www.konga.com/search?search={product}&page={x}'
url = template.format(product=product, x=x)
yield url
def get_all_products(nest):
name = nest.find_element(By.CLASS_NAME, 'af885_1iPzH').text.strip()
current_price = nest.find_element(By.CLASS_NAME, 'd7c0f_sJAqi').text.strip()
reviews = nest.find_element(By.CLASS_NAME, 'eea9b_1Ma8-').text.strip()
product_info = (name, current_price, reviews)
return product_info
def main(product):
product_list = []
url = get_url(product)
for one_url in url:
service = Service(executable_path="C:/driver/chromedriver_win32/chromedriver.exe")
driver = webdriver.Chrome(service=service)
driver.get(one_url)
driver.maximize_window()
time.sleep(5)
product_cards = driver.find_elements(By.CLASS_NAME, 'bbe45_3oExY')
time.sleep(5)
for everyCard in product_cards:
productDetails = get_all_products(everyCard)
product_list.append(productDetails)
col = ['Product_Name', 'Current_Price', 'Product_Reviews']
df = pd.DataFrame(product_list, columns=col)
df.to_csv("C:\\Users\LP\Documents\MainTest\MainTest.csv", index=False, encoding='utf-8', mode='a')
driver.quit()
product = input('Enter Product You Are Looking For : ')
main(product)
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.