I have a requirement to scrape some data from web pages.I'm using selenium with Python for that but was unable to get the desired results.If anyone can suggest me what to do or how i can change the code to get the desired results it will be very helpful.
I need to get the results in following order Position, Name of Product, URL and company name.
The script below is able to get all the title names but with extra "\n" characters and not able to get other details.Can anyone please help me with it or give suggestions on this please?
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
option = webdriver.ChromeOptions()
option.add_argument(" — incognito")
#browser = webdriver.Chrome(executable_path='/usr/local/bin/chromedriver/', chrome_options=option)
browser = webdriver.Chrome(executable_path='/users/user_1566/downloads/chrome_driver/chromedriver', chrome_options=option)
browser.get('https://www.google.com/search?q=samsung+note10')
#items = len(browser.find_elements_by_class_name("cu-container"))
#items = len(browser.find_elements_by_class_name("mnr-c pla-unit"))
#print(items)
timeout = 20
try:
WebDriverWait(browser, timeout).until(EC.visibility_of_element_located((By.XPATH, "//div[@class='top-pla-group-inner']")))
except TimeoutException:
print('Timed out waiting for page to load')
#browser.quit()
titles_element = browser.find_elements_by_xpath("//div[@class='mnr-c pla-unit']")
# use list comprehension to get the actual repo titles and not the selenium objects.
titles = [x.text for x in titles_element]
# print out all the titles.
print('titles:')
print(titles, '\n')
language_element = browser.find_elements_by_xpath("//a[@class=''plantl pla-unit-single-clickable-target clickable-card']")
print(language_element)
# same concept as for list-comprehension above.
languages = [x.text for x in language_element]
print("languages:")
print(languages, "\n")
for title, language in zip(titles, languages):
print("RepoName : Language")
print(title + ": " + language, "\n")
Expected output from the URL https://www.google.com/search?q=samsung+note10
Position Company_Name Product_Name URL
1 CHECK24 Samsung Galaxy Note10 256 GB
glow mit Vertrag + Allnet Flat bei
try
details=browser.find_elements_by_class_name("pymv4e")
for i in details:
print(str(i))
after that you display as you wanted
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.