What I am trying to achieve is that I have a list of similar elements present in a page from which I want to extract the attribute from the element. While doing so once one page is done the code should click the next button until till the last page and perform the similar operation of extraction once done move to the next link in the for loop can anyone help me in achieving this, following is the code that I am using what it does is it clicks the next button but never exits the while loop.
for link in content[currentIndex:]:
now = datetime.now()
# dd/mm/YY H:M:S
dt_string = now.strftime("%d/%m/%Y %H:%M:%S")
print("date and time =", dt_string)
driver.get(link)
time.sleep(3)
try:
while driver.find_element_by_css_selector('.a-last').click():
try:
while driver.find_element_by_css_selector('.a-disabled a-last.a-last'):
print("nothing here")
break
except NoSuchElementException:
aa = driver.find_elements_by_css_selector(
'.sg-col-4-of-24.sg-col-4-of-12.sg-col-4-of-36.s-result-item.sg-col-4-of-28.sg-col-4-of-16.sg-col.sg-col-4-of-20.sg-col-4-of-32')
for j in aa:
hd = j.get_attribute('data-asin')
hd1 = "'" + hd
print(hd1)
f.write(link + "," + hd1 + "\n")
driver.find_element_by_css_selector('.a-last').click()
except NoSuchElementException:
pass
The link that I am using is " https://www.amazon.com/s/ref=sr_in_h_p_89_87?fst=as%3Aoff&rh=i%3Atoys-and-games-intl-ship%2Cn%3A%2116225015011%2Cn%3A276729011%2Cp_89%3AHobby+Products+International&bbn=16225015011&ie=UTF8&qid=1578465210&rnid=2528832011 " in this i want to extract attribute "data-asin" for all the elements.
can any one help seriously i am stuck at this
I think put click() action as statement of while loop is not a good method.
I recommend to use Explicit Waits instead of try-except.
Try to remove all try-except and while loop, modify your code as below:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
wait = WebDriverWait(driver, 10) //Explicit Waits to wait for ExpectedCondition
aa = wait.until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, '.sg-col-4-of-24.sg-col-4-of-12.sg-col-4-of-36.s-result-item.sg-col-4-of-28.sg-col-4-of-16.sg-col.sg-col-4-of-20.sg-col-4-of-32')))
for j in aa:
hd = j.get_attribute('data-asin')
hd1 = "'" + hd
print(hd1)
f.write(link + "," + hd1 + "\n")
next_btn = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, '.a-last')))
if 'disabled' not in next_btn.get_attribute("class"):
next_btn.click()
Reference of Explicit Waits: https://selenium-python.readthedocs.io/waits.html
To get the data-asin
value from all elements and from all pages for a given link
Use infinite while loop and check for length of the button.if not exit.
To get all the data-asin
values induce WebDriverWait
and visibility_of_all_elements_located
() and following css selector.
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
driver = webdriver.Chrome()
driver.get("https://www.amazon.com/s/ref=sr_in_h_p_89_87?fst=as%3Aoff&rh=i%3Atoys-and-games-intl-ship%2Cn%3A%2116225015011%2Cn%3A276729011%2Cp_89%3AHobby+Products+International&bbn=16225015011&ie=UTF8&qid=1578465210&rnid=2528832011")
dataAsin = []
while True:
elements = WebDriverWait(driver, 20).until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR, "div.s-result-list.s-search-results.sg-row >div[data-asin]")))
for element in elements:
dataAsin.append(element.get_attribute("data-asin"))
if len(driver.find_elements_by_css_selector("li.a-disabled.a-last")) > 0:
break;
if len(driver.find_elements_by_css_selector("li.a-last >a")) > 0:
driver.find_element_by_css_selector("li.a-last >a").click()
else:
break;
print(dataAsin)
print(len(dataAsin))
Output :
['B018EMT6SC', 'B076SYTFBD', 'B0044TPTTU', 'B002KCU5NI', 'B078HD9T7Q', 'B01HBX9OXW', 'B002KCQHJE', 'B0734D66VZ', 'B006ON7DB6', 'B0163MYTDM', 'B000LZPJQO', 'B00XI6BVU6', 'B00KDKJ3GO', 'B00XI6CC9K', 'B001S79QXQ', 'B002KD02KI', 'B016QR5YP6', 'B00XI7I5AE', 'B00YASDU4Q', 'B00S9YG31E', 'B0017KI18K', 'B07FQTGYZJ', 'B00G7NZ41E', 'B001FECAO4', 'B000BLM2WM', 'B001FQF6NE', 'B07NZ8DGHJ', 'B0734F3YGR', 'B01N3A5XK0', 'B00LUEGNJC', 'B00G7NZHO8', 'B00G7NZHJ8', 'B00G7NZHFC', 'B00G7NZGFI', 'B00G7NZ5CM', 'B00CIENEA4', 'B00B8NS17W', 'B00B8NNG40', 'B00B8NNAIC', 'B009OY7VQE', 'B00731MJ8U', 'B00731KXII', 'B006YBD30S', 'B006P196I0', 'B0057CM7XY', 'B004NF7IV2', 'B004NF29OI', 'B004G8ULMY', 'B00423K95C', 'B0038VXJSI', 'B0037Y922S', 'B002UNDO76', 'B001L2G79E', 'B001FQGMDC', 'B001BHI88C', 'B0017KHCF8', 'B000WTD4JS', 'B07BHDFNF5', 'B07BHD1GWC', 'B0734FPYDK', 'B0734DD7VG', 'B0734D65HG', 'B0734D5D55', 'B0734CCTGY', 'B0734CCQ2H', 'B0734C1JW6', 'B0734BP4PZ', 'B0734BFLX1', 'B0734B724T', 'B07349ZPJQ', 'B073496PC4', 'B073492BX5', 'B073489R7T', 'B07347C8CY', 'B01N4U9OGM', 'B01MTFAV1H', 'B0163MZ7UG', 'B0163MYQJO', 'B0163MYPWM', 'B00G7NZLU8', 'B00G7NZI4C', 'B00G7NZH9S', 'B00G7NZFPO', 'B00G7NZEWS', 'B00G7NZEFK', 'B00G7NZE78', 'B00G7NZ914', 'B00G7NZ3GU', 'B00G7NZ2ES', 'B00BCU7EDI', 'B00731MLT2', 'B00731MBOW', 'B006ZZAZ0Y', 'B006ZZAYUK', 'B0057CM9K0', 'B0057CM01S', 'B0038VSHGC', 'B002KCQJ5G', 'B00261J7WS', 'B001S77QME', 'B001HSBXDC', 'B001FQMG8M', 'B001CBEWDC', 'B001BG4SLO', 'B000UZEOKC', 'B000LZSFH4', 'B000LZNVDM', 'B000FTOKN4', 'B000BN2XQK', 'B000BLGA1G']
110
driver.find_element_by_css_selector('.a-last').click():
change it to
driver.find_element_by_css_selector('a-disabled a-last').click():
hope this helps
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.