简体   繁体   中英

Not able to exit the while loop after the click is not available

What I am trying to achieve is that I have a list of similar elements present in a page from which I want to extract the attribute from the element. While doing so once one page is done the code should click the next button until till the last page and perform the similar operation of extraction once done move to the next link in the for loop can anyone help me in achieving this, following is the code that I am using what it does is it clicks the next button but never exits the while loop.

for link in content[currentIndex:]:
    now = datetime.now()
    # dd/mm/YY H:M:S
    dt_string = now.strftime("%d/%m/%Y %H:%M:%S")
    print("date and time =", dt_string)
    driver.get(link)
    time.sleep(3)
    try:
        while driver.find_element_by_css_selector('.a-last').click():
            try:
                while driver.find_element_by_css_selector('.a-disabled a-last.a-last'):
                    print("nothing here")
                    break
            except NoSuchElementException:
                aa = driver.find_elements_by_css_selector(
                    '.sg-col-4-of-24.sg-col-4-of-12.sg-col-4-of-36.s-result-item.sg-col-4-of-28.sg-col-4-of-16.sg-col.sg-col-4-of-20.sg-col-4-of-32')
                for j in aa:
                    hd = j.get_attribute('data-asin')
                    hd1 = "'" + hd
                    print(hd1)
                    f.write(link + "," + hd1 + "\n")
                driver.find_element_by_css_selector('.a-last').click()
    except NoSuchElementException:
        pass

The link that I am using is " https://www.amazon.com/s/ref=sr_in_h_p_89_87?fst=as%3Aoff&rh=i%3Atoys-and-games-intl-ship%2Cn%3A%2116225015011%2Cn%3A276729011%2Cp_89%3AHobby+Products+International&bbn=16225015011&ie=UTF8&qid=1578465210&rnid=2528832011 " in this i want to extract attribute "data-asin" for all the elements.

can any one help seriously i am stuck at this

I think put click() action as statement of while loop is not a good method.

I recommend to use Explicit Waits instead of try-except.

Try to remove all try-except and while loop, modify your code as below:

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

wait = WebDriverWait(driver, 10)  //Explicit Waits to wait for ExpectedCondition

aa = wait.until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, '.sg-col-4-of-24.sg-col-4-of-12.sg-col-4-of-36.s-result-item.sg-col-4-of-28.sg-col-4-of-16.sg-col.sg-col-4-of-20.sg-col-4-of-32')))
for j in aa:
    hd = j.get_attribute('data-asin')
    hd1 = "'" + hd
    print(hd1)
    f.write(link + "," + hd1 + "\n")

next_btn = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, '.a-last')))
if 'disabled' not in next_btn.get_attribute("class"):
    next_btn.click()

Reference of Explicit Waits: https://selenium-python.readthedocs.io/waits.html

To get the data-asin value from all elements and from all pages for a given link

Use infinite while loop and check for length of the button.if not exit.

To get all the data-asin values induce WebDriverWait and visibility_of_all_elements_located () and following css selector.

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

driver = webdriver.Chrome()
driver.get("https://www.amazon.com/s/ref=sr_in_h_p_89_87?fst=as%3Aoff&rh=i%3Atoys-and-games-intl-ship%2Cn%3A%2116225015011%2Cn%3A276729011%2Cp_89%3AHobby+Products+International&bbn=16225015011&ie=UTF8&qid=1578465210&rnid=2528832011")
dataAsin = []
while True:

    elements = WebDriverWait(driver, 20).until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR, "div.s-result-list.s-search-results.sg-row >div[data-asin]")))
    for element in elements:
        dataAsin.append(element.get_attribute("data-asin"))

    if len(driver.find_elements_by_css_selector("li.a-disabled.a-last")) > 0:
        break;
    if len(driver.find_elements_by_css_selector("li.a-last >a")) > 0:
        driver.find_element_by_css_selector("li.a-last >a").click()
    else:
        break;

print(dataAsin)
print(len(dataAsin))

Output :

['B018EMT6SC', 'B076SYTFBD', 'B0044TPTTU', 'B002KCU5NI', 'B078HD9T7Q', 'B01HBX9OXW', 'B002KCQHJE', 'B0734D66VZ', 'B006ON7DB6', 'B0163MYTDM', 'B000LZPJQO', 'B00XI6BVU6', 'B00KDKJ3GO', 'B00XI6CC9K', 'B001S79QXQ', 'B002KD02KI', 'B016QR5YP6', 'B00XI7I5AE', 'B00YASDU4Q', 'B00S9YG31E', 'B0017KI18K', 'B07FQTGYZJ', 'B00G7NZ41E', 'B001FECAO4', 'B000BLM2WM', 'B001FQF6NE', 'B07NZ8DGHJ', 'B0734F3YGR', 'B01N3A5XK0', 'B00LUEGNJC', 'B00G7NZHO8', 'B00G7NZHJ8', 'B00G7NZHFC', 'B00G7NZGFI', 'B00G7NZ5CM', 'B00CIENEA4', 'B00B8NS17W', 'B00B8NNG40', 'B00B8NNAIC', 'B009OY7VQE', 'B00731MJ8U', 'B00731KXII', 'B006YBD30S', 'B006P196I0', 'B0057CM7XY', 'B004NF7IV2', 'B004NF29OI', 'B004G8ULMY', 'B00423K95C', 'B0038VXJSI', 'B0037Y922S', 'B002UNDO76', 'B001L2G79E', 'B001FQGMDC', 'B001BHI88C', 'B0017KHCF8', 'B000WTD4JS', 'B07BHDFNF5', 'B07BHD1GWC', 'B0734FPYDK', 'B0734DD7VG', 'B0734D65HG', 'B0734D5D55', 'B0734CCTGY', 'B0734CCQ2H', 'B0734C1JW6', 'B0734BP4PZ', 'B0734BFLX1', 'B0734B724T', 'B07349ZPJQ', 'B073496PC4', 'B073492BX5', 'B073489R7T', 'B07347C8CY', 'B01N4U9OGM', 'B01MTFAV1H', 'B0163MZ7UG', 'B0163MYQJO', 'B0163MYPWM', 'B00G7NZLU8', 'B00G7NZI4C', 'B00G7NZH9S', 'B00G7NZFPO', 'B00G7NZEWS', 'B00G7NZEFK', 'B00G7NZE78', 'B00G7NZ914', 'B00G7NZ3GU', 'B00G7NZ2ES', 'B00BCU7EDI', 'B00731MLT2', 'B00731MBOW', 'B006ZZAZ0Y', 'B006ZZAYUK', 'B0057CM9K0', 'B0057CM01S', 'B0038VSHGC', 'B002KCQJ5G', 'B00261J7WS', 'B001S77QME', 'B001HSBXDC', 'B001FQMG8M', 'B001CBEWDC', 'B001BG4SLO', 'B000UZEOKC', 'B000LZSFH4', 'B000LZNVDM', 'B000FTOKN4', 'B000BN2XQK', 'B000BLGA1G']
110
 driver.find_element_by_css_selector('.a-last').click(): 

change it to

 driver.find_element_by_css_selector('a-disabled a-last').click():

hope this helps

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM