selenium.common.exceptions.NoSuchElementException：消息：在将 GeckoDriver Firefox 和 Selenium 与 Python 一起使用之前未看到 Web 元素引用

Question

I'm trying to get the data for several different tests from a test prep site.我正在尝试从考试准备站点获取几个不同测试的数据。 There are different subjects, each of which has a specialization, each of which has a practice-test, each of which has several questions.有不同的科目，每个科目都有专业，每个科目都有练习测试，每个科目都有几个问题。

subject <--- specialization <---- practice-test *------ question

Here's my code:这是我的代码：

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import pathlib
import time
import json
import os

driver=webdriver.Firefox(executable_path="../../../geckodriver.exe")
wait = WebDriverWait(driver, 15)
data=[]

def setup():

   driver.get('https://www.varsitytutors.com/practice-tests')
   try:
      go_away_1= driver.find_element_by_class_name("ub-emb-iframe")
      driver.execute_script("arguments[0].style.visibility='hidden'", go_away_1)
      go_away_2= driver.find_element_by_class_name("ub-emb-iframe-wrapper")
      driver.execute_script("arguments[0].style.visibility='hidden'", go_away_2)
      go_away_3= driver.find_element_by_class_name("ub-emb-visible")
      driver.execute_script("arguments[0].style.visibility='hidden'", go_away_3)
   except:
      pass

def get_subjects(subs=[]):
   subject_clickables_xpath="/html/body/div[3]/div[9]/div/*/div[@data-subject]/div[1]"
   subject_clickables=driver.find_elements_by_xpath(subject_clickables_xpath)
   subject_names=map(lambda x : x.find_element_by_xpath('..').get_attribute('data-subject'), subject_clickables)
   subject_pairs=zip(subject_names, subject_clickables)
   return subject_pairs

def get_specializations(subject):

   specialization_clickables_xpath="//div//div[@data-subject='"+subject+"']/following-sibling::div//div[@class='public_problem_set']//a[contains(.,'Practice Tests')]"
   specialization_names_xpath="//div//div[@data-subject='"+subject+"']/following-sibling::div//div[@class='public_problem_set']//a[contains(.,'Practice Tests')]/../.."
   specialization_names=map(lambda x : x.get_attribute('data-subject'), driver.find_elements_by_xpath(specialization_names_xpath))
   specialization_clickables = driver.find_elements_by_xpath(specialization_clickables_xpath)
   specialization_pairs=zip(specialization_names, specialization_clickables)
   return specialization_pairs

def get_practices(subject, specialization):
   practice_clickables_xpath="/html/body/div[3]/div[8]/div[3]/*/div[1]/a[1]"
   practice_names_xpath="//*/h3[@class='subject_header']"
   lengths_xpath="/html/body/div[3]/div[8]/div[3]/*/div[2]"
   lengths=map(lambda x : x.text, driver.find_elements_by_xpath(lengths_xpath))
   print(lengths)
   practice_names=map(lambda x : x.text, driver.find_elements_by_xpath(practice_names_xpath))
   practice_clickables = driver.find_elements_by_xpath(practice_clickables_xpath)
   practice_pairs=zip(practice_names, practice_clickables)
   return practice_pairs

def remove_popup():
   try:

      button=wait.until(EC.element_to_be_clickable((By.XPATH,"//button[contains(.,'No Thanks')]")))
      button.location_once_scrolled_into_view
      button.click()
   except:
      print('could not find the popup')

def get_questions(subject, specialization, practice):
   remove_popup()
   questions=[]
   current_question=None
   while True:
      question={}
      try:
         WebDriverWait(driver,5).until(EC.presence_of_element_located((By.XPATH,"/html/body/div[3]/div[7]/div[1]/div[2]/div[2]/table/tbody/tr/td[1]")))
         question_number=driver.find_element_by_xpath('/html/body/div[3]/div[7]/div[1]/div[2]/div[2]/table/tbody/tr/td[1]').text.replace('.','')
         question_pre=driver.find_element_by_class_name('question_pre')
         question_body=driver.find_element_by_xpath('/html/body/div[3]/div[7]/div[1]/div[2]/div[2]/table/tbody/tr/td[2]/p')
         answer_choices=driver.find_elements_by_class_name('question_row')
         answers=map(lambda x : x.text, answer_choices)
         question['id']=question_number
         question['pre']=question_pre.text
         question['body']=question_body.text
         question['answers']=list(answers)
         questions.append(question)
         choice=WebDriverWait(driver,20).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR,"input.test_button")))
         driver.execute_script("arguments[0].click();", choice[3])
         time.sleep(3)
      except Exception as e:
         if 'results' in driver.current_url:
            driver.get(driver.current_url.replace('http://', 'https://'))
            # last question has been answered; record results
            remove_popup()
            pathlib.Path('data/'+subject+'/'+specialization).mkdir(parents=True, exist_ok=True)
            with open('data/'+subject+'/'+specialization+'/questions.json', 'w') as outfile:
               json.dump(list(questions), outfile)
               break
         else:
            driver.get(driver.current_url.replace('http://', 'https://'))
   return questions


def scrape():
   setup()
   subjects=get_subjects()
   for subject_name, subject_clickable in subjects:
      subject={}
      subject['name']=subject_name
      subject['specializations']=[]
      subject_clickable.click()
      subject_url=driver.current_url.replace('http://', 'https://')
      specializations=get_specializations(subject_name)
      for specialization_name, specialization_clickable in specializations:
         specialization={}
         specialization['name']=specialization_name
         specialization['practices']=[]
         specialization_clickable.click()
         specialization_url=driver.current_url.replace('http://', 'https://')
         practices=get_practices(subject_name, specialization_name)
         for practice_name, practice_clickable in practices:
            practice={}
            practice['name']=practice_name
            practice_clickable.click()
            questions=get_questions(subject_name, specialization_name, practice_name)
            practice['questions']=questions
            driver.get(specialization_url)
         driver.get(subject_url)
      data.append(subject)
   print(data)
scrape()

Running this produces the error message:运行它会产生错误消息：

Traceback (most recent call last):
  File "scrape.py", line 141, in <module>
    scrape()
  File "scrape.py", line 126, in scrape
    for practice_name, practice_clickable in practices:
  File "scrape.py", line 49, in <lambda>
    practice_names=map(lambda x : x.text, driver.find_elements_by_xpath(practice_names_xpath))
  File "C:\Users\Joseph\AppData\Local\Programs\Python\Python36\lib\site-packages\selenium\webdriver\remote\webelement.py", line 76, in text
    return self._execute(Command.GET_ELEMENT_TEXT)['value']
  File "C:\Users\Joseph\AppData\Local\Programs\Python\Python36\lib\site-packages\selenium\webdriver\remote\webelement.py", line 628, in _execute
    return self._parent.execute(command, params)
  File "C:\Users\Joseph\AppData\Local\Programs\Python\Python36\lib\site-packages\selenium\webdriver\remote\webdriver.py", line 312, in execute
    self.error_handler.check_response(response)
  File "C:\Users\Joseph\AppData\Local\Programs\Python\Python36\lib\site-packages\selenium\webdriver\remote\errorhandler.py", line 237, in check_response
    raise exception_class(message, screen, stacktrace)
selenium.common.exceptions.NoSuchElementException: Message: Web element reference not seen before: 980e5c29-e3af-4b13-979f-0f2bb58b3480

After getting the questions from one practice-test, the driver needs to return to specialization page where the next practice-test can be found.从一个练习测试中得到问题后，驱动程序需要返回到可以找到下一个练习测试的专业化页面。 Hence these lines (where the problem is):因此这些行（问题所在）：

for practice_name, practice_clickable in practices:
            practice={}
            practice['name']=practice_name
            practice_clickable.click()
            questions=get_questions(subject_name, specialization_name, practice_name)
            practice['questions']=questions
            driver.get(specialization_url)

Apparently, the next practice clickable is no longer found on the page.显然，页面上不再找到下一个可点击的练习。 Why not?为什么不？

Also, I'm not sure if this is worthy of its own question, but I couldn't get this program to work on my Ubuntu computer -- the get_questions function stops on the last question and won't go to the results page.另外，我不确定这是否值得自己提出问题，但我无法让这个程序在我的 Ubuntu 计算机上get_questions函数在最后一个问题上停止并且不会转到结果页面。

Here's a tentative suggestion I'm trying by Guy:这是我正在尝试的 Guy 的初步建议：

def scrape():
   setup()
   subjects=get_subjects()
   for subject_name, subject_clickable in subjects:
      subject={}
      subject['name']=subject_name
      subject['specializations']=[]
      subject_clickable.click()
      subject_url=driver.current_url.replace('http://', 'https://')
      specializations=get_specializations(subject_name)
      for specialization_name, specialization_clickable in specializations:
         specialization={}
         specialization['name']=specialization_name
         specialization['practices']=[]
         specialization_clickable.click()
         specialization_url=driver.current_url.replace('http://', 'https://')
         practices=get_practices(subject_name, specialization_name)

         practices_len = len(list(get_practices(subject_name, specialization_name)))
         for i in range(practices_len):
            practices_list = list(get_practices(subject_name, specialization_name))
            practice = {}
            practice['name'] = practices_list[i][0]
            practices_list[i][1].click()
#         for practice_name, practice_clickable in practices:
#            practice={}
#            practice['name']=practice_name
#            practice_clickable.click()
#            questions=get_questions(subject_name, specialization_name, practice_name)
#            practice['questions']=questions
            driver.get(specialization_url)
         driver.get(subject_url)
      data.append(subject)
   print(data)
scrape()

Edit: As suggested by Hubert, I tried the following out:编辑：正如 Hubert 所建议的，我尝试了以下方法：

 practices = get_practices(subject_name, specialization_name)
 practices = [item[0] for item in practices]
 for index, practice_name in enumerate(practices):
    practice={}
    practice['name'] = practice_name
    practice_row = driver.find_element_by_xpath('//*[text()="'+practice_name+'"]/..')
    practice_clickable_n = practice_row.find_element_by_link_text('Begin')
    print('old:', practice_clickable[index])
    print('new:', practice_clickable_n)
    practice_clickable_n.click()
    questions=get_questions(subject_name, specialization_name, practice_name)

And this was the result:这是结果：

<map object at 0x7fabc0129860>
<map object at 0x7fabc0129898>
Traceback (most recent call last):
  File "scrape.py", line 140, in <module>
    scrape()
  File "scrape.py", line 131, in scrape
    print('old:', practice_clickable[index])
IndexError: list index out of range

Answer 1

This error message...这个错误信息...

selenium.common.exceptions.NoSuchElementException: Message: Web element reference not seen before: 980e5c29-e3af-4b13-979f-0f2bb58b3480

...implies that the GeckoDriver was unable identify the WebElement . ...暗示GeckoDriver无法识别WebElement 。

This error is coming out from get(webEl, win) within the Marionette source code :此错误来自Marionette 源代码中的get(webEl, win) ：

get(webEl, win) {
  if (!(webEl instanceof WebElement)) {
    throw new TypeError(pprint`Expected web element, got: ${webEl}`);
  }
  if (!this.has(webEl)) {
    throw new NoSuchElementError(
      "Web element reference not seen before: " + webEl.uuid
    );
  }

@fc's comment in the discussion 'Element reference not seen before: undefined' using geckodriver, waitForElementVisible fails explains the actual issue: @fc 在使用 geckodriver、waitForElementVisible 失败的讨论“以前未见过的元素引用：未定义”中的评论解释了实际问题：

However, the core issue was discussed in Intermittent test_navigation.py TestRefresh.test_basic |但是， Intermittent test_navigation.py TestRefresh.test_basic |中讨论了核心问题。 NoSuchElementException: Failed to trigger opening a new tab: Web element reference not seen before and was subsequently and was addressed through the changeset NoSuchElementException：无法触发打开新选项卡：Web 元素引用以前未见，随后已通过变更集解决

Solution解决方案

Using the latest version of the binaries will solve the issue in terms of:使用最新版本的二进制文件将解决以下问题：

Selenium v3.141.59硒 v3.141.59
GeckoDriver v0.26.0 壁虎驱动程序 v0.26.0
Firefox v70.0火狐 v70.0

Answer 2

The problem is the iteration over practices .问题在于practices的迭代。 It holds WebElement s, but thier reference is lost when you are navigating to a new page, even if it actually the same page Selenium treats it as a new one.它包含WebElement s，但是当您导航到新页面时，它们的引用会丢失，即使它实际上是同一页面 Selenium 将其视为新页面。

You can solve it by iterating by index.您可以通过索引迭代来解决它。 To do it with zip you can do something like this要用zip做到这一点，你可以做这样的事情

practices_len = len(list(get_practices(subject_name, specialization_name)))
for i in range(practices_len):
    practices_list = list(get_practices(subject_name, specialization_name))
    practice = {}
    practice['name'] = practices_list[i][0]
    practices_list[i][1].click()

Answer 3

Guy is right.盖伊是对的。 The next time you load the specialization_url it is a new page with new elements but practices contains the web elements of the old page as web elements.下次加载 specialization_url 时，它是一个包含新元素的新页面，但实践包含旧页面的 Web 元素作为 Web 元素。

To only change the part where it happens the code below first creates a list of the practices and practice_clickables.为了仅更改它发生的部分，下面的代码首先创建一个 practices 和 practice_clickables 列表。 It then searches for a fresh clickable whenever it returns to the new specialization_url page and prints the ID of the old and the current practice_clickable.然后，每当它返回到新的 specialization_url 页面并打印旧的和当前的 practice_clickable 的 ID 时，它就会搜索新的可点击对象。 With that it is clearly visible that the element in the same row now is a different one than it was the first time the page was loaded.这样可以清楚地看到，同一行中的元素现在与第一次加载页面时不同。

In addition the map or zip function seems to create a generator so that even the iteration over the practices fails because in that step webdriver code is executed on old objects.此外，map 或 zip 函数似乎创建了一个生成器，因此即使对实践的迭代也会失败，因为在该步骤中，webdriver 代码是在旧对象上执行的。 That is why I first create lists and iterate over a list.这就是我首先创建列表并遍历列表的原因。

Changed snippet:更改片段：

practices = get_practices(subject_name, specialization_name)
practice_clickable = [item[1] for item in practices]
practices = get_practices(subject_name, specialization_name)
practices = [item[0] for item in practices]
for index, practice_name in enumerate(practices):
    practice={}
    practice['name'] = practice_name
    practice_row = driver.find_element_by_xpath(f'//*[text()="{practice_name}"]/..')
    practice_clickable_n = practice_row.find_element_by_link_text('Begin')
    print('old:', practice_clickable[index])
    print('new:', practice_clickable_n)
    practice_clickable_n.click()
    questions=get_questions(subject_name, specialization_name, practice_name)

Complete scrape function:完整的抓取功能：

def scrape():
    setup()
    subjects=get_subjects()
    for subject_name, subject_clickable in subjects:
        subject={}
        subject['name']=subject_name
        subject['specializations']=[]
        subject_clickable.click()
        if ('http://') in driver.current_url:
            subject_url=driver.current_url.replace('http://', 'https://')
        else:
            subject_url=driver.current_url
        specializations=get_specializations(subject_name)
        for specialization_name, specialization_clickable in specializations:
            specialization={}
            specialization['name']=specialization_name
            specialization['practices']=[]
            specialization_clickable.click()
            if 'http://' in driver.current_url:
                specialization_url=driver.current_url.replace('http://', 'https://')
            else:
                specialization_url=driver.current_url
            practices = get_practices(subject_name, specialization_name)
            practice_clickable = [item[1] for item in practices]
            practices = get_practices(subject_name, specialization_name)
            practices = [item[0] for item in practices]
            for index, practice_name in enumerate(practices):
                practice={}
                practice['name'] = practice_name
                practice_row = driver.find_element_by_xpath(f'//*[text()="{practice_name}"]/..')
                practice_clickable_n = practice_row.find_element_by_link_text('Begin')
                print('old:', practice_clickable[index])
                print('new:', practice_clickable_n)
                practice_clickable_n.click()
                questions=get_questions(subject_name, specialization_name, practice_name)
                practice['questions']=questions
                driver.get(specialization_url)
            driver.get(subject_url)
        data.append(subject)
    print(data)

selenium.common.exceptions.NoSuchElementException：消息：在将 GeckoDriver Firefox 和 Selenium 与 Python 一起使用之前未看到 Web 元素引用

问题描述

3 个解决方案

解决方案1
3 2019-11-27 15:05:25

Solution解决方案

解决方案2
0 2019-11-27 12:50:18

解决方案3
0 2019-12-06 09:06:03

selenium.common.exceptions.NoSuchElementException：消息：在将 GeckoDriver Firefox 和 Selenium 与 Python 一起使用之前未看到 Web 元素引用

问题描述

3 个解决方案

解决方案1 3 2019-11-27 15:05:25

Solution解决方案

解决方案2 0 2019-11-27 12:50:18

解决方案3 0 2019-12-06 09:06:03

解决方案1
3 2019-11-27 15:05:25

解决方案2
0 2019-11-27 12:50:18

解决方案3
0 2019-12-06 09:06:03