繁体   English   中英

Selenium python:元素不可交互

[英]Selenium python : element not interactable

我正在尝试从该网站示例网站中抓取信息

我需要获取2021版本并按代码搜索。 这是我的代码:

from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.common.exceptions import NoSuchElementException, StaleElementReferenceException
from selenium.webdriver.support.ui import WebDriverWait, Select
from selenium.webdriver.support import expected_conditions

ignored_exceptions=(NoSuchElementException,StaleElementReferenceException)

options = Options()
options.add_argument('--disable-extensions')
options.add_argument('--headless')
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')
options.add_argument('--remote-debugging-port=9515')
options.add_argument('--disable-setuid-sandbox')
options.add_argument("--start-maximized")

driver = webdriver.Chrome(service=Service("/usr/bin/chromedriver"), options=options)
url = "https://noc.esdc.gc.ca/"
driver.get(url)

search_by_code = WebDriverWait(driver, 10, ignored_exceptions=ignored_exceptions)\
                        .until(expected_conditions.presence_of_element_located((By.XPATH, "/html/body/main/div[2]/div/div/div/div/div/div/div/div/ul/li[2]/a")))
# click to activate this option
search_by_code.click()

text_area = WebDriverWait(driver, 10, ignored_exceptions=ignored_exceptions)\
                        .until(expected_conditions.presence_of_element_located((By.XPATH, "/html/body/main/div[2]/div/div/div/div/div/div/div/div/div/details[2]/div/div/form/div/div[2]/div/input")))

version = Select(WebDriverWait(driver, 10, ignored_exceptions=ignored_exceptions)\
                        .until(expected_conditions.presence_of_element_located((By.XPATH, "/html/body/main/div[2]/div/div/div/div/div/div/div/div/div/details[2]/div/div/form/div/div[1]/select"))))

search_button = driver.find_element(By.XPATH, '/html/body/main/div[2]/div/div/div/div/div/div/div/div/div/details[2]/div/div/form/div/div[2]/div/div/button')

# select version 2021
version.select_by_value('2021.0')

# click on text area
text_area.click()

# type the text 
text_area.send_keys("10010  –  Financial managers")

# click the button
search_button.click()

print(source = driver.current_url)

我不确定我错过了什么? 我添加了一些评论来描述逻辑。

你可以使用这个网站的python-requests库。 我在下面做了一个小脚本来发送请求并获取数据。 您可以将查询发送到SearchCriteria.CodeSearch值。 您可以使用BS4LXMLScrapy Selector class并通过XPATHCSS selector来定位值。

import re
import requests

cookies = {
    'ASP.NET_SessionId': 'u4qffsgfreddkgvdphaxo3pc',
    'BIGipServernoc_esdc_gc_ca_http_443.app~noc_esdc_gc_ca_http_443_pool': '639114412.20480.0000',
    'gpv_pthl': 'blank%20theme',
    'gpv_pc': 'Employment%20and%20Social%20Development%20Canada',
    'gpv_pqs': 'blank%20query%20string',
    'gpv_pu': 'noc.esdc.gc.ca%2FSearch%2FQuickSearchJobTitleResults',
    'gpv_pt': 'Search%20by%20job%20title%20-%20Results%20-%20Canada.ca',
    's_plt': '9.44',
    's_tp': '1206',
    'gpv_url': 'noc.esdc.gc.ca%2FSearch%2FQuickSearchJobTitleResults',
    's_ips': '741',
}

headers = {
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
    'Accept-Language': 'en-PK,en;q=0.9,ur-PK;q=0.8,ur;q=0.7,en-GB;q=0.6,en-US;q=0.5,sv;q=0.4,it;q=0.3',
    'Cache-Control': 'no-cache',
    'Connection': 'keep-alive',
    # Requests sorts cookies= alphabetically
    # 'Cookie': 'ASP.NET_SessionId=u4qffsgfreddkgvdphaxo3pc; BIGipServernoc_esdc_gc_ca_http_443.app~noc_esdc_gc_ca_http_443_pool=639114412.20480.0000; gpv_pthl=blank%20theme; gpv_pc=Employment%20and%20Social%20Development%20Canada; gpv_pqs=blank%20query%20string; gpv_pu=noc.esdc.gc.ca%2FSearch%2FQuickSearchJobTitleResults; gpv_pt=Search%20by%20job%20title%20-%20Results%20-%20Canada.ca; s_plt=9.44; s_tp=1206; gpv_url=noc.esdc.gc.ca%2FSearch%2FQuickSearchJobTitleResults; s_ips=741',
    'Origin': 'https://noc.esdc.gc.ca',
    'Pragma': 'no-cache',
    'Referer': 'https://noc.esdc.gc.ca/Search/QuickSearchJobTitleResults',
    'Sec-Fetch-Dest': 'document',
    'Sec-Fetch-Mode': 'navigate',
    'Sec-Fetch-Site': 'same-origin',
    'Sec-Fetch-User': '?1',
    'Upgrade-Insecure-Requests': '1',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.0.0 Safari/537.36',
    'sec-ch-ua': '".Not/A)Brand";v="99", "Google Chrome";v="103", "Chromium";v="103"',
    'sec-ch-ua-mobile': '?0',
    'sec-ch-ua-platform': '"Windows"',
}

data = {
    'SearchCriteria.CodeVersion': '2021.0',
    'SearchCriteria.CodeSearch': '10010  –  Financial managers',
    'btn-submitSearchNOC': 'Search',
}

r = requests.post('https://noc.esdc.gc.ca/Search/QuickSearchJobTitleResults', cookies=cookies, headers=headers, data=data)
id_ = re.findall(r'objectid=(.*?)&', r.text)[0]

url = f'https://noc.esdc.gc.ca/Structure/NocProfile?objectid={id_}'
r = requests.get(url=url, headers=headers)

print(r.text)

暂无
暂无

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM