在不变 url 中选择多个选项

Question

我需要通过从下拉菜单中选择 state、区域和块来从网站上抓取内容。

我尝试使用 python 请求和帖子，但我无法正确抓取内容，因为网站的 url 永远不会改变我选择的选项。

这是我到目前为止尝试过的代码：

# importing all necessary packages
import urllib3
import requests
from bs4 import BeautifulSoup
import csv

urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

url = "http://swachhbharatmission.gov.in/tsc/Report_NBA/Panchayat/Rpt_SarpanchDetail.aspx"
session = requests.Session()
html = session.get(url, verify=False).content
soup = BeautifulSoup(html, "lxml")
option = soup.find("select",{"name":"ctl00$ContentPlaceHolder1$ddlState"}).findAll("option")

# create dictionary 'states' mapping each state with it's code
states = {}
for elem in option[1:]:
    key = elem['value']
    value = elem.text
    states[key] = value

for state in states.keys():
    payload_ano = {'ctl00$ContentPlaceHolder1$ddlState': str(state)}
    r = requests.post(url, data=payload_ano,verify=False)
    break

soup = BeautifulSoup(r.text,"html.parser")
option = soup.find("select",{"name":"ctl00$ContentPlaceHolder1$ddlDistrict"}).findAll("option")

option # only gives [<option selected="selected" value="%">All District</option>] from the home page and not the districts inside the state chosen

我使用了 break 语句，因此代码可以提前终止。 现在的问题是，最终行中的变量option应包含Z9ED39E2EA2EA931586B6A985A6942EF573EZ时下拉列表的内容。 但它只显示主页的内容。

任何帮助或建议将不胜感激。

Answer 1

您可以使用 selenium 到 select 下拉选项。

from selenium import webdriver
from selenium.webdriver.support.select import Select
from selenium.common.exceptions import NoSuchElementException

driver = webdriver.Chrome()

driver.get('http://swachhbharatmission.gov.in/tsc/Report_NBA/Panchayat/Rpt_SarpanchDetail.aspx')

# get state options
state_element = driver.find_element_by_xpath('//*[@id="ctl00_ContentPlaceHolder1_ddlState"]')
state_select = Select(state_element)
state_options = [state_option.text for state_option in state_select.options]

# choose state option number
print('\nselect state:')
for i, state in enumerate(state_options):
    print(f'{i+1} - {state.strip()}')
state = input(':- ')

# select state option
state_selected = driver.find_element_by_xpath(f'//*[@id="ctl00_ContentPlaceHolder1_ddlState"]/option[{state}]')
state_selected.click()

# get district options
district_element = driver.find_element_by_xpath('//*[@id="ctl00_ContentPlaceHolder1_ddlDistrict"]')
district_select = Select(district_element)
district_options = [district_option.text for district_option in district_select.options]

# choose district option number
print('\nselect district:')
for i, district in enumerate(district_options):
    print(f'{i+1} - {district.strip()}')
district = input(':- ')

# select district option
district_selected = driver.find_element_by_xpath(f'//*[@id="ctl00_ContentPlaceHolder1_ddlDistrict"]/option[{district}]')
district_selected.click()

# get block options
block_element = driver.find_element_by_xpath('//*[@id="ctl00_ContentPlaceHolder1_ddlBlock"]')
block_select = Select(block_element)
block_options = [block_option.text for block_option in block_select.options]

# choose block option number
print('\nselect block:')
for i, block in enumerate(block_options):
    print(f'{i+1} - {block.strip()}')
block = input(':- ')

# select block option
block_selected = driver.find_element_by_xpath(f'//*[@id="ctl00_ContentPlaceHolder1_ddlBlock"]/option[{block}]')
block_selected.click()


# get data of each record
try:
    table_element = driver.find_element_by_css_selector('table.Table')
except NoSuchElementException:
    print('\nRecord not found')
else:
    table_rows = table_element.find_elements_by_css_selector('table.Table tr')
    print('\nGrampanchayat Sarpanch Details')
    for table_row in table_rows[2:]:
        table_cols = table_row.find_elements_by_css_selector('table.Table tr td')
        for table_col in table_cols:
            print(table_col.text, end=',\t')
        print()

注意：您需要将Chrome 驱动程序下载到您的项目文件夹中。

在不变 url 中选择多个选项

问题描述

1 个解决方案

解决方案1
0 已采纳 2020-07-22 11:54:13

在不变 url 中选择多个选项

问题描述

1 个解决方案

解决方案1 0 已采纳 2020-07-22 11:54:13

解决方案1
0 已采纳 2020-07-22 11:54:13