[英]Scraping from dropdown with scroll
我想从长下拉列表中获取所有值。 首先需要打开下拉列表,然后滚动它直到加载所有值,获取它们并返回包含所有值的表。
网站链接: https : //cmt.ps.membersuite.com/profile/CreateAccount_CreateUser.aspx
这是我必须选择的下拉列表:
它将在此处显示下拉列表:
并滚动直到出现所有值,然后返回表。
import requests
from bs4 import BeautifulSoup
from requests import get
url = 'https://cmt.ps.membersuite.com/profile/CreateAccount_CreateUser.aspx'
page = requests.get(url)
print(page)
soup = BeautifulSoup(page.text, "lxml")
option = soup.find('td' , class_ = "rcbArrowCell rcbArrowCellRight").findAll('option')
option_ = soup.find("table", {"style": "width: 900 px;"}).find("option")
print(option)
print(option_
import requests
from bs4 import BeautifulSoup
import re
import json
def lol(url):
with requests.Session() as req:
r = req.get(url)
soup = BeautifulSoup(r.content, 'html.parser')
vs = soup.find("input", id="__VIEWSTATE").get("value")
ev = soup.find("input", id="__EVENTVALIDATION").get("value")
data = {
"__EVENTTARGET": "",
"__EVENTARGUMENT": "",
"__VIEWSTATE": vs,
"__VIEWSTATEGENERATOR": "FE3EF141",
"": [
"{2}",
""
],
"ctl00_rwmWindowManager_ClientState": "",
"ctl00_rwTimeoutWarning_ClientState": "",
"s": "",
"ctl00$PageContent$orgOption": "rbHaveOrg",
"ctl00$PageContent$ddlOrganization": "-",
"ddlOrganization_ClientState": "{\"logEntries\":[],\"value\":\"048447c3-0007-c47b-7c9e-0b3e39483880\",\"text\":\"-\",\"enabled\":true}",
"ctl00$PageContent$tbLoginID": "",
"ctl00$PageContent$tbPassword": "",
"ctl00$PageContent$tbConfirmPassword": "",
"ctl00$PageContent$tbIndividualFirstName": "",
"ctl00$PageContent$tbIndividualLastName": "",
"ctl00$PageContent$tbIndividualSuffix": "",
"ctl00_PageContent_tbIndividualSuffix_ClientState": "{\"logEntries\":[],\"value\":\"\",\"text\":\"\",\"enabled\":true}",
"ctl00$PageContent$tbIndividualEmail": "",
"ctl00$PageContent$gvIndividualPhoneNumbers$ctl02$tbIndividualPhoneNumber": "",
"ctl00$PageContent$gvIndividualPhoneNumbers$ctl03$tbIndividualPhoneNumber": "",
"ctl00$PageContent$rptIndividualAddresses$ctl00$hfIndividualAddressCode": "Home",
"ctl00$PageContent$rptIndividualAddresses$ctl00$acIndividualAddress$Line1": "",
"ctl00$PageContent$rptIndividualAddresses$ctl00$acIndividualAddress$Line2": "",
"ctl00$PageContent$rptIndividualAddresses$ctl00$acIndividualAddress$PostalCode": "",
"ctl00$PageContent$rptIndividualAddresses$ctl00$acIndividualAddress$City": "",
"ctl00$PageContent$rptIndividualAddresses$ctl00$acIndividualAddress$State": "",
"ctl00_PageContent_rptIndividualAddresses_ctl00_acIndividualAddress_State_ClientState": "{\"logEntries\":[],\"value\":\"\",\"text\":\"\",\"enabled\":true}",
"ctl00$PageContent$rptIndividualAddresses$ctl00$acIndividualAddress$Country": "No Country",
"ctl00_PageContent_rptIndividualAddresses_ctl00_acIndividualAddress_Country_ClientState": "{\"logEntries\":[],\"value\":\"\",\"text\":\"No Country\",\"enabled\":true}",
"ctl00$PageContent$rptIndividualAddresses$ctl00$acIndividualAddress$County": "",
"ctl00$PageContent$rptIndividualAddresses$ctl00$acIndividualAddress$CongressionalDistrict": "",
"ctl00$PageContent$rptIndividualAddresses$ctl00$acIndividualAddress$CASSCertificationDate": "",
"ctl00$PageContent$rptIndividualAddresses$ctl00$acIndividualAddress$CarrierRoute": "",
"ctl00$PageContent$rptIndividualAddresses$ctl00$acIndividualAddress$DeliveryPointCheckDigit": "",
"ctl00$PageContent$rptIndividualAddresses$ctl00$acIndividualAddress$DeliveryPointCode": "",
"ctl00$PageContent$rptIndividualAddresses$ctl01$hfIndividualAddressCode": "Work",
"ctl00$PageContent$rptIndividualAddresses$ctl01$acIndividualAddress$Line1": "",
"ctl00$PageContent$rptIndividualAddresses$ctl01$acIndividualAddress$Line2": "",
"ctl00$PageContent$rptIndividualAddresses$ctl01$acIndividualAddress$PostalCode": "",
"ctl00$PageContent$rptIndividualAddresses$ctl01$acIndividualAddress$City": "",
"ctl00$PageContent$rptIndividualAddresses$ctl01$acIndividualAddress$State": "",
"ctl00_PageContent_rptIndividualAddresses_ctl01_acIndividualAddress_State_ClientState": "{\"logEntries\":[],\"value\":\"\",\"text\":\"\",\"enabled\":true}",
"ctl00$PageContent$rptIndividualAddresses$ctl01$acIndividualAddress$Country": "No Country",
"ctl00_PageContent_rptIndividualAddresses_ctl01_acIndividualAddress_Country_ClientState": "{\"logEntries\":[],\"value\":\"\",\"text\":\"No Country\",\"enabled\":true}",
"ctl00$PageContent$rptIndividualAddresses$ctl01$acIndividualAddress$County": "",
"ctl00$PageContent$rptIndividualAddresses$ctl01$acIndividualAddress$CongressionalDistrict": "",
"ctl00$PageContent$rptIndividualAddresses$ctl01$acIndividualAddress$CASSCertificationDate": "",
"ctl00$PageContent$rptIndividualAddresses$ctl01$acIndividualAddress$CarrierRoute": "",
"ctl00$PageContent$rptIndividualAddresses$ctl01$acIndividualAddress$DeliveryPointCheckDigit": "",
"ctl00$PageContent$rptIndividualAddresses$ctl01$acIndividualAddress$DeliveryPointCode": "",
"ctl00$PageContent$ddlIndividualPreferredAddress": "048447c3-000f-c2ac-8bfc-0b3d04988fbc",
"ctl00$PageContent$chkDoNotMail": "on",
"ctl00$PageContent$chkDoNotFax": "on",
"ctl00_PageContent_dlbMessageCategories_lbSrc_ClientState": "{\"isEnabled\":true,\"logEntries\":[],\"selectedIndices\":[],\"checkedIndices\":[],\"scrollPosition\":0}",
"ctl00_PageContent_dlbMessageCategories_lbDest_ClientState": "{\"isEnabled\":true,\"logEntries\":[],\"selectedIndices\":[],\"checkedIndices\":[],\"scrollPosition\":0}",
"ctl00$PageContent$cfsIndividualCustomFields$TextBox1": "",
"ctl00$PageContent$cfsIndividualCustomFields$TextBox3": "",
"ctl00$PageContent$cfsIndividualCustomFields$TextBox4": "",
"ctl00$PageContent$cfsIndividualCustomFields$RadComboBox5": "---- Select ----",
"ctl00_PageContent_cfsIndividualCustomFields_RadComboBox5_ClientState": "{\"logEntries\":[],\"value\":\"\",\"text\":\"---- Select ----\",\"enabled\":true}",
"ctl00$PageContent$cfsIndividualCustomFields$RadComboBox6": "---- Select ----",
"ctl00_PageContent_cfsIndividualCustomFields_RadComboBox6_ClientState": "{\"logEntries\":[],\"value\":\"\",\"text\":\"---- Select ----\",\"enabled\":true}",
"ctl00$PageContent$cfsIndividualCustomFields$RadComboBox7": "---- Select ----",
"ctl00_PageContent_cfsIndividualCustomFields_RadComboBox7_ClientState": "{\"logEntries\":[],\"value\":\"\",\"text\":\"---- Select ----\",\"enabled\":true}",
"ctl00$PageContent$cfsIndividualCustomFields$RadComboBox8": "---- Select ----",
"ctl00_PageContent_cfsIndividualCustomFields_RadComboBox8_ClientState": "{\"logEntries\":[],\"value\":\"\",\"text\":\"---- Select ----\",\"enabled\":true}",
"ctl00_PageContent_cfsIndividualCustomFields_DualListBox9_lbSrc_ClientState": "{\"isEnabled\":true,\"logEntries\":[],\"selectedIndices\":[],\"checkedIndices\":[],\"scrollPosition\":0}",
"ctl00_PageContent_cfsIndividualCustomFields_DualListBox9_lbDest_ClientState": "{\"isEnabled\":true,\"logEntries\":[],\"selectedIndices\":[],\"checkedIndices\":[],\"scrollPosition\":0}",
"ctl00_PageContent_cfsIndividualCustomFields_DualListBox10_lbSrc_ClientState": "{\"isEnabled\":true,\"logEntries\":[],\"selectedIndices\":[],\"checkedIndices\":[],\"scrollPosition\":0}",
"ctl00_PageContent_cfsIndividualCustomFields_DualListBox10_lbDest_ClientState": "{\"isEnabled\":true,\"logEntries\":[],\"selectedIndices\":[],\"checkedIndices\":[],\"scrollPosition\":0}",
"ctl00$PageContent$cfsIndividualCustomFields$RadDatePicker11": "",
"ctl00_PageContent_cfsIndividualCustomFields_RadDatePicker11_dateInput_text": "",
"ctl00$PageContent$cfsIndividualCustomFields$RadDatePicker11$dateInput": "",
"ctl00_PageContent_cfsIndividualCustomFields_RadDatePicker11_dateInput_ClientState": "{\"enabled\":true,\"emptyMessage\":\"\",\"minDateStr\":\"1/1/1 0:0:0\",\"maxDateStr\":\"12/31/9999 0:0:0\"}",
"ctl00_PageContent_cfsIndividualCustomFields_RadDatePicker11_calendar_SD": "[]",
"ctl00_PageContent_cfsIndividualCustomFields_RadDatePicker11_calendar_AD": "[[1,1,1],[9999,12,31],[2020,3,24]]",
"ctl00_PageContent_cfsIndividualCustomFields_RadDatePicker11_ClientState": "{\"minDateStr\":\"1/1/0001 0:0:0\",\"maxDateStr\":\"12/31/9999 0:0:0\"}",
"ctl00$PageContent$cfsIndividualCustomFields$RadComboBox12": "---- Select ----",
"ctl00_PageContent_cfsIndividualCustomFields_RadComboBox12_ClientState": "{\"logEntries\":[],\"value\":\"\",\"text\":\"---- Select ----\",\"enabled\":true}",
"ctl00$PageContent$cfsIndividualCustomFields$RadComboBox13": "---- Select ----",
"ctl00_PageContent_cfsIndividualCustomFields_RadComboBox13_ClientState": "{\"logEntries\":[],\"value\":\"\",\"text\":\"---- Select ----\",\"enabled\":true}",
"ctl00$PageContent$cfsIndividualCustomFields$RadComboBox14": "---- Select ----",
"ctl00_PageContent_cfsIndividualCustomFields_RadComboBox14_ClientState": "{\"logEntries\":[],\"value\":\"\",\"text\":\"---- Select ----\",\"enabled\":true}",
"ctl00$PageContent$cfsIndividualCustomFields$RadComboBox15": "---- Select ----",
"ctl00_PageContent_cfsIndividualCustomFields_RadComboBox15_ClientState": "{\"logEntries\":[],\"value\":\"\",\"text\":\"---- Select ----\",\"enabled\":true}",
"ctl00$PageContent$cfsIndividualCustomFields$TextBox16": "",
"ctl00_PageContent_cfsIndividualCustomFields_DualListBox17_lbSrc_ClientState": "{\"isEnabled\":true,\"logEntries\":[],\"selectedIndices\":[],\"checkedIndices\":[],\"scrollPosition\":0}",
"ctl00_PageContent_cfsIndividualCustomFields_DualListBox17_lbDest_ClientState": "{\"isEnabled\":true,\"logEntries\":[],\"selectedIndices\":[],\"checkedIndices\":[],\"scrollPosition\":0}",
"ctl00$PageContent$cfsIndividualCustomFields$TextBox18": "",
"ctl00$PageContent$cfsIndividualCustomFields$RadComboBox19": "---- Select ----",
"ctl00_PageContent_cfsIndividualCustomFields_RadComboBox19_ClientState": "{\"logEntries\":[],\"value\":\"\",\"text\":\"---- Select ----\",\"enabled\":true}",
"ctl00$PageContent$cfsIndividualCustomFields$RadComboBox20": "---- Select ----",
"ctl00_PageContent_cfsIndividualCustomFields_RadComboBox20_ClientState": "{\"logEntries\":[],\"value\":\"\",\"text\":\"---- Select ----\",\"enabled\":true}",
"ctl00_PageContent_cfsIndividualCustomFields_DualListBox21_lbSrc_ClientState": "{\"isEnabled\":true,\"logEntries\":[],\"selectedIndices\":[],\"checkedIndices\":[],\"scrollPosition\":0}",
"ctl00_PageContent_cfsIndividualCustomFields_DualListBox21_lbDest_ClientState": "{\"isEnabled\":true,\"logEntries\":[],\"selectedIndices\":[],\"checkedIndices\":[],\"scrollPosition\":0}",
"__CALLBACKID": "ctl00$PageContent$ddlOrganization",
"__CALLBACKPARAM": "{\"Command\":\"LOD\",\"Text\":\"-\",\"ClientState\":{\"value\":\"\",\"text\":\"\",\"enabled\":true,\"logEntries\":[]},\"Context\":{\"Text\":\"-\",\"NumberOfItems\":0},\"NumberOfItems\":0}",
"__EVENTVALIDATION": ev
}
r = requests.post(url, data=data)
goal = re.search(r"\=(\[.+])", r.text).group(1)
clear = json.loads(goal)
print(json.dumps(clear, indent=4))
lol("https://cmt.ps.membersuite.com/profile/CreateAccount_CreateUser.aspx")
输出:
[
{
"text": "-",
"value": "048447c3-0007-c47b-7c9e-0b3e39483880",
"attributes": {
"ROW_NUMBER": "1",
"LocalID": "10619",
"EmailAddress": "jamiebolton@hotmail.com",
"_Preferred_Address_City": "",
"_Preferred_Address_State": "",
"Status.ShowInQuickSearches": "True"
}
},
{
"text": "-",
"value": "048447c3-0007-ced2-814b-0b400d0f470f",
"attributes": {
"ROW_NUMBER": "2",
"LocalID": "11477",
"EmailAddress": "rpt@gwu.edu",
"_Preferred_Address_City": "",
"_Preferred_Address_State": "",
"Status.ShowInQuickSearches": "True"
}
},
{
"text": "Banco Internacional del Per\u00fa - Interbank",
"value": "048447c3-0007-c0e0-6c76-0b3e66b7e1ec",
"attributes": {
"ROW_NUMBER": "3",
"LocalID": "10703",
"EmailAddress": "dalvarezc84@gmail.com",
"_Preferred_Address_City": "",
"_Preferred_Address_State": "",
"Status.ShowInQuickSearches": "True"
}
},
{
"text": "Bishop-McDonald Wealth Mgmt Group",
"value": "048447c3-0007-c6d0-d748-03bda528b59f",
"attributes": {
"ROW_NUMBER": "4",
"LocalID": "11697",
"EmailAddress": "",
"_Preferred_Address_City": "",
"_Preferred_Address_State": "",
"Status.ShowInQuickSearches": "True"
}
},
{
"text": "BOB-Caridif Life Insurance Co.,Ltd.",
"value": "048447c3-0007-c8b1-bbf2-0b3d578797ea",
"attributes": {
"ROW_NUMBER": "5",
"LocalID": "10094",
"EmailAddress": "bingxinshi@163.com",
"_Preferred_Address_City": "",
"_Preferred_Address_State": "",
"Status.ShowInQuickSearches": "True"
}
},
{
"text": "CEFS Verm\u00f6gensverwaltungs- und Beteiligungs GmbH",
"value": "048447c3-0007-c88c-9064-0b3df4010a50",
"attributes": {
"ROW_NUMBER": "6",
"LocalID": "10467",
"EmailAddress": "sergiufala@gmail.com",
"_Preferred_Address_City": "",
"_Preferred_Address_State": "",
"Status.ShowInQuickSearches": "True"
}
},
{
"text": "CGS-CIMB",
"value": "048447c3-0007-c74a-918b-0b3e7c42f6a0",
"attributes": {
"ROW_NUMBER": "7",
"LocalID": "10753",
"EmailAddress": "joelap0506@gmail.com",
"_Preferred_Address_City": "",
"_Preferred_Address_State": "",
"Status.ShowInQuickSearches": "True"
}
},
{
"text": "CMT - Denver Chapter",
"value": "048447c3-0007-c603-99e4-0b3deeb54833",
"attributes": {
"ROW_NUMBER": "8",
"LocalID": "10446",
"EmailAddress": "christopher@navwm.com",
"_Preferred_Address_City": "",
"_Preferred_Address_State": "",
"Status.ShowInQuickSearches": "True"
}
},
{
"text": "EFG-Hermes",
"value": "048447c3-0007-cc4c-fc57-0b3f2b5e69e0",
"attributes": {
"ROW_NUMBER": "9",
"LocalID": "11092",
"EmailAddress": "ahmedhusseinsalah@icloud.com",
"_Preferred_Address_City": "",
"_Preferred_Address_State": "",
"Status.ShowInQuickSearches": "True"
}
},
{
"text": "Great-West Life",
"value": "048447c3-0007-c2f7-bb59-0b3e5b135cba",
"attributes": {
"ROW_NUMBER": "10",
"LocalID": "10677",
"EmailAddress": "karentsang@gmail.com",
"_Preferred_Address_City": "",
"_Preferred_Address_State": "",
"Status.ShowInQuickSearches": "True"
}
},
{
"text": "HSI- HONG SANG INVESTMENT",
"value": "048447c3-0007-c997-6d78-0b4041230224",
"attributes": {
"ROW_NUMBER": "11",
"LocalID": "11559",
"EmailAddress": "hosabank@gmail.com",
"_Preferred_Address_City": "",
"_Preferred_Address_State": "",
"Status.ShowInQuickSearches": "True"
}
},
{
"text": "Huatai-Pinebridge",
"value": "048447c3-0007-cbe8-452c-0b3e800ee085",
"attributes": {
"ROW_NUMBER": "12",
"LocalID": "10779",
"EmailAddress": "brightblueme@gmail.com",
"_Preferred_Address_City": "",
"_Preferred_Address_State": "",
"Status.ShowInQuickSearches": "True"
}
},
{
"text": "IB Securities Company-Vietnam",
"value": "048447c3-0007-c51c-2535-0b3e2f55e160",
"attributes": {
"ROW_NUMBER": "13",
"LocalID": "10601",
"EmailAddress": "vd.huong283@gmail.com",
"_Preferred_Address_City": "",
"_Preferred_Address_State": "",
"Status.ShowInQuickSearches": "True"
}
},
{
"text": "ICMA-RC",
"value": "048447c3-0007-c622-d748-e234e10f75a0",
"attributes": {
"ROW_NUMBER": "14",
"LocalID": "11888",
"EmailAddress": "",
"_Preferred_Address_City": "",
"_Preferred_Address_State": "",
"Status.ShowInQuickSearches": "True"
}
},
{
"text": "Infinit-O Global",
"value": "048447c3-0007-c939-a3a1-0b3fbe520f57",
"attributes": {
"ROW_NUMBER": "15",
"LocalID": "11334",
"EmailAddress": "gvvillacampa@gmail.com",
"_Preferred_Address_City": "",
"_Preferred_Address_State": "",
"Status.ShowInQuickSearches": "True"
}
},
{
"text": "Knowledge-Tech Inc",
"value": "048447c3-0007-c77a-6642-0b3e2db60635",
"attributes": {
"ROW_NUMBER": "16",
"LocalID": "10595",
"EmailAddress": "brucehayii@aol.com",
"_Preferred_Address_City": "",
"_Preferred_Address_State": "",
"Status.ShowInQuickSearches": "True"
}
},
{
"text": "Lewis-Michael & Associates",
"value": "048447c3-0007-c471-5e2b-0b3e2aade42d",
"attributes": {
"ROW_NUMBER": "17",
"LocalID": "10589",
"EmailAddress": "bruce.gill@lewis-michael.com",
"_Preferred_Address_City": "",
"_Preferred_Address_State": "",
"Status.ShowInQuickSearches": "True"
}
},
{
"text": "Mega-x Stone Investments",
"value": "048447c3-0007-ced6-9803-0b3f487e6917",
"attributes": {
"ROW_NUMBER": "18",
"LocalID": "11190",
"EmailAddress": "rida.eldarwish@gmail.com",
"_Preferred_Address_City": "",
"_Preferred_Address_State": "",
"Status.ShowInQuickSearches": "True"
}
},
{
"text": "Merrill Lynch-",
"value": "048447c3-0007-ce46-c1a6-0b3d828c2890",
"attributes": {
"ROW_NUMBER": "19",
"LocalID": "10183",
"EmailAddress": "jonathan.thibodeaux@ml.com",
"_Preferred_Address_City": "",
"_Preferred_Address_State": "",
"Status.ShowInQuickSearches": "True"
}
},
{
"text": "Mid-Continent Capital",
"value": "048447c3-0007-c73c-d748-e1030b68c5a0",
"attributes": {
"ROW_NUMBER": "20",
"LocalID": "11994",
"EmailAddress": "",
"_Preferred_Address_City": "",
"_Preferred_Address_State": "",
"Status.ShowInQuickSearches": "True"
}
}
]
现在您确实有一个
list
,访问它并获取您想要的任何内容。
考虑到它给了你前 20 行 :) 但不用担心。 您可以在以下parameter
__CALLBACKPARAM
上使用 + 20
循环POST
请求,您需要在每个POST
上将"NumberOfItems\\":0
更改为+20
:)
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.