繁体   English   中英

使用滚动从下拉列表中刮取

[英]Scraping from dropdown with scroll

我想从长下拉列表中获取所有值。 首先需要打开下拉列表,然后滚动它直到加载所有值,获取它们并返回包含所有值的表。

网站链接: https : //cmt.ps.membersuite.com/profile/CreateAccount_CreateUser.aspx

这是我必须选择的下拉列表:

下拉列表

它将在此处显示下拉列表:

滚动条

并滚动直到出现所有值,然后返回表。

import requests
from bs4 import BeautifulSoup
from requests import get

url = 'https://cmt.ps.membersuite.com/profile/CreateAccount_CreateUser.aspx'
page = requests.get(url)
print(page)
soup = BeautifulSoup(page.text, "lxml")
option = soup.find('td' , class_ = "rcbArrowCell rcbArrowCellRight").findAll('option')
option_ = soup.find("table", {"style": "width: 900 px;"}).find("option")
print(option)
print(option_
import requests
from bs4 import BeautifulSoup
import re
import json


def lol(url):
    with requests.Session() as req:
        r = req.get(url)
        soup = BeautifulSoup(r.content, 'html.parser')
        vs = soup.find("input", id="__VIEWSTATE").get("value")
        ev = soup.find("input", id="__EVENTVALIDATION").get("value")
        data = {
            "__EVENTTARGET": "",
            "__EVENTARGUMENT": "",
            "__VIEWSTATE": vs,
            "__VIEWSTATEGENERATOR": "FE3EF141",
            "": [
                "{2}",
                ""
            ],
            "ctl00_rwmWindowManager_ClientState": "",
            "ctl00_rwTimeoutWarning_ClientState": "",
            "s": "",
            "ctl00$PageContent$orgOption": "rbHaveOrg",
            "ctl00$PageContent$ddlOrganization": "-",
            "ddlOrganization_ClientState": "{\"logEntries\":[],\"value\":\"048447c3-0007-c47b-7c9e-0b3e39483880\",\"text\":\"-\",\"enabled\":true}",
            "ctl00$PageContent$tbLoginID": "",
            "ctl00$PageContent$tbPassword": "",
            "ctl00$PageContent$tbConfirmPassword": "",
            "ctl00$PageContent$tbIndividualFirstName": "",
            "ctl00$PageContent$tbIndividualLastName": "",
            "ctl00$PageContent$tbIndividualSuffix": "",
            "ctl00_PageContent_tbIndividualSuffix_ClientState": "{\"logEntries\":[],\"value\":\"\",\"text\":\"\",\"enabled\":true}",
            "ctl00$PageContent$tbIndividualEmail": "",
            "ctl00$PageContent$gvIndividualPhoneNumbers$ctl02$tbIndividualPhoneNumber": "",
            "ctl00$PageContent$gvIndividualPhoneNumbers$ctl03$tbIndividualPhoneNumber": "",
            "ctl00$PageContent$rptIndividualAddresses$ctl00$hfIndividualAddressCode": "Home",
            "ctl00$PageContent$rptIndividualAddresses$ctl00$acIndividualAddress$Line1": "",
            "ctl00$PageContent$rptIndividualAddresses$ctl00$acIndividualAddress$Line2": "",
            "ctl00$PageContent$rptIndividualAddresses$ctl00$acIndividualAddress$PostalCode": "",
            "ctl00$PageContent$rptIndividualAddresses$ctl00$acIndividualAddress$City": "",
            "ctl00$PageContent$rptIndividualAddresses$ctl00$acIndividualAddress$State": "",
            "ctl00_PageContent_rptIndividualAddresses_ctl00_acIndividualAddress_State_ClientState": "{\"logEntries\":[],\"value\":\"\",\"text\":\"\",\"enabled\":true}",
            "ctl00$PageContent$rptIndividualAddresses$ctl00$acIndividualAddress$Country": "No Country",
            "ctl00_PageContent_rptIndividualAddresses_ctl00_acIndividualAddress_Country_ClientState": "{\"logEntries\":[],\"value\":\"\",\"text\":\"No Country\",\"enabled\":true}",
            "ctl00$PageContent$rptIndividualAddresses$ctl00$acIndividualAddress$County": "",
            "ctl00$PageContent$rptIndividualAddresses$ctl00$acIndividualAddress$CongressionalDistrict": "",
            "ctl00$PageContent$rptIndividualAddresses$ctl00$acIndividualAddress$CASSCertificationDate": "",
            "ctl00$PageContent$rptIndividualAddresses$ctl00$acIndividualAddress$CarrierRoute": "",
            "ctl00$PageContent$rptIndividualAddresses$ctl00$acIndividualAddress$DeliveryPointCheckDigit": "",
            "ctl00$PageContent$rptIndividualAddresses$ctl00$acIndividualAddress$DeliveryPointCode": "",
            "ctl00$PageContent$rptIndividualAddresses$ctl01$hfIndividualAddressCode": "Work",
            "ctl00$PageContent$rptIndividualAddresses$ctl01$acIndividualAddress$Line1": "",
            "ctl00$PageContent$rptIndividualAddresses$ctl01$acIndividualAddress$Line2": "",
            "ctl00$PageContent$rptIndividualAddresses$ctl01$acIndividualAddress$PostalCode": "",
            "ctl00$PageContent$rptIndividualAddresses$ctl01$acIndividualAddress$City": "",
            "ctl00$PageContent$rptIndividualAddresses$ctl01$acIndividualAddress$State": "",
            "ctl00_PageContent_rptIndividualAddresses_ctl01_acIndividualAddress_State_ClientState": "{\"logEntries\":[],\"value\":\"\",\"text\":\"\",\"enabled\":true}",
            "ctl00$PageContent$rptIndividualAddresses$ctl01$acIndividualAddress$Country": "No Country",
            "ctl00_PageContent_rptIndividualAddresses_ctl01_acIndividualAddress_Country_ClientState": "{\"logEntries\":[],\"value\":\"\",\"text\":\"No Country\",\"enabled\":true}",
            "ctl00$PageContent$rptIndividualAddresses$ctl01$acIndividualAddress$County": "",
            "ctl00$PageContent$rptIndividualAddresses$ctl01$acIndividualAddress$CongressionalDistrict": "",
            "ctl00$PageContent$rptIndividualAddresses$ctl01$acIndividualAddress$CASSCertificationDate": "",
            "ctl00$PageContent$rptIndividualAddresses$ctl01$acIndividualAddress$CarrierRoute": "",
            "ctl00$PageContent$rptIndividualAddresses$ctl01$acIndividualAddress$DeliveryPointCheckDigit": "",
            "ctl00$PageContent$rptIndividualAddresses$ctl01$acIndividualAddress$DeliveryPointCode": "",
            "ctl00$PageContent$ddlIndividualPreferredAddress": "048447c3-000f-c2ac-8bfc-0b3d04988fbc",
            "ctl00$PageContent$chkDoNotMail": "on",
            "ctl00$PageContent$chkDoNotFax": "on",
            "ctl00_PageContent_dlbMessageCategories_lbSrc_ClientState": "{\"isEnabled\":true,\"logEntries\":[],\"selectedIndices\":[],\"checkedIndices\":[],\"scrollPosition\":0}",
            "ctl00_PageContent_dlbMessageCategories_lbDest_ClientState": "{\"isEnabled\":true,\"logEntries\":[],\"selectedIndices\":[],\"checkedIndices\":[],\"scrollPosition\":0}",
            "ctl00$PageContent$cfsIndividualCustomFields$TextBox1": "",
            "ctl00$PageContent$cfsIndividualCustomFields$TextBox3": "",
            "ctl00$PageContent$cfsIndividualCustomFields$TextBox4": "",
            "ctl00$PageContent$cfsIndividualCustomFields$RadComboBox5": "---- Select ----",
            "ctl00_PageContent_cfsIndividualCustomFields_RadComboBox5_ClientState": "{\"logEntries\":[],\"value\":\"\",\"text\":\"---- Select ----\",\"enabled\":true}",
            "ctl00$PageContent$cfsIndividualCustomFields$RadComboBox6": "---- Select ----",
            "ctl00_PageContent_cfsIndividualCustomFields_RadComboBox6_ClientState": "{\"logEntries\":[],\"value\":\"\",\"text\":\"---- Select ----\",\"enabled\":true}",
            "ctl00$PageContent$cfsIndividualCustomFields$RadComboBox7": "---- Select ----",
            "ctl00_PageContent_cfsIndividualCustomFields_RadComboBox7_ClientState": "{\"logEntries\":[],\"value\":\"\",\"text\":\"---- Select ----\",\"enabled\":true}",
            "ctl00$PageContent$cfsIndividualCustomFields$RadComboBox8": "---- Select ----",
            "ctl00_PageContent_cfsIndividualCustomFields_RadComboBox8_ClientState": "{\"logEntries\":[],\"value\":\"\",\"text\":\"---- Select ----\",\"enabled\":true}",
            "ctl00_PageContent_cfsIndividualCustomFields_DualListBox9_lbSrc_ClientState": "{\"isEnabled\":true,\"logEntries\":[],\"selectedIndices\":[],\"checkedIndices\":[],\"scrollPosition\":0}",
            "ctl00_PageContent_cfsIndividualCustomFields_DualListBox9_lbDest_ClientState": "{\"isEnabled\":true,\"logEntries\":[],\"selectedIndices\":[],\"checkedIndices\":[],\"scrollPosition\":0}",
            "ctl00_PageContent_cfsIndividualCustomFields_DualListBox10_lbSrc_ClientState": "{\"isEnabled\":true,\"logEntries\":[],\"selectedIndices\":[],\"checkedIndices\":[],\"scrollPosition\":0}",
            "ctl00_PageContent_cfsIndividualCustomFields_DualListBox10_lbDest_ClientState": "{\"isEnabled\":true,\"logEntries\":[],\"selectedIndices\":[],\"checkedIndices\":[],\"scrollPosition\":0}",
            "ctl00$PageContent$cfsIndividualCustomFields$RadDatePicker11": "",
            "ctl00_PageContent_cfsIndividualCustomFields_RadDatePicker11_dateInput_text": "",
            "ctl00$PageContent$cfsIndividualCustomFields$RadDatePicker11$dateInput": "",
            "ctl00_PageContent_cfsIndividualCustomFields_RadDatePicker11_dateInput_ClientState": "{\"enabled\":true,\"emptyMessage\":\"\",\"minDateStr\":\"1/1/1 0:0:0\",\"maxDateStr\":\"12/31/9999 0:0:0\"}",
            "ctl00_PageContent_cfsIndividualCustomFields_RadDatePicker11_calendar_SD": "[]",
            "ctl00_PageContent_cfsIndividualCustomFields_RadDatePicker11_calendar_AD": "[[1,1,1],[9999,12,31],[2020,3,24]]",
            "ctl00_PageContent_cfsIndividualCustomFields_RadDatePicker11_ClientState": "{\"minDateStr\":\"1/1/0001 0:0:0\",\"maxDateStr\":\"12/31/9999 0:0:0\"}",
            "ctl00$PageContent$cfsIndividualCustomFields$RadComboBox12": "---- Select ----",
            "ctl00_PageContent_cfsIndividualCustomFields_RadComboBox12_ClientState": "{\"logEntries\":[],\"value\":\"\",\"text\":\"---- Select ----\",\"enabled\":true}",
            "ctl00$PageContent$cfsIndividualCustomFields$RadComboBox13": "---- Select ----",
            "ctl00_PageContent_cfsIndividualCustomFields_RadComboBox13_ClientState": "{\"logEntries\":[],\"value\":\"\",\"text\":\"---- Select ----\",\"enabled\":true}",
            "ctl00$PageContent$cfsIndividualCustomFields$RadComboBox14": "---- Select ----",
            "ctl00_PageContent_cfsIndividualCustomFields_RadComboBox14_ClientState": "{\"logEntries\":[],\"value\":\"\",\"text\":\"---- Select ----\",\"enabled\":true}",
            "ctl00$PageContent$cfsIndividualCustomFields$RadComboBox15": "---- Select ----",
            "ctl00_PageContent_cfsIndividualCustomFields_RadComboBox15_ClientState": "{\"logEntries\":[],\"value\":\"\",\"text\":\"---- Select ----\",\"enabled\":true}",
            "ctl00$PageContent$cfsIndividualCustomFields$TextBox16": "",
            "ctl00_PageContent_cfsIndividualCustomFields_DualListBox17_lbSrc_ClientState": "{\"isEnabled\":true,\"logEntries\":[],\"selectedIndices\":[],\"checkedIndices\":[],\"scrollPosition\":0}",
            "ctl00_PageContent_cfsIndividualCustomFields_DualListBox17_lbDest_ClientState": "{\"isEnabled\":true,\"logEntries\":[],\"selectedIndices\":[],\"checkedIndices\":[],\"scrollPosition\":0}",
            "ctl00$PageContent$cfsIndividualCustomFields$TextBox18": "",
            "ctl00$PageContent$cfsIndividualCustomFields$RadComboBox19": "---- Select ----",
            "ctl00_PageContent_cfsIndividualCustomFields_RadComboBox19_ClientState": "{\"logEntries\":[],\"value\":\"\",\"text\":\"---- Select ----\",\"enabled\":true}",
            "ctl00$PageContent$cfsIndividualCustomFields$RadComboBox20": "---- Select ----",
            "ctl00_PageContent_cfsIndividualCustomFields_RadComboBox20_ClientState": "{\"logEntries\":[],\"value\":\"\",\"text\":\"---- Select ----\",\"enabled\":true}",
            "ctl00_PageContent_cfsIndividualCustomFields_DualListBox21_lbSrc_ClientState": "{\"isEnabled\":true,\"logEntries\":[],\"selectedIndices\":[],\"checkedIndices\":[],\"scrollPosition\":0}",
            "ctl00_PageContent_cfsIndividualCustomFields_DualListBox21_lbDest_ClientState": "{\"isEnabled\":true,\"logEntries\":[],\"selectedIndices\":[],\"checkedIndices\":[],\"scrollPosition\":0}",
            "__CALLBACKID": "ctl00$PageContent$ddlOrganization",
            "__CALLBACKPARAM": "{\"Command\":\"LOD\",\"Text\":\"-\",\"ClientState\":{\"value\":\"\",\"text\":\"\",\"enabled\":true,\"logEntries\":[]},\"Context\":{\"Text\":\"-\",\"NumberOfItems\":0},\"NumberOfItems\":0}",
            "__EVENTVALIDATION": ev
        }
        r = requests.post(url, data=data)
        goal = re.search(r"\=(\[.+])", r.text).group(1)
        clear = json.loads(goal)
        print(json.dumps(clear, indent=4))


lol("https://cmt.ps.membersuite.com/profile/CreateAccount_CreateUser.aspx")

输出:

[
    {
        "text": "-",
        "value": "048447c3-0007-c47b-7c9e-0b3e39483880",
        "attributes": {
            "ROW_NUMBER": "1",
            "LocalID": "10619",
            "EmailAddress": "jamiebolton@hotmail.com",
            "_Preferred_Address_City": "",
            "_Preferred_Address_State": "",
            "Status.ShowInQuickSearches": "True"
        }
    },
    {
        "text": "-",
        "value": "048447c3-0007-ced2-814b-0b400d0f470f",
        "attributes": {
            "ROW_NUMBER": "2",
            "LocalID": "11477",
            "EmailAddress": "rpt@gwu.edu",
            "_Preferred_Address_City": "",
            "_Preferred_Address_State": "",
            "Status.ShowInQuickSearches": "True"
        }
    },
    {
        "text": "Banco Internacional del Per\u00fa - Interbank",
        "value": "048447c3-0007-c0e0-6c76-0b3e66b7e1ec",
        "attributes": {
            "ROW_NUMBER": "3",
            "LocalID": "10703",
            "EmailAddress": "dalvarezc84@gmail.com",
            "_Preferred_Address_City": "",
            "_Preferred_Address_State": "",
            "Status.ShowInQuickSearches": "True"
        }
    },
    {
        "text": "Bishop-McDonald Wealth Mgmt Group",
        "value": "048447c3-0007-c6d0-d748-03bda528b59f",
        "attributes": {
            "ROW_NUMBER": "4",
            "LocalID": "11697",
            "EmailAddress": "",
            "_Preferred_Address_City": "",
            "_Preferred_Address_State": "",
            "Status.ShowInQuickSearches": "True"
        }
    },
    {
        "text": "BOB-Caridif Life Insurance Co.,Ltd.",
        "value": "048447c3-0007-c8b1-bbf2-0b3d578797ea",
        "attributes": {
            "ROW_NUMBER": "5",
            "LocalID": "10094",
            "EmailAddress": "bingxinshi@163.com",
            "_Preferred_Address_City": "",
            "_Preferred_Address_State": "",
            "Status.ShowInQuickSearches": "True"
        }
    },
    {
        "text": "CEFS Verm\u00f6gensverwaltungs- und Beteiligungs GmbH",
        "value": "048447c3-0007-c88c-9064-0b3df4010a50",
        "attributes": {
            "ROW_NUMBER": "6",
            "LocalID": "10467",
            "EmailAddress": "sergiufala@gmail.com",
            "_Preferred_Address_City": "",
            "_Preferred_Address_State": "",
            "Status.ShowInQuickSearches": "True"
        }
    },
    {
        "text": "CGS-CIMB",
        "value": "048447c3-0007-c74a-918b-0b3e7c42f6a0",
        "attributes": {
            "ROW_NUMBER": "7",
            "LocalID": "10753",
            "EmailAddress": "joelap0506@gmail.com",
            "_Preferred_Address_City": "",
            "_Preferred_Address_State": "",
            "Status.ShowInQuickSearches": "True"
        }
    },
    {
        "text": "CMT - Denver Chapter",
        "value": "048447c3-0007-c603-99e4-0b3deeb54833",
        "attributes": {
            "ROW_NUMBER": "8",
            "LocalID": "10446",
            "EmailAddress": "christopher@navwm.com",
            "_Preferred_Address_City": "",
            "_Preferred_Address_State": "",
            "Status.ShowInQuickSearches": "True"
        }
    },
    {
        "text": "EFG-Hermes",
        "value": "048447c3-0007-cc4c-fc57-0b3f2b5e69e0",
        "attributes": {
            "ROW_NUMBER": "9",
            "LocalID": "11092",
            "EmailAddress": "ahmedhusseinsalah@icloud.com",
            "_Preferred_Address_City": "",
            "_Preferred_Address_State": "",
            "Status.ShowInQuickSearches": "True"
        }
    },
    {
        "text": "Great-West Life",
        "value": "048447c3-0007-c2f7-bb59-0b3e5b135cba",
        "attributes": {
            "ROW_NUMBER": "10",
            "LocalID": "10677",
            "EmailAddress": "karentsang@gmail.com",
            "_Preferred_Address_City": "",
            "_Preferred_Address_State": "",
            "Status.ShowInQuickSearches": "True"
        }
    },
    {
        "text": "HSI- HONG SANG INVESTMENT",
        "value": "048447c3-0007-c997-6d78-0b4041230224",
        "attributes": {
            "ROW_NUMBER": "11",
            "LocalID": "11559",
            "EmailAddress": "hosabank@gmail.com",
            "_Preferred_Address_City": "",
            "_Preferred_Address_State": "",
            "Status.ShowInQuickSearches": "True"
        }
    },
    {
        "text": "Huatai-Pinebridge",
        "value": "048447c3-0007-cbe8-452c-0b3e800ee085",
        "attributes": {
            "ROW_NUMBER": "12",
            "LocalID": "10779",
            "EmailAddress": "brightblueme@gmail.com",
            "_Preferred_Address_City": "",
            "_Preferred_Address_State": "",
            "Status.ShowInQuickSearches": "True"
        }
    },
    {
        "text": "IB Securities Company-Vietnam",
        "value": "048447c3-0007-c51c-2535-0b3e2f55e160",
        "attributes": {
            "ROW_NUMBER": "13",
            "LocalID": "10601",
            "EmailAddress": "vd.huong283@gmail.com",
            "_Preferred_Address_City": "",
            "_Preferred_Address_State": "",
            "Status.ShowInQuickSearches": "True"
        }
    },
    {
        "text": "ICMA-RC",
        "value": "048447c3-0007-c622-d748-e234e10f75a0",
        "attributes": {
            "ROW_NUMBER": "14",
            "LocalID": "11888",
            "EmailAddress": "",
            "_Preferred_Address_City": "",
            "_Preferred_Address_State": "",
            "Status.ShowInQuickSearches": "True"
        }
    },
    {
        "text": "Infinit-O Global",
        "value": "048447c3-0007-c939-a3a1-0b3fbe520f57",
        "attributes": {
            "ROW_NUMBER": "15",
            "LocalID": "11334",
            "EmailAddress": "gvvillacampa@gmail.com",
            "_Preferred_Address_City": "",
            "_Preferred_Address_State": "",
            "Status.ShowInQuickSearches": "True"
        }
    },
    {
        "text": "Knowledge-Tech Inc",
        "value": "048447c3-0007-c77a-6642-0b3e2db60635",
        "attributes": {
            "ROW_NUMBER": "16",
            "LocalID": "10595",
            "EmailAddress": "brucehayii@aol.com",
            "_Preferred_Address_City": "",
            "_Preferred_Address_State": "",
            "Status.ShowInQuickSearches": "True"
        }
    },
    {
        "text": "Lewis-Michael & Associates",
        "value": "048447c3-0007-c471-5e2b-0b3e2aade42d",
        "attributes": {
            "ROW_NUMBER": "17",
            "LocalID": "10589",
            "EmailAddress": "bruce.gill@lewis-michael.com",
            "_Preferred_Address_City": "",
            "_Preferred_Address_State": "",
            "Status.ShowInQuickSearches": "True"
        }
    },
    {
        "text": "Mega-x Stone Investments",
        "value": "048447c3-0007-ced6-9803-0b3f487e6917",
        "attributes": {
            "ROW_NUMBER": "18",
            "LocalID": "11190",
            "EmailAddress": "rida.eldarwish@gmail.com",
            "_Preferred_Address_City": "",
            "_Preferred_Address_State": "",
            "Status.ShowInQuickSearches": "True"
        }
    },
    {
        "text": "Merrill Lynch-",
        "value": "048447c3-0007-ce46-c1a6-0b3d828c2890",
        "attributes": {
            "ROW_NUMBER": "19",
            "LocalID": "10183",
            "EmailAddress": "jonathan.thibodeaux@ml.com",
            "_Preferred_Address_City": "",
            "_Preferred_Address_State": "",
            "Status.ShowInQuickSearches": "True"
        }
    },
    {
        "text": "Mid-Continent Capital",
        "value": "048447c3-0007-c73c-d748-e1030b68c5a0",
        "attributes": {
            "ROW_NUMBER": "20",
            "LocalID": "11994",
            "EmailAddress": "",
            "_Preferred_Address_City": "",
            "_Preferred_Address_State": "",
            "Status.ShowInQuickSearches": "True"
        }
    }
]

现在您确实有一个list ,访问它并获取您想要的任何内容。

考虑到它给了你前 20 行 :) 但不用担心。 您可以在以下parameter __CALLBACKPARAM上使用 + 20循环POST请求,您需要在每个POST上将"NumberOfItems\\":0更改为+20 :)

暂无
暂无

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM