![](/img/trans.png)
[英]How to get more than one item with identical html tag on BeautifulSoup
[英]How to get non attribute item of a html tag through beautifulsoup?
我有以下 html 需要解析,如何將window.WIZ_global_data
所有項目作為普通字典訪問? window.WIZ_global_data
也是腳本標簽的屬性嗎?
我可以通過soup.head.script
訪問script
標簽
<!DOCTYPE html>
<html dir="ltr" lang="no">
<head>
<base href="https://consent.google.com/"/>
<meta content="origin" name="referrer"/>
<link href="https://consent.google.com/m" rel="canonical"/>
<meta content="initial-scale=1,minimum-scale=1,maximum-scale=5,width=device-width" name="viewport"/>
<link href="//www.google.com/favicon.ico" rel="shortcut icon"/>
<script data-id="_gd" nonce="QI9+XeJ9TwHcCspiFyqIIQ">
window.WIZ_global_data = {"DndLYb":"","DpimGf":false,"EP1ykd":["/_/*"],"FdrFJe":"2318287307032857584","GVlsxf":"www.google.com","Im6cmf":"/_/ConsentUi","LVIXXb":1,"LoQv7e":false,"MT7f9b":[],"Mypbod":"https://www.googleapis.com/reauth","PYFuDc":"DUMMY_X_CLIENT_DATA_WIZ_GLOBAL_KEY_DO_NOT_USE","QrtxK":"","R6pIad":"%.@.]","S06Grb":"","TTHqvb":"https://kidsmanagement-pa.googleapis.com","Yllh3e":"%.@.1637316536530302,170695567,503968613]","cfb2h":"boq_identityfrontenduiserver_20211111.08_p0","eNnkwf":"1637316536","ejMLCd":"DUMMY_X_GEO_WIZ_GLOBAL_KEY_DO_NOT_USE","eptZe":"/_/ConsentUi/","fPDxwd":[1763433,1772879,1782333,45814370],"gGcLoe":false,"ksKYzf":"%.@.false,true,false,false,null,false,null,\"\",null,[[\"RelayState\",\"SAMLRequest\",\"SigAlg\",\"Signature\",\"TL\",\"af\",\"alwf\",\"btmpl\",\"c\",\"cbflow\",\"cd\",\"checkConnection\",\"checkedDomains\",\"client_id\",\"continue\",\"cpbps\",\"dsh\",\"emr\",\"faa\",\"flowEntry\",\"flowName\",\"followup\",\"forceOsidOriginForTest\",\"gae\",\"go\",\"hd\",\"hide_status_bar\",\"hl\",\"idvToken\",\"ifkv\",\"ifr\",\"ignoreShadow\",\"kdi\",\"kid_continue\",\"ltmpl\",\"marl\",\"migrate\",\"multilogin\",\"next\",\"oauth\",\"osid\",\"pageId\",\"passwdsession\",\"platform_variant\",\"pstMsg\",\"rart\",\"rip\",\"rm\",\"sarp\",\"scc\",\"scope\",\"secure\",\"sendvemail\",\"service\",\"session\",\"skipShadow\",\"skipvpage\",\"source\",\"ss\",\"ss_mode\",\"sspa\",\"t\",\"target\",\"theme\",\"ul\"]],null,null,[],[[null,null,\"https://accounts.google.com/AccountChooser?continue\\u003dhttps://www.google.com/maps/place/Hauges%2Bgate%2B66,%2B3019%2BDrammen/@59.7476727,10.1924839,17z/data%3D!3m1!4b1!4m5!3m4!1s0x46412335e0db3d83:0x544c99fb4daa3946!8m2!3d59.7476727!4d10.1946726\\u0026hl\\u003dno\"],null,[null,null,\"https://accounts.google.com/signin/recovery?continue\\u003dhttps://www.google.com/maps/place/Hauges%2Bgate%2B66,%2B3019%2BDrammen/@59.7476727,10.1924839,17z/data%3D!3m1!4b1!4m5!3m4!1s0x46412335e0db3d83:0x544c99fb4daa3946!8m2!3d59.7476727!4d10.1946726\\u0026hl\\u003dno\"],[null,null,\"https://accounts.google.com/restart?continue\\u003dhttps://www.google.com/maps/place/Hauges%2Bgate%2B66,%2B3019%2BDrammen/@59.7476727,10.1924839,17z/data%3D!3m1!4b1!4m5!3m4!1s0x46412335e0db3d83:0x544c99fb4daa3946!8m2!3d59.7476727!4d10.1946726\\u0026hl\\u003dno\"]],null,false,[]]","nQyAE":{"vEMF5e":"false","LlkYkf":"false","WBBR0d":"true","ViN5Xd":"false","p6p11":"true","EoymAc":"false","FbHgvb":"false","P1ceCf":"false","tBSlob":"false","XqMd3":"false","Ee3RQb":"false","a76Enc":"false","G25Msb":"false","VL5wad":"false","XSgnJf":"false","APYQvd":"false","rONKDd":"false","Dpi3Gf":"false","lttELb":"false","YDrknb":"false","kQrwQd":"false","ihOA2":"false","Ozjmee":"false"},"qwAQke":"ConsentUi","qymVe":"YNnh99mAz0-TXbG5yo-NTKQ-0l4","rtQCxc":-60,"thykhd":"AKH95euIF6P3sAglrRifxgEcfcB9vtI_XXJFaZ5Oty53KFviM2VkkPyRfmncVF-cgLGCK0LYtfquTSFWlA3cjJWXXNjjUICQ5v3brxMomscQEgs9760XyDgPgf0\u003d","unNRMb":"AKJVzcpKsW1gHA6-l3WGdp1MQsU_eTjL-Ufc5L6-PjxIndj8D2t6PFy3SRwl7yc0EXpFbqSSCh50","vAyiz":"ChUI/o793Lf0vtdFEK6ek/ubmfGdvgE\u003d","w2btAe":"%.@.null,null,\"\",false,null,null,true,false]","zChJod":"%.@.]"};
</script>
編輯:我也可以使用soup.head.script.text
將內容作為字符串獲取,但希望將它們作為字典並以更pythonic 的方式獲取。
您可以使用re
搜索變量,提取其值,然后json.loads()
像dictionary
一樣訪問它:
json.loads(re.search(r'window.WIZ_global_data = ({.*})', html).group(1))
html=r'''
<!DOCTYPE html>
<html dir="ltr" lang="no">
<head>
<base href="https://consent.google.com/"/>
<meta content="origin" name="referrer"/>
<link href="https://consent.google.com/m" rel="canonical"/>
<meta content="initial-scale=1,minimum-scale=1,maximum-scale=5,width=device-width" name="viewport"/>
<link href="//www.google.com/favicon.ico" rel="shortcut icon"/>
<script data-id="_gd" nonce="QI9+XeJ9TwHcCspiFyqIIQ">
window.WIZ_global_data = {"DndLYb":"","DpimGf":false,"EP1ykd":["/_/*"],"FdrFJe":"2318287307032857584","GVlsxf":"www.google.com","Im6cmf":"/_/ConsentUi","LVIXXb":1,"LoQv7e":false,"MT7f9b":[],"Mypbod":"https://www.googleapis.com/reauth","PYFuDc":"DUMMY_X_CLIENT_DATA_WIZ_GLOBAL_KEY_DO_NOT_USE","QrtxK":"","R6pIad":"%.@.]","S06Grb":"","TTHqvb":"https://kidsmanagement-pa.googleapis.com","Yllh3e":"%.@.1637316536530302,170695567,503968613]","cfb2h":"boq_identityfrontenduiserver_20211111.08_p0","eNnkwf":"1637316536","ejMLCd":"DUMMY_X_GEO_WIZ_GLOBAL_KEY_DO_NOT_USE","eptZe":"/_/ConsentUi/","fPDxwd":[1763433,1772879,1782333,45814370],"gGcLoe":false,"ksKYzf":"%.@.false,true,false,false,null,false,null,\"\",null,[[\"RelayState\",\"SAMLRequest\",\"SigAlg\",\"Signature\",\"TL\",\"af\",\"alwf\",\"btmpl\",\"c\",\"cbflow\",\"cd\",\"checkConnection\",\"checkedDomains\",\"client_id\",\"continue\",\"cpbps\",\"dsh\",\"emr\",\"faa\",\"flowEntry\",\"flowName\",\"followup\",\"forceOsidOriginForTest\",\"gae\",\"go\",\"hd\",\"hide_status_bar\",\"hl\",\"idvToken\",\"ifkv\",\"ifr\",\"ignoreShadow\",\"kdi\",\"kid_continue\",\"ltmpl\",\"marl\",\"migrate\",\"multilogin\",\"next\",\"oauth\",\"osid\",\"pageId\",\"passwdsession\",\"platform_variant\",\"pstMsg\",\"rart\",\"rip\",\"rm\",\"sarp\",\"scc\",\"scope\",\"secure\",\"sendvemail\",\"service\",\"session\",\"skipShadow\",\"skipvpage\",\"source\",\"ss\",\"ss_mode\",\"sspa\",\"t\",\"target\",\"theme\",\"ul\"]],null,null,[],[[null,null,\"https://accounts.google.com/AccountChooser?continue\\u003dhttps://www.google.com/maps/place/Hauges%2Bgate%2B66,%2B3019%2BDrammen/@59.7476727,10.1924839,17z/data%3D!3m1!4b1!4m5!3m4!1s0x46412335e0db3d83:0x544c99fb4daa3946!8m2!3d59.7476727!4d10.1946726\\u0026hl\\u003dno\"],null,[null,null,\"https://accounts.google.com/signin/recovery?continue\\u003dhttps://www.google.com/maps/place/Hauges%2Bgate%2B66,%2B3019%2BDrammen/@59.7476727,10.1924839,17z/data%3D!3m1!4b1!4m5!3m4!1s0x46412335e0db3d83:0x544c99fb4daa3946!8m2!3d59.7476727!4d10.1946726\\u0026hl\\u003dno\"],[null,null,\"https://accounts.google.com/restart?continue\\u003dhttps://www.google.com/maps/place/Hauges%2Bgate%2B66,%2B3019%2BDrammen/@59.7476727,10.1924839,17z/data%3D!3m1!4b1!4m5!3m4!1s0x46412335e0db3d83:0x544c99fb4daa3946!8m2!3d59.7476727!4d10.1946726\\u0026hl\\u003dno\"]],null,false,[]]","nQyAE":{"vEMF5e":"false","LlkYkf":"false","WBBR0d":"true","ViN5Xd":"false","p6p11":"true","EoymAc":"false","FbHgvb":"false","P1ceCf":"false","tBSlob":"false","XqMd3":"false","Ee3RQb":"false","a76Enc":"false","G25Msb":"false","VL5wad":"false","XSgnJf":"false","APYQvd":"false","rONKDd":"false","Dpi3Gf":"false","lttELb":"false","YDrknb":"false","kQrwQd":"false","ihOA2":"false","Ozjmee":"false"},"qwAQke":"ConsentUi","qymVe":"YNnh99mAz0-TXbG5yo-NTKQ-0l4","rtQCxc":-60,"thykhd":"AKH95euIF6P3sAglrRifxgEcfcB9vtI_XXJFaZ5Oty53KFviM2VkkPyRfmncVF-cgLGCK0LYtfquTSFWlA3cjJWXXNjjUICQ5v3brxMomscQEgs9760XyDgPgf0\u003d","unNRMb":"AKJVzcpKsW1gHA6-l3WGdp1MQsU_eTjL-Ufc5L6-PjxIndj8D2t6PFy3SRwl7yc0EXpFbqSSCh50","vAyiz":"ChUI/o793Lf0vtdFEK6ek/ubmfGdvgE\u003d","w2btAe":"%.@.null,null,\"\",false,null,null,true,false]","zChJod":"%.@.]"};
</script>
'''
import json,re
json.loads(re.search(r'window.WIZ_global_data = ({.*})', html).group(1))
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.