[英]Python web scraping with BS using correct url?
初學者。 到目前為止,我有以下代碼:
import requests
from bs4 import BeautifulSoup
logurl = "https://login.flash.co.za/apex/f?p=pwfone:login"
posturl = 'https://login.flash.co.za/apex/wwv_flow.accept'
with requests.Session() as s:
s.headers = {"User-Agent":"Mozilla/5.0"}
res = s.get(logurl)
soup = BeautifulSoup(res.text,"lxml")
values = {
'p_flow_id': soup.select_one("[name='p_flow_id']")['value'],
'p_flow_step_id': soup.select_one("[name='p_flow_step_id']")['value'],
'p_instance': soup.select_one("[name='p_instance']")['value'],
'p_page_submission_id': soup.select_one("[name='p_page_submission_id']")['value'],
'p_request': 'LOGIN',
'p_arg_names': soup.select_one("[name='p_arg_names']")['value'],
'p_t01': 'username',
'p_arg_names': soup.select_one("[name='p_arg_names']")['value'],
'p_t02': 'password',
'p_md5_checksum': soup.select_one("[name='p_md5_checksum']")['value'],
'p_page_checksum': soup.select_one("[name='p_page_checksum']")['value']
}
r = s.post(posturl, data=values)
print r.content
logurl
=進行登錄的URL posturl
=張貼登錄數據的表單操作url。
但是,當我嘗試使用此功能時,即使內容輸入正確,內容也會返回“密碼錯誤”頁面。
當我手動正確登錄以查看包含所需數據的正確url頁面時,我注意到該url實際上是下面列出的位置url(來自chrome flow_id
',請參見下圖),其中包括flow_id
和instance
值從代碼:
Location: https://login.flash.co.za/apex/f?p=1500:1:9004571425464
Request URL: https://login.flash.co.za/apex/wwv_flow.accept
Referer: https://login.flash.co.za/apex/f?p=pwfone:login
我是否應該不嘗試“發布”到該URL,而不是請求URL?
編輯1:
import requests
from bs4 import BeautifulSoup
logurl = "https://login.flash.co.za/apex/f?p=pwfone:login"
posturl = 'https://login.flash.co.za/apex/wwv_flow.accept'
with requests.Session() as s:
s.headers = {
"Host": "login.flash.co.za",
"Connection": "keep-alive",
"Origin": "https://login.flash.co.za",
"Upgrade-Insecure-Requests": "1",
"Content-Type": "application/x-www-form-urlencoded",
"User-Agent": "Mozilla/5.0 (Windows NT x.y; rv:10.0) Gecko/20100101 Firefox/10.0",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
"Referer": "https://login.flash.co.za/apex/f?p=pwfone:login",
"Accept-Encoding": "gzip, deflate, br",
"Accept-Language": "en-US,en;q=0.9",
}
res = s.get(logurl)
soup = BeautifulSoup(res.text,"html.parser")
values = {
'p_flow_id': soup.select_one("[name='p_flow_id']")['value'],
'p_flow_step_id': soup.select_one("[name='p_flow_step_id']")['value'],
'p_instance': soup.select_one("[name='p_instance']")['value'],
'p_page_submission_id': soup.select_one("[name='p_page_submission_id']")['value'],
'p_request': 'LOGIN',
'p_arg_names': soup.select_one("[name='p_arg_names']")['value'],
'p_t01': 'solar',
'p_arg_names': soup.select_one("[name='p_arg_names']")['value'],
'p_t02': 'password',
'p_md5_checksum': soup.select_one("[name='p_md5_checksum']")['value'],
'p_page_checksum': soup.select_one("[name='p_page_checksum']")['value']
}
r = s.post(posturl, data=values)
print r.content
在Fiddler
攔截了請求
您發布到的URL是正確的,只需設置以下標題,然后嘗試再次登錄
headers = {
"Host": "login.flash.co.za",
"Connection": "keep-alive",
"Origin": "https://login.flash.co.za",
"Upgrade-Insecure-Requests": "1",
"Content-Type": "application/x-www-form-urlencoded",
"User-Agent": "Mozilla/5.0 (Windows NT x.y; rv:10.0) Gecko/20100101 Firefox/10.0",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
"Referer": "https://login.flash.co.za/apex/f?p=pwfone:login",
"Accept-Encoding": "gzip, deflate, br",
"Accept-Language": "en-US,en;q=0.9",
}
您兩次具有相同的“ p_arg_names”值。 它應該是兩個不同的值。 嘗試將其作為這樣的列表傳遞(由於我沒有用戶名或密碼,所以未經測試的代碼完全是這樣):
import requests
from bs4 import BeautifulSoup
logurl = "https://login.flash.co.za/apex/f?p=pwfone:login"
posturl = 'https://login.flash.co.za/apex/wwv_flow.accept'
with requests.Session() as s:
s.headers = {"User-Agent":"Mozilla/5.0"}
res = s.get(logurl)
soup = BeautifulSoup(res.text,"lxml")
arg_names =[]
for name in soup.select("[name='p_arg_names']"):
arg_names.append(name['value'])
values = {
'p_flow_id': soup.select_one("[name='p_flow_id']")['value'],
'p_flow_step_id': soup.select_one("[name='p_flow_step_id']")['value'],
'p_instance': soup.select_one("[name='p_instance']")['value'],
'p_page_submission_id': soup.select_one("[name='p_page_submission_id']")['value'],
'p_request': 'LOGIN',
'p_t01': 'username',
'p_arg_names': arg_names,
'p_t02': 'password',
'p_md5_checksum': soup.select_one("[name='p_md5_checksum']")['value'],
'p_page_checksum': soup.select_one("[name='p_page_checksum']")['value']
}
s.headers.update({'Referer': logurl})
r = s.post(posturl, data=values)
print (r.content)
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.