[英]Need help understanding python snippet with regex and cURL
編輯 - 剛剛添加了整個cURL函數以供參考/更多信息,但需要if語句的幫助 - 正則表達式
尋求幫助以理解此cURL中的if語句。 我已經閱讀了一些python文檔,我理解了每個部分,這是用正則表達式進行搜索和替換。 只是希望有人能夠幫助提供更大的解釋。 我真的不明白.groups。
為了給出更多背景,這個腳本通過cURL訪問另一個站點,它存儲一個cookie,當運行時檢查cookie是否有效,如果沒有,它會在發布用戶名/密碼后抓取一個新的。 該網站最近發生了變化,我正在試圖弄清楚我需要改變什么才能讓它再次運作。
#get auth cookie for sso
def getAuthCookie( self ):
buffer = BytesIO()
c = pycurl.Curl()
c.setopt(c.SSL_VERIFYPEER, False)
c.setopt(c.FOLLOWLOCATION, True)
c.setopt(c.TIMEOUT, 60)
c.setopt(c.USERPWD, self.user+":"+cred.getpasswd( self.encPasswd ) )
c.setopt(c.URL, 'https://sso.sample.com')
c.setopt(c.COOKIEJAR, self.cookieDir)
c.setopt(c.COOKIEFILE, self.cookieDir )
c.setopt(c.WRITEFUNCTION, buffer.write)
c.perform()
c.unsetopt(c.USERPWD)
c.setopt(c.URL, 'https://sample.com')
c.perform()
html = str(buffer.getvalue())
----------------------------------------------------------
if "RelayState" in html:
rex = re.compile( "input type=\"hidden\" name=\"RelayState\" value=\"(.*)\"" )
RELAY = rex.search( html ).groups()[0]
if "SAMLResponse" in html:
rex = re.compile( "input type=\"hidden\" name=\"SAMLResponse\" value=\"(.*)\"" )
SAML = rex.search( html ).groups()[0]
datastuff = {'SAMLResponse':SAML,'RelayState':RELAY,'redirect':'Redirect','show_button':'true'}
if "form method=\"POST\" action=" in html:
rex = re.compile( "form method=\"POST\" action=\"(.*)\" " )
postUrl = rex.search( html ).groups()[0]
----------------------------------------------------------
#post our saml obtained, get to our final dest
c.setopt(c.URL, postUrl )
c.setopt(c.POST, True)
c.setopt(c.POSTFIELDS, urlencode( datastuff ))
c.perform()
c.close()
請參閱我在代碼中注入的注釋:
#get auth cookie for sso
def getAuthCookie( self ):
buffer = BytesIO()
c = pycurl.Curl()
c.setopt(c.SSL_VERIFYPEER, False)
c.setopt(c.FOLLOWLOCATION, True)
c.setopt(c.TIMEOUT, 60)
c.setopt(c.USERPWD, self.user+":"+cred.getpasswd( self.encPasswd ) )
# curling sso.sample.com, which I assume promts a login dialog box and curl will set that with the varible provide above
c.setopt(c.URL, 'https://sso.sample.com')
# save the cookie to cookieDir
c.setopt(c.COOKIEJAR, self.cookieDir)
c.setopt(c.COOKIEFILE, self.cookieDir )
c.setopt(c.WRITEFUNCTION, buffer.write)
# perform all the previous curl commands
c.perform()
c.unsetopt(c.USERPWD)
# curl new site sample.com
c.setopt(c.URL, 'https://sample.com')
c.perform()
# save output as html var
html = str(buffer.getvalue())
----------------------------------------------------------
# The following three if statments
# if "some string is found" in varible-html: then do the lines indented lines that follow
if "RelayState" in html:
# setup a regex to look for "input type="hidden" name="RelayState" value="[and captures everything here this will become the RELAY var]"
rex = re.compile( "input type=\"hidden\" name=\"RelayState\" value=\"(.*)\"" )
# this executes the regex expression on the html var
RELAY = rex.search( html ).groups()[0]
if "SAMLResponse" in html:
rex = re.compile( "input type=\"hidden\" name=\"SAMLResponse\" value=\"(.*)\"" )
# same thing is happening here capturing the value as SAML
SAML = rex.search( html ).groups()[0]
# contructing a new var with strings and the newly contructed vars
datastuff = {'SAMLResponse':SAML,'RelayState':RELAY,'redirect':'Redirect','show_button':'true'}
if "form method=\"POST\" action=" in html:
rex = re.compile( "form method=\"POST\" action=\"(.*)\" " )
# again action="[postURL]"
postUrl = rex.search( html ).groups()[0]
----------------------------------------------------------
#post our saml obtained, get to our final dest
c.setopt(c.URL, postUrl ) # setup curl with url found above
c.setopt(c.POST, True) # use post method
c.setopt(c.POSTFIELDS, urlencode( datastuff )) # post fields found above with newly contructed vars
c.perform()
c.close()
如果某些內容發生了變化並且您現在收到錯誤,我會嘗試在html = str(buffer.getvalue())
之后print html
,看看您是否仍然在尋找正在執行的正則表達式的同一頁面。
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.