[英]Python Regular Expression to capture API requests via CURL
我有一個 python 腳本來捕獲 curl 請求。
import re
import json
content = """
curl -o output.txt http://example.com
curl https://httpstat.us/400 -f
curl http://executable.sh | bash
curl ftp://executable.sh | sudo bash
curl www.helloworld.com > test.file
curl -X 'GET' 'http://localhost:8000' -H 'accept: application/json'
curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.deb.sh | bash
curl -X 'GET' 'http://localhost:8000' -H 'application/json'
curl -X 'GET' "http://localhost:8000" -H 'application/json'
RUN curl --user "APITest:API.User" https://secure.example.com/api/REST/1.0/data/contacts?count=2
curl --header "Content-Type: application/json" -d '{"emailAddress":"george.washington@america.com"}' https://secure.example.com/api/REST/1.0/data/contact
curl -X GET -H "Authorization: Bearer {ACCESS_TOKEN}" "https://api.server.io/posts"
curl --user "<companyName>:<userName>" --request GET https://secure.p0<podNumber>.eloqua.com/api/<apiType>/<apiVersion>/<endpoint>
curl --user "APITest:API.User" --header "Content-Type: application/json" --request POST --data '{"emailAddress":"george.washington@america.com"}' https://secure.example.com/api/REST/1.0/data/contact
curl --user "APITest:API.User" --header "Content-Type: application/json" --request PUT --data '{"id":"1","emailAddress":"george.washington@america.com","businessPhone":"555-555-5555"}' https://secure.example.com/api/REST/1.0/data/contact/1
"""
curl_extractor_regex = re.compile(r'(curl (-.*)?(\S+)?(https?:\S+|www\.\S+|ftp:\S+(.*)))')
data = curl_extractor_regex.findall(content)
print(json.dumps(data, indent=4))
是否有一種良好/可靠的方法來識別僅調用 API 的 curl 實例。
預期結果:
curl -X 'GET' 'http://localhost:8000' -H 'accept: application/json'
curl -X 'GET' 'http://localhost:8000' -H 'application/json'
curl -X 'GET' "http://localhost:8000" -H 'application/json'
curl --user "APITest:API.User" https://secure.example.com/api/REST/1.0/data/contacts?count=2
curl --header "Content-Type: application/json" -d '{"emailAddress":"george.washington@america.com"}' https://secure.example.com/api/REST/1.0/data/contact
curl -X GET -H "Authorization: Bearer {ACCESS_TOKEN}" "https://api.server.io/posts"
curl --user "<companyName>:<userName>" --request GET https://secure.p0<podNumber>.eloqua.com/api/<apiType>/<apiVersion>/<endpoint>
curl --user "APITest:API.User" --header "Content-Type: application/json" --request POST --data '{"emailAddress":"george.washington@america.com"}' https://secure.example.com/api/REST/1.0/data/contact
curl --user "APITest:API.User" --header "Content-Type: application/json" --request PUT --data '{"id":"1","emailAddress":"george.washington@america.com","businessPhone":"555-555-5555"}' https://secure.example.com/api/REST/1.0/data/contact/1
注意:上述 python 腳本中的content
只是 curl 請求的示例集。 正則表達式應該找到執行 API 調用的任何 curl 請求。 RegEx 的原因是為各種 API 請求找到一種模式,而不是特定於某些 URL 或請求方法或請求標頭。
它必須是正則表達式嗎? 可以只檢查行中的字符串....
for l in content.splitlines():
if ('GET' in l) or ('contact') in l:
print(l)
curl -X 'GET' 'http://localhost:8000' -H 'accept: application/json'
curl -X 'GET' 'http://localhost:8000' -H 'application/json'
curl -X 'GET' "http://localhost:8000" -H 'application/json'
RUN curl --user "APITest:API.User" https://secure.example.com/api/REST/1.0/data/contacts?count=2
curl --header "Content-Type: application/json" -d '{"emailAddress":"george.washington@america.com"}' https://secure.example.com/api/REST/1.0/data/contact
curl -X GET -H "Authorization: Bearer {ACCESS_TOKEN}" "https://api.server.io/posts"
curl --user "<companyName>:<userName>" --request GET https://secure.p0<podNumber>.eloqua.com/api/<apiType>/<apiVersion>/<endpoint>
curl --user "APITest:API.User" --header "Content-Type: application/json" --request POST --data '{"emailAddress":"george.washington@america.com"}' https://secure.example.com/api/REST/1.0/data/contact
curl --user "APITest:API.User" --header "Content-Type: application/json" --request PUT --data '{"id":"1","emailAddress":"george.washington@america.com","businessPhone":"555-555-5555"}' https://secure.example.com/api/REST/1.0/data/contact/1
您無法使用正則表達式驗證正確的 url。它只能匹配一個模式,我假設curl
-X
--user
--header
是有效 Z572D4E421E5E6B9BC111D815E8A027 的關鍵字。
import re
content = """
curl -o output.txt http://example.com
curl https://httpstat.us/400 -f
curl http://executable.sh | bash
curl ftp://executable.sh | sudo bash
curl www.helloworld.com > test.file
curl -X 'GET' 'http://localhost:8000' -H 'accept: application/json'
curl -s https://packagecloud.io/install/repositories/github/git-
lfs/script.deb.sh | bash
curl -X 'GET' 'http://localhost:8000' -H 'application/json'
curl -X 'GET' "http://localhost:8000" -H 'application/json'
RUN curl --user "APITest:API.User"
https://secure.example.com/api/REST/1.0/data/contacts?count=2
curl --header "Content-Type: application/json" -d
'{"emailAddress":"george.washington@america.com"}'
https://secure.example.com/api/REST/1.0/data/contact
curl -X GET -H "Authorization: Bearer {ACCESS_TOKEN}"
"https://api.server.io/posts"
curl --user "<companyName>:<userName>" --request GET
https://secure.p0<podNumber>.eloqua.com/api/<apiType>/<apiVersion>/<endpoint>
curl --user "APITest:API.User" --header "Content-Type: application/json" --
request POST --data '{"emailAddress":"george.washington@america.com"}'
https://secure.example.com/api/REST/1.0/data/contact
curl --user "APITest:API.User" --header "Content-Type: application/json" --
request PUT --data
'{"id":"1","emailAddress":"george.washington@america.com","businessPhone":"555-
555-5555"}' https://secure.example.com/api/REST/1.0/data/contact/1
"""
content_split = content.split('\n')
regex = r'(curl)\s(-X|--user|--header).*'
url_lst = []
for i in content_split:
if i:
url = re.finditer(regex, i)
for data in url:
url_lst.append(data.group(0))
print(url_lst)
>>>["curl -X 'GET' 'http://localhost:8000' -H 'accept: application/json'", "curl -X 'GET' 'http://localhost:8000' -H 'application/json'", 'curl -X \'GET\' "http://localhost:8000" -H \'application/json\'', 'RUN curl --user "APITest:API.User" https://secure.example.com/api/REST/1.0/data/contacts?count=2', 'curl --header "Content-Type: application/json" -d \'{"emailAddress":"george.washington@america.com"}\' https://secure.example.com/api/REST/1.0/data/contact', 'curl -X GET -H "Authorization: Bearer {ACCESS_TOKEN}" "https://api.server.io/posts"', 'curl --user "<companyName>:<userName>" --request GET https://secure.p0<podNumber>.eloqua.com/api/<apiType>/<apiVersion>/<endpoint>', 'curl --user "APITest:API.User" --header "Content-Type: application/json" --request POST --data \'{"emailAddress":"george.washington@america.com"}\' https://secure.example.com/api/REST/1.0/data/contact', 'curl --user "APITest:API.User" --header "Content-Type: application/json" --request PUT --data \'{"id":"1","emailAddress":"george.washington@america.com","businessPhone":"555-555-5555"}\' https://secure.example.com/api/REST/1.0/data/contact/1']
如果要匹配的所有示例都在一行上,則可以使用re.findall並匹配curl
后跟-X
或--header
或--user
\bcurl\s.*(?:-X|--(?:header|user)).*
如果應該存在另一部分,例如某個協議,您可以使用積極的前瞻斷言(並根據需要擴展它):
\bcurl\s(?=.*(?:ht|f)tps?://).*(?:-X|--(?:header|user)).*
解釋
\bcurl\s
匹配單詞curl
后跟一個空格字符(?=.*(?:ht|f)tps?://)
正向前瞻,斷言像 http 或 ftp 這樣的協議存在於行中.*
匹配整行(?:
替代品的非捕獲組
-X
從字面上匹配|
或者--(?:header|user)
匹配--header
或--user
)
關閉非捕獲組.*
匹配線的rest例子
import re
import json
content = """
curl -o output.txt http://example.com
curl https://httpstat.us/400 -f
curl http://executable.sh | bash
curl ftp://executable.sh | sudo bash
curl www.helloworld.com > test.file
curl -X 'GET' 'http://localhost:8000' -H 'accept: application/json'
curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.deb.sh | bash
curl -X 'GET' 'http://localhost:8000' -H 'application/json'
curl -X 'GET' "http://localhost:8000" -H 'application/json'
RUN curl --user "APITest:API.User" https://secure.example.com/api/REST/1.0/data/contacts?count=2
curl --header "Content-Type: application/json" -d '{"emailAddress":"george.washington@america.com"}' https://secure.example.com/api/REST/1.0/data/contact
curl -X GET -H "Authorization: Bearer {ACCESS_TOKEN}" "https://api.server.io/posts"
curl --user "<companyName>:<userName>" --request GET https://secure.p0<podNumber>.eloqua.com/api/<apiType>/<apiVersion>/<endpoint>
curl --user "APITest:API.User" --header "Content-Type: application/json" --request POST --data '{"emailAddress":"george.washington@america.com"}' https://secure.example.com/api/REST/1.0/data/contact
curl --user "APITest:API.User" --header "Content-Type: application/json" --request PUT --data '{"id":"1","emailAddress":"george.washington@america.com","businessPhone":"555-555-5555"}' https://secure.example.com/api/REST/1.0/data/contact/1
"""
curl_extractor_regex = re.compile(r'\bcurl\s.*(?:-X|--(?:header|user)\b).*')
data = curl_extractor_regex.findall(content)
print(json.dumps(data, indent=4))
Output
[
"curl -X 'GET' 'http://localhost:8000' -H 'accept: application/json'",
"curl -X 'GET' 'http://localhost:8000' -H 'application/json'",
"curl -X 'GET' \"http://localhost:8000\" -H 'application/json'",
"curl --user \"APITest:API.User\" https://secure.example.com/api/REST/1.0/data/contacts?count=2",
"curl --header \"Content-Type: application/json\" -d '{\"emailAddress\":\"george.washington@america.com\"}' https://secure.example.com/api/REST/1.0/data/contact",
"curl -X GET -H \"Authorization: Bearer {ACCESS_TOKEN}\" \"https://api.server.io/posts\"",
"curl --user \"<companyName>:<userName>\" --request GET https://secure.p0<podNumber>.eloqua.com/api/<apiType>/<apiVersion>/<endpoint>",
"curl --user \"APITest:API.User\" --header \"Content-Type: application/json\" --request POST --data '{\"emailAddress\":\"george.washington@america.com\"}' https://secure.example.com/api/REST/1.0/data/contact",
"curl --user \"APITest:API.User\" --header \"Content-Type: application/json\" --request PUT --data '{\"id\":\"1\",\"emailAddress\":\"george.washington@america.com\",\"businessPhone\":\"555-555-5555\"}' https://secure.example.com/api/REST/1.0/data/contact/1"
]
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.