I have a script which collects response header
information of a given URLs from file. The Argument/Input File
which i'm passing right now is from outside. Execution Method: python collect.py <Input.txt>
.
Input File :
1,http://www.example.com
2,http://www.blahblah.com
3,......
Now,I wish to pass the ID with URL as single argument like,
python collect.py 1,http://www.example.com
and execute the results and write it to outfile.
#!/usr/bin/python
import subprocess
import json
import sys
import httplib
import urlparse
import pickle
import sys
class HeaderFetcher:
def __init__(self,url,headers={'User-Agent':'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.9.0.3) Gecko/2008092417 Firefox/3.0.3'}):
self.report = {}
self.initial_url = url
self.request_headers = headers
def fetch(self):
self.fetchheaders(self.initial_url,self.request_headers)
def fetchheaders(self,url,req_headers):
try:
u = urlparse.urlparse(url)
scheme = u.scheme
ServerConnection = None
con = None
if 'http' == scheme:
ServerConnection = httplib.HTTPConnection
elif 'https' == scheme:
ServerConnection = httplib.HTTPSConnection
if None != ServerConnection:
con = ServerConnection(u.hostname,u.port,timeout=10)
con.request("GET",url,None,req_headers)
res = con.getresponse()
#print res
self.report[url] = res.getheaders()
if 301 == res.status or 302 == res.status:
redirect_url = res.getheader('Location')
if not redirect_url in self.report:
if len(self.report.keys())<40:
self.fetchheaders(redirect_url,req_headers)
except:
test="Error In Loading"
#print test
def process(infile='Input.txt'):
#f = open('Input.txt','r')
f = open(sys.argv[1],"r")
agents= {'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.9.0.3) Gecko/2008092417 Firefox/3.0.3'}
finalJson = {}
for line in f.readlines():
App=line.strip().split(',')
#print "Processing " + App[1]
App_URL=App[1]
ua=agents
fetcher = HeaderFetcher(App_URL,{'User-Agent':ua})
fetcher.fetch()
url=fetcher.report
keys = list(url)
headerJson = {}
for k in keys:
headers=url[k]
for header in headers:
headerJson[header[0]] = header[1]
finalJson[App[0]] = headerJson
header_final=json.dumps(finalJson,indent=4)
#server_details = json.loads(response.read(header_final))
#print header_final
#json_data=open(header_final)
#server_details = json.loads(header_final)
server_details=json.loads(unicode(header_final),"ISO-8859-1")
with open("Headers_Final_List.txt",'wb') as f :
for appid, headers in server_details.iteritems():
htypes = [h for h in headers if h in (
'x-powered-by','server','x-aspnet-version','x-aspnetmvc-version')]
headers='{},{}'.format(appid, ','.join(htypes))
f.write(headers+'\n')
f.close()
if __name__ == '__main__':
process(sys.argv[-1])
Any suggestions on how to parse the single argument from command line.
sys.argv is not a file but you are trying to read it like one.
this:
def process(infile='Input.txt'):
#f = open('Input.txt','r')
f = open(sys.argv[1],"r")
agents= {'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.9.0.3) Gecko/2008092417 Firefox/3.0.3'}
finalJson = {}
for line in f.readlines():
should be more like:
def process():
#f = open('Input.txt','r')
argu = sys.argv[1].split(',')
ID = argu[0]
URL = argu[1]
Then you can use the id and url in your other method.
#!/usr/bin/python
import sys
print 'Number of arguments:', len(sys.argv), 'arguments.'
print 'Argument List:', str(sys.argv)
n = len(sys.argv) -1
args = []
for i in sys.argv[n].split(','):
args.append(i)
for i in args:
print i
python arg.py 1,abc.txt
Number of arguments: 2 arguments.
Argument List: ['arg.py', '1,abc.txt']
1
abc.txt
This method doesn't fall over with IndexError: list index out of range , if the program is called without a parameter or the parameter doesn't have a comma in it
id, url = sys.argv[1].split(',')
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.