简体   繁体   中英

Python : Command Line Argument Issue

I have a script which collects response header information of a given URLs from file. The Argument/Input File which i'm passing right now is from outside. Execution Method: python collect.py <Input.txt> .

Input File :

1,http://www.example.com
2,http://www.blahblah.com
3,......

Now,I wish to pass the ID with URL as single argument like,

python collect.py 1,http://www.example.com

and execute the results and write it to outfile.

#!/usr/bin/python
import subprocess
import json
import sys
import httplib
import urlparse
import pickle
import sys



class HeaderFetcher:
        def __init__(self,url,headers={'User-Agent':'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB;    rv:1.9.0.3) Gecko/2008092417 Firefox/3.0.3'}):
                self.report = {}
                self.initial_url = url
                self.request_headers = headers

        def fetch(self):
                self.fetchheaders(self.initial_url,self.request_headers)

        def fetchheaders(self,url,req_headers):
                try:
                        u = urlparse.urlparse(url)
                        scheme = u.scheme
                        ServerConnection = None
                        con = None
                        if 'http' == scheme:
                                ServerConnection = httplib.HTTPConnection
                        elif 'https' == scheme:
                                ServerConnection = httplib.HTTPSConnection
                        if None != ServerConnection:
                                con = ServerConnection(u.hostname,u.port,timeout=10)
                                con.request("GET",url,None,req_headers)
                                res = con.getresponse()
                                #print res
                                self.report[url] = res.getheaders()
                                if 301 == res.status or 302 == res.status:
                                        redirect_url = res.getheader('Location')
                                        if not redirect_url in self.report:
                                                if len(self.report.keys())<40:
                                                        self.fetchheaders(redirect_url,req_headers)
                except:
                        test="Error In Loading"
                        #print test

def process(infile='Input.txt'):
        #f = open('Input.txt','r')
        f = open(sys.argv[1],"r")
        agents= {'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB;    rv:1.9.0.3) Gecko/2008092417 Firefox/3.0.3'}
        finalJson = {}
        for line in f.readlines():
            App=line.strip().split(',')
            #print "Processing " + App[1]
            App_URL=App[1]
            ua=agents
            fetcher = HeaderFetcher(App_URL,{'User-Agent':ua})
            fetcher.fetch()
            url=fetcher.report
            keys = list(url)
            headerJson = {}
            for k in keys:
                headers=url[k]
                for header in headers:
                    headerJson[header[0]] = header[1]
            finalJson[App[0]] = headerJson
        header_final=json.dumps(finalJson,indent=4)
        #server_details = json.loads(response.read(header_final))
        #print header_final
        #json_data=open(header_final)
        #server_details = json.loads(header_final)
        server_details=json.loads(unicode(header_final),"ISO-8859-1")
        with open("Headers_Final_List.txt",'wb') as f :
                for appid, headers in server_details.iteritems():
                        htypes = [h for h in headers if h in (
                         'x-powered-by','server','x-aspnet-version','x-aspnetmvc-version')]
                        headers='{},{}'.format(appid, ','.join(htypes))
                        f.write(headers+'\n')
        f.close()

if __name__ == '__main__':
        process(sys.argv[-1])

Any suggestions on how to parse the single argument from command line.

sys.argv is not a file but you are trying to read it like one.

this:

def process(infile='Input.txt'):
        #f = open('Input.txt','r')
        f = open(sys.argv[1],"r")
        agents= {'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB;    rv:1.9.0.3) Gecko/2008092417 Firefox/3.0.3'}
        finalJson = {}
        for line in f.readlines():

should be more like:

def process():
        #f = open('Input.txt','r')
        argu = sys.argv[1].split(',')
        ID = argu[0]        
        URL = argu[1]

Then you can use the id and url in your other method.

#!/usr/bin/python

import sys

print 'Number of arguments:', len(sys.argv), 'arguments.'
print 'Argument List:', str(sys.argv)
n = len(sys.argv) -1
args = []
for i in sys.argv[n].split(','):
 args.append(i)
for i in args:
 print i

python arg.py 1,abc.txt

Number of arguments: 2 arguments.
Argument List: ['arg.py', '1,abc.txt']
1
abc.txt

This method doesn't fall over with IndexError: list index out of range , if the program is called without a parameter or the parameter doesn't have a comma in it

id, url = sys.argv[1].split(',')

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM