#!/usr/bin/env python
import threading
import urllib, sys,os
import Queue
concurrent = 200
queue = Queue.Queue(concurrent*2)
try:
aim = sys.argv[1].lower()
dic = open(sys.argv[2],'r')
except:
print "Usage: %s url wordlist" % sys.argv[0]
sys.exit(1)
class Scanner(threading.Thread):
def __init__(self,queue):
threading.Thread.__init__(self)
self.queue=queue
def run(self):
while True:
self.path = self.queue.get()
self.geturl = urllib.urlopen(aim+'/'+self.path)
self.status = self.geturl.getcode()
self.url = aim+self.path
self.result = self.url+'=>'+str(self.status)
print self.result
self.writeresult(self.result)
self.queue.task_done()
def writeresult(self,result):
fp = open('result.txt','a+')
fp.write(result+'\n')
fp.close()
def main():
for i in range(concurrent):
t = Scanner(queue)
t.setDaemon(True)
t.start()
for path in dic.readlines():
queue.put(path.strip())
queue.join()
if __name__ == '__main__':
main()
It is a python program to scan the dir of the website, when the scanning finish, it even not quit with the ctrl+ci want to know when it finish the scanning how to quit the program automatically.
and when it is in process, it also appear some problem like this:
Exception in thread Thread-130:
Traceback (most recent call last):
File "/usr/local/Cellar/python/2.7.3/Frameworks/Python.framework/Versions/2.7/lib/python2.7/threading.py", line 551, in __bootstrap_inner
self.run()
File "tt.py", line 28, in run
self.geturl = urllib.urlopen(aim+'/'+self.path)
File "/usr/local/Cellar/python/2.7.3/Frameworks/Python.framework/Versions/2.7/lib/python2.7/urllib.py", line 86, in urlopen
return opener.open(url)
File "/usr/local/Cellar/python/2.7.3/Frameworks/Python.framework/Versions/2.7/lib/python2.7/urllib.py", line 207, in open
return getattr(self, name)(url)
File "/usr/local/Cellar/python/2.7.3/Frameworks/Python.framework/Versions/2.7/lib/python2.7/urllib.py", line 344, in open_http
h.endheaders(data)
File "/usr/local/Cellar/python/2.7.3/Frameworks/Python.framework/Versions/2.7/lib/python2.7/httplib.py", line 954, in endheaders
self._send_output(message_body)
File "/usr/local/Cellar/python/2.7.3/Frameworks/Python.framework/Versions/2.7/lib/python2.7/httplib.py", line 814, in _send_output
self.send(msg)
File "/usr/local/Cellar/python/2.7.3/Frameworks/Python.framework/Versions/2.7/lib/python2.7/httplib.py", line 776, in send
self.connect()
File "/usr/local/Cellar/python/2.7.3/Frameworks/Python.framework/Versions/2.7/lib/python2.7/httplib.py", line 757, in connect
self.timeout, self.source_address)
File "/usr/local/Cellar/python/2.7.3/Frameworks/Python.framework/Versions/2.7/lib/python2.7/socket.py", line 553, in create_connection
for res in getaddrinfo(host, port, 0, SOCK_STREAM):
IOError: [Errno socket error] [Errno 8] nodename nor servname provided, or not known
The program as it is, it will close when all threads have finished. But to easily get rid of all those errors, in your function run, from the class, after the while True: claus, put everything that follows in a try: except: clause like this
try:
code
except:
pass
Its not exactly the cleanest way to do it, but considering what you are after, it will do the job, and will get you rid of those exceptions, which btw mean that some URLS have been timed out.
I wanted some practice so I tried this out and changed a lot. Does it get you a full set of results? You will need to replace paths with your original argument reading.
import threading
import urllib
import Queue
concurrent = 5
aim = 'http://edition.cnn.com'
paths = ['2013/10/12/opinion/kazin-tea-party/index.html?hpt=hp_t5',
'2013/10/11/opinion/opinion-hay-nobel-opcw/index.html?hpt=hp_t5',
'2013/10/11/opinion/rosin-women-in-charge/index.html?hpt=hp_t5',
'some invalid path',
'2013'] # also an invalid path
def main():
work_q = Queue.Queue()
result_q = Queue.Queue()
# start the scanners and the result writer
scanners = [Scanner(work_q, result_q) for i in range(concurrent)]
for s in scanners:
s.start()
results_file_path = 'results.txt'
result_writer = ResultWriter(result_q, 'results.txt')
result_writer.start()
# send all the work and wait for it to be completed
for path in paths:
work_q.put(path.strip())
work_q.join()
# tell everyone to stop
# you could just kill the threads but you writer needs to close the file
for s in scanners:
work_q.put(Scanner.STOP_TOKEN)
result_q.put(ResultWriter.STOP_TOKEN) # make sure file gets closed
# wait for everyone to actually stop
for s in scanners:
s.join()
result_writer.join()
print 'the scan has finished and results are in {}'.format(results_file_path)
class Scanner(threading.Thread):
STOP_TOKEN = '<<stop>>'
def __init__(self, work_q, result_q):
threading.Thread.__init__(self)
self.work_q = work_q
self.result_q = result_q
def run(self):
while True:
path = status = None # reset in case of error
try:
try:
path = self.work_q.get(timeout=0.00001)
except Queue.Empty:
continue
if path == self.STOP_TOKEN:
break # stop looking for work
get_url = urllib.urlopen(aim + '/' + path)
status = get_url.getcode()
except Exception as e:
status = 'unhandled error ({})'.format(e)
self.result_q.put((path, status))
self.work_q.task_done()
class ResultWriter(threading.Thread):
STOP_TOKEN = '<<stop>>'
def __init__(self, result_q, results_file_path):
threading.Thread.__init__(self)
self.result_q = result_q
self.results_file_path = results_file_path
def run(self):
with open(self.results_file_path, 'w') as results_file:
while True:
try:
result = self.result_q.get(timeout=0.00001)
except Queue.Empty:
continue
if result == self.STOP_TOKEN:
break # stop looking for results
path, status = result
results_file.write('{}=>{}\n'.format(path, status))
if __name__ == '__main__':
main()
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.