How to Quit program when all the thread have been finished?

Question

#!/usr/bin/env python

import threading
import urllib, sys,os
import Queue


concurrent = 200
queue = Queue.Queue(concurrent*2)

try:
    aim = sys.argv[1].lower()
    dic = open(sys.argv[2],'r')

except:
    print "Usage: %s url wordlist" % sys.argv[0]
    sys.exit(1)

class Scanner(threading.Thread):
    def __init__(self,queue):
        threading.Thread.__init__(self)
        self.queue=queue

    def run(self):

        while True:

            self.path = self.queue.get()
            self.geturl = urllib.urlopen(aim+'/'+self.path)
            self.status =  self.geturl.getcode()
            self.url = aim+self.path
            self.result = self.url+'=>'+str(self.status)
            print self.result
            self.writeresult(self.result)
            self.queue.task_done()



    def writeresult(self,result):

        fp = open('result.txt','a+')
        fp.write(result+'\n')
        fp.close()  


def main():         

    for i in range(concurrent):
        t = Scanner(queue)
        t.setDaemon(True)
        t.start()

    for path in dic.readlines():
        queue.put(path.strip())

    queue.join()

if __name__ == '__main__':
    main()

It is a python program to scan the dir of the website, when the scanning finish, it even not quit with the ctrl+ci want to know when it finish the scanning how to quit the program automatically.

and when it is in process, it also appear some problem like this:

Exception in thread Thread-130:
Traceback (most recent call last):
  File "/usr/local/Cellar/python/2.7.3/Frameworks/Python.framework/Versions/2.7/lib/python2.7/threading.py", line 551, in __bootstrap_inner
    self.run()
  File "tt.py", line 28, in run
    self.geturl = urllib.urlopen(aim+'/'+self.path)
  File "/usr/local/Cellar/python/2.7.3/Frameworks/Python.framework/Versions/2.7/lib/python2.7/urllib.py", line 86, in urlopen
    return opener.open(url)
  File "/usr/local/Cellar/python/2.7.3/Frameworks/Python.framework/Versions/2.7/lib/python2.7/urllib.py", line 207, in open
    return getattr(self, name)(url)
  File "/usr/local/Cellar/python/2.7.3/Frameworks/Python.framework/Versions/2.7/lib/python2.7/urllib.py", line 344, in open_http
    h.endheaders(data)
  File "/usr/local/Cellar/python/2.7.3/Frameworks/Python.framework/Versions/2.7/lib/python2.7/httplib.py", line 954, in endheaders
    self._send_output(message_body)
  File "/usr/local/Cellar/python/2.7.3/Frameworks/Python.framework/Versions/2.7/lib/python2.7/httplib.py", line 814, in _send_output
    self.send(msg)
  File "/usr/local/Cellar/python/2.7.3/Frameworks/Python.framework/Versions/2.7/lib/python2.7/httplib.py", line 776, in send
    self.connect()
  File "/usr/local/Cellar/python/2.7.3/Frameworks/Python.framework/Versions/2.7/lib/python2.7/httplib.py", line 757, in connect
    self.timeout, self.source_address)
  File "/usr/local/Cellar/python/2.7.3/Frameworks/Python.framework/Versions/2.7/lib/python2.7/socket.py", line 553, in create_connection
    for res in getaddrinfo(host, port, 0, SOCK_STREAM):
IOError: [Errno socket error] [Errno 8] nodename nor servname provided, or not known

Answer 1

The program as it is, it will close when all threads have finished. But to easily get rid of all those errors, in your function run, from the class, after the while True: claus, put everything that follows in a try: except: clause like this

try:
     code
except:
    pass

Its not exactly the cleanest way to do it, but considering what you are after, it will do the job, and will get you rid of those exceptions, which btw mean that some URLS have been timed out.

Answer 2

I wanted some practice so I tried this out and changed a lot. Does it get you a full set of results? You will need to replace paths with your original argument reading.

With those threads, maybe you are getting unhandled exceptions resulting in missing results? I added a mechanism to catch any errors during reading and pass those to the result writer.
I guess appending from multiple threads to a file is ok, but I added a writer thread to more cleanly manage the file
most of the assignments to self were unnecessary
if you still get socket errors, check the paths in the result file and see how you want to handle those results if at all
I'm no expert, so don't take this as best practice

import threading
import urllib
import Queue

concurrent = 5

aim = 'http://edition.cnn.com'
paths = ['2013/10/12/opinion/kazin-tea-party/index.html?hpt=hp_t5',
         '2013/10/11/opinion/opinion-hay-nobel-opcw/index.html?hpt=hp_t5',
         '2013/10/11/opinion/rosin-women-in-charge/index.html?hpt=hp_t5',
         'some invalid path',
         '2013']  # also an invalid path


def main():
    work_q = Queue.Queue()
    result_q = Queue.Queue()

    # start the scanners and the result writer
    scanners = [Scanner(work_q, result_q) for i in range(concurrent)]
    for s in scanners:
        s.start()
    results_file_path = 'results.txt'
    result_writer = ResultWriter(result_q, 'results.txt')
    result_writer.start()
    # send all the work and wait for it to be completed
    for path in paths:
        work_q.put(path.strip())
    work_q.join()
    # tell everyone to stop
    # you could just kill the threads but you writer needs to close the file
    for s in scanners:
        work_q.put(Scanner.STOP_TOKEN)
    result_q.put(ResultWriter.STOP_TOKEN)  # make sure file gets closed
    # wait for everyone to actually stop
    for s in scanners:
        s.join()
    result_writer.join()
    print 'the scan has finished and results are in {}'.format(results_file_path)


class Scanner(threading.Thread):
    STOP_TOKEN = '<<stop>>'

    def __init__(self, work_q, result_q):
        threading.Thread.__init__(self)
        self.work_q = work_q
        self.result_q = result_q

    def run(self):
        while True:
            path = status = None  # reset in case of error
            try:
                try:
                    path = self.work_q.get(timeout=0.00001)
                except Queue.Empty:
                    continue
                if path == self.STOP_TOKEN:
                    break  # stop looking for work
                get_url = urllib.urlopen(aim + '/' + path)
                status = get_url.getcode()
            except Exception as e:
                status = 'unhandled error ({})'.format(e)
            self.result_q.put((path, status))
            self.work_q.task_done()


class ResultWriter(threading.Thread):
    STOP_TOKEN = '<<stop>>'

    def __init__(self, result_q, results_file_path):
        threading.Thread.__init__(self)
        self.result_q = result_q
        self.results_file_path = results_file_path

    def run(self):
        with open(self.results_file_path, 'w') as results_file:
            while True:
                try:
                    result = self.result_q.get(timeout=0.00001)
                except Queue.Empty:
                    continue
                if result == self.STOP_TOKEN:
                    break  # stop looking for results
                path, status = result
                results_file.write('{}=>{}\n'.format(path, status))


if __name__ == '__main__':
    main()

How to Quit program when all the thread have been finished?

Question

2 answers

solution1
0 2013-10-13 14:58:05

solution2
0 ACCPTED 2013-10-13 17:36:53

How to Quit program when all the thread have been finished?

Question

2 answers

solution1 0 2013-10-13 14:58:05

solution2 0 ACCPTED 2013-10-13 17:36:53

solution1
0 2013-10-13 14:58:05

solution2
0 ACCPTED 2013-10-13 17:36:53