![](/img/trans.png)
[英]txt file downloaded is being viewed as html and that is excluding the empty lines
[英]why is queue empty for the file to be downloaded
下面是我將不同的URL下載到每個單獨的線程中的代碼,我試圖在實現線程池之前進行一些更改,但是由於此更改,隊列將為空並且下載沒有開始。
import Queue
import urllib2
import os
import utils as _fdUtils
import signal
import sys
import time
import threading
class ThreadedFetch(threading.Thread):
""" docstring for ThreadedFetch
"""
def __init__(self, queue, out_queue):
super(ThreadedFetch, self).__init__()
self.queueItems = queue.get()
self.__url = self.queueItems[0]
self.__saveTo = self.queueItems[1]
self.outQueue = out_queue
def run(self):
fileName = self.__url.split('/')[-1]
path = os.path.join(DESKTOP_PATH, fileName)
file_size = int(_fdUtils.getUrlSizeInBytes(self.__url))
while not STOP_REQUEST.isSet():
urlFh = urllib2.urlopen(self.__url)
_log.info("Download: %s" , fileName)
with open(path, 'wb') as fh:
file_size_dl = 0
block_sz = 8192
while True:
buffer = urlFh.read(block_sz)
if not buffer:
break
file_size_dl += len(buffer)
fh.write(buffer)
status = r"%10d [%3.2f%%]" % (file_size_dl, file_size_dl * 100. / file_size)
status = status + chr(8)*(len(status)+1)
sys.stdout.write('%s\r' % status)
time.sleep(.05)
sys.stdout.flush()
if file_size_dl == file_size:
_log.info("Download Completed %s%% for file %s, saved to %s",
file_size_dl * 100. / file_size, fileName, DESKTOP_PATH)
下面是執行調用和初始化的主要功能。
def main(appName):
args = _fdUtils.getParser()
urls_saveTo = {}
# spawn a pool of threads, and pass them queue instance
# each url will be downloaded concurrently
for i in range(len(args.urls)):
t = ThreadedFetch(queue, out_queue)
t.daemon = True
t.start()
try:
for url in args.urls:
urls_saveTo[url] = args.saveTo
# urls_saveTo = {urls[0]: args.saveTo, urls[1]: args.saveTo, urls[2]: args.saveTo}
# populate queue with data
for item, value in urls_saveTo.iteritems():
queue.put([item, value])
# wait on the queue until everything has been processed
queue.join()
print '*** Done'
except (KeyboardInterrupt, SystemExit):
lgr.critical('! Received keyboard interrupt, quitting threads.')
創建隊列,然后創建第一個線程,該線程立即嘗試從仍然為空的隊列中獲取項目。 ThreadedFetch.__init__()
方法不是異步運行的,而是在線程對象上調用start()
時只是run()
方法。
將隊列存儲在__init__()
,並將get()
移到run()
方法中。 這樣,您可以創建所有線程,並且它們在自己的線程中阻塞,從而使您有機會將項目放入主線程的隊列中。
class ThreadedFetch(threading.Thread):
def __init__(self, queue, out_queue):
super(ThreadedFetch, self).__init__()
self.queue = queue
self.outQueue = out_queue
def run(self):
url, save_to = self.queue.get()
# ...
對於此示例,由於每個線程恰好從隊列中獲得一項,因此隊列是不必要的。 您可以在創建線程對象時將該項目直接傳遞給線程:
class ThreadedFetch(threading.Thread):
def __init__(self, url, save_to, out_queue):
super(ThreadedFetch, self).__init__()
self.url = url
self.save_to = save_to
self.outQueue = out_queue
def run(self):
# ...
並且當ThreadedFetch
類實際上僅由__init__()
和run()
方法組成時,您可以考慮將run()
方法移至函數中並以異步方式啟動它。
def fetch(url, save_to, out_queue):
# ...
# ...
def main():
# ...
thread = Thread(target=fetch, args=(url, save_to, out_queue))
thread.daemon = True
thread.start()
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.