[英]Python Threading isnt working
設置:init.py
import threading
...other imports ...
... vars ...
for drive in drives:
series = folder.getFolders(drive)
for serie in series:
print(str(datetime.datetime.now()))
t = threading.Thread(target=serienchecker, args=(drive, serie, blacklist,apikeyv3,language,))
t.start()
t.join()
serienchecker.py
from threading import Thread
from themoviedb import *
from folderhelper import *
class serienchecker(Thread):
...
def __init__(self, path,seriesname, blacklist, apikeytmdb='', language='eng'):
...
self.startSearch()
...
def startSearch(self):
print("start")
...
輸出:
2017-02-08 21:29:04.481536
start
2017-02-08 21:29:17.385611
start
2017-02-08 21:30:00.548471
start
但我希望所有這些都可以在大約同一時間進行計算。 甚至有辦法將所有任務排隊並同時處理N個線程嗎? [這只是一個小例子,腳本將檢查幾個hundert文件夾]我想做錯了嗎?
我曾嘗試過幾種方法,但沒有成功,請幫助我
謝謝!
編輯://
def job():
while(jobs):
tmp = jobs.pop()
task(drive=tmp[0],serie=tmp[1])
def task(drive, serie):
print("Serie[{0}]".format(serie))
sc = serienchecker(drive, serie,blacklist,apikeyv3,language)
sc.start()
result = sc.result
resultString=''
for obj in result:
resultString+=obj+"\n"
print(resultString)
for drive in drives:
series = folder.getFolders(drive)
for serie in series:
jobs.append([drive,serie])
while(jobs):
job()
join()
等到線程結束為止,因此您不應該在啟動線程之后立即調用它(否則您不能在前一個線程結束之前創建新線程)。
創建一個列表以在開始時存儲您的線程:
threads = []
然后在創建線程時將其添加到列表中:
threads.append(t)
在程序結束時加入所有線程
for t in threads:
t.join()
前面已經提到,您需要推遲join
直到畢竟線程已經開始。 考慮使用ThreadPool
來限制並發線程的數量,如果python的GIL減慢了處理速度,可以將其重新實現為進程池。 它為您執行線程啟動,調度和加入。
import multiprocessing
import itertools
import platform
...
# helper functions for process pool
#
# linux - worker process gets a view of parent memory at time pool
# is created, including global variables that exist at that time.
#
# windows - a new process is created and all needed state must be
# passed to the child. we could pass these values on every call,
# but assuming blacklist is large, its more efficient to set it
# up once
do_init = platform.system() == "Windows"
if do_init:
def init_serienchecker_process(_blacklist, _apikeyv3, _language):
"""Call once when process pool worker created to set static config"""
global blacklist, apikeyv3, language
blacklist, apikeyv3, language = _blacklist, _apikeyv3, _language
# this is the worker in the child process. It is called with items iterated
# in the parent Pool.map function. In our case, the item is a (drive, serie)
# tuple. Unpack, combine w/ globals and call the real function.
def serienchecker_worker(drive_serie):
"""Calls serienchecker with global blacklist, apikeyv3, language set by
init_serienchecker_process"""
return serienchecker(drive_serie[0], drive_serie[1], blacklist,
apikeyv3, language)
def drive_serie_iter(folder, drives):
"""Yields (drive, serie) tuples"""
for drive in drives:
for serie in series:
yield drive, serie
# decide the number of workers. Here I just chose a random max value,
# but your number will depend on your desired workload.
max_workers = 24
num_items = len(drive) * len(serie)
num_workers = min(num_items, max_workers)
# setup a process pool. we need to initialize windows with the global
# variables but since linux already has a view of the globals, its
# not needed
initializer = init_serienchecker_process if do_init else None
initargs = (blacklist, apikeyv3, language) if do_init else None
pool = multiprocessing.Pool(num_workers, initializer=initializer,
initargs=initargs)
# map calls serienchecker_worker in the subprocess for each (drive, serie)
# pair produced by drive_serie_iter
for result in pool.map(serienchecker_worker, drive_serie_iter(folder, drives)):
print(result) # not sure you care what the results are
pool.join()
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.