[英]Multiprocessing : use tqdm to display a progress bar
為了使我的代碼更“pythonic”和更快,我使用“multiprocessing”和 map function 發送它 a)function 和 b)迭代范圍
植入的解決方案(即直接在范圍 tqdm.tqdm(range(0, 30)) 上調用 tqdm)不適用於多處理(如下面的代碼所示)。
進度條顯示從0到100%(python讀碼時?)但並不表示map function的實際進度。
如何顯示一個進度條,指示“地圖”function 在哪一步?
from multiprocessing import Pool
import tqdm
import time
def _foo(my_number):
square = my_number * my_number
time.sleep(1)
return square
if __name__ == '__main__':
p = Pool(2)
r = p.map(_foo, tqdm.tqdm(range(0, 30)))
p.close()
p.join()
歡迎任何幫助或建議...
使用 imap 而不是 map,它返回一個處理值的迭代器。
from multiprocessing import Pool
import tqdm
import time
def _foo(my_number):
square = my_number * my_number
time.sleep(1)
return square
if __name__ == '__main__':
with Pool(2) as p:
r = list(tqdm.tqdm(p.imap(_foo, range(30)), total=30))
抱歉遲到了,但如果您只需要並發映射,我在tqdm>=4.42.0
添加了此功能:
from tqdm.contrib.concurrent import process_map # or thread_map
import time
def _foo(my_number):
square = my_number * my_number
time.sleep(1)
return square
if __name__ == '__main__':
r = process_map(_foo, range(0, 30), max_workers=2)
參考資料: https : //tqdm.github.io/docs/contrib.concurrent/和https://github.com/tqdm/tqdm/blob/master/examples/parallel_bars.py
它支持max_workers
和chunksize
,您還可以輕松地從process_map
切換到thread_map
。
找到的解決方案:小心! 由於多處理,估計時間(每個循環的迭代次數、總時間等)可能不穩定,但進度條工作正常。
注意:池的上下文管理器僅適用於 Python 3.3 版
from multiprocessing import Pool
import time
from tqdm import *
def _foo(my_number):
square = my_number * my_number
time.sleep(1)
return square
if __name__ == '__main__':
with Pool(processes=2) as p:
max_ = 30
with tqdm(total=max_) as pbar:
for i, _ in enumerate(p.imap_unordered(_foo, range(0, max_))):
pbar.update()
您可以改用p_tqdm
。
https://github.com/swansonk14/p_tqdm
from p_tqdm import p_map
import time
def _foo(my_number):
square = my_number * my_number
time.sleep(1)
return square
if __name__ == '__main__':
r = p_map(_foo, list(range(0, 30)))
根據哈維·馬丁內斯的回答,我編寫了函數imap_unordered_bar
。 它可以以與imap_unordered
相同的方式使用,唯一的區別是顯示處理條。
from multiprocessing import Pool
import time
from tqdm import *
def imap_unordered_bar(func, args, n_processes = 2):
p = Pool(n_processes)
res_list = []
with tqdm(total = len(args)) as pbar:
for i, res in tqdm(enumerate(p.imap_unordered(func, args))):
pbar.update()
res_list.append(res)
pbar.close()
p.close()
p.join()
return res_list
def _foo(my_number):
square = my_number * my_number
time.sleep(1)
return square
if __name__ == '__main__':
result = imap_unordered_bar(_foo, range(5))
import multiprocessing as mp
import tqdm
some_iterable = ...
def some_func():
# your logic
...
if __name__ == '__main__':
with mp.Pool(mp.cpu_count()-2) as p:
list(tqdm.tqdm(p.imap(some_func, iterable), total=len(iterable)))
當您需要從並行執行函數中獲取結果時,這是我的看法。 這個函數做了一些事情(我的另一篇文章進一步解釋了它),但關鍵是有一個待處理的任務隊列和一個完成的任務隊列。 當工作人員完成掛起隊列中的每個任務時,他們將結果添加到任務完成隊列中。 您可以使用 tqdm 進度條將檢查包裝到任務完成隊列中。 我沒有把 do_work() 函數的實現放在這里,它不相關,因為這里的消息是監視任務完成隊列並在每次有結果時更新進度條。
def par_proc(job_list, num_cpus=None, verbose=False):
# Get the number of cores
if not num_cpus:
num_cpus = psutil.cpu_count(logical=False)
print('* Parallel processing')
print('* Running on {} cores'.format(num_cpus))
# Set-up the queues for sending and receiving data to/from the workers
tasks_pending = mp.Queue()
tasks_completed = mp.Queue()
# Gather processes and results here
processes = []
results = []
# Count tasks
num_tasks = 0
# Add the tasks to the queue
for job in job_list:
for task in job['tasks']:
expanded_job = {}
num_tasks = num_tasks + 1
expanded_job.update({'func': pickle.dumps(job['func'])})
expanded_job.update({'task': task})
tasks_pending.put(expanded_job)
# Set the number of workers here
num_workers = min(num_cpus, num_tasks)
# We need as many sentinels as there are worker processes so that ALL processes exit when there is no more
# work left to be done.
for c in range(num_workers):
tasks_pending.put(SENTINEL)
print('* Number of tasks: {}'.format(num_tasks))
# Set-up and start the workers
for c in range(num_workers):
p = mp.Process(target=do_work, args=(tasks_pending, tasks_completed, verbose))
p.name = 'worker' + str(c)
processes.append(p)
p.start()
# Gather the results
completed_tasks_counter = 0
with tqdm(total=num_tasks) as bar:
while completed_tasks_counter < num_tasks:
results.append(tasks_completed.get())
completed_tasks_counter = completed_tasks_counter + 1
bar.update(completed_tasks_counter)
for p in processes:
p.join()
return results
對於帶有 apply_async 的進度條,我們可以按照以下建議使用以下代碼:
https://github.com/tqdm/tqdm/issues/484
import time
import random
from multiprocessing import Pool
from tqdm import tqdm
def myfunc(a):
time.sleep(random.random())
return a ** 2
pool = Pool(2)
pbar = tqdm(total=100)
def update(*a):
pbar.update()
for i in range(pbar.total):
pool.apply_async(myfunc, args=(i,), callback=update)
pool.close()
pool.join()
基於“user17242583”的回答,我創建了以下 function。 它應該和 Pool.map 一樣快,並且結果總是有序的。 另外,您可以根據需要向 function 傳遞任意數量的參數,而不僅僅是一個可迭代的參數。
from multiprocessing import Pool
from functools import partial
from tqdm import tqdm
def imap_tqdm(function, iterable, processes, chunksize=1, desc=None, disable=False, **kwargs):
"""
Run a function in parallel with a tqdm progress bar and an arbitrary number of arguments.
Results are always ordered and the performance should be the same as of Pool.map.
:param function: The function that should be parallelized.
:param iterable: The iterable passed to the function.
:param processes: The number of processes used for the parallelization.
:param chunksize: The iterable is based on the chunk size chopped into chunks and submitted to the process pool as separate tasks.
:param desc: The description displayed by tqdm in the progress bar.
:param disable: Disables the tqdm progress bar.
:param kwargs: Any additional arguments that should be passed to the function.
"""
if kwargs:
function_wrapper = partial(_wrapper, function=function, **kwargs)
else:
function_wrapper = partial(_wrapper, function=function)
results = [None] * len(iterable)
with Pool(processes=processes) as p:
with tqdm(desc=desc, total=len(iterable), disable=disable) as pbar:
for i, result in p.imap_unordered(function_wrapper, enumerate(iterable), chunksize=chunksize):
results[i] = result
pbar.update()
return results
def _wrapper(enum_iterable, function, **kwargs):
i = enum_iterable[0]
result = function(enum_iterable[1], **kwargs)
return i, result
這種方法簡單而且有效。
from multiprocessing.pool import ThreadPool
import time
from tqdm import tqdm
def job():
time.sleep(1)
pbar.update()
pool = ThreadPool(5)
with tqdm(total=100) as pbar:
for i in range(100):
pool.apply_async(job)
pool.close()
pool.join()
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.