[英]Multiprocessing starmap_async python
我正在学习在 python 中使用多处理,我有一个问题。 我想计算 object(即单词元组)在列表中的次数。 我提出两种选择。 第一个使用pool.starmap_async ,第二个没有多处理。
ngrams=[('review', 'productivity'), ('productivity', 'satisfaction'), ('satisfaction', 'democratic'), ('democratic', 'autocratic'), ('autocratic', 'leadership'), ('leadership', 'empirical'), ('empirical', 'literature'), ('literature', 'explore'), ('explore', 'organizational_outcome'), ('organizational_outcome', 'democratic'), ('democratic', 'leadership'), ('leadership', 'task##oriented'), ('task##oriented', 'group'), ('group', 'individual'), ('individual', 'member'), ('member', 'productivity'), ('productivity', 'satisfaction'), ('satisfaction', 'receive'), ('receive', 'attention'), ('attention', 'emphasis')]
ngrams_uniq=[('satisfaction', 'democratic'), ('organizational_outcome', 'democratic'), ('review', 'productivity'), ('democratic', 'leadership'), ('member', 'productivity'), ('receive', 'attention'), ('empirical', 'literature'), ('group', 'individual'), ('literature', 'explore'), ('democratic', 'autocratic'), ('autocratic', 'leadership'), ('attention', 'emphasis'), ('task##oriented', 'group'), ('explore', 'organizational_outcome'), ('leadership', 'task##oriented'), ('satisfaction', 'receive'), ('productivity', 'satisfaction'), ('leadership', 'empirical'), ('individual', 'member')]
def count_ngrams(gram,ngrams):
return (gram,ngrams.count(gram))
##带泳池
print(time.strftime("%H:%M:%S"))
pool = mp.Pool(mp.cpu_count())
dict_freq_ngrams=pool.starmap_async(count_ngrams,[(gram,ngrams) for gram in ngrams_uniq]).get()
pool.close()
print(time.strftime("%H:%M:%S"))
##无池
print(time.strftime("%H:%M:%S"))
dict_freq_ngrams=[count_ngrams(gram,ngrams) for gram in ngrams_uniq]
print(time.strftime("%H:%M:%S"))
当我测量执行时间时,我总是认为第二个选项更快。 我不明白为什么会发生这种情况......也许我有一个错误,但我不知道它是什么。
提前致谢
我认为您没有错误,而是将数据复制到新的解释器表单多处理的开销超过了并行计算所带来的速度提升,因为在我的表面上,刚启动池需要 0.2 到 0.3 秒
import time
import multiprocessing as mp
import matplotlib.pyplot as plt
import numpy as np
import copy
ngrams=[('review', 'productivity'), ('productivity', 'satisfaction'), ('satisfaction', 'democratic'), ('democratic', 'autocratic'), ('autocratic', 'leadership'), ('leadership', 'empirical'), ('empirical', 'literature'), ('literature', 'explore'), ('explore', 'organizational_outcome'), ('organizational_outcome', 'democratic'), ('democratic', 'leadership'), ('leadership', 'task##oriented'), ('task##oriented', 'group'), ('group', 'individual'), ('individual', 'member'), ('member', 'productivity'), ('productivity', 'satisfaction'), ('satisfaction', 'receive'), ('receive', 'attention'), ('attention', 'emphasis')]*40
ngrams_uniq=[('satisfaction', 'democratic'), ('organizational_outcome', 'democratic'), ('review', 'productivity'), ('democratic', 'leadership'), ('member', 'productivity'), ('receive', 'attention'), ('empirical', 'literature'), ('group', 'individual'), ('literature', 'explore'), ('democratic', 'autocratic'), ('autocratic', 'leadership'), ('attention', 'emphasis'), ('task##oriented', 'group'), ('explore', 'organizational_outcome'), ('leadership', 'task##oriented'), ('satisfaction', 'receive'), ('productivity', 'satisfaction'), ('leadership', 'empirical'), ('individual', 'member')]
ngrams_copy=copy.copy(ngrams)
def count_ngrams(gram,ngrams):
return (gram,ngrams.count(gram))
if __name__ == "__main__":
std = np.array([])
Pool= np.array([])
for i in range(100):
t = time.time()
with mp.Pool(mp.cpu_count()) as pool:
res=pool.starmap_async(count_ngrams,[(val, ngrams) for val in ngrams_uniq])
dict_freq_ngrams = res.get()#(gram,ngrams) for gram in ngrams_uniq]
Pool = np.append(Pool, np.array(time.time() - t))
print(i)
t = time.time()
dict_freq_ngrams=[count_ngrams(gram,ngrams) for gram in ngrams_uniq]
std = np.append(std, np.array(time.time() - t))
ngrams = ngrams+ngrams_copy
plt.plot(std)
plt.plot(Pool)
plt.show()
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.