[英]python multiprocessing,pathos slow
今天我運行了一些代碼,我想在我的多核 cpu 上運行它,所以即使我寫了地圖,我也把它改成了 pool.map。 令人驚訝的是,即使我的代碼使用了如此多的處理能力或內存(據我所知),它的運行速度也變慢了。 所以我寫了這個測試,它使用了悲情和多處理。
from pathos.pools import ProcessPool
from pathos.pools import ThreadPool
#from pathos.pools import ParallelPool
from pathos.pools import SerialPool
from multiprocessing import Pool
import time
def timeit(method):
def timed(*args, **kw):
ts = time.time()
result = method(*args, **kw)
te = time.time()
print ('%r (%r, %r) %2.2f sec' % \
(method.__name__, args, kw, te-ts))
return result
return timed
def times2(x):
return 2*x
@timeit
def test(max,p):
(p.map(times2, range(max)))
def main():
ppool = ProcessPool(4)
tpool = ThreadPool(4)
#parapool = ParallelPool(4)
spool = SerialPool(4)
pool = Pool(4)
for i in range(8):
max = 10**i
print(max)
print('ThreadPool')
test(max,tpool)
#print('ParallelPool')
#test(max,parapool)
print('SerialPool')
test(max,spool)
print('Pool')
test(max,pool)
print('ProcessPool')
test(max,ppool)
print('===============')
if __name__ == '__main__':
main()
這些是結果
1
ThreadPool
'test' ((1, <pool ThreadPool(nthreads=4)>), {}) 0.00 sec
SerialPool
'test' ((1, <pool SerialPool()>), {}) 0.00 sec
Pool
'test' ((1, <multiprocessing.pool.Pool object at 0x0000011E63D276A0>), {}) 0.17 sec
ProcessPool
'test' ((1, <pool ProcessPool(ncpus=4)>), {}) 0.00 sec
===============
10
ThreadPool
'test' ((10, <pool ThreadPool(nthreads=4)>), {}) 0.00 sec
SerialPool
'test' ((10, <pool SerialPool()>), {}) 0.00 sec
Pool
'test' ((10, <multiprocessing.pool.Pool object at 0x0000011E63D276A0>), {}) 0.00 sec
ProcessPool
'test' ((10, <pool ProcessPool(ncpus=4)>), {}) 0.01 sec
===============
100
ThreadPool
'test' ((100, <pool ThreadPool(nthreads=4)>), {}) 0.00 sec
SerialPool
'test' ((100, <pool SerialPool()>), {}) 0.00 sec
Pool
'test' ((100, <multiprocessing.pool.Pool object at 0x0000011E63D276A0>), {}) 0.00 sec
ProcessPool
'test' ((100, <pool ProcessPool(ncpus=4)>), {}) 0.01 sec
===============
1000
ThreadPool
'test' ((1000, <pool ThreadPool(nthreads=4)>), {}) 0.00 sec
SerialPool
'test' ((1000, <pool SerialPool()>), {}) 0.00 sec
Pool
'test' ((1000, <multiprocessing.pool.Pool object at 0x0000011E63D276A0>), {}) 0.00 sec
ProcessPool
'test' ((1000, <pool ProcessPool(ncpus=4)>), {}) 0.02 sec
===============
10000
ThreadPool
'test' ((10000, <pool ThreadPool(nthreads=4)>), {}) 0.00 sec
SerialPool
'test' ((10000, <pool SerialPool()>), {}) 0.00 sec
Pool
'test' ((10000, <multiprocessing.pool.Pool object at 0x0000011E63D276A0>), {}) 0.00 sec
ProcessPool
'test' ((10000, <pool ProcessPool(ncpus=4)>), {}) 0.09 sec
===============
100000
ThreadPool
'test' ((100000, <pool ThreadPool(nthreads=4)>), {}) 0.04 sec
SerialPool
'test' ((100000, <pool SerialPool()>), {}) 0.00 sec
Pool
'test' ((100000, <multiprocessing.pool.Pool object at 0x0000011E63D276A0>), {}) 0.01 sec
ProcessPool
'test' ((100000, <pool ProcessPool(ncpus=4)>), {}) 0.74 sec
===============
1000000
ThreadPool
'test' ((1000000, <pool ThreadPool(nthreads=4)>), {}) 0.42 sec
SerialPool
'test' ((1000000, <pool SerialPool()>), {}) 0.00 sec
Pool
'test' ((1000000, <multiprocessing.pool.Pool object at 0x0000011E63D276A0>), {}) 0.17 sec
ProcessPool
'test' ((1000000, <pool ProcessPool(ncpus=4)>), {}) 7.54 sec
===============
10000000
ThreadPool
'test' ((10000000, <pool ThreadPool(nthreads=4)>), {}) 4.57 sec
SerialPool
'test' ((10000000, <pool SerialPool()>), {}) 0.00 sec
Pool
'test' ((10000000, <multiprocessing.pool.Pool object at 0x0000011E63D276A0>), {}) 2.25 sec
ProcessPool
'test' ((10000000, <pool ProcessPool(ncpus=4)>), {}) 81.51 sec
===============
正如您所看到的,多處理通常勝過 ProcessPool,甚至比 SerialPool 還要慢。 我正在運行 i5-2500,我今天通過 pip 安裝了 pathos
>pip freeze
colorama==0.3.9
decorator==4.1.2
dill==0.2.7.1
helper-htmlparse==0.1
htmldom==2.0
lxml==4.0.0
multiprocess==0.70.5
pathos==0.2.1
pox==0.2.3
ppft==1.6.4.7.1
py==1.4.34
pyfs==0.0.8
pyreadline==2.1
pytest==3.2.2
six==1.11.0
為什么會這樣?
我想自己檢查一下,看看它在實際緩慢的功能(睡眠 1 秒)下的表現如何。
from pathos.pools import ProcessPool
from pathos.pools import ThreadPool
from pathos.pools import ParallelPool
from pathos.pools import SerialPool
from multiprocessing import Pool
import time
def timeit(method):
def timed(*args, **kw):
ts = time.time()
result = method(*args, **kw)
te = time.time()
print ('%r (%r, %r) %2.2f sec' % \
(method.__name__, args, kw, te-ts))
return result
return timed
def slowfcn(n):
from time import sleep
sleep(1.0)
@timeit
def test(n,p):
(p.map(slowfcn, range(n)))
def main():
npool = 4
ppool = ProcessPool(npool)
tpool = ThreadPool(npool)
parapool = ParallelPool(npool)
spool = SerialPool()
pool = Pool(npool)
nloops = 8
print('For Loop')
ts = time.time()
for i in range(nloops):
slowfcn(i)
te = time.time()
print ('%r () %2.2f sec' % ('test', te-ts))
print('ThreadPool')
test(nloops,tpool)
print('ParallelPool')
test(nloops,parapool)
print('SerialPool')
test(nloops,spool)
print('Pool')
test(nloops,pool)
print('ProcessPool')
test(nloops,ppool)
if __name__ == '__main__':
main()
結果如下:
For Loop
'test' () 8.00 sec
ThreadPool
'test' ((8, <pool ThreadPool(nthreads=4)>), {}) 2.00 sec
ParallelPool
'test' ((8, <pool ParallelPool(ncpus=4, servers=None)>), {}) 8.01 sec
SerialPool
'test' ((8, <pool SerialPool()>), {}) 0.00 sec
Pool
'test' ((8, <multiprocessing.pool.Pool state=RUN pool_size=4>), {}) 2.00 sec
ProcessPool
'test' ((8, <pool ProcessPool(ncpus=4)>), {}) 2.01 sec
因此,雖然 ThreadPool、Pool 和 ProcessPool 都使用線程而不是並行處理,但看起來 python 將這些線程分布在您的 CPU 內核中,因此您實際上確實獲得了加速。 此外,ParallelPool 需要配置服務器,但從文檔或示例中我不清楚如何做到這一點。 我不清楚 SerialPool 在這里做什么,我也不知道如何解決這個問題。
您只會受益於要求苛刻的任務的並行化。 與多處理/多線程代碼所需的通信相比,您的任務非常即時。 嘗試使用持續1s的功能,您將看到效果。 另外,請記住,在 python 中,由於 GIL,如果您的 IO 有界,您只會從多線程中受益。 對於 CPU 有界的任務,請使用多處理。
請參閱Raymond 的演講。
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.