python多处理，pathos慢

Question

今天我运行了一些代码，我想在我的多核 cpu 上运行它，所以即使我写了地图，我也把它改成了 pool.map。 令人惊讶的是，即使我的代码使用了如此多的处理能力或内存（据我所知），它的运行速度也变慢了。 所以我写了这个测试，它使用了悲情和多处理。

from pathos.pools import ProcessPool
from pathos.pools import ThreadPool
#from pathos.pools import ParallelPool
from pathos.pools import SerialPool
from multiprocessing import Pool

import time

def timeit(method):
    def timed(*args, **kw):
        ts = time.time()
        result = method(*args, **kw)
        te = time.time()
        print ('%r (%r, %r) %2.2f sec' % \
              (method.__name__, args, kw, te-ts))
        return result

    return timed

def times2(x):
    return 2*x

@timeit
def test(max,p):
    (p.map(times2, range(max)))

def main():
    ppool = ProcessPool(4)
    tpool = ThreadPool(4)
    #parapool = ParallelPool(4)
    spool = SerialPool(4)
    pool = Pool(4)
    for i in range(8):
        max = 10**i
        print(max)
        print('ThreadPool')
        test(max,tpool)
        #print('ParallelPool')
        #test(max,parapool)
        print('SerialPool')
        test(max,spool)
        print('Pool')
        test(max,pool)
        print('ProcessPool')
        test(max,ppool)
        print('===============')


if __name__ == '__main__':
    main()

这些是结果

1
ThreadPool
'test' ((1, <pool ThreadPool(nthreads=4)>), {}) 0.00 sec
SerialPool
'test' ((1, <pool SerialPool()>), {}) 0.00 sec
Pool
'test' ((1, <multiprocessing.pool.Pool object at 0x0000011E63D276A0>), {}) 0.17 sec
ProcessPool
'test' ((1, <pool ProcessPool(ncpus=4)>), {}) 0.00 sec
===============
10
ThreadPool
'test' ((10, <pool ThreadPool(nthreads=4)>), {}) 0.00 sec
SerialPool
'test' ((10, <pool SerialPool()>), {}) 0.00 sec
Pool
'test' ((10, <multiprocessing.pool.Pool object at 0x0000011E63D276A0>), {}) 0.00 sec
ProcessPool
'test' ((10, <pool ProcessPool(ncpus=4)>), {}) 0.01 sec
===============
100
ThreadPool
'test' ((100, <pool ThreadPool(nthreads=4)>), {}) 0.00 sec
SerialPool
'test' ((100, <pool SerialPool()>), {}) 0.00 sec
Pool
'test' ((100, <multiprocessing.pool.Pool object at 0x0000011E63D276A0>), {}) 0.00 sec
ProcessPool
'test' ((100, <pool ProcessPool(ncpus=4)>), {}) 0.01 sec
===============
1000
ThreadPool
'test' ((1000, <pool ThreadPool(nthreads=4)>), {}) 0.00 sec
SerialPool
'test' ((1000, <pool SerialPool()>), {}) 0.00 sec
Pool
'test' ((1000, <multiprocessing.pool.Pool object at 0x0000011E63D276A0>), {}) 0.00 sec
ProcessPool
'test' ((1000, <pool ProcessPool(ncpus=4)>), {}) 0.02 sec
===============
10000
ThreadPool
'test' ((10000, <pool ThreadPool(nthreads=4)>), {}) 0.00 sec
SerialPool
'test' ((10000, <pool SerialPool()>), {}) 0.00 sec
Pool
'test' ((10000, <multiprocessing.pool.Pool object at 0x0000011E63D276A0>), {}) 0.00 sec
ProcessPool
'test' ((10000, <pool ProcessPool(ncpus=4)>), {}) 0.09 sec
===============
100000
ThreadPool
'test' ((100000, <pool ThreadPool(nthreads=4)>), {}) 0.04 sec
SerialPool
'test' ((100000, <pool SerialPool()>), {}) 0.00 sec
Pool
'test' ((100000, <multiprocessing.pool.Pool object at 0x0000011E63D276A0>), {}) 0.01 sec
ProcessPool
'test' ((100000, <pool ProcessPool(ncpus=4)>), {}) 0.74 sec
===============
1000000
ThreadPool
'test' ((1000000, <pool ThreadPool(nthreads=4)>), {}) 0.42 sec
SerialPool
'test' ((1000000, <pool SerialPool()>), {}) 0.00 sec
Pool
'test' ((1000000, <multiprocessing.pool.Pool object at 0x0000011E63D276A0>), {}) 0.17 sec
ProcessPool
'test' ((1000000, <pool ProcessPool(ncpus=4)>), {}) 7.54 sec
===============
10000000
ThreadPool
'test' ((10000000, <pool ThreadPool(nthreads=4)>), {}) 4.57 sec
SerialPool
'test' ((10000000, <pool SerialPool()>), {}) 0.00 sec
Pool
'test' ((10000000, <multiprocessing.pool.Pool object at 0x0000011E63D276A0>), {}) 2.25 sec
ProcessPool
'test' ((10000000, <pool ProcessPool(ncpus=4)>), {}) 81.51 sec
===============

正如您所看到的，多处理通常胜过 ProcessPool，甚至比 SerialPool 还要慢。 我正在运行 i5-2500，我今天通过 pip 安装了 pathos

>pip freeze
colorama==0.3.9
decorator==4.1.2
dill==0.2.7.1
helper-htmlparse==0.1
htmldom==2.0
lxml==4.0.0
multiprocess==0.70.5
pathos==0.2.1
pox==0.2.3
ppft==1.6.4.7.1
py==1.4.34
pyfs==0.0.8
pyreadline==2.1
pytest==3.2.2
six==1.11.0

为什么会这样？

Answer 1

我想自己检查一下，看看它在实际缓慢的功能（睡眠 1 秒）下的表现如何。

from pathos.pools import ProcessPool
from pathos.pools import ThreadPool
from pathos.pools import ParallelPool
from pathos.pools import SerialPool
from multiprocessing import Pool
import time

def timeit(method):
    def timed(*args, **kw):
        ts = time.time()
        result = method(*args, **kw)
        te = time.time()
        print ('%r (%r, %r) %2.2f sec' % \
              (method.__name__, args, kw, te-ts))
        return result
    return timed

def slowfcn(n):
    from time import sleep
    sleep(1.0)

@timeit
def test(n,p):
    (p.map(slowfcn, range(n)))

def main():
    npool = 4
    ppool = ProcessPool(npool)
    tpool = ThreadPool(npool)
    parapool = ParallelPool(npool)
    spool = SerialPool()
    pool = Pool(npool)

    nloops = 8
    print('For Loop')
    ts = time.time()
    for i in range(nloops):
        slowfcn(i)
    te = time.time()
    print ('%r () %2.2f sec' % ('test', te-ts))
    print('ThreadPool')
    test(nloops,tpool)
    print('ParallelPool')
    test(nloops,parapool)
    print('SerialPool')
    test(nloops,spool)
    print('Pool')
    test(nloops,pool)
    print('ProcessPool')
    test(nloops,ppool)


if __name__ == '__main__':
    main()

结果如下：

For Loop
'test' () 8.00 sec
ThreadPool
'test' ((8, <pool ThreadPool(nthreads=4)>), {}) 2.00 sec
ParallelPool
'test' ((8, <pool ParallelPool(ncpus=4, servers=None)>), {}) 8.01 sec
SerialPool
'test' ((8, <pool SerialPool()>), {}) 0.00 sec
Pool
'test' ((8, <multiprocessing.pool.Pool state=RUN pool_size=4>), {}) 2.00 sec
ProcessPool
'test' ((8, <pool ProcessPool(ncpus=4)>), {}) 2.01 sec

因此，虽然 ThreadPool、Pool 和 ProcessPool 都使用线程而不是并行处理，但看起来 python 将这些线程分布在您的 CPU 内核中，因此您实际上确实获得了加速。 此外，ParallelPool 需要配置服务器，但从文档或示例中我不清楚如何做到这一点。 我不清楚 SerialPool 在这里做什么，我也不知道如何解决这个问题。

Answer 2

您只会受益于要求苛刻的任务的并行化。 与多处理/多线程代码所需的通信相比，您的任务非常即时。 尝试使用持续1s的功能，您将看到效果。 另外，请记住，在 python 中，由于 GIL，如果您的 IO 有界，您只会从多线程中受益。 对于 CPU 有界的任务，请使用多处理。

请参阅Raymond 的演讲。

python多处理，pathos慢

问题描述

2 个解决方案

解决方案1
1 2020-04-09 01:49:33

解决方案2
0 已采纳 2017-09-24 08:34:36

python多处理，pathos慢

问题描述

2 个解决方案

解决方案1 1 2020-04-09 01:49:33

解决方案2 0 已采纳 2017-09-24 08:34:36

解决方案1
1 2020-04-09 01:49:33

解决方案2
0 已采纳 2017-09-24 08:34:36