![](/img/trans.png)
[英]RawArray not modified by processes as shared memory for Python multiprocessing
[英]Python multiprocessing with shared RawArray
我希望从 numpy 阵列的不同行并行读取多个进程以加快速度。 但是,当我运行以下代码时,到达 func 的第一个进程会引发错误,就好像 var 不再在 scope 中一样。 为什么会这样?
import numpy as np
import multiprocessing as mp
num_procs = 16
num_points = 2500000
def init_worker(X):
global var
var = X
def func(proc):
X_np = np.frombuffer(var).reshape((num_procs, num_points))
for y in range(num_points):
z = X_np[proc][y]
if __name__ == '__main__':
data = np.random.randn(num_procs, num_points)
X = mp.RawArray('d', num_procs*num_points)
X_np = np.frombuffer(X).reshape((num_procs, num_points))
np.copyto(X_np, data)
pool = mp.Pool(processes=4, initializer=init_worker, initargs=(X,))
for proc in range(num_procs):
pool.apply_async(func(proc))
pool.close()
pool.join()
Traceback (most recent call last):
File "parallel_test.py", line 26, in <module>
pool.apply_async(func(proc))
File "parallel_test.py", line 13, in func
X_np = np.frombuffer(var).reshape((num_procs, num_points))
NameError: global name 'var' is not defined
更新:出于某种原因,如果我使用 Pool.map 而不是带有 Pool.apply_async 的 for 循环,它似乎可以工作。 我不明白为什么。
有什么理由不在顶级 scope 中将X
声明为global
? 这消除了NameError
。
import numpy as np
import multiprocessing as mp
num_procs = 16
num_points = 25000000
def func(proc):
X_np = np.frombuffer(X).reshape((num_procs, num_points))
for y in range(num_points):
z = X_np[proc][y]
if __name__ == '__main__':
data = np.random.randn(num_procs, num_points)
global X
X = mp.RawArray('d', num_procs*num_points)
X_np = np.frombuffer(X).reshape((num_procs, num_points))
np.copyto(X_np, data)
pool = mp.Pool(processes=4 )
for proc in range(num_procs):
pool.apply_async(func(proc))
pool.close()
pool.join()
当我运行此问题的简化实例时,n=20:
import numpy as np
import multiprocessing as mp
num_procs = 4
num_points = 5
def func(proc):
X_np = np.frombuffer(X).reshape((num_procs, num_points))
for y in range(num_points):
z = X_np[proc][y]
if __name__ == '__main__':
data = np.random.randn(num_procs, num_points)
global X
X = mp.RawArray('d', num_procs*num_points)
X_np = np.frombuffer(X).reshape((num_procs, num_points))
np.copyto(X_np, data)
pool = mp.Pool(processes=4 )
for proc in range(num_procs):
pool.apply_async(func(proc))
pool.close()
pool.join()
print("\n".join(map(str, X)))
我得到以下 output:
-0.6346037804619162 1.1005724710066107 0.33458763357165255 0.6409345714971889 0.7124888766851982 0.36760459213332963 0.23593304931386933 -0.8668969562941349 -0.8842756219923469 0.005979036105620422 1.386422154089567 -0.8770988782214508 0.25187448339771057 -0.2473967968471952 -0.4909708883978521 0.5423521489750244 0.018749603867333802 0.035304792504378055 1.3263872668956616 1.0199839603892742
您尚未提供预期 output 的样本。 这看起来与您的预期相似吗?
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.