[英]Python Multiprocessing : AssertionError: can only join a started process error when multiple process are spawned in loop
[英]Python multiprocessing - AssertionError: can only join a child process
我正在尝试python mutliprocessing模块,但遇到了一些问题。 我对线程模块非常熟悉,但是我需要确保正在执行的进程并行运行。
这是我要做什么的概述。 请忽略未声明的变量/函数之类的内容,因为我无法完整粘贴代码。
import multiprocessing
import time
def wrap_func_to_run(host, args, output):
output.append(do_something(host, args))
return
def func_to_run(host, args):
return do_something(host, args)
def do_work(server, client, server_args, client_args):
server_output = func_to_run(server, server_args)
client_output = func_to_run(client, client_args)
#handle this output and return a result
return result
def run_server_client(server, client, server_args, client_args, server_output, client_output):
server_process = multiprocessing.Process(target=wrap_func_to_run, args=(server, server_args, server_output))
server_process.start()
client_process = multiprocessing.Process(target=wrap_func_to_run, args=(client, client_args, client_output))
client_process.start()
server_process.join()
client_process.join()
#handle the output and return some result
def run_in_parallel(server, client):
#set up commands for first process
server_output = client_output = []
server_cmd = "cmd"
client_cmd = "cmd"
process_one = multiprocessing.Process(target=run_server_client, args=(server, client, server_cmd, client_cmd, server_output, client_output))
process_one.start()
#set up second process to run - but this one can run here
result = do_work(server, client, "some server args", "some client args")
process_one.join()
#use outputs above and the result to determine result
return final_result
def main():
#grab client
client = client()
#grab server
server = server()
return run_in_parallel(server, client)
if __name__ == "__main__":
main()
这是我得到的错误:
Error in sys.exitfunc:
Traceback (most recent call last):
File "/usr/lib64/python2.7/atexit.py", line 24, in _run_exitfuncs
func(*targs, **kargs)
File "/usr/lib64/python2.7/multiprocessing/util.py", line 319, in _exit_function
p.join()
File "/usr/lib64/python2.7/multiprocessing/process.py", line 143, in join
assert self._parent_pid == os.getpid(), 'can only join a child process'
AssertionError: can only join a child process
我尝试了许多不同的方法来解决此问题,但我的感觉是我使用此模块的方式出了问题。
编辑:
因此,我创建了一个文件,该文件将通过模拟客户端/服务器及其所做的工作来重现此文件-我也错过了重要的一点,那就是我在Unix中运行了该文件。 另一个重要的信息是,在我的实际情况下, do_work
涉及使用os.fork()
。 如果不使用os.fork()
就无法重现该错误,所以我假设问题就在这里。 在我的现实世界中,那部分代码不是我的,所以我将其视为黑匣子(可能是我的错误)。 无论如何,这里是要复制的代码-
#!/usr/bin/python
import multiprocessing
import time
import os
import signal
import sys
class Host():
def __init__(self):
self.name = "host"
def work(self):
#override - use to simulate work
pass
class Server(Host):
def __init__(self):
self.name = "server"
def work(self):
x = 0
for i in range(10000):
x+=1
print x
time.sleep(1)
class Client(Host):
def __init__(self):
self.name = "client"
def work(self):
x = 0
for i in range(5000):
x+=1
print x
time.sleep(1)
def func_to_run(host, args):
print host.name + " is working"
host.work()
print host.name + ": " + args
return "done"
def do_work(server, client, server_args, client_args):
print "in do_work"
server_output = client_output = ""
child_pid = os.fork()
if child_pid == 0:
server_output = func_to_run(server, server_args)
sys.exit(server_output)
time.sleep(1)
client_output = func_to_run(client, client_args)
# kill and wait for server to finish
os.kill(child_pid, signal.SIGTERM)
(pid, status) = os.waitpid(child_pid, 0)
return (server_output == "done" and client_output =="done")
def run_server_client(server, client, server_args, client_args):
server_process = multiprocessing.Process(target=func_to_run, args=(server, server_args))
print "Starting server process"
server_process.start()
client_process = multiprocessing.Process(target=func_to_run, args=(client, client_args))
print "Starting client process"
client_process.start()
print "joining processes"
server_process.join()
client_process.join()
print "processes joined and done"
def run_in_parallel(server, client):
#set up commands for first process
server_cmd = "server command for run_server_client"
client_cmd = "client command for run_server_client"
process_one = multiprocessing.Process(target=run_server_client, args=(server, client, server_cmd, client_cmd))
print "Starting process one"
process_one.start()
#set up second process to run - but this one can run here
print "About to do work"
result = do_work(server, client, "server args from do work", "client args from do work")
print "Joining process one"
process_one.join()
#use outputs above and the result to determine result
print "Process one has joined"
return result
def main():
#grab client
client = Client()
#grab server
server = Server()
return run_in_parallel(server, client)
if __name__ == "__main__":
main()
如果我在do_work
删除对os.fork()
的使用,我不会收到错误,并且代码的行为将像我以前期望的那样(除了传递输出,我已接受了我的错误/误解)。 我可以将旧代码更改为不使用os.fork(),但我也想知道为什么这会导致此问题,以及是否有可行的解决方案。
编辑2:
我开始研究一种解决方案,该解决方案在接受答案之前忽略了os.fork()。 这是我对可以完成的模拟工作量进行一些调整的结果-
#!/usr/bin/python
import multiprocessing
import time
import os
import signal
import sys
from Queue import Empty
class Host():
def __init__(self):
self.name = "host"
def work(self, w):
#override - use to simulate work
pass
class Server(Host):
def __init__(self):
self.name = "server"
def work(self, w):
x = 0
for i in range(w):
x+=1
print x
time.sleep(1)
class Client(Host):
def __init__(self):
self.name = "client"
def work(self, w):
x = 0
for i in range(w):
x+=1
print x
time.sleep(1)
def func_to_run(host, args, w, q):
print host.name + " is working"
host.work(w)
print host.name + ": " + args
q.put("ZERO")
return "done"
def handle_queue(queue):
done = False
results = []
return_val = 0
while not done:
#try to grab item from Queue
tr = None
try:
tr = queue.get_nowait()
print "found element in queue"
print tr
except Empty:
done = True
if tr is not None:
results.append(tr)
for el in results:
if el != "ZERO":
return_val = 1
return return_val
def do_work(server, client, server_args, client_args):
print "in do_work"
server_output = client_output = ""
child_pid = os.fork()
if child_pid == 0:
server_output = func_to_run(server, server_args)
sys.exit(server_output)
time.sleep(1)
client_output = func_to_run(client, client_args)
# kill and wait for server to finish
os.kill(child_pid, signal.SIGTERM)
(pid, status) = os.waitpid(child_pid, 0)
return (server_output == "done" and client_output =="done")
def run_server_client(server, client, server_args, client_args, w, mq):
local_queue = multiprocessing.Queue()
server_process = multiprocessing.Process(target=func_to_run, args=(server, server_args, w, local_queue))
print "Starting server process"
server_process.start()
client_process = multiprocessing.Process(target=func_to_run, args=(client, client_args, w, local_queue))
print "Starting client process"
client_process.start()
print "joining processes"
server_process.join()
client_process.join()
print "processes joined and done"
if handle_queue(local_queue) == 0:
mq.put("ZERO")
def run_in_parallel(server, client):
#set up commands for first process
master_queue = multiprocessing.Queue()
server_cmd = "server command for run_server_client"
client_cmd = "client command for run_server_client"
process_one = multiprocessing.Process(target=run_server_client, args=(server, client, server_cmd, client_cmd, 400000000, master_queue))
print "Starting process one"
process_one.start()
#set up second process to run - but this one can run here
print "About to do work"
#result = do_work(server, client, "server args from do work", "client args from do work")
run_server_client(server, client, "server args from do work", "client args from do work", 5000, master_queue)
print "Joining process one"
process_one.join()
#use outputs above and the result to determine result
print "Process one has joined"
return_val = handle_queue(master_queue)
print return_val
return return_val
def main():
#grab client
client = Client()
#grab server
server = Server()
val = run_in_parallel(server, client)
if val:
print "failed"
else:
print "passed"
return val
if __name__ == "__main__":
main()
该代码具有一些经过调整的打印输出,目的只是为了确切地了解正在发生的事情。 我使用了multiprocessing.Queue在进程之间存储和共享输出,并返回到要处理的主线程中。 我认为这解决了我的python部分问题,但是我正在处理的代码中仍然存在一些问题。 我唯一能说的是,等效于func_to_run
包括通过ssh发送命令并获取所有输出的err。 出于某些原因,这对于执行时间短的命令非常有效,但对于执行时间/输出大得多的命令则不太好。 我尝试在这里的代码中使用完全不同的工作值来模拟这一点,但是无法重现相似的结果。
编辑3我正在使用的库代码(同样不是我的)对ssh命令使用Popen.wait()
,我只是读到以下内容:
Popen.wait()
等待子进程终止。 设置并返回returncode属性。警告使用stdout = PIPE和/或stderr = PIPE时,这将导致死锁,并且>子进程会向管道生成足够的输出,从而阻塞等待OS管道缓冲区接受更多数据的等待。 使用communication()可以避免这种情况。
我将代码调整为不进行缓冲,仅在收到时打印,一切正常。
我可以将旧代码更改为不使用
os.fork()
但我也想知道为什么这会导致此问题,以及是否有可行的解决方案。
理解问题的关键是确切知道fork()
作用。 CPython文档指出“分叉子进程”。 但这假定您了解C库调用fork()
。
这是glibc的联机帮助页中所说的内容:
fork()
通过复制调用过程来创建一个新过程。 新进程称为子进程,与调用进程的父进程完全相同,称为父进程,但以下几点除外:...
基本上就好像您采用了程序并以很小的差异制作了程序状态(堆,堆栈,指令指针等)的副本,并使其独立于原始程序执行一样。 当此子进程自然退出时,它将使用exit()
,这将触发由multiprocessing
模块注册的atexit()
处理程序。
您可以采取什么措施来避免这种情况?
os.fork()
:改为使用multiprocessing
,就像您现在正在探索 fork()
,仅在必要时在子代或父代中import multiprocessing
。 _exit()
(CPython文档状态,“注意,标准的退出方式是sys.exit(n)。_exit()通常只应在fork()之后的子级进程中使用。”) 在我看来,您一次穿线太多。 我不会从run_in_parallel
线程化它,而只是使用适当的参数调用run_server_client
,因为它们会在内部线程化。
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.