[英]python multiprocessing/threading cleanup
我有一个python工具,基本上有这种设置:
main process (P1) -> spawns a process (P2) that starts a tcp connection
-> spawns a thread (T1) that starts a loop to receive
messages that are sent from P2 to P1 via a Queue (Q1)
server process (P2) -> spawns two threads (T2 and T3) that start loops to
receive messages that are sent from P1 to P2 via Queues (Q2 and Q3)
我遇到的问题是,当我停止我的程序时(使用Ctrl + C),它不会退出。 服务器进程已经结束,但是主进程只是在那里挂起而且我必须将其杀掉。
线程循环函数看起来都一样:
def _loop(self):
while self.running:
res = self.Q1.get()
if res is None:
break
self._handle_msg(res)
所有线程都作为守护进程启动:
t = Thread(target=self._loop)
t.setDaemon(True)
t.start()
在我的主要流程中,我使用atexit来执行清理任务:
atexit.register(self.on_exit)
这些清理任务基本上如下:
1)设置self.running
在P1至False
和发送None
给Q1,使得线程T1要完成
self.running = False
self.Q1.put(None)
2)通过Q2向P2发送消息,通知该过程它正在结束
self.Q2.put("stop")
3)在P2中,对“停止”消息做出反应并执行我们在P1中所做的操作
self.running = False
self.Q2.put(None)
self.Q3.put(None)
就是这样,在我的理解中,应该让一切都很好地关闭,但事实并非如此。
P1的主要代码还包含以下无限循环,否则程序将过早结束:
while running:
sleep(1)
也许这与问题有关,但我不明白为什么会这样。
那我做错了什么? 我的设置是否存在重大设计缺陷? 我忘了关闭一些东西吗?
编辑
好的,我修改了我的代码并设法让它在大多数情况下正确关闭。 不幸的是,从现在开始,它仍然卡住了。
我设法写了一个代码的小工作示例。 要演示发生的情况,您需要简单地启动脚本,然后使用Ctrl + C
来停止它。 如果在启动工具后尽快按Ctrl + C
,现在看起来似乎出现了问题。
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import signal
import sys
import logging
from multiprocessing import Process, Queue
from threading import Thread
from time import sleep
logger = logging.getLogger("mepy-client")
class SocketClientProtocol(object):
def __init__(self, q_in, q_out, q_binary):
self.q_in = q_in
self.q_out = q_out
self.q_binary = q_binary
self.running = True
t = Thread(target=self._loop)
#t.setDaemon(True)
t.start()
t = Thread(target=self._loop_binary)
#t.setDaemon(True)
t.start()
def _loop(self):
print "start of loop 2"
while self.running:
res = self.q_in.get()
if res is None:
break
self._handle_msg(res)
print "end of loop 2"
def _loop_binary(self):
print "start of loop 3"
while self.running:
res = self.q_binary.get()
if res is None:
break
self._handle_binary(res)
print "end of loop 3"
def _handle_msg(self, msg):
msg_type = msg[0]
if msg_type == "stop2":
print "STOP RECEIVED"
self.running = False
self.q_in.put(None)
self.q_binary.put(None)
def _put_msg(self, msg):
self.q_out.put(msg)
def _handle_binary(self, data):
pass
def handle_element(self):
self._put_msg(["something"])
def run_twisted(q_in, q_out, q_binary):
s = SocketClientProtocol(q_in, q_out, q_binary)
while s.running:
sleep(2)
s.handle_element()
class MediatorSender(object):
def __init__(self):
self.q_in = None
self.q_out = None
self.q_binary = None
self.p = None
self.running = False
def start(self):
if self.running:
return
self.running = True
self.q_in = Queue()
self.q_out = Queue()
self.q_binary = Queue()
print "!!!!START"
self.p = Process(target=run_twisted, args=(self.q_in, self.q_out, self.q_binary))
self.p.start()
t = Thread(target=self._loop)
#t.setDaemon(True)
t.start()
def stop(self):
print "!!!!STOP"
if not self.running:
return
print "STOP2"
self.running = False
self.q_out.put(None)
self.q_in.put(["stop2"])
#self.q_in.put(None)
#self.q_binary.put(None)
try:
if self.p and self.p.is_alive():
self.p.terminate()
except:
pass
def _loop(self):
print "start of loop 1"
while self.running:
res = self.q_out.get()
if res is None:
break
self._handle_msg(res)
print "end of loop 1"
def _handle_msg(self, msg):
self._put_msg(msg)
def _put_msg(self, msg):
self.q_in.put(msg)
def _put_binary(self, msg):
self.q_binary.put(msg)
def send_chunk(self, chunk):
self._put_binary(chunk)
running = True
def signal_handler(signal, frame):
global running
if running:
running = False
ms.stop()
else:
sys.exit(0)
if __name__ == "__main__":
signal.signal(signal.SIGINT, signal_handler)
ms = MediatorSender()
ms.start()
for i in range(100):
ms.send_chunk("some chunk of data")
while running:
sleep(1)
也许你应该尝试捕获SIGINT
信号,这是由Ctrl + C
使用signal.signal
生成的,如下所示:
#!/usr/bin/env python
import signal
import sys
def signal_handler(signal, frame):
print('You pressed Ctrl+C!')
sys.exit(0)
signal.signal(signal.SIGINT, signal_handler)
print('Press Ctrl+C')
signal.pause()
代码从这里被盗
我认为你通过在子进程上调用p.terminate()
来破坏你的multiprocessing.Queue
。 文档对此有一个警告:
警告:如果在关联进程使用管道或队列时使用此方法,则管道或队列可能会损坏,并可能被其他进程无法使用。 类似地,如果进程已获得锁或信号量等,则终止它可能导致其他进程死锁。
在某些情况下,看起来p
在您的MediatorSender._loop
方法可以使用您加载到其中的标记之前终止,以让它知道它应该退出。
此外,您正在安装一个期望仅在主进程中工作的信号处理程序,但实际上父进程和子进程都接收到SIGINT,这意味着在两个进程中都调用了signal_handler
,可能导致ms.stop
获取由于处理将ms.running
设置为False
的方式中的竞争条件,因此调用两次
我建议只是利用两个进程都接收SIGINT,并让父进程和子进程直接处理KeyboardInterrupt
。 这样,每个人都会干净地关闭自己,而不是让父母终止孩子。 以下代码演示了这一点,并且在我的测试中从未挂起。 我已经在几个地方简化了你的代码,但在功能上它完全相同:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import logging
from multiprocessing import Process, Queue
from threading import Thread
from time import sleep
logger = logging.getLogger("mepy-client")
class SocketClientProtocol(object):
def __init__(self, q_in, q_out, q_binary):
self.q_in = q_in
self.q_out = q_out
self.q_binary = q_binary
t = Thread(target=self._loop)
t.start()
t = Thread(target=self._loop_binary)
t.start()
def _loop(self):
print("start of loop 2")
for res in iter(self.q_in.get, None):
self._handle_msg(res)
print("end of loop 2")
def _loop_binary(self):
print("start of loop 3")
for res in iter(self.q_binary.get, None):
self._handle_binary(res)
print("end of loop 3")
def _handle_msg(self, msg):
msg_type = msg[0]
if msg_type == "stop2":
self.q_in.put(None)
self.q_binary.put(None)
def _put_msg(self, msg):
self.q_out.put(msg)
def stop(self):
print("STOP RECEIVED")
self.q_in.put(None)
self.q_binary.put(None)
def _handle_binary(self, data):
pass
def handle_element(self):
self._put_msg(["something"])
def run_twisted(q_in, q_out, q_binary):
s = SocketClientProtocol(q_in, q_out, q_binary)
try:
while True:
sleep(2)
s.handle_element()
except KeyboardInterrupt:
s.stop()
class MediatorSender(object):
def __init__(self):
self.q_in = None
self.q_out = None
self.q_binary = None
self.p = None
self.running = False
def start(self):
if self.running:
return
self.running = True
self.q_in = Queue()
self.q_out = Queue()
self.q_binary = Queue()
print("!!!!START")
self.p = Process(target=run_twisted,
args=(self.q_in, self.q_out, self.q_binary))
self.p.start()
self.loop = Thread(target=self._loop)
self.loop.start()
def stop(self):
print("!!!!STOP")
if not self.running:
return
print("STOP2")
self.running = False
self.q_out.put(None)
def _loop(self):
print("start of loop 1")
for res in iter(self.q_out.get, None):
self._handle_msg(res)
print("end of loop 1")
def _handle_msg(self, msg):
self._put_msg(msg)
def _put_msg(self, msg):
self.q_in.put(msg)
def _put_binary(self, msg):
self.q_binary.put(msg)
def send_chunk(self, chunk):
self._put_binary(chunk)
if __name__ == "__main__":
ms = MediatorSender()
try:
ms.start()
for i in range(100):
ms.send_chunk("some chunk of data")
# You actually have to join w/ a timeout in a loop on
# Python 2.7. If you just call join(), SIGINT won't be
# received by the main process, and the program will
# hang. This is a bug, and is fixed in Python 3.x.
while True:
ms.loop.join()
except KeyboardInterrupt:
ms.stop()
编辑:
如果您更喜欢使用信号处理程序而不是捕获KeyboardInterrupt
,您只需要确保子进程使用自己的信号处理程序,而不是继承父进程:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import signal
import logging
from functools import partial
from multiprocessing import Process, Queue
from threading import Thread
from time import sleep
logger = logging.getLogger("mepy-client")
class SocketClientProtocol(object):
def __init__(self, q_in, q_out, q_binary):
self.q_in = q_in
self.q_out = q_out
self.q_binary = q_binary
self.running = True
t = Thread(target=self._loop)
t.start()
t = Thread(target=self._loop_binary)
t.start()
def _loop(self):
print("start of loop 2")
for res in iter(self.q_in.get, None):
self._handle_msg(res)
print("end of loop 2")
def _loop_binary(self):
print("start of loop 3")
for res in iter(self.q_binary.get, None):
self._handle_binary(res)
print("end of loop 3")
def _handle_msg(self, msg):
msg_type = msg[0]
if msg_type == "stop2":
self.q_in.put(None)
self.q_binary.put(None)
def _put_msg(self, msg):
self.q_out.put(msg)
def stop(self):
print("STOP RECEIVED")
self.running = False
self.q_in.put(None)
self.q_binary.put(None)
def _handle_binary(self, data):
pass
def handle_element(self):
self._put_msg(["something"])
def run_twisted(q_in, q_out, q_binary):
s = SocketClientProtocol(q_in, q_out, q_binary)
signal.signal(signal.SIGINT, partial(signal_handler_child, s))
while s.running:
sleep(2)
s.handle_element()
class MediatorSender(object):
def __init__(self):
self.q_in = None
self.q_out = None
self.q_binary = None
self.p = None
self.running = False
def start(self):
if self.running:
return
self.running = True
self.q_in = Queue()
self.q_out = Queue()
self.q_binary = Queue()
print("!!!!START")
self.p = Process(target=run_twisted,
args=(self.q_in, self.q_out, self.q_binary))
self.p.start()
self.loop = Thread(target=self._loop)
self.loop.start()
def stop(self):
print("!!!!STOP")
if not self.running:
return
print("STOP2")
self.running = False
self.q_out.put(None)
def _loop(self):
print("start of loop 1")
for res in iter(self.q_out.get, None):
self._handle_msg(res)
print("end of loop 1")
def _handle_msg(self, msg):
self._put_msg(msg)
def _put_msg(self, msg):
self.q_in.put(msg)
def _put_binary(self, msg):
self.q_binary.put(msg)
def send_chunk(self, chunk):
self._put_binary(chunk)
def signal_handler_main(ms, *args):
ms.stop()
def signal_handler_child(s, *args):
s.stop()
if __name__ == "__main__":
ms = MediatorSender()
signal.signal(signal.SIGINT, partial(signal_handler_main, ms))
ms.start()
for i in range(100):
ms.send_chunk("some chunk of data")
while ms.loop.is_alive():
ms.loop.join(9999999)
print('done main')
如果我使用线程模块,这通常适用于我。 如果使用多处理程序,它将无法工作。 如果您从终端运行脚本,请尝试在后台运行它,就像这样。
python scriptFoo.py &
运行该过程后,它将输出这样的PID
[1] 23107
每当您需要退出脚本时,只需键入kill和脚本PID即可。
kill 23107
再次按Enter键,它应该杀死所有子进程并输出它。
[1]+ Terminated python scriptFoo.py
据我所知,你无法使用'Ctrl + C'杀死所有子进程
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.