[英]How to do dynamic creation of per-process queues in Python multiprocessing
我想动态创建多个Process
,其中每个实例都有一个队列,用于接收来自其他实例的传入消息,并且每个实例还可以创建新实例。 所以我们最终得到了一个进程网络,所有进程都相互发送。 每个实例都可以互相发送。
下面的代码会做我想做的事:它使用Manager.dict()
来存储队列,确保更新被传播,并使用Lock()
来保护对队列的写访问。 然而,当添加一个新队列时,它会抛出"RuntimeError: Queue objects should only be shared between processes through inheritance"
。
问题是在启动时,我们不知道最终需要多少个队列,所以我们必须动态创建它们。 但由于除了施工时间我们不能共享队列,我不知道该怎么做。
我知道一种可能性是使queues
成为全局变量,而不是传递给__init__
的托管变量:据我所知,问题是queues
变量的添加不会传播到其他进程。
编辑我正在研究进化算法。 EA 是一种机器学习技术。 EA 模拟一个“种群”,该种群通过适者生存、交叉和突变而进化。 在并行EA 中,就像这里一样,我们也有群体之间的迁移,对应于进程间通信。 岛屿也可以产生新的岛屿,因此我们需要一种在动态创建的进程之间发送消息的方法。
import random, time
from multiprocessing import Process, Queue, Lock, Manager, current_process
try:
from queue import Empty as EmptyQueueException
except ImportError:
from Queue import Empty as EmptyQueueException
class MyProcess(Process):
def __init__(self, queues, lock):
super(MyProcess, self).__init__(target=lambda x: self.run(x),
args=tuple())
self.queues = queues
self.lock = lock
# acquire lock and add a new queue for this process
with self.lock:
self.id = len(list(self.queues.keys()))
self.queues[self.id] = Queue()
def run(self):
while len(list(self.queues.keys())) < 10:
# make a new process
new = MyProcess(self.lock)
new.start()
# send a message to a random process
dest_key = random.choice(list(self.queues.keys()))
dest = self.queues[dest_key]
dest.put("hello to %s from %s" % (dest_key, self.id))
# receive messages
message = True
while message:
try:
message = self.queues[self.id].get(False) # don't block
print("%s received: %s" % (self.id, message))
except EmptyQueueException:
break
# what queues does this process know about?
print("%d: I know of %s" %
(self.id, " ".join([str(id) for id in self.queues.keys()])))
time.sleep(1)
if __name__ == "__main__":
# Construct MyProcess with a Manager.dict for storing the queues
# and a lock to protect write access. Start.
MyProcess(Manager().dict(), Lock()).start()
我不完全确定您的用例实际上是什么。 也许如果您详细说明为什么要让每个进程动态地生成一个具有连接队列的子进程,那么在这种情况下正确的解决方案将会更加清楚。
无论如何,对于现在的问题,似乎没有真正的好方法来动态创建多处理管道或队列。
我认为,如果您愿意在每个进程中生成线程,则可以使用multiprocessing.connection.Listener/Client
来回通信。 我没有产生线程,而是使用网络 sockets 和 select 在线程之间进行通信。
动态进程生成和网络 sockets 可能仍然不稳定,这取决于multiprocessing
在生成/分叉新进程时如何清理文件描述符,并且您的解决方案很可能更容易在 *nix 衍生产品上工作。 如果您担心套接字开销,您可以使用 unix 域 sockets 更轻量级,但会增加在多个工作机器上运行节点的复杂性。
无论如何,这是一个使用网络 sockets 和全局进程列表来完成此任务的示例,因为我无法找到一个让multiprocessing
执行此操作的好方法。
import collections
import multiprocessing
import random
import select
import socket
import time
class MessagePassingProcess(multiprocessing.Process):
def __init__(self, id_, processes):
self.id = id_
self.processes = processes
self.queue = collections.deque()
super(MessagePassingProcess, self).__init__()
def run(self):
print "Running"
inputs = []
outputs = []
server = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
address = self.processes[self.id]["address"]
print "Process %s binding to %s"%(self.id, address)
server.bind(address)
server.listen(5)
inputs.append(server)
process = self.processes[self.id]
process["listening"] = True
self.processes[self.id] = process
print "Process %s now listening!(%s)"%(self.id, process)
while inputs:
readable, writable, exceptional = select.select(inputs,
outputs,
inputs,
0.1)
for sock in readable:
print "Process %s has a readable scoket: %s"%(self.id,
sock)
if sock is server:
print "Process %s has a readable server scoket: %s"%(self.id,
sock)
conn, addr = sock.accept()
conn.setblocking(0)
inputs.append(conn)
else:
data = sock.recv(1024)
if data:
self.queue.append(data)
print "non server readable socket with data"
else:
inputs.remove(sock)
sock.close()
print "non server readable socket with no data"
for sock in exceptional:
print "exception occured on socket %s"%(sock)
inputs.remove(sock)
sock.close()
while len(self.queue) >= 1:
print "Received:", self.queue.pop()
# send a message to a random process:
random_id = random.choice(list(self.processes.keys()))
print "%s Attempting to send message to %s"%(self.id, random_id)
random_process = self.processes[random_id]
print "random_process:", random_process
if random_process["listening"]:
random_address = random_process["address"]
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
try:
s.connect(random_address)
except socket.error:
print "%s failed to send to %s"%(self.id, random_id)
else:
s.send("Hello World!")
finally:
s.close()
time.sleep(1)
if __name__=="__main__":
print "hostname:", socket.getfqdn()
print dir(multiprocessing)
manager = multiprocessing.Manager()
processes = manager.dict()
joinable = []
for n in xrange(multiprocessing.cpu_count()):
mpp = MessagePassingProcess(n, processes)
processes[n] = {"id":n,
"address":("127.0.0.1",7000+n),
"listening":False,
}
print "processes[%s] = %s"%(n, processes[n])
mpp.start()
joinable.append(mpp)
for process in joinable:
process.join()
经过大量的润色和测试,这可能是对multiprocessing.Process
和/或multiprocessing.Pool
的逻辑扩展,因为如果它在标准库中可用,它看起来确实像人们会使用的东西。 创建一个 DynamicQueue class 也可能是合理的,它使用 sockets 可以被其他队列发现。
无论如何,希望它有所帮助。 如果您找到更好的方法来完成这项工作,请更新。
此代码基于接受的答案。 它在 Python 3 中,因为 OSX Snow Leopard 在多处理的某些用途上出现了段错误。
#!/usr/bin/env python3
import collections
from multiprocessing import Process, Manager, Lock, cpu_count
import random
import select
import socket
import time
import pickle
class Message:
def __init__(self, origin):
self.type = "long_msg"
self.data = "X" * 3000
self.origin = origin
def __str__(self):
return "%s %d" % (self.type, self.origin)
class MessagePassingProcess(Process):
def __init__(self, processes, lock):
self.lock = lock
self.processes = processes
with self.lock:
self.id = len(list(processes.keys()))
process_dict = {"id": self.id,
"address": ("127.0.0.1", 7000 + self.id),
"listening": False
}
self.processes[self.id] = process_dict
print("new process: processes[%s] = %s" % (self.id, processes[self.id]))
self.queue = collections.deque()
super(MessagePassingProcess, self).__init__()
def run(self):
print("Running")
self.processes[self.id]["joinable"] = True
inputs = []
outputs = []
server = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
address = self.processes[self.id]["address"]
print("Process %s binding to %s" % (self.id, address))
server.bind(address)
server.listen(5)
inputs.append(server)
process = self.processes[self.id]
process["listening"] = True
self.processes[self.id] = process
print("Process %s now listening!(%s)" % (self.id, process))
while inputs and len(list(self.processes.keys())) < 10:
readable, writable, exceptional = select.select(inputs,
outputs,
inputs,
0.1)
# read incoming messages
for sock in readable:
print("Process %s has a readable socket: %s" % (self.id, sock))
if sock is server:
print("Process %s has a readable server socket: %s" %
(self.id, sock))
conn, addr = sock.accept()
conn.setblocking(0)
inputs.append(conn)
else:
data = True
item = bytes() # empty bytes object, to be added to
recvs = 0
while data:
data = sock.recv(1024)
item += data
recvs += 1
if len(item):
self.queue.append(item)
print("non server readable socket: recvd %d bytes in %d parts"
% (len(item), recvs))
else:
inputs.remove(sock)
sock.close()
print("non server readable socket: nothing to read")
for sock in exceptional:
print("exception occured on socket %s" % (sock))
inputs.remove(sock)
sock.close()
while len(self.queue):
msg = pickle.loads(self.queue.pop())
print("received:" + str(msg))
# send a message to a random process:
random_id = random.choice(list(self.processes.keys()))
print("%s attempting to send message to %s" % (self.id, random_id))
random_process = self.processes[random_id]
if random_process["listening"]:
random_address = random_process["address"]
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
try:
s.connect(random_address)
except socket.error:
print("%s failed to send to %s"%(self.id, random_id))
else:
item = pickle.dumps(Message(self.id))
print("sending a total of %d bytes" % len(item))
s.sendall(item)
finally:
s.close()
# make a new process
if random.random() < 0.1:
mpp = MessagePassingProcess(self.processes, self.lock)
mpp.start()
else:
time.sleep(1.0)
print("process %d finished looping" % self.id)
if __name__=="__main__":
manager = Manager()
processes = manager.dict()
lock = Lock()
# make just one process: it will make more
mpp = MessagePassingProcess(processes, lock)
mpp.start()
# this doesn't join on all the other processes created
# subsequently
mpp.join()
提供标准库 socketserver 以帮助避免手动编程 select()。 在这个版本中,我们在一个单独的线程中启动一个套接字服务器,以便每个进程都可以在其主循环中进行(好吧,假装做)计算。
#!/usr/bin/env python3
# Each Node is an mp.Process. It opens a client-side socket to send a
# message to another Node. Each Node listens using a separate thread
# running a socketserver (so avoiding manual programming of select()),
# which itself starts a new thread to handle each incoming connection.
# The socketserver puts received messages on an mp.Queue, where they
# are picked up by the Node for processing once per loop. This setup
# allows the Node to do computation in its main loop.
import multiprocessing as mp
import threading, random, socket, socketserver, time, pickle, queue
class Message:
def __init__(self, origin):
self.type = "long_message"
self.data = "X" * random.randint(0, 2000)
self.origin = origin
def __str__(self):
return "Message of type %s, length %d from %d" % (
self.type, len(self.data), self.origin)
class Node(mp.Process):
def __init__(self, nodes, lock):
super().__init__()
# Add this node to the Manager.dict of node descriptors.
# Write-access is protected by a Lock.
self.nodes = nodes
self.lock = lock
with self.lock:
self.id = len(list(nodes.keys()))
host = "127.0.0.1"
port = 7022 + self.id
node = {"id": self.id, "address": (host, port), "listening": False}
self.nodes[self.id] = node
print("new node: nodes[%s] = %s" % (self.id, nodes[self.id]))
# Set up socketserver.
# don't know why collections.deque or queue.Queue don't work here.
self.queue = mp.Queue()
# This MixIn usage is directly from the python.org
# socketserver docs
class ThreadedTCPServer(socketserver.ThreadingMixIn,
socketserver.TCPServer):
pass
class HandlerWithQueue(socketserver.BaseRequestHandler):
# Something of a hack: using class variables to give the
# Handler access to this Node-specific data
handler_queue = self.queue
handler_id = self.id
def handle(self):
# could receive data in multiple chunks, so loop and
# concatenate
item = bytes()
recvs = 0
data = True
if data:
data = self.request.recv(4096)
item += data
recvs += 1
if len(item):
# Receive a pickle here and put it straight on
# queue. Will be unpickled when taken off queue.
print("%d: socketserver received %d bytes in %d recv()s"
% (self.handler_id, len(item), recvs))
self.handler_queue.put(item)
self.server = ThreadedTCPServer((host, port), HandlerWithQueue)
self.server_thread = threading.Thread(target=self.server.serve_forever)
self.server_thread.setDaemon(True) # Tell it to exit when Node exits.
self.server_thread.start()
print("%d: server loop running in thread %s" %
(self.id, self.server_thread.getName()))
# Now ready to receive
with self.lock:
# Careful: if we assign directly to
# self.nodes[self.id]["listening"], the new value *won't*
# be propagated to other Nodes by the Manager.dict. Have
# to use this hack to re-assign the Manager.dict key.
node = self.nodes[self.id]
node["listening"] = True
self.nodes[self.id] = node
def send(self):
# Find a destination. All listening nodes are eligible except self.
dests = [node for node in self.nodes.values()
if node["id"] != self.id and node["listening"]]
if len(dests) < 1:
print("%d: no node to send to" % self.id)
return
dest = random.choice(dests)
print("%d: sending to %s" % (self.id, dest["id"]))
# send
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
try:
s.connect(dest["address"])
except socket.error:
print("%s: failed to send to %s" % (self.id, dest["id"]))
else:
item = pickle.dumps(Message(self.id))
s.sendall(item)
finally:
s.close()
# Check our queue for incoming messages.
def receive(self):
while True:
try:
message = pickle.loads(self.queue.get(False))
print("%d: received %s" % (self.id, str(message)))
except queue.Empty:
break
def run(self):
print("%d: in run()" % self.id)
# Main loop. Loop until at least 10 Nodes exist. Because of
# parallel processing we might get a few more
while len(list(self.nodes.keys())) < 10:
time.sleep(random.random() * 0.5) # simulate heavy computation
self.send()
time.sleep(random.random() * 0.5) # simulate heavy computation
self.receive()
# maybe make a new node
if random.random() < 0.1:
new = Node(self.nodes, self.lock)
new.start()
# Seems natural to call server_thread.shutdown() here, but it
# hangs. But since we've set the thread to be a daemon, it
# will exit when this process does.
print("%d: finished" % self.id)
if __name__=="__main__":
manager = mp.Manager()
nodes = manager.dict()
lock = mp.Lock()
# make just one node: it will make more
node0 = Node(nodes, lock)
node0.start()
# This doesn't join on all the other nodes created subsequently.
# But everything seems to work out ok.
node0.join()
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.