[英]How to do dynamic creation of per-process queues in Python multiprocessing
我想動態創建多個Process
,其中每個實例都有一個隊列,用於接收來自其他實例的傳入消息,並且每個實例還可以創建新實例。 所以我們最終得到了一個進程網絡,所有進程都相互發送。 每個實例都可以互相發送。
下面的代碼會做我想做的事:它使用Manager.dict()
來存儲隊列,確保更新被傳播,並使用Lock()
來保護對隊列的寫訪問。 然而,當添加一個新隊列時,它會拋出"RuntimeError: Queue objects should only be shared between processes through inheritance"
。
問題是在啟動時,我們不知道最終需要多少個隊列,所以我們必須動態創建它們。 但由於除了施工時間我們不能共享隊列,我不知道該怎么做。
我知道一種可能性是使queues
成為全局變量,而不是傳遞給__init__
的托管變量:據我所知,問題是queues
變量的添加不會傳播到其他進程。
編輯我正在研究進化算法。 EA 是一種機器學習技術。 EA 模擬一個“種群”,該種群通過適者生存、交叉和突變而進化。 在並行EA 中,就像這里一樣,我們也有群體之間的遷移,對應於進程間通信。 島嶼也可以產生新的島嶼,因此我們需要一種在動態創建的進程之間發送消息的方法。
import random, time
from multiprocessing import Process, Queue, Lock, Manager, current_process
try:
from queue import Empty as EmptyQueueException
except ImportError:
from Queue import Empty as EmptyQueueException
class MyProcess(Process):
def __init__(self, queues, lock):
super(MyProcess, self).__init__(target=lambda x: self.run(x),
args=tuple())
self.queues = queues
self.lock = lock
# acquire lock and add a new queue for this process
with self.lock:
self.id = len(list(self.queues.keys()))
self.queues[self.id] = Queue()
def run(self):
while len(list(self.queues.keys())) < 10:
# make a new process
new = MyProcess(self.lock)
new.start()
# send a message to a random process
dest_key = random.choice(list(self.queues.keys()))
dest = self.queues[dest_key]
dest.put("hello to %s from %s" % (dest_key, self.id))
# receive messages
message = True
while message:
try:
message = self.queues[self.id].get(False) # don't block
print("%s received: %s" % (self.id, message))
except EmptyQueueException:
break
# what queues does this process know about?
print("%d: I know of %s" %
(self.id, " ".join([str(id) for id in self.queues.keys()])))
time.sleep(1)
if __name__ == "__main__":
# Construct MyProcess with a Manager.dict for storing the queues
# and a lock to protect write access. Start.
MyProcess(Manager().dict(), Lock()).start()
我不完全確定您的用例實際上是什么。 也許如果您詳細說明為什么要讓每個進程動態地生成一個具有連接隊列的子進程,那么在這種情況下正確的解決方案將會更加清楚。
無論如何,對於現在的問題,似乎沒有真正的好方法來動態創建多處理管道或隊列。
我認為,如果您願意在每個進程中生成線程,則可以使用multiprocessing.connection.Listener/Client
來回通信。 我沒有產生線程,而是使用網絡 sockets 和 select 在線程之間進行通信。
動態進程生成和網絡 sockets 可能仍然不穩定,這取決於multiprocessing
在生成/分叉新進程時如何清理文件描述符,並且您的解決方案很可能更容易在 *nix 衍生產品上工作。 如果您擔心套接字開銷,您可以使用 unix 域 sockets 更輕量級,但會增加在多個工作機器上運行節點的復雜性。
無論如何,這是一個使用網絡 sockets 和全局進程列表來完成此任務的示例,因為我無法找到一個讓multiprocessing
執行此操作的好方法。
import collections
import multiprocessing
import random
import select
import socket
import time
class MessagePassingProcess(multiprocessing.Process):
def __init__(self, id_, processes):
self.id = id_
self.processes = processes
self.queue = collections.deque()
super(MessagePassingProcess, self).__init__()
def run(self):
print "Running"
inputs = []
outputs = []
server = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
address = self.processes[self.id]["address"]
print "Process %s binding to %s"%(self.id, address)
server.bind(address)
server.listen(5)
inputs.append(server)
process = self.processes[self.id]
process["listening"] = True
self.processes[self.id] = process
print "Process %s now listening!(%s)"%(self.id, process)
while inputs:
readable, writable, exceptional = select.select(inputs,
outputs,
inputs,
0.1)
for sock in readable:
print "Process %s has a readable scoket: %s"%(self.id,
sock)
if sock is server:
print "Process %s has a readable server scoket: %s"%(self.id,
sock)
conn, addr = sock.accept()
conn.setblocking(0)
inputs.append(conn)
else:
data = sock.recv(1024)
if data:
self.queue.append(data)
print "non server readable socket with data"
else:
inputs.remove(sock)
sock.close()
print "non server readable socket with no data"
for sock in exceptional:
print "exception occured on socket %s"%(sock)
inputs.remove(sock)
sock.close()
while len(self.queue) >= 1:
print "Received:", self.queue.pop()
# send a message to a random process:
random_id = random.choice(list(self.processes.keys()))
print "%s Attempting to send message to %s"%(self.id, random_id)
random_process = self.processes[random_id]
print "random_process:", random_process
if random_process["listening"]:
random_address = random_process["address"]
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
try:
s.connect(random_address)
except socket.error:
print "%s failed to send to %s"%(self.id, random_id)
else:
s.send("Hello World!")
finally:
s.close()
time.sleep(1)
if __name__=="__main__":
print "hostname:", socket.getfqdn()
print dir(multiprocessing)
manager = multiprocessing.Manager()
processes = manager.dict()
joinable = []
for n in xrange(multiprocessing.cpu_count()):
mpp = MessagePassingProcess(n, processes)
processes[n] = {"id":n,
"address":("127.0.0.1",7000+n),
"listening":False,
}
print "processes[%s] = %s"%(n, processes[n])
mpp.start()
joinable.append(mpp)
for process in joinable:
process.join()
經過大量的潤色和測試,這可能是對multiprocessing.Process
和/或multiprocessing.Pool
的邏輯擴展,因為如果它在標准庫中可用,它看起來確實像人們會使用的東西。 創建一個 DynamicQueue class 也可能是合理的,它使用 sockets 可以被其他隊列發現。
無論如何,希望它有所幫助。 如果您找到更好的方法來完成這項工作,請更新。
此代碼基於接受的答案。 它在 Python 3 中,因為 OSX Snow Leopard 在多處理的某些用途上出現了段錯誤。
#!/usr/bin/env python3
import collections
from multiprocessing import Process, Manager, Lock, cpu_count
import random
import select
import socket
import time
import pickle
class Message:
def __init__(self, origin):
self.type = "long_msg"
self.data = "X" * 3000
self.origin = origin
def __str__(self):
return "%s %d" % (self.type, self.origin)
class MessagePassingProcess(Process):
def __init__(self, processes, lock):
self.lock = lock
self.processes = processes
with self.lock:
self.id = len(list(processes.keys()))
process_dict = {"id": self.id,
"address": ("127.0.0.1", 7000 + self.id),
"listening": False
}
self.processes[self.id] = process_dict
print("new process: processes[%s] = %s" % (self.id, processes[self.id]))
self.queue = collections.deque()
super(MessagePassingProcess, self).__init__()
def run(self):
print("Running")
self.processes[self.id]["joinable"] = True
inputs = []
outputs = []
server = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
address = self.processes[self.id]["address"]
print("Process %s binding to %s" % (self.id, address))
server.bind(address)
server.listen(5)
inputs.append(server)
process = self.processes[self.id]
process["listening"] = True
self.processes[self.id] = process
print("Process %s now listening!(%s)" % (self.id, process))
while inputs and len(list(self.processes.keys())) < 10:
readable, writable, exceptional = select.select(inputs,
outputs,
inputs,
0.1)
# read incoming messages
for sock in readable:
print("Process %s has a readable socket: %s" % (self.id, sock))
if sock is server:
print("Process %s has a readable server socket: %s" %
(self.id, sock))
conn, addr = sock.accept()
conn.setblocking(0)
inputs.append(conn)
else:
data = True
item = bytes() # empty bytes object, to be added to
recvs = 0
while data:
data = sock.recv(1024)
item += data
recvs += 1
if len(item):
self.queue.append(item)
print("non server readable socket: recvd %d bytes in %d parts"
% (len(item), recvs))
else:
inputs.remove(sock)
sock.close()
print("non server readable socket: nothing to read")
for sock in exceptional:
print("exception occured on socket %s" % (sock))
inputs.remove(sock)
sock.close()
while len(self.queue):
msg = pickle.loads(self.queue.pop())
print("received:" + str(msg))
# send a message to a random process:
random_id = random.choice(list(self.processes.keys()))
print("%s attempting to send message to %s" % (self.id, random_id))
random_process = self.processes[random_id]
if random_process["listening"]:
random_address = random_process["address"]
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
try:
s.connect(random_address)
except socket.error:
print("%s failed to send to %s"%(self.id, random_id))
else:
item = pickle.dumps(Message(self.id))
print("sending a total of %d bytes" % len(item))
s.sendall(item)
finally:
s.close()
# make a new process
if random.random() < 0.1:
mpp = MessagePassingProcess(self.processes, self.lock)
mpp.start()
else:
time.sleep(1.0)
print("process %d finished looping" % self.id)
if __name__=="__main__":
manager = Manager()
processes = manager.dict()
lock = Lock()
# make just one process: it will make more
mpp = MessagePassingProcess(processes, lock)
mpp.start()
# this doesn't join on all the other processes created
# subsequently
mpp.join()
提供標准庫 socketserver 以幫助避免手動編程 select()。 在這個版本中,我們在一個單獨的線程中啟動一個套接字服務器,以便每個進程都可以在其主循環中進行(好吧,假裝做)計算。
#!/usr/bin/env python3
# Each Node is an mp.Process. It opens a client-side socket to send a
# message to another Node. Each Node listens using a separate thread
# running a socketserver (so avoiding manual programming of select()),
# which itself starts a new thread to handle each incoming connection.
# The socketserver puts received messages on an mp.Queue, where they
# are picked up by the Node for processing once per loop. This setup
# allows the Node to do computation in its main loop.
import multiprocessing as mp
import threading, random, socket, socketserver, time, pickle, queue
class Message:
def __init__(self, origin):
self.type = "long_message"
self.data = "X" * random.randint(0, 2000)
self.origin = origin
def __str__(self):
return "Message of type %s, length %d from %d" % (
self.type, len(self.data), self.origin)
class Node(mp.Process):
def __init__(self, nodes, lock):
super().__init__()
# Add this node to the Manager.dict of node descriptors.
# Write-access is protected by a Lock.
self.nodes = nodes
self.lock = lock
with self.lock:
self.id = len(list(nodes.keys()))
host = "127.0.0.1"
port = 7022 + self.id
node = {"id": self.id, "address": (host, port), "listening": False}
self.nodes[self.id] = node
print("new node: nodes[%s] = %s" % (self.id, nodes[self.id]))
# Set up socketserver.
# don't know why collections.deque or queue.Queue don't work here.
self.queue = mp.Queue()
# This MixIn usage is directly from the python.org
# socketserver docs
class ThreadedTCPServer(socketserver.ThreadingMixIn,
socketserver.TCPServer):
pass
class HandlerWithQueue(socketserver.BaseRequestHandler):
# Something of a hack: using class variables to give the
# Handler access to this Node-specific data
handler_queue = self.queue
handler_id = self.id
def handle(self):
# could receive data in multiple chunks, so loop and
# concatenate
item = bytes()
recvs = 0
data = True
if data:
data = self.request.recv(4096)
item += data
recvs += 1
if len(item):
# Receive a pickle here and put it straight on
# queue. Will be unpickled when taken off queue.
print("%d: socketserver received %d bytes in %d recv()s"
% (self.handler_id, len(item), recvs))
self.handler_queue.put(item)
self.server = ThreadedTCPServer((host, port), HandlerWithQueue)
self.server_thread = threading.Thread(target=self.server.serve_forever)
self.server_thread.setDaemon(True) # Tell it to exit when Node exits.
self.server_thread.start()
print("%d: server loop running in thread %s" %
(self.id, self.server_thread.getName()))
# Now ready to receive
with self.lock:
# Careful: if we assign directly to
# self.nodes[self.id]["listening"], the new value *won't*
# be propagated to other Nodes by the Manager.dict. Have
# to use this hack to re-assign the Manager.dict key.
node = self.nodes[self.id]
node["listening"] = True
self.nodes[self.id] = node
def send(self):
# Find a destination. All listening nodes are eligible except self.
dests = [node for node in self.nodes.values()
if node["id"] != self.id and node["listening"]]
if len(dests) < 1:
print("%d: no node to send to" % self.id)
return
dest = random.choice(dests)
print("%d: sending to %s" % (self.id, dest["id"]))
# send
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
try:
s.connect(dest["address"])
except socket.error:
print("%s: failed to send to %s" % (self.id, dest["id"]))
else:
item = pickle.dumps(Message(self.id))
s.sendall(item)
finally:
s.close()
# Check our queue for incoming messages.
def receive(self):
while True:
try:
message = pickle.loads(self.queue.get(False))
print("%d: received %s" % (self.id, str(message)))
except queue.Empty:
break
def run(self):
print("%d: in run()" % self.id)
# Main loop. Loop until at least 10 Nodes exist. Because of
# parallel processing we might get a few more
while len(list(self.nodes.keys())) < 10:
time.sleep(random.random() * 0.5) # simulate heavy computation
self.send()
time.sleep(random.random() * 0.5) # simulate heavy computation
self.receive()
# maybe make a new node
if random.random() < 0.1:
new = Node(self.nodes, self.lock)
new.start()
# Seems natural to call server_thread.shutdown() here, but it
# hangs. But since we've set the thread to be a daemon, it
# will exit when this process does.
print("%d: finished" % self.id)
if __name__=="__main__":
manager = mp.Manager()
nodes = manager.dict()
lock = mp.Lock()
# make just one node: it will make more
node0 = Node(nodes, lock)
node0.start()
# This doesn't join on all the other nodes created subsequently.
# But everything seems to work out ok.
node0.join()
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.