![](/img/trans.png)
[英]Asynchronous multiprocessing with a worker pool in Python: how to keep going after timeout?
[英]how to keep track of asynchronous results returned from a multiprocessing pool
我試圖將多處理添加到一些代碼中,這些代碼具有我無法修改的功能。 我想將這些函數作為作業異步提交到多處理池。 我正在做的事情很像這里顯示的代碼。 但是,我不確定如何跟蹤結果。 如何知道返回結果對應的應用函數?
要強調的重點是我無法修改現有函數(其他依賴於它們的東西保持原樣),並且結果可以按照與函數作業應用於池的順序不同的順序返回。
感謝您的任何想法!
編輯:一些嘗試代碼如下:
import multiprocessing
from multiprocessing import Pool
import os
import signal
import time
import inspect
def multiply(multiplicand1=0, multiplicand2=0):
return multiplicand1*multiplicand2
def workFunctionTest(**kwargs):
time.sleep(3)
return kwargs
def printHR(object):
"""
This function prints a specified object in a human readable way.
"""
# dictionary
if isinstance(object, dict):
for key, value in sorted(object.items()):
print u'{a1}: {a2}'.format(a1=key, a2=value)
# list or tuple
elif isinstance(object, list) or isinstance(object, tuple):
for element in object:
print element
# other
else:
print object
class Job(object):
def __init__(
self,
workFunction=workFunctionTest,
workFunctionKeywordArguments={'testString': "hello world"},
workFunctionTimeout=1,
naturalLanguageString=None,
classInstance=None,
resultGetter=None,
result=None
):
self.workFunction=workFunction
self.workFunctionKeywordArguments=workFunctionKeywordArguments
self.workFunctionTimeout=workFunctionTimeout
self.naturalLanguageString=naturalLanguageString
self.classInstance=self.__class__.__name__
self.resultGetter=resultGetter
self.result=result
def description(self):
descriptionString=""
for key, value in sorted(vars(self).items()):
descriptionString+=str("{a1}:{a2} ".format(a1=key, a2=value))
return descriptionString
def printout(self):
"""
This method prints a dictionary of all data attributes.
"""
printHR(vars(self))
class JobGroup(object):
"""
This class acts as a container for jobs. The data attribute jobs is a list of job objects.
"""
def __init__(
self,
jobs=None,
naturalLanguageString="null",
classInstance=None,
result=None
):
self.jobs=jobs
self.naturalLanguageString=naturalLanguageString
self.classInstance=self.__class__.__name__
self.result=result
def description(self):
descriptionString=""
for key, value in sorted(vars(self).items()):
descriptionString+=str("{a1}:{a2} ".format(a1=key, a2=value))
return descriptionString
def printout(self):
"""
This method prints a dictionary of all data attributes.
"""
printHR(vars(self))
def initialise_processes():
signal.signal(signal.SIGINT, signal.SIG_IGN)
def execute(
jobObject=None,
numberOfProcesses=multiprocessing.cpu_count()
):
# Determine the current function name.
functionName=str(inspect.stack()[0][3])
def collateResults(result):
"""
This is a process pool callback function which collates a list of results returned.
"""
# Determine the caller function name.
functionName=str(inspect.stack()[1][3])
print("{a1}: result: {a2}".format(a1=functionName, a2=result))
results.append(result)
def getResults(job):
# Determine the current function name.
functionName=str(inspect.stack()[0][3])
while True:
try:
result=job.resultGetter.get(job.workFunctionTimeout)
break
except multiprocessing.TimeoutError:
print("{a1}: subprocess timeout for job".format(a1=functionName, a2=job.description()))
#job.result=result
return result
# Create a process pool.
pool1 = multiprocessing.Pool(numberOfProcesses, initialise_processes)
print("{a1}: pool {a2} of {a3} processes created".format(a1=functionName, a2=str(pool1), a3=str(numberOfProcesses)))
# Unpack the input job object and submit it to the process pool.
print("{a1}: unpacking and applying job object {a2} to pool...".format(a1=functionName, a2=jobObject))
if isinstance(jobObject, Job):
# If the input job object is a job, apply it to the pool with its associated timeout specification.
# Return a list of results.
job=jobObject
print("{a1}: job submitted to pool: {a2}".format(a1=functionName, a2=job.description()))
# Apply the job to the pool, saving the object pool.ApplyResult to the job object.
job.resultGetter=pool1.apply_async(
func=job.workFunction,
kwds=job.workFunctionKeywordArguments
)
# Get results.
# Acquire the job result with respect to the specified job timeout and apply this result to the job data attribute result.
print("{a1}: getting results for job...".format(a1=functionName))
job.result=getResults(job)
print("{a1}: job completed: {a2}".format(a1=functionName, a2=job.description()))
print("{a1}: job result: {a2}".format(a1=functionName, a2=job.result))
# Return the job result from execute.
return job.result
pool1.terminate()
pool1.join()
elif isinstance(jobObject, JobGroup):
# If the input job object is a job group, cycle through each job and apply it to the pool with its associated timeout specification.
for job in jobObject.jobs:
print("{a1}: job submitted to pool: {a2}".format(a1=functionName, a2=job.description()))
# Apply the job to the pool, saving the object pool.ApplyResult to the job object.
job.resultGetter=pool1.apply_async(
func=job.workFunction,
kwds=job.workFunctionKeywordArguments
)
# Get results.
# Cycle through each job and and append the result for the job to a list of results.
results=[]
for job in jobObject.jobs:
# Acquire the job result with respect to the specified job timeout and apply this result to the job data attribute result.
print("{a1}: getting results for job...".format(a1=functionName))
job.result=getResults(job)
print("{a1}: job completed: {a2}".format(a1=functionName, a2=job.description()))
#print("{a1}: job result: {a2}".format(a1=functionName, a2=job.result))
# Collate the results.
results.append(job.result)
# Apply the list of results to the job group data attribute results.
jobObject.results=results
print("{a1}: job group results: {a2}".format(a1=functionName, a2=jobObject.results))
# Return the job result list from execute.
return jobObject.results
pool1.terminate()
pool1.join()
else:
# invalid input object
print("{a1}: invalid job object {a2}".format(a1=functionName, a2=jobObject))
def main():
print('-'*80)
print("MULTIPROCESSING SYSTEM DEMONSTRATION\n")
# Create a job.
print("# creating a job...\n")
job1=Job(
workFunction=workFunctionTest,
workFunctionKeywordArguments={'testString': "hello world"},
workFunctionTimeout=4
)
print("- printout of new job object:")
job1.printout()
print("\n- printout of new job object in logging format:")
print job1.description()
# Create another job.
print("\n# creating another job...\n")
job2=Job(
workFunction=multiply,
workFunctionKeywordArguments={'multiplicand1': 2, 'multiplicand2': 3},
workFunctionTimeout=6
)
print("- printout of new job object:")
job2.printout()
print("\n- printout of new job object in logging format:")
print job2.description()
# Create a JobGroup object.
print("\n# creating a job group (of jobs 1 and 2)...\n")
jobGroup1=JobGroup(
jobs=[job1, job2],
)
print("- printout of new job group object:")
jobGroup1.printout()
print("\n- printout of new job group object in logging format:")
print jobGroup1.description()
# Submit the job group.
print("\nready to submit job group")
response=raw_input("\nPress Enter to continue...\n")
execute(jobGroup1)
response=raw_input("\nNote the results printed above. Press Enter to continue the demonstration.\n")
# Demonstrate timeout.
print("\n # creating a new job in order to demonstrate timeout functionality...\n")
job3=Job(
workFunction=workFunctionTest,
workFunctionKeywordArguments={'testString': "hello world"},
workFunctionTimeout=1
)
print("- printout of new job object:")
job3.printout()
print("\n- printout of new job object in logging format:")
print job3.description()
print("\nNote the timeout specification of only 1 second.")
# Submit the job.
print("\nready to submit job")
response=raw_input("\nPress Enter to continue...\n")
execute(job3)
response=raw_input("\nNote the recognition of timeouts printed above. This concludes the demonstration.")
print('-'*80)
if __name__ == '__main__':
main()
編輯:由於以下原因,此問題已被暫停[暫停]:
“要求代碼的問題必須表明對正在解決的問題的最小理解。包括嘗試的解決方案,為什么它們不起作用,以及預期的結果。另請參閱: Stack Overflow問題清單 ”
這個問題不是要求代碼; 它要求思想,一般指導。 對所考慮問題的最小理解得到了證明(注意正確使用術語“多處理”,“池”和“異步”並注意對先前代碼的引用 )。 關於嘗試的解決方案,我承認在解決方案上的嘗試努力將是有益的。 我現在已經添加了這樣的代碼。 我希望我已經解決了導致[暫停]狀態的問題。
沒有看到實際的代碼,我只能回答一般性問題。 但有兩種一般的解決方案。
首先,不是使用callback
而忽略AsyncResult
,而是將它們存儲在某種集合中。 然后你就可以使用那個集合了。 例如,如果您希望能夠使用該函數作為鍵查找函數的結果,只需創建一個用函數鍵入的dict
:
def in_parallel(funcs):
results = {}
pool = mp.Pool()
for func in funcs:
results[func] = pool.apply_async(func)
pool.close()
pool.join()
return {func: result.get() for func, result in results.items()}
或者,您可以更改回調函數以按鍵將結果存儲在集合中。 例如:
def in_parallel(funcs):
results = {}
pool = mp.Pool()
for func in funcs:
def callback(result, func=func):
results[func] = result
pool.apply_async(func, callback=callback)
pool.close()
pool.join()
return results
我正在使用函數本身作為密鑰。 但是你想要使用索引,這同樣容易。 您擁有的任何價值,都可以用作關鍵。
同時,您鏈接的示例實際上只是在一堆參數上調用相同的函數,等待所有這些參數完成,並將結果保留為任意順序的迭代。 這正是imap_unordered
所做的,但更簡單。 你可以用鏈接代碼替換整個復雜的東西:
pool = mp.Pool()
results = list(pool.imap_unordered(foo_pool, range(10)))
pool.close()
pool.join()
然后,如果您希望結果按原始順序而不是按任意順序排列,則可以切換到imap
或map
。 所以:
pool = mp.Pool()
results = pool.map(foo_pool, range(10))
pool.close()
pool.join()
如果你需要類似但過於復雜的東西以適應map
范例, concurrent.futures
可能會讓你的生活比multiprocessing
更容易。 如果您使用的是Python 2.x,則必須安裝backport 。 但是,你可以用AsyncResult
或callback
(或map
)做更難的事情,比如將一大堆期貨組合成一個大的未來。 請參閱鏈接文檔中的示例。
最后一點:
需要強調的重點是我無法修改現有功能......
如果無法修改函數,則可以始終將其包裝。 例如,假設我有一個函數返回一個數字的平方,但我正在嘗試異步地構建一個dict映射數字到它們的方塊,所以我需要將原始數字作為結果的一部分。 這很容易:
def number_and_square(x):
return x, square(x)
現在,我可以只使用apply_async(number_and_square)
而不僅僅是square
,並獲得我想要的結果。
我沒有在上面的例子中這樣做,因為在第一種情況下我將密鑰存儲在來自調用端的集合中,在第二種情況下,我將它綁定到回調函數中。 但是將它綁定到函數周圍的包裝器就像這兩者中的任何一個一樣簡單,並且當這兩者都不合適時可能是合適的。
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.