I need to use a pool to asynchronously parse results coming from an extraction method and send those results to a write queue.
I have tried this: but it seems to just run iteratively... one process after the other.
process_pool = Pool(processes=30, maxtasksperchild=1)
while True:
filepath = read_queue.get(True)
if filepath is None:
break
res = process_pool.apply_async(func=process.run, args=(filepath, final_path), callback=write_queue.put)
results.append(res)
for result in results:
result.wait()
process_pool.close()
process_pool.join()
I have also tried just waiting on each result, but that does the same thing as the above:
process_pool = Pool(processes=30, maxtasksperchild=1)
while True:
filepath = read_queue.get(True)
if filepath is None:
break
res = process_pool.apply_async(func=process.run, args=(filepath, final_path), callback=write_queue.put)
res.wait()
process_pool.close()
process_pool.join()
I also tried just scheduling processes and letting the pool block itself if it's out of workers to spawn:
process_pool = Pool(processes=30, maxtasksperchild=1)
while True:
filepath = read_queue.get(True)
if filepath is None:
break
process_pool.apply_async(func=process.run, args=(filepath, final_path), callback=write_queue.put)
process_pool.close()
process_pool.join()
This doesn't work, and just runs through the processes over and over, not actually running any sort of function and I'm not sure why. It seems I have to do something with the AsyncResult
for the pool to actually schedule the process.
I need it to work like this:
However, I can't seem to get it to work asynchronously correctly. It will only work iteratively because I have to do something with result to actually get the task to schedule properly. Whether that is a .get
, .wait
, whatever.
# write.py
def write(p_list):
outfile = Path('outfile.txt.bz2')
for data in p_list:
if Path.exists(outfile):
mode = 'ab'
else:
mode = 'wb'
with bz2.open(filename=outfile, mode=mode, compresslevel=9) as output:
temp = (str(data) + '\n').encode('utf-8')
output.write(temp)
print('JSON files written', flush=True)
class Write(Process):
def __init__(self, write_queue: Queue):
Process.__init__(self)
self.write_queue = write_queue
def run(self):
while True:
try:
p_list = self.write_queue.get(True, 900)
except Empty:
continue
if p_list is None:
break
write(p_list)
-
# process.py
def parse(data: int):
global json_list
time.sleep(.1) # simulate parsing the json
json_list.append(data)
def read(data: int):
time.sleep(.1)
parse(data)
def run(data: int):
global json_list
json_list = []
read(data)
return json_list
if __name__ == '__main__':
global output_path, json_list
-
# main.py
if __name__ == '__main__':
read_queue = Queue()
write_queue = Queue()
write = Write(write_queue=write_queue)
write.daemon = True
write.start()
for i in range(0, 1000000):
read_queue.put(i)
read_queue.put(None)
process_pool = Pool(processes=30, maxtasksperchild=1)
while True:
data = read_queue.get(True)
if data is None:
break
res = process_pool.apply_async(func=process.run, args=(data,), callback=write_queue.put)
write_queue.put(None)
process_pool.close()
process_pool.join()
write.join()
print('process done')
So, the problem is that there is no problem. I'm just stupid. If you define a max task per worker of 1, the processes will schedule very quickly and it will look like nothing is happening (or maybe im the only one who thought that).
Here's a reasonable way to use an asynchronous process pool correctly within a while
loop with a maxtasksperchild
of 1
if __name__ == '__main__':
def func(elem):
time.sleep(0.5)
return elem
def callback(elem):
# do something with processed data
pass
queue = multiprocessing.Queue()
for i in range(0, 10000):
queue.put(i)
process_pool = multiprocessing.Pool(processes=num_processes, maxtasksperchild=1)
results = []
while True:
data = queue.get(True)
if data is None:
break
res = process_pool.apply_async(func=func, args=(data,), callback=callback)
results.append(res)
flag = False
for i, res in enumerate(results):
try:
res.wait(600)
# do some logging
results[i] = None
except TimeoutError:
flag = True
# do some logging
process_pool.close()
if flag:
process_pool.terminate()
process_pool.join()
# done!
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.