如何在while循環中獨立運行Python異步

Question

我使用FASTAPI和fastapi_utils包。 我的api是在3秒內接收用戶的文本並將它們發送到模型以計算它們的長度（僅用於簡單的演示）。 所以我使用 fastapi_utils 作為調度后台任務。 最后，我將從字典中得到結果。 但是我發現程序在 while 循環中被阻塞並且feed_data_into_model沒有將值放入 shared_dict。 所以一段時間不會結束。


import asyncio
import uuid
import logging
from typing import Union, List
import threading
lock = threading.Lock()
from fastapi import FastAPI, Request, Body
from fastapi_utils.tasks import repeat_every
import uvicorn
logger = logging.getLogger(__name__)
app = FastAPI()
queue = asyncio.Queue(maxsize=64)

shared_dict = {} # model result saved here!

lock = threading.Lock()

def handle_dict(key, value = None, action = "put"):
    lock.acquire()
    try:
        if action == "put":
            shared_dict[key] = value
        elif action == "delete":
            del shared_dict[key]
        elif action == "get":
            value = shared_dict[key]
        elif action == "exist":
            value = key in shared_dict
        else:
            pass
    finally:
        # Always called, even if exception is raised in try block
        lock.release()
    return value

def model_work(x:Union[str,List[str]]):
    if isinstance(x,str):
        result = [len(x)]
    else:
        result = [len(_) for _ in x]
    return result

@app.on_event("startup")
@repeat_every(seconds=4, logger=logger, wait_first=True)
async def feed_data_into_model():
    if queue.qsize() != 0:
        data = []
        ids = []
        while queue.qsize() != 0:
          task = await queue.get()
          task_id = task[0]
          ids.append(task_id)
          text = task[1]
          data.append(text)
        result = model_work(data)  
        # print("model result:",result)
        for index,task_id in enumerate(ids):
            value = result[index]
            handle_dict(task_id,value,action = "put")

async def get_response(task_id):
    not_exist_flag = True
    while not_exist_flag:
        not_exist_flag = handle_dict(task_id, None, action= "exist") is False # BUG: it doesn't work
    value = handle_dict(task_id, None, action= "get")
    handle_dict(task_id, None, action= "delete")
    return value

@app.get("/{text}")
async def demo(text:str):
    task_id = str(uuid.uuid4())
    state = "pending"
    item= [task_id,text,state,""]
    await queue.put(item)
    # !: await query_from_answer_dict
    value = await get_response(task_id)
    return value

if __name__ == "__main__":
    # !: single process run every 4s, if queue not empty then pop them out to model
    # !: and model will save result in thread-safe dict, key is task-id
    uvicorn.run("api:app", host="0.0.0.0", port=5555)

服務運行后，您應該使用文本訪問 Web API。 而且您會發現即使在 3 秒后您也被阻止了。 我猜 fastapi_utils 不會打開一個新線程來執行后台任務，因此主線程在 while 循環中被阻塞，因為 dict 始終為空。

Answer 1

目前的問題是在 asyncio 循環中使用阻塞代碼。 如果您插入一個短暫的延遲，它將起作用：

    while not_exist_flag:
        not_exist_flag = handle_dict(task_id, None, action="exist") is False
        await asyncio.sleep(0.1)

原因是你需要讓調度器去別的地方，實際去做處理！ 遺憾的是，Asyncio 並不是編寫阻塞代碼的免費通行證。 但是添加延遲是一個非常非最佳的解決方案。*

更好的解決方案是讓您的get_response函數直接等待任務，因為目前一切都在一個線程中，將處理移交給單獨的隊列沒有任何優勢。 或者使用多處理，並在保留對它的本地引用的同時提交任務。 那么你可以直接等待未來，避免使用輪詢。

當您完成此操作時，您幾乎已經徹底改造了 celery。 fastapi 項目生成器默認包含 celery：如果您確實需要將這些任務交給另一個進程，您可能需要考慮這樣做。

一般來說，盡量避免在 asyncio 中進行輪詢。 你想等待一切。

*這是非最佳的，因為：

輪詢是在最高級別進行的，所以它已經比在 c 中進行的要慢
這里的輪詢涉及調用獲取鎖的整個函數，因此我們有上下文切換成本（來自函數）鎖成本，以及其他任何嘗試使用鎖的阻塞
您的輪詢間隔直接影響其他代碼運行的可用時間

請注意，您的輪詢循環可能已編寫：

while not handle_dict(task_id, None, action="exist"):
    pass

這更清楚地顯示了繁忙的循環。

Answer 2

服務器代碼，需要在 get-response 睡眠時刪除，因為它很難看：


import asyncio
import uuid
from typing import Union, List
import threading
from queue import Queue
from fastapi import FastAPI, Request, Body, APIRouter
from fastapi_utils.tasks import repeat_every
import uvicorn
import time
import logging
import datetime
logger = logging.getLogger(__name__)

app = APIRouter()
def feed_data_into_model(queue,shared_dict,lock): 
    if queue.qsize() != 0:
        data = []
        ids = []
        while queue.qsize() != 0:
          task = queue.get()
          task_id = task[0]
          ids.append(task_id)
          text = task[1]
          data.append(text)
        result = model_work(data)  
        # print("model result:",result)
        for index,task_id in enumerate(ids):
            value = result[index]
            handle_dict(task_id,value,action = "put",lock=lock, shared_dict = shared_dict)

class TestThreading(object):
    def __init__(self, interval, queue,shared_dict,lock):
        self.interval = interval

        thread = threading.Thread(target=self.run, args=(queue,shared_dict,lock))
        thread.daemon = True
        thread.start()

    def run(self,queue,shared_dict,lock):
        while True:
            # More statements comes here
            # print(datetime.datetime.now().__str__() + ' : Start task in the background')
            feed_data_into_model(queue,shared_dict,lock)
            time.sleep(self.interval)

if __name__ != "__main__":
    # since uvicorn will init and reload the file, and __name__ will change, not as __main__, so I init variable here
    # otherwise, we will have 2 background thread (one is empty) , it doesn't run but hard to debug due to the confusion
    global queue, shared_dict, lock 
    queue = Queue(maxsize=64) #
    shared_dict = {} # model result saved here!
    lock = threading.Lock()
    tr = TestThreading(3, queue,shared_dict,lock)

def handle_dict(key, value = None, action = "put", lock = None, shared_dict = None):
    lock.acquire()
    try:
        if action == "put":
            shared_dict[key] = value
        elif action == "delete":
            del shared_dict[key]
        elif action == "get":
            value = shared_dict[key]
        elif action == "exist":
            value = key in shared_dict
        else:
            pass
    finally:
        # Always called, even if exception is raised in try block
        lock.release()
    return value

def model_work(x:Union[str,List[str]]):
    time.sleep(3)
    if isinstance(x,str):
        result = [len(x)]
    else:
        result = [len(_) for _ in x]
    return result

async def get_response(task_id, lock, shared_dict):
    not_exist_flag = True
    while not_exist_flag:
        not_exist_flag = handle_dict(task_id, None, action= "exist",lock=lock, shared_dict = shared_dict) is False 
        await asyncio.sleep(0.02)
    value = handle_dict(task_id, None, action= "get", lock=lock, shared_dict = shared_dict)
    handle_dict(task_id, None, action= "delete",lock=lock, shared_dict = shared_dict)
    return value

@app.get("/{text}")
async def demo(text:str):
    global queue, shared_dict, lock 
    task_id = str(uuid.uuid4())
    logger.info(task_id)
    state = "pending"
    item= [task_id,text,state,""]
    queue.put(item)
    # TODO: await query_from_answer_dict , need to change since it's ugly to while wait the answer
    value = await get_response(task_id, lock, shared_dict)
    return 1

if __name__ == "__main__":
    # what I want to do:
    #  single process run every 3s, if queue not empty then pop them out to model
    #  and model will save result in thread-safe dict, key is task-id
    
    uvicorn.run("api:app", host="0.0.0.0", port=5555)

客戶端測試代碼：

for n in {1..5}; do curl http://localhost:5555/a & ; done

如何在while循環中獨立運行Python異步

問題描述

2 個解決方案

解決方案1
0 2021-11-01 17:20:01

解決方案2
0 2021-11-03 08:37:17

如何在while循環中獨立運行Python異步

問題描述

2 個解決方案

解決方案1 0 2021-11-01 17:20:01

解決方案2 0 2021-11-03 08:37:17

解決方案1
0 2021-11-01 17:20:01

解決方案2
0 2021-11-03 08:37:17