如何異步執行按鈕命令？

Question

我正在嘗試使用pydub和speech_recognition庫從音頻文件生成轉錄。 我正在嘗試通過 Tkinter 中制作的 GUI 來執行此操作，我想在其中異步顯示轉錄。 但是，有些東西在我的代碼中不起作用，因為 GUI 在生成轉錄時一直凍結。

這是代碼：

import customtkinter
import asyncio
from tkinter import filedialog
from async_tkinter_loop import async_handler, async_mainloop
import speech_recognition as sr
import os
from pathlib import Path
from pydub import AudioSegment
from pydub.silence import split_on_silence


class App(customtkinter.CTk):
    def __init__(self):
        super().__init__()
        self.filepath = None
        self.transcription = None

        self.grid_rowconfigure(2, weight=1)

        self.btn_select_file = customtkinter.CTkButton(
            self, text="Select audio file", command=self.open_file
        )
        self.btn_select_file.grid(row=0, column=0, padx=20, pady=30)

        self.btn_generate_text = customtkinter.CTkButton(
            self,
            fg_color="green",
            text="Generate text",
            command=async_handler(self.get_transcription)
        )
        self.btn_generate_text.grid(row=1, column=0, padx=20, pady=30)

        self.tbx_transcription = customtkinter.CTkTextbox(self, wrap="word")
        self.tbx_transcription.grid(row=2, column=0, padx=20, pady=20, sticky="nsew")

    def open_file(self):
        # Open the file dialog
        filepath = filedialog.askopenfilename(
            initialdir="/",
            title="Select a file",
            filetypes=[("Audio files", ["*.mp3", "*.wav", "*.ogg", "*.opus", "*.mpeg"])]
        )

        if filepath:
            self.filepath = filepath

    async def get_transcription(self):
        if not self.filepath:
            self.tbx_transcription.insert(
                "0.0",
                "Error: No audio file selected, please select one before generating text."
            )
            return

        # Create a task to get the transcription
        task = [asyncio.create_task(self.generate_transcription(self.filepath))]

        completed, pending = await asyncio.wait(task)
        self.transcription = [task.result() for task in completed]

        # Display the transcription
        self.tbx_transcription.insert("0.0", self.transcription)

    @staticmethod
    async def generate_transcription(filepath):
        """
        Splitting a large audio file into chunks
        and applying speech recognition on each of these chunks
        """
        # create a speech recognition object
        r = sr.Recognizer()

        # open the audio file using pydub
        content_type = Path(filepath).suffix

        if "wav" in content_type:
            sound = AudioSegment.from_wav(filepath)
        elif "ogg" in content_type or "opus" in content_type:
            sound = AudioSegment.from_ogg(filepath)
        elif "mp3" in content_type or "mpeg" in content_type:
            sound = AudioSegment.from_mp3(filepath)

        # split audio sound where silence is 700 miliseconds or more and get chunks
        chunks = split_on_silence(
            sound,
            # experiment with this value for your target audio file
            min_silence_len=500,
            # adjust this per requirement
            silence_thresh=sound.dBFS - 14,
            # keep the silence for 1 second, adjustable as well
            keep_silence=500,
        )

        folder_name = "audio-chunks"
        # create a directory to store the audio chunks
        if not os.path.isdir(folder_name):
            os.mkdir(folder_name)

        whole_text = ""
        # process each chunk
        for i, audio_chunk in enumerate(chunks, start=1):
            # export audio chunk and save it in the `folder_name` directory.
            chunk_filename = os.path.join(folder_name, f"chunk{i}.wav")
            audio_chunk.export(chunk_filename, format="wav")
            # recognize the chunk
            with sr.AudioFile(chunk_filename) as source:
                audio_listened = r.record(source)
                # try converting it to text
                try:
                    text = r.recognize_google(audio_listened, language="es")
                except sr.UnknownValueError as e:
                    print("Error:", str(e))
                else:
                    text = f"{text.capitalize()}. "
                    whole_text += text

        # return the text for all chunks detected
        return whole_text


if __name__ == "__main__":
    app = App()
    async_mainloop(app)

我出於絕望嘗試使用async_tkinter_loop庫，但使用它不是強制性的。

編輯：我試過 httpSteve 的解決方案，但 GUI 一直凍結，就像我上面提供的代碼一樣。 這是代表應用程序異常行為的 gif。

可能不領情，但我嘗試移動window點擊按鈕沒有任何反應。 在生成轉錄之前，GUI 不會響應。

Answer 1

看起來問題是 get_transcription 方法同步運行並阻塞了 Tkinter 主循環，導致 GUI 凍結。 要解決此問題，您應該在 get_transcription 方法中使用 await 關鍵字來異步運行 generate_transcription 協程。 另外，您可以使用 Tkinter 的方法在轉錄完成后更新文本框。

這是更新后的代碼：

class App(customtkinter.CTk):
    def __init__(self):
        # ...
        self.btn_generate_text = customtkinter.CTkButton(
            self,
            fg_color="green",
            text="Generate text",
            command=self.get_transcription
        )
        self.btn_generate_text.grid(row=1, column=0, padx=20, pady=30)
        # ...

    def get_transcription(self):
        if not self.filepath:
            self.tbx_transcription.insert(
                "0.0",
                "Error: No audio file selected, please select one before generating text."
            )
            return
        asyncio.create_task(self._get_transcription())

    async def _get_transcription(self):
        self.transcription = await self.generate_transcription(self.filepath)
        self.tbx_transcription.insert("0.0", self.transcription)

這樣 generate_transcription 協程在后台運行，而 Tkinter 主循環繼續運行並更新 GUI。

Answer 2

由於threading ，我終於設法防止 GUI 凍結。 這里的關鍵是使用

threading.Thread(
    target=lambda loop: loop.run_until_complete(self.async_get_transcription()),
    args=(asyncio.new_event_loop(),)
).start()

並在創建self.btn_generate_text object 時傳遞command=lambda: self.get_transcription() 。

這是固定代碼：

import asyncio
import customtkinter
import threading
from tkinter import filedialog
import speech_recognition as sr
import os
from pathlib import Path
from pydub import AudioSegment
from pydub.silence import split_on_silence


class App(customtkinter.CTk):
    def __init__(self):
        super().__init__()
        self.filepath = None
        self.transcription = None

        self.grid_rowconfigure(2, weight=1)

        self.btn_select_file = customtkinter.CTkButton(
            self,
            text="Select audio file",
            command=self.open_file
        )
        self.btn_select_file.grid(row=0, column=0, padx=20, pady=30)

        self.btn_generate_text = customtkinter.CTkButton(
            self,
            fg_color="green",
            text="Generate text",
            command=lambda: self.get_transcription()
        )
        self.btn_generate_text.grid(row=1, column=0, padx=20, pady=30)

        self.tbx_transcription = customtkinter.CTkTextbox(self, wrap="word")
        self.tbx_transcription.grid(row=2, column=0, padx=20, pady=20, sticky="nsew")

    def open_file(self):
        # Open the file dialog
        filepath = filedialog.askopenfilename(
            initialdir="/",
            title="Select a file",
            filetypes=[("Audio files", ["*.mp3", "*.wav", "*.ogg", "*.opus", "*.mpeg"])]
        )

        if filepath:
            self.filepath = filepath

    def get_transcription(self):
        if not self.filepath:
            self.tbx_transcription.insert(
                "0.0",
                "Error: No audio file selected, please select one before generating text."
            )
            return

        threading.Thread(
            target=lambda loop: loop.run_until_complete(self.async_get_transcription()),
            args=(asyncio.new_event_loop(),)
        ).start()

        self.progressbar_1 = customtkinter.CTkProgressBar(self)
        self.progressbar_1.grid(row=2, column=0, padx=40, pady=0, sticky="ew")
        self.progressbar_1.configure(mode="indeterminnate")
        self.progressbar_1.start()

    async def async_get_transcription(self):
        self.transcription = await self.generate_transcription(self.filepath)
        self.progressbar_1.grid_forget()
        self.tbx_transcription.insert("0.0", self.transcription)

    @staticmethod
    async def generate_transcription(filepath):
        """
        Splitting a large audio file into chunks
        and applying speech recognition on each of these chunks
        """
        # create a speech recognition object
        r = sr.Recognizer()

        # open the audio file using pydub
        content_type = Path(filepath).suffix

        if "wav" in content_type:
            sound = AudioSegment.from_wav(filepath)
        elif "ogg" in content_type or "opus" in content_type:
            sound = AudioSegment.from_ogg(filepath)
        elif "mp3" in content_type or "mpeg" in content_type:
            sound = AudioSegment.from_mp3(filepath)

        # split audio sound where silence is 700 miliseconds or more and get chunks
        chunks = split_on_silence(
            sound,
            # experiment with this value for your target audio file
            min_silence_len=500,
            # adjust this per requirement
            silence_thresh=sound.dBFS - 14,
            # keep the silence for 1 second, adjustable as well
            keep_silence=500,
        )

        folder_name = "audio-chunks"
        # create a directory to store the audio chunks
        if not os.path.isdir(folder_name):
            os.mkdir(folder_name)

        whole_text = ""
        # process each chunk
        for i, audio_chunk in enumerate(chunks, start=1):
            # export audio chunk and save it in the `folder_name` directory.
            chunk_filename = os.path.join(folder_name, f"chunk{i}.wav")
            audio_chunk.export(chunk_filename, format="wav")
            # recognize the chunk
            with sr.AudioFile(chunk_filename) as source:
                audio_listened = r.record(source)
                # try converting it to text
                try:
                    text = r.recognize_google(audio_listened, language="es")
                except sr.UnknownValueError as e:
                    print("Error:", str(e))
                else:
                    text = f"{text.capitalize()}. "
                    whole_text += text

        # return the text for all chunks detected
        return whole_text


if __name__ == "__main__":
    app = App()
    app.mainloop()

如何異步執行按鈕命令？

問題描述

2 個解決方案

解決方案1
0 2023-01-26 19:45:48

解決方案2
0 2023-01-27 18:02:18

如何異步執行按鈕命令？

問題描述

2 個解決方案

解決方案1 0 2023-01-26 19:45:48

解決方案2 0 2023-01-27 18:02:18

解決方案1
0 2023-01-26 19:45:48

解決方案2
0 2023-01-27 18:02:18