从 websocket stream 和 python 和谷歌云语音到文本实时转录音频

Question

i'm trying to transcript phone calls in real time using vonage API and google speech to text but i keep get an error when im trying to use the google STT.我正在尝试使用 vonage API 和谷歌语音转文本实时转录电话，但当我尝试使用谷歌 STT 时，我一直收到错误消息。

here is my code:这是我的代码：

from flask import Flask, request, jsonify
from flask_sockets import Sockets
from google.cloud import speech # speech to text service
import sys
import os

# Audio recording parameters
RATE = 16000
CHUNK = int(RATE / 10)  # 100ms

os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = PATH


app = Flask(__name__)
sockets = Sockets(app)

language_code = "iw-IL"  # a BCP-47 language tag

client = speech.SpeechClient()
config = speech.RecognitionConfig(
    encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
    sample_rate_hertz=RATE,
    language_code=language_code,
)

streaming_config = speech.StreamingRecognitionConfig(
    config=config, interim_results=True
)


def listen_print_loop(responses):
    """Iterates through server responses and prints them.

    The responses passed is a generator that will block until a response
    is provided by the server.

    Each response may contain multiple results, and each result may contain
    multiple alternatives; for details, see .  Here we
    print only the transcription for the top alternative of the top result.

    In this case, responses are provided for interim results as well. If the
    response is an interim one, print a line feed at the end of it, to allow
    the next result to overwrite it, until the response is a final one. For the
    final one, print a newline to preserve the finalized transcription.
    """


    num_chars_printed = 0
    for response in responses:
        if not response.results:
            continue

        # The `results` list is consecutive. For streaming, we only care about
        # the first result being considered, since once it's `is_final`, it
        # moves on to considering the next utterance.
        result = response.results[0]
        if not result.alternatives:
            continue

        # Display the transcription of the top alternative.
        transcript = result.alternatives[0].transcript
        global phaseNum
        # Display interim results, but with a carriage return at the end of the
        # line, so subsequent lines will overwrite them.
        #
        # If the previous result was longer than this one, we need to print
        # some extra spaces to overwrite the previous result
        overwrite_chars = " " * (num_chars_printed - len(transcript))

        if not result.is_final:

            sys.stdout.write(transcript + overwrite_chars + "\r")
            sys.stdout.flush()

            num_chars_printed = len(transcript)


        else:
            print('==>'+transcript + overwrite_chars)


            num_chars_printed = 0


@app.route("/ncco")
def answer_call():
    ncco = [
        {
            "action": "talk",
            "text": "Please wait while we connect you to the echo server",
        },
        {
            "action": "connect",
            "from": "NUMBER",
            "endpoint": [
                {
                    "type": "websocket",
                    "uri": "wss://{0}/socket".format(request.host),
                    "content-type": "audio/l16;rate=16000",
                }
            ],
        },
    ]

    return jsonify(ncco)


@app.route("/webhooks/event", methods=["POST"])
def events():
    return "200"


@sockets.route("/socket", methods=["GET"])
def echo_socket(ws):
    while not ws.closed:
        message = ws.receive()
        if type(message) == str:
            print(message)
        elif message:
            requests = (speech.StreamingRecognizeRequest(audio_content=chunk) for chunk in message)
            responses = client.streaming_recognize(streaming_config, requests)
            # listen_print_loop(responses)
            # ws.send(message)


if __name__ == "__main__":
    from gevent import pywsgi
    from geventwebsocket.handler import WebSocketHandler
    server = pywsgi.WSGIServer(("", 3000), app, handler_class=WebSocketHandler)
    print('server is Up')
    server.serve_forever()

and here is the error i get:这是我得到的错误：

  File "C:\Users\ADMIN\AppData\Roaming\Python\Python39\site-packages\google\api_core\grpc_helpers.py", line 166, in error_remapped_callable
    return _StreamingResponseIterator(result, prefetch_first_result=prefetch_first)
  File "C:\Users\ADMIN\AppData\Roaming\Python\Python39\site-packages\google\api_core\grpc_helpers.py", line 89, in __init__
    self._stored_first_result = six.next(self._wrapped)
  File "C:\Users\ADMIN\AppData\Roaming\Python\Python39\site-packages\grpc\_channel.py", line 426, in __next__
    return self._next()
  File "C:\Users\ADMIN\AppData\Roaming\Python\Python39\site-packages\grpc\_channel.py", line 826, in _next
    raise self
grpc._channel._MultiThreadedRendezvous: <_MultiThreadedRendezvous of RPC that terminated with:
        status = StatusCode.UNKNOWN
        details = "Exception iterating requests!"
        debug_error_string = "None"
>

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "C:\Users\ADMIN\AppData\Roaming\Python\Python39\site-packages\gevent\pywsgi.py", line 999, in handle_one_response
    self.run_application()
  File "C:\Users\ADMIN\AppData\Roaming\Python\Python39\site-packages\geventwebsocket\handler.py", line 75, in run_application
    self.run_websocket()
  File "C:\Users\ADMIN\AppData\Roaming\Python\Python39\site-packages\geventwebsocket\handler.py", line 52, in run_websocket
    list(self.application(self.environ, lambda s, h, e=None: []))
  File "C:\Python39\lib\site-packages\flask\app.py", line 2464, in __call__
    return self.wsgi_app(environ, start_response)
  File "C:\Users\ADMIN\AppData\Roaming\Python\Python39\site-packages\flask_sockets.py", line 45, in __call__
    handler(environment, **values)
  File "C:\Users\ADMIN\Desktop\python\vonageBot\echoServer.py", line 122, in echo_socket
    responses = client.streaming_recognize(streaming_config, requests)
  File "C:\Users\ADMIN\AppData\Roaming\Python\Python39\site-packages\google\cloud\speech_v1\helpers.py", line 81, in streaming_recognize
    return super(SpeechHelpers, self).streaming_recognize(
  File "C:\Users\ADMIN\AppData\Roaming\Python\Python39\site-packages\google\cloud\speech_v1\services\speech\client.py", line 616, in streaming_recognize
    response = rpc(requests, retry=retry, timeout=timeout, metadata=metadata,)
  File "C:\Users\ADMIN\AppData\Roaming\Python\Python39\site-packages\google\api_core\gapic_v1\method.py", line 145, in __call__
    return wrapped_func(*args, **kwargs)
  File "C:\Users\ADMIN\AppData\Roaming\Python\Python39\site-packages\google\api_core\retry.py", line 281, in retry_wrapped_func
    return retry_target(
  File "C:\Users\ADMIN\AppData\Roaming\Python\Python39\site-packages\google\api_core\retry.py", line 184, in retry_target
    return target()
  File "C:\Users\ADMIN\AppData\Roaming\Python\Python39\site-packages\google\api_core\timeout.py", line 102, in func_with_timeout
    return func(*args, **kwargs)
  File "C:\Users\ADMIN\AppData\Roaming\Python\Python39\site-packages\google\api_core\grpc_helpers.py", line 168, in error_remapped_callable
    six.raise_from(exceptions.from_grpc_error(exc), exc)
  File "<string>", line 3, in raise_from
google.api_core.exceptions.Unknown: None Exception iterating requests!
2021-04-19T08:52:01Z {'REMOTE_ADDR': '::1', 'REMOTE_PORT': '50054', 'HTTP_HOST': 'xxc8573d03e9.ngrok.io', (hidden keys: 30)} failed with Unknown

basicly i try to implement google microphone streaming example with a stream from web socket insted of the microphone, but there is a problem with that implementetion基本上，我尝试使用来自麦克风的 web 插座的 stream 实现谷歌麦克风流式传输示例，但是该实现存在问题

Answer 1

i find a solution, here it is:我找到了一个解决方案，这里是：

#!/usr/bin/env python3
import base64
import json
import threading
import os, sys

from flask import Flask, request, jsonify
from flask_sockets import Sockets
from google.cloud.speech import RecognitionConfig, StreamingRecognitionConfig
from SpeechClientBridge import SpeechClientBridge

os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = PATH


config = RecognitionConfig(
    encoding=RecognitionConfig.AudioEncoding.LINEAR16,
    sample_rate_hertz=16000,
    language_code="iw-IL",
)
streaming_config = StreamingRecognitionConfig(config=config, interim_results=True)


app = Flask(__name__)
sockets = Sockets(app)


@app.route("/ncco")
def answer_call():
    ncco = [
        {
            "action": "connect",
            "from": "+97223764024",
            "endpoint": [
                {
                    "type": "websocket",
                    "uri": "wss://{0}/socket".format(request.host),
                    "content-type": "audio/l16;rate=16000",
                }
            ],
        },
    ]

    return jsonify(ncco)


def on_transcription_response(response):
    num_chars_printed = 0
    if not response.results:
        return

    result = response.results[0]
    if not result.alternatives:
        return

    transcript = result.alternatives[0].transcript
    overwrite_chars = " " * (num_chars_printed - len(transcript))

    print(result.is_final)

    if not result.is_final:

            sys.stdout.write(transcript + overwrite_chars + "\r")
            sys.stdout.flush()

            num_chars_printed = len(transcript)


    else:
        print('==>'+transcript + overwrite_chars)


@app.route("/webhooks/event", methods=["POST"])
def events():
    return "200"


@sockets.route("/socket", methods=["GET"])
def transcript(ws):
    print("WS connection opened")
    bridge = SpeechClientBridge(streaming_config, on_transcription_response)
    t = threading.Thread(target=bridge.start)
    t.start()

    while not ws.closed:
        message = ws.receive()
        if message is None:
            bridge.add_request(None)
            bridge.terminate()
            break

        # print(message)
        if type(message) == str:
            print(message)
        elif type(message) == None:
            break
            # chunk = base64.b64decode(message)
        else:
            bridge.add_request(message)


    bridge.terminate()
    print("WS connection closed")


# def echo_socket(ws):
#     print("WS connection opened")
#     while not ws.closed:
#         message = ws.receive()
#         ws.send(message)


if __name__ == "__main__":
    from gevent import pywsgi
    from geventwebsocket.handler import WebSocketHandler
    server = pywsgi.WSGIServer(("", 3000), app, handler_class=WebSocketHandler)
    print('server is Up')
    server.serve_forever()

and an importent class:和一个重要的 class：

import queue

from google.cloud import speech


class SpeechClientBridge:
    def __init__(self, streaming_config, on_response):
        self._on_response = on_response
        self._queue = queue.Queue()
        self._ended = False
        self.streaming_config = streaming_config

    def start(self):
        client = speech.SpeechClient()
        stream = self.generator()
        requests = (
            speech.StreamingRecognizeRequest(audio_content=content)
            for content in stream
        )
        responses = client.streaming_recognize(self.streaming_config, requests)
        self.process_responses_loop(responses)

    def terminate(self):
        self._ended = True

    def add_request(self, buffer):
        self._queue.put(bytes(buffer), block=False)

    def process_responses_loop(self, responses):
        for response in responses:
            self._on_response(response)

            if self._ended:
                break

    def generator(self):
        while not self._ended:
            # Use a blocking get() to ensure there's at least one chunk of
            # data, and stop iteration if the chunk is None, indicating the
            # end of the audio stream.
            chunk = self._queue.get()
            if chunk is None:
                return
            data = [chunk]

            # Now consume whatever other data's still buffered.
            while True:
                try:
                    chunk = self._queue.get(block=False)
                    if chunk is None:
                        return
                    data.append(chunk)
                except queue.Empty:
                    break

            yield b"".join(data)

从 websocket stream 和 python 和谷歌云语音到文本实时转录音频

问题描述

1 个解决方案

解决方案1
1 2021-04-19 12:03:17

从 websocket stream 和 python 和谷歌云语音到文本实时转录音频

问题描述

1 个解决方案

解决方案1 1 2021-04-19 12:03:17

解决方案1
1 2021-04-19 12:03:17