I'm trying to use Google Cloud Speech-to-Text and so far I've got the python transcribe_streaming_mic code working and it's outputting a live speech transcription into my terminal but how to I get it to output that text live to a website text box like the example on their front page ?:
I've looked through the documentation for some example code of this but unless I've been blind and not seen it I cant find any website output example code.
Thank you!
The demo featured on Google's Speech-to-Text landing page :
uses some JavaScript to handle the uploading of audio files and live recording in order to show off the API:
<div class="l-showcase">
<div class="text-center">
<p class="text-title">Convert your speech to text right now</p>
<p class="text-body">Select a language and click "Start Now" to begin recording</p>
</div>
<!-- DEMO -->
<div
id="streaming_demo_section"
data-embed="sp-app"
data-force-polling="true"
data-polyfill-url="https://www.gstatic.com/external_hosted/polymer/v2/webcomponents-lite.js"
data-url="https://www.gstatic.com/cloud-site-ux/speech/speech.min.html">
</div>
</div>
Google provides some examples for how to record audio from a browser user in their Web Fundamentals document: Recording Audio from the User .
You would have to 1) record the user's audio, 2) post the audio to the Speech-To-Text API, and 3) display the response back to the user's browser.
For the Python server part, you can follow this code . In the client side you have to send audio stream to the server through websocket connection.
For testing the Python server, you can use this code
import asyncio
import websockets
import json
import threading
from six.moves import queue
from google.cloud import speech
from google.cloud.speech import types
IP = '0.0.0.0'
PORT = 8000
class Transcoder(object):
"""
Converts audio chunks to text
"""
def __init__(self, encoding, rate, language):
self.buff = queue.Queue()
self.encoding = encoding
self.language = language
self.rate = rate
self.closed = True
self.transcript = None
def start(self):
"""Start up streaming speech call"""
threading.Thread(target=self.process).start()
def response_loop(self, responses):
"""
Pick up the final result of Speech to text conversion
"""
for response in responses:
if not response.results:
continue
result = response.results[0]
if not result.alternatives:
continue
transcript = result.alternatives[0].transcript
if result.is_final:
self.transcript = transcript
def process(self):
"""
Audio stream recognition and result parsing
"""
#You can add speech contexts for better recognition
cap_speech_context = types.SpeechContext(phrases=["Add your phrases here"])
client = speech.SpeechClient()
config = types.RecognitionConfig(
encoding=self.encoding,
sample_rate_hertz=self.rate,
language_code=self.language,
speech_contexts=[cap_speech_context,],
model='command_and_search'
)
streaming_config = types.StreamingRecognitionConfig(
config=config,
interim_results=False,
single_utterance=False)
audio_generator = self.stream_generator()
requests = (types.StreamingRecognizeRequest(audio_content=content)
for content in audio_generator)
responses = client.streaming_recognize(streaming_config, requests)
try:
self.response_loop(responses)
except:
self.start()
def stream_generator(self):
while not self.closed:
chunk = self.buff.get()
if chunk is None:
return
data = [chunk]
while True:
try:
chunk = self.buff.get(block=False)
if chunk is None:
return
data.append(chunk)
except queue.Empty:
break
yield b''.join(data)
def write(self, data):
"""
Writes data to the buffer
"""
self.buff.put(data)
async def audio_processor(websocket, path):
"""
Collects audio from the stream, writes it to buffer and return the output of Google speech to text
"""
config = await websocket.recv()
if not isinstance(config, str):
print("ERROR, no config")
return
config = json.loads(config)
transcoder = Transcoder(
encoding=config["format"],
rate=config["rate"],
language=config["language"]
)
transcoder.start()
while True:
try:
data = await websocket.recv()
except websockets.ConnectionClosed:
print("Connection closed")
break
transcoder.write(data)
transcoder.closed = False
if transcoder.transcript:
print(transcoder.transcript)
await websocket.send(transcoder.transcript)
transcoder.transcript = None
start_server = websockets.serve(audio_processor, IP, PORT)
asyncio.get_event_loop().run_until_complete(start_server)
asyncio.get_event_loop().run_forever()
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.