IBM Watson Speech to Text Service 未在 Unity3d 中提供响应

Question

我有一个 ExampleSstreaming 类，它实际上是我从IBM Watson SDK (speech to text service demo) 的 GitHub获得的。 这里是

public class ExampleStreaming : MonoBehaviour
{
  private int m_RecordingRoutine = 0;
  private string m_MicrophoneID = null;
  private AudioClip m_Recording = null;
  private int m_RecordingBufferSize = 5;
  private int m_RecordingHZ = 22050;

  private SpeechToText m_SpeechToText = new SpeechToText();

  void Start()
  {
    LogSystem.InstallDefaultReactors();
    Log.Debug("ExampleStreaming", "Start();");

    Active = true;
        Debug.Log("start");
    StartRecording();
  }

    public void Update() {
        Debug.Log(m_SpeechToText.IsListening);
    }

  public bool Active
  {
    get { return m_SpeechToText.IsListening; }
    set
    {
      if (value && !m_SpeechToText.IsListening)
      {
        m_SpeechToText.DetectSilence = true;
        m_SpeechToText.EnableWordConfidence = false;
        m_SpeechToText.EnableTimestamps = false;
        m_SpeechToText.SilenceThreshold = 0.03f;
        m_SpeechToText.MaxAlternatives = 1;
        m_SpeechToText.EnableContinousRecognition = true;
        m_SpeechToText.EnableInterimResults = true;
        m_SpeechToText.OnError = OnError;
        m_SpeechToText.StartListening(OnRecognize);
      }
      else if (!value && m_SpeechToText.IsListening)
      {
        m_SpeechToText.StopListening();
      }
    }
  }

  private void StartRecording()
  {
    if (m_RecordingRoutine == 0)
    {
            Debug.Log("m_RecordingRoutine");
            UnityObjectUtil.StartDestroyQueue();
      m_RecordingRoutine = Runnable.Run(RecordingHandler());
    }
  }

  private void StopRecording()
  {
    if (m_RecordingRoutine != 0)
    {
      Microphone.End(m_MicrophoneID);
      Runnable.Stop(m_RecordingRoutine);
      m_RecordingRoutine = 0;
    }
  }

  private void OnError(string error)
  {
    Active = false;

    Log.Debug("ExampleStreaming", "Error! {0}", error);
  }

  private IEnumerator RecordingHandler()
  {
    Log.Debug("ExampleStreaming", "devices: {0}", Microphone.devices);

        m_MicrophoneID = Microphone.devices[0];
        Debug.Log("m_MicrophoneID : " + m_MicrophoneID);
        m_Recording = Microphone.Start(m_MicrophoneID, true, m_RecordingBufferSize, m_RecordingHZ);
    yield return null;      // let m_RecordingRoutine get set..
        Debug.Log("m_Recording : " + m_Recording.length);
        if (m_Recording == null)
    {
            Debug.Log("m_Recording is null");
            StopRecording();
      yield break;
    }

    bool bFirstBlock = true;
    int midPoint = m_Recording.samples / 2;
    float[] samples = null;

    while (m_RecordingRoutine != 0 && m_Recording != null)
    {
      int writePos = Microphone.GetPosition(m_MicrophoneID);
      if (writePos > m_Recording.samples || !Microphone.IsRecording(m_MicrophoneID))
      {
        Log.Error("MicrophoneWidget", "Microphone disconnected.");

        StopRecording();
        yield break;
      }

      if ((bFirstBlock && writePos >= midPoint)
        || (!bFirstBlock && writePos < midPoint))
      {
        // front block is recorded, make a RecordClip and pass it onto our callback.
        samples = new float[midPoint];
        m_Recording.GetData(samples, bFirstBlock ? 0 : midPoint);

        AudioData record = new AudioData();
        record.MaxLevel = Mathf.Max(samples);
        record.Clip = AudioClip.Create("Recording", midPoint, m_Recording.channels, m_RecordingHZ, false);
        record.Clip.SetData(samples, 0);

        m_SpeechToText.OnListen(record);

        bFirstBlock = !bFirstBlock;
      }
      else
      {
        // calculate the number of samples remaining until we ready for a block of audio, 
        // and wait that amount of time it will take to record.
        int remaining = bFirstBlock ? (midPoint - writePos) : (m_Recording.samples - writePos);
        float timeRemaining = (float)remaining / (float)m_RecordingHZ;

        yield return new WaitForSeconds(timeRemaining);
      }

    }

    yield break;
  }

  private void OnRecognize(SpeechRecognitionEvent result)
  {
        Debug.Log("OnRecognize");
        if (result != null && result.results.Length > 0)
    {
      foreach (var res in result.results)
      {
        foreach (var alt in res.alternatives)
        {
          string text = alt.transcript;
                    Debug.Log(text);

          Log.Debug("ExampleStreaming", string.Format("{0} ({1}, {2:0.00})\n", text, res.final ? "Final" : "Interim", alt.confidence));
        }
      }
    }
  }
}

这是我添加的用于获取麦克风的行。 我只是在函数RecordingHandler 中编辑它以在零索引处提供麦克风设备，该索引实际上为空（我不知道为什么，这是故意留下还是错误）。

 m_MicrophoneID = Microphone.devices[0];

但不幸的是，它没有在我认为应该执行的 Event OnRecognize 中显示任何输出日志。

几秒钟后，它会显示这些日志（因为我给出了音频的长度 5）。 我做错了什么，我无法理解如何语音到文本。

[DEBUG] OnListenClosed(), State = DISCONNECTED 
[DEBUG] KeepAlive exited.

我也试过 IBM Watson Speech To text Scene 它也没有显示任何内容。

Answer 1

我还不能流式传输实时输出，但可以通过 watson 服务将音频剪辑转换为文本，这是简单的代码（花了三天时间）。

using UnityEngine;
using System.Collections;
using IBM.Watson.DeveloperCloud.Services.SpeechToText.v1;

public class AudioClipToTextWatson : MonoBehaviour {
    // Non-streaming
    SpeechToText m_SpeechToText = new SpeechToText();
    public AudioClip m_AudioClip = new AudioClip();
    public bool on = false;

    void Start () {
        m_AudioClip = Microphone.Start(Microphone.devices[0], false, 4, 44100);

            m_SpeechToText.Recognize(m_AudioClip, OnRecognize);
            //  Streaming
            m_SpeechToText.StartListening(OnRecognize);
            //  Stop listening
            m_SpeechToText.StopListening();
    }


    private void OnRecognize(SpeechRecognitionEvent result)
    {
        Debug.Log("result : " + result);
        if (result != null && result.results.Length > 0)
        {
            foreach (var res in result.results)
            {
                foreach (var alt in res.alternatives)
                {
                    string text = alt.transcript;
                    Debug.Log(text);
                    Debug.Log(res.final);
                }
            }
        }
    }

}

注意：您可以使用麦克风录制音频剪辑并将其转换为文本。 如果您已经有音频，则将其拖放到检查器并注释掉开始事件中的第一行。

Answer 2

我解决了错误

我在 Unity 2018.3.14f1 中遇到了同样的问题。 我只是更改播放器设置然后工作正常

文件 -> 构建设置 -> 播放器设置 -> 其他设置

配置

脚本运行时版本：.Net 4x 等效版本
API 兼容级别：.Net 4x

IBM Watson Speech to Text Service 未在 Unity3d 中提供响应

问题描述

2 个解决方案

解决方案1
1 已采纳 2017-09-09 12:46:46

解决方案2
0 2020-02-18 15:18:17

IBM Watson Speech to Text Service 未在 Unity3d 中提供响应

问题描述

2 个解决方案

解决方案1 1 已采纳 2017-09-09 12:46:46

解决方案2 0 2020-02-18 15:18:17

解决方案1
1 已采纳 2017-09-09 12:46:46

解决方案2
0 2020-02-18 15:18:17