當檢測到靜音（JS）時，如何提取前面的音頻（來自麥克風）作為緩沖區？

Question

我正在使用 Google Cloud API 進行語音轉文本，並帶有 NodeJS 后端。 該應用程序需要能夠偵聽語音命令，並將它們作為緩沖區傳輸到后端。 為此，我需要在檢測到靜音時發送前面音頻的緩沖區。

任何幫助，將不勝感激。 包括下面的js代碼

 if (!navigator.getUserMedia)
    navigator.getUserMedia = navigator.getUserMedia || navigator.webkitGetUserMedia ||
        navigator.mozGetUserMedia || navigator.msGetUserMedia;

if (navigator.getUserMedia) {
    navigator.getUserMedia({audio: true}, success, function (e) {
        alert('Error capturing audio.');
    });
} else alert('getUserMedia not supported in this browser.');

var recording = false;

window.startRecording = function () {
    recording = true;
};

window.stopRecording = function () {
    recording = false;
    // window.Stream.end();
};

function success(e) {
    audioContext = window.AudioContext || window.webkitAudioContext;
    context = new audioContext();

    // the sample rate is in context.sampleRate
    audioInput = context.createMediaStreamSource(e);

    var bufferSize = 4096;
    recorder = context.createScriptProcessor(bufferSize, 1, 1);

    recorder.onaudioprocess = function (e) {
        if (!recording) return;
        console.log('recording');
        var left = e.inputBuffer.getChannelData(0);
        console.log(convertoFloat32ToInt16(left));
       };

    audioInput.connect(recorder);
    recorder.connect(context.destination);
}

Answer 1

我不太確定問題中到底問了什么，所以這個答案只是為了提供一種檢測 AudioStream 中靜音的方法。

要檢測 AudioStream 中的靜音，您可以使用AudioAnalyser節點，您將在該節點上定期調用getByteFrequencyData方法，並檢查在給定時間內是否存在高於預期級別的聲音。

您可以直接使用minDecibels屬性設置閾值級別。

 function detectSilence( stream, onSoundEnd = _=>{}, onSoundStart = _=>{}, silence_delay = 500, min_decibels = -80 ) { const ctx = new AudioContext(); const analyser = ctx.createAnalyser(); const streamNode = ctx.createMediaStreamSource(stream); streamNode.connect(analyser); analyser.minDecibels = min_decibels; const data = new Uint8Array(analyser.frequencyBinCount); // will hold our data let silence_start = performance.now(); let triggered = false; // trigger only once per silence event function loop(time) { requestAnimationFrame(loop); // we'll loop every 60th of a second to check analyser.getByteFrequencyData(data); // get current data if (data.some(v => v)) { // if there is data above the given db limit if(triggered){ triggered = false; onSoundStart(); } silence_start = time; // set it to now } if (!triggered && time - silence_start > silence_delay) { onSoundEnd(); triggered = true; } } loop(); } function onSilence() { console.log('silence'); } function onSpeak() { console.log('speaking'); } navigator.mediaDevices.getUserMedia({ audio: true }) .then(stream => { detectSilence(stream, onSilence, onSpeak); // do something else with the stream }) .catch(console.error);

並且作為一個小提琴，因為 stackSnippets 可能會阻止 gUM。

Answer 2

可以使用SpeechRecognition result事件來確定當一個字或短語已被確認，例如， ls ， cd ， pwd或其他命令，通過.transcript的SpeechRecognitionAlternative到speechSynthesis.speak()其中在連接start和end的事件SpeechSynthesisUtterance在MediaStream傳遞的MediaRecorder對象上調用.resume() .start()或.resume() ； 使用FileReader或Response.arrayBuffer()將Blob at dataavailable事件轉換為ArrayBuffer 。

我們可以選擇使用audiostart或soundstart與audioend或soundend的事件， SpeechRecognition ，記錄用戶的實際聲音，雖然端可能無法持續的關系被炒到實際開始和音頻的最終只是以標准體系麥克風捕捉。

<!DOCTYPE html>
<html>

<head>
  <title>Speech Recognition Recording</title>
</head>

<body>
  <input type="button" value="Stop speech command recognition" id="stop">
  <script>
    navigator.mediaDevices.getUserMedia({
        audio: true
      })
      .then(stream => {
        const recorder = new MediaRecorder(stream);
        const recognition = new webkitSpeechRecognition();
        const synthesis = new SpeechSynthesisUtterance();
        const handleResult = e => {
          recognition.onresult = null;
          console.log(e.results);
          const result = e.results[e.results.length - 1];

          if (result.isFinal) {
            const [{transcript}] = result;
            console.log(transcript);
            synthesis.text = transcript;
            window.speechSynthesis.speak(synthesis);
          }
        }
        synthesis.onstart = () => {
          if (recorder.state === "inactive") {
            recorder.start()
          } else {
            if (recorder.state === "paused") {
              recorder.resume();
            }
          }
        }
        synthesis.onend = () => {
          recorder.pause();
          recorder.requestData();
        }
        recorder.ondataavailable = async(e) => {
          if (stream.active) {
            try {
              const blobURL = URL.createObjectURL(e.data);
              const request = await fetch(blobURL);
              const ab = await request.arrayBuffer();
              console.log(blobURL, ab);
              recognition.onresult = handleResult;
              // URL.revokeObjectURL(blobURL);
            } catch (err) {
              throw err
            }
          }
        }
        recorder.onpause = e => {
          console.log("recorder " + recorder.state);
        }
        recognition.continuous = true;
        recognition.interimResults = false;
        recognition.maxAlternatives = 1;
        recognition.start();
        recognition.onend = e => {
          console.log("recognition ended, stream.active", stream.active);

          if (stream.active) {
            console.log(e);
            // the service disconnects after a period of time
            recognition.start();
          }
        }
        recognition.onresult = handleResult;

        stream.oninactive = () => {
          console.log("stream ended");
        }

        document.getElementById("stop")
          .onclick = () => {
            console.log("stream.active:", stream.active);
            if (stream && stream.active && recognition) {
              recognition.abort();
              recorder.stop();
              for (let track of stream.getTracks()) {
                track.stop();
              }
              console.log("stream.active:", stream.active);
            }
          }

      })
      .catch(err => {
        console.error(err)
      });
  </script>
</body>

</html>

plnkr https://plnkr.co/edit/4DVEg6mhFRR94M5gdaIp?p=preview

Answer 3

最簡單的方法是使用.pause()和.resume() .stop()方法MediaRecorder()以允許用戶啟動，暫停和停止錄制音頻捕獲利用navigator.mediaDevices.getUserMedia()和轉換結果Blob到ArrayBuffer ，如果這是 api 期望被POST ed 到服務器

<!DOCTYPE html>
<html>

<head>
  <title>User Media Recording</title>
</head>

<body>
  <input type="button" value="Start/resume recording audio" id="start">
  <input type="button" value="Pause recording audio" id="pause">
  <input type="button" value="Stop recording audio" id="stop">
  <script>
    navigator.mediaDevices.getUserMedia({
        audio: true
      })
      .then(stream => {
        const recorder = new MediaRecorder(stream);

        recorder.ondataavailable = async(e) => {
          if (stream.active) {
            try {
              const blobURL = URL.createObjectURL(e.data);
              const request = await fetch(blobURL);
              const ab = await request.arrayBuffer();
              // do stuff with `ArrayBuffer` of recorded audio
              console.log(blobURL, ab);
              // we do not need the `Blob URL`, we can revoke the object
              // URL.revokeObjectURL(blobURL);
            } catch (err) {
              throw err
            }
          }
        }
        recorder.onpause = e => {
          console.log("recorder " + recorder.state);
          recorder.requestData();
        }

        stream.oninactive = () => {
          console.log("stream ended");
        }

        document.getElementById("start")
          .onclick = () => {

            if (recorder.state === "inactive") {
              recorder.start();
            } else {
              recorder.resume();
            }
            console.log("recorder.state:", recorder.state);
          }

        document.getElementById("pause")
          .onclick = () => {

            if (recorder.state === "recording") {
              recorder.pause();
            }
            console.log("recorder.state:", recorder.state);
          }

        document.getElementById("stop")
          .onclick = () => {

            if (recorder.state === "recording" || recorder.state === "paused") {
              recorder.stop();
            }

            for (let track of stream.getTracks()) {
              track.stop();
            }

            document.getElementById("start").onclick = null;
            document.getElementById("pause").onclick = null;
            console.log("recorder.state:", recorder.state
            , "stream.active", stream.active);
          }

      })
      .catch(err => {
        console.error(err)
      });
  </script>
</body>

</html>

plnkr https://plnkr.co/edit/7caWYMsvub90G6pwDdQp?p=preview

當檢測到靜音（JS）時，如何提取前面的音頻（來自麥克風）作為緩沖區？

問題描述

3 個解決方案

解決方案1
23 2017-10-17 03:24:02

解決方案2
2 2017-10-15 02:09:17

解決方案3
2 2017-10-17 01:34:50

當檢測到靜音（JS）時，如何提取前面的音頻（來自麥克風）作為緩沖區？

問題描述

3 個解決方案

解決方案1 23 2017-10-17 03:24:02

解決方案2 2 2017-10-15 02:09:17

解決方案3 2 2017-10-17 01:34:50

解決方案1
23 2017-10-17 03:24:02

解決方案2
2 2017-10-15 02:09:17

解決方案3
2 2017-10-17 01:34:50