简体   繁体   English

Microsoft Speech-to-Text SDK JS 不接受包含长字节数组的文件

[英]Microsoft Speech-to-Text SDK JS won't accept a file with a long array of bytes

I'm using Microsoft's Azure speech-to-text SDK to get text from a.wav file, using JavaScript.我正在使用 Microsoft 的 Azure 语音到文本 SDK 从 a.wav 文件中获取文本,使用 JavaScript。 The problem is, the recognizer won't accept the File object and returns the error "Uncaught rangeerror: source array is too long".问题是,识别器不会接受文件 object 并返回错误“Uncaught rangeerror: source array is too long”。 Calling.slice(0, 2248) on the blob that is used to make the File object works correctly, returning the correct first word of the.wav file.在用于使文件 object 正常工作的 blob 上调用 .slice(0, 2248),返回 .wav 文件的正确第一个字。 But if I try to slice the blob into chunks like (2249, 4497) returns the error "Uncaught rangeerror: offset is outside the bounds of the DataView".但是,如果我尝试将 blob 切成块,例如 (2249, 4497),则会返回错误“Uncaught rangeerror: offset is outside the bounds of the DataView”。 I'm at a loss for how to either a) get the recognizer to accept a blob with a long source array or b) break the blob into chunks that aren't out of bounds.我不知道如何 a) 让识别器接受具有长源数组的 blob 或 b) 将 blob 分成不超出范围的块。 The.wav url has been changed to dashes for anonymity and should be ignored. .wav url 已更改为匿名破折号,应忽略。 Any solutions are appreciated!任何解决方案表示赞赏!

JS:
<script>
//get wav file from url, create File object with it
function fromFile() {
  fetch("http://www.-----------.com/prod/wp-content/uploads/2020/12/cutafew.wav")
  .then(response => response.blob())
  .then(blob => {
    var file = new File([blob], "http://www.---------.com/prod/wp-content/uploads/2020/12/cutafew.wav", {
               type:"audio/x-wav", lastModified:new Date().getTime()
             });

//if file got successfully, do the following:
    var reader = new FileReader();
    var speechConfig = SpeechSDK.SpeechConfig.fromSubscription("f6abc3bfabc64f0d820d537c0d738788", "centralus");
    var audioConfig = SpeechSDK.AudioConfig.fromWavFileInput(file);
    var recognizer = new SpeechSDK.SpeechRecognizer(speechConfig, audioConfig);


//use recognizer to convert wav file to text
recognizer.recognizing = (s, e) => {
console.log(e.result);
    console.log(`RECOGNIZING: Text=${e.result.text}`);
};

recognizer.recognized = (s, e) => {
    if (e.result.reason == ResultReason.RecognizedSpeech) {
        console.log(`RECOGNIZED: Text=${e.result.text}`);
    }
    else if (e.result.reason == ResultReason.NoMatch) {
        console.log("NOMATCH: Speech could not be recognized.");
    }
};

recognizer.canceled = (s, e) => {
    console.log(`CANCELED: Reason=${e.reason}`);

    if (e.reason == CancellationReason.Error) {
        console.log(`"CANCELED: ErrorCode=${e.errorCode}`);
        console.log(`"CANCELED: ErrorDetails=${e.errorDetails}`);
        console.log("CANCELED: Did you update the subscription info?");
    }

    recognizer.stopContinuousRecognitionAsync();
};

recognizer.sessionStopped = (s, e) => {
    console.log("\n    Session stopped event.");
    recognizer.stopContinuousRecognitionAsync();
};

recognizer.startContinuousRecognitionAsync();

})
//throw error if file wasn't created
.catch(err => console.error(err));

}

fromFile();
</script>

you can use Recognize from in-memory stream example您可以使用从内存 stream 示例中识别

const fs = require('fs');
const sdk = require("microsoft-cognitiveservices-speech-sdk");
const speechConfig = sdk.SpeechConfig.fromSubscription("<paste-your-speech-key-here>", "<paste-your-speech-location/region-here>");

function fromStream() {
    let pushStream = sdk.AudioInputStream.createPushStream();

    fs.createReadStream("YourAudioFile.wav").on('data', function(arrayBuffer) {
        pushStream.write(arrayBuffer.slice());
    }).on('end', function() {
        pushStream.close();
    });
 
    let audioConfig = sdk.AudioConfig.fromStreamInput(pushStream);
    let recognizer = new sdk.SpeechRecognizer(speechConfig, audioConfig);
    recognizer.recognizeOnceAsync(result => {
        console.log(`RECOGNIZED: Text=${result.text}`);
        recognizer.close();
    });
}
fromStream();

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

相关问题 用于语音转文本WebSocket的IBM Watson JavaScript SDK - IBM Watson JavaScript SDK for Speech-To-Text WebSocket Issue IBM Watson Speech-to-Text JavaScript SDK:如何获取消息? - IBM Watson Speech-to-Text JavaScript SDK: how to get messages? 在Microsoft BotFramework WebChat中集成用于文本到语音和语音到文本的认知语音服务 - Integrating Cognitive Speech Service for both Text-to-Speech and Speech-to-Text in Microsoft BotFramework WebChat 如何将本地html5录制的音频的float32Array格式转换为Google语音转文本服务的适当字节? - How to convert the float32Array format of native html5 recorded audio to proper bytes for Google Speech-to-Text service? 如何使用 AIFF 文件进行语音转文本 - How to get speech-to-text working with an AIFF file Chrome中使用javascript语音转换文字无法识别任何内容 - Speech-to-text with javascript in Chrome doesn't recognize anything 语音到文本识别不准确 - Speech-to-text Recognition is not accurate 针对盲人的JavaScript语音转文本 - JavaScript Speech-to-Text for blind people 显示Google Cloud语音转文字 - Displaying Google Cloud Speech-to-Text Node.js中的Google Cloud语音到文本api编码问题 - Google Cloud Speech-to-Text api encoding issues in Node.js
 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM