[英]Microsoft Speech-to-Text SDK JS won't accept a file with a long array of bytes
I'm using Microsoft's Azure speech-to-text SDK to get text from a.wav file, using JavaScript.我正在使用 Microsoft 的 Azure 语音到文本 SDK 从 a.wav 文件中获取文本,使用 JavaScript。 The problem is, the recognizer won't accept the File object and returns the error "Uncaught rangeerror: source array is too long".
问题是,识别器不会接受文件 object 并返回错误“Uncaught rangeerror: source array is too long”。 Calling.slice(0, 2248) on the blob that is used to make the File object works correctly, returning the correct first word of the.wav file.
在用于使文件 object 正常工作的 blob 上调用 .slice(0, 2248),返回 .wav 文件的正确第一个字。 But if I try to slice the blob into chunks like (2249, 4497) returns the error "Uncaught rangeerror: offset is outside the bounds of the DataView".
但是,如果我尝试将 blob 切成块,例如 (2249, 4497),则会返回错误“Uncaught rangeerror: offset is outside the bounds of the DataView”。 I'm at a loss for how to either a) get the recognizer to accept a blob with a long source array or b) break the blob into chunks that aren't out of bounds.
我不知道如何 a) 让识别器接受具有长源数组的 blob 或 b) 将 blob 分成不超出范围的块。 The.wav url has been changed to dashes for anonymity and should be ignored.
.wav url 已更改为匿名破折号,应忽略。 Any solutions are appreciated!
任何解决方案表示赞赏!
JS:
<script>
//get wav file from url, create File object with it
function fromFile() {
fetch("http://www.-----------.com/prod/wp-content/uploads/2020/12/cutafew.wav")
.then(response => response.blob())
.then(blob => {
var file = new File([blob], "http://www.---------.com/prod/wp-content/uploads/2020/12/cutafew.wav", {
type:"audio/x-wav", lastModified:new Date().getTime()
});
//if file got successfully, do the following:
var reader = new FileReader();
var speechConfig = SpeechSDK.SpeechConfig.fromSubscription("f6abc3bfabc64f0d820d537c0d738788", "centralus");
var audioConfig = SpeechSDK.AudioConfig.fromWavFileInput(file);
var recognizer = new SpeechSDK.SpeechRecognizer(speechConfig, audioConfig);
//use recognizer to convert wav file to text
recognizer.recognizing = (s, e) => {
console.log(e.result);
console.log(`RECOGNIZING: Text=${e.result.text}`);
};
recognizer.recognized = (s, e) => {
if (e.result.reason == ResultReason.RecognizedSpeech) {
console.log(`RECOGNIZED: Text=${e.result.text}`);
}
else if (e.result.reason == ResultReason.NoMatch) {
console.log("NOMATCH: Speech could not be recognized.");
}
};
recognizer.canceled = (s, e) => {
console.log(`CANCELED: Reason=${e.reason}`);
if (e.reason == CancellationReason.Error) {
console.log(`"CANCELED: ErrorCode=${e.errorCode}`);
console.log(`"CANCELED: ErrorDetails=${e.errorDetails}`);
console.log("CANCELED: Did you update the subscription info?");
}
recognizer.stopContinuousRecognitionAsync();
};
recognizer.sessionStopped = (s, e) => {
console.log("\n Session stopped event.");
recognizer.stopContinuousRecognitionAsync();
};
recognizer.startContinuousRecognitionAsync();
})
//throw error if file wasn't created
.catch(err => console.error(err));
}
fromFile();
</script>
you can use Recognize from in-memory stream example您可以使用从内存 stream 示例中识别
const fs = require('fs');
const sdk = require("microsoft-cognitiveservices-speech-sdk");
const speechConfig = sdk.SpeechConfig.fromSubscription("<paste-your-speech-key-here>", "<paste-your-speech-location/region-here>");
function fromStream() {
let pushStream = sdk.AudioInputStream.createPushStream();
fs.createReadStream("YourAudioFile.wav").on('data', function(arrayBuffer) {
pushStream.write(arrayBuffer.slice());
}).on('end', function() {
pushStream.close();
});
let audioConfig = sdk.AudioConfig.fromStreamInput(pushStream);
let recognizer = new sdk.SpeechRecognizer(speechConfig, audioConfig);
recognizer.recognizeOnceAsync(result => {
console.log(`RECOGNIZED: Text=${result.text}`);
recognizer.close();
});
}
fromStream();
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.