Microsoft Speech-to-Text SDK JS won't accept a file with a long array of bytes

Question

I'm using Microsoft's Azure speech-to-text SDK to get text from a.wav file, using JavaScript. The problem is, the recognizer won't accept the File object and returns the error "Uncaught rangeerror: source array is too long". Calling.slice(0, 2248) on the blob that is used to make the File object works correctly, returning the correct first word of the.wav file. But if I try to slice the blob into chunks like (2249, 4497) returns the error "Uncaught rangeerror: offset is outside the bounds of the DataView". I'm at a loss for how to either a) get the recognizer to accept a blob with a long source array or b) break the blob into chunks that aren't out of bounds. The.wav url has been changed to dashes for anonymity and should be ignored. Any solutions are appreciated!

JS:
<script>
//get wav file from url, create File object with it
function fromFile() {
  fetch("http://www.-----------.com/prod/wp-content/uploads/2020/12/cutafew.wav")
  .then(response => response.blob())
  .then(blob => {
    var file = new File([blob], "http://www.---------.com/prod/wp-content/uploads/2020/12/cutafew.wav", {
               type:"audio/x-wav", lastModified:new Date().getTime()
             });

//if file got successfully, do the following:
    var reader = new FileReader();
    var speechConfig = SpeechSDK.SpeechConfig.fromSubscription("f6abc3bfabc64f0d820d537c0d738788", "centralus");
    var audioConfig = SpeechSDK.AudioConfig.fromWavFileInput(file);
    var recognizer = new SpeechSDK.SpeechRecognizer(speechConfig, audioConfig);


//use recognizer to convert wav file to text
recognizer.recognizing = (s, e) => {
console.log(e.result);
    console.log(`RECOGNIZING: Text=${e.result.text}`);
};

recognizer.recognized = (s, e) => {
    if (e.result.reason == ResultReason.RecognizedSpeech) {
        console.log(`RECOGNIZED: Text=${e.result.text}`);
    }
    else if (e.result.reason == ResultReason.NoMatch) {
        console.log("NOMATCH: Speech could not be recognized.");
    }
};

recognizer.canceled = (s, e) => {
    console.log(`CANCELED: Reason=${e.reason}`);

    if (e.reason == CancellationReason.Error) {
        console.log(`"CANCELED: ErrorCode=${e.errorCode}`);
        console.log(`"CANCELED: ErrorDetails=${e.errorDetails}`);
        console.log("CANCELED: Did you update the subscription info?");
    }

    recognizer.stopContinuousRecognitionAsync();
};

recognizer.sessionStopped = (s, e) => {
    console.log("\n    Session stopped event.");
    recognizer.stopContinuousRecognitionAsync();
};

recognizer.startContinuousRecognitionAsync();

})
//throw error if file wasn't created
.catch(err => console.error(err));

}

fromFile();
</script>

Answer 1

you can use Recognize from in-memory stream example

const fs = require('fs');
const sdk = require("microsoft-cognitiveservices-speech-sdk");
const speechConfig = sdk.SpeechConfig.fromSubscription("<paste-your-speech-key-here>", "<paste-your-speech-location/region-here>");

function fromStream() {
    let pushStream = sdk.AudioInputStream.createPushStream();

    fs.createReadStream("YourAudioFile.wav").on('data', function(arrayBuffer) {
        pushStream.write(arrayBuffer.slice());
    }).on('end', function() {
        pushStream.close();
    });
 
    let audioConfig = sdk.AudioConfig.fromStreamInput(pushStream);
    let recognizer = new sdk.SpeechRecognizer(speechConfig, audioConfig);
    recognizer.recognizeOnceAsync(result => {
        console.log(`RECOGNIZED: Text=${result.text}`);
        recognizer.close();
    });
}
fromStream();

Microsoft Speech-to-Text SDK JS won't accept a file with a long array of bytes

Question

1 answers

solution1
2 2021-09-15 16:49:50

Microsoft Speech-to-Text SDK JS won't accept a file with a long array of bytes

Question

1 answers

solution1 2 2021-09-15 16:49:50

solution1
2 2021-09-15 16:49:50