简体   繁体   中英

Microsoft Speech-to-Text SDK JS won't accept a file with a long array of bytes

I'm using Microsoft's Azure speech-to-text SDK to get text from a.wav file, using JavaScript. The problem is, the recognizer won't accept the File object and returns the error "Uncaught rangeerror: source array is too long". Calling.slice(0, 2248) on the blob that is used to make the File object works correctly, returning the correct first word of the.wav file. But if I try to slice the blob into chunks like (2249, 4497) returns the error "Uncaught rangeerror: offset is outside the bounds of the DataView". I'm at a loss for how to either a) get the recognizer to accept a blob with a long source array or b) break the blob into chunks that aren't out of bounds. The.wav url has been changed to dashes for anonymity and should be ignored. Any solutions are appreciated!

JS:
<script>
//get wav file from url, create File object with it
function fromFile() {
  fetch("http://www.-----------.com/prod/wp-content/uploads/2020/12/cutafew.wav")
  .then(response => response.blob())
  .then(blob => {
    var file = new File([blob], "http://www.---------.com/prod/wp-content/uploads/2020/12/cutafew.wav", {
               type:"audio/x-wav", lastModified:new Date().getTime()
             });

//if file got successfully, do the following:
    var reader = new FileReader();
    var speechConfig = SpeechSDK.SpeechConfig.fromSubscription("f6abc3bfabc64f0d820d537c0d738788", "centralus");
    var audioConfig = SpeechSDK.AudioConfig.fromWavFileInput(file);
    var recognizer = new SpeechSDK.SpeechRecognizer(speechConfig, audioConfig);


//use recognizer to convert wav file to text
recognizer.recognizing = (s, e) => {
console.log(e.result);
    console.log(`RECOGNIZING: Text=${e.result.text}`);
};

recognizer.recognized = (s, e) => {
    if (e.result.reason == ResultReason.RecognizedSpeech) {
        console.log(`RECOGNIZED: Text=${e.result.text}`);
    }
    else if (e.result.reason == ResultReason.NoMatch) {
        console.log("NOMATCH: Speech could not be recognized.");
    }
};

recognizer.canceled = (s, e) => {
    console.log(`CANCELED: Reason=${e.reason}`);

    if (e.reason == CancellationReason.Error) {
        console.log(`"CANCELED: ErrorCode=${e.errorCode}`);
        console.log(`"CANCELED: ErrorDetails=${e.errorDetails}`);
        console.log("CANCELED: Did you update the subscription info?");
    }

    recognizer.stopContinuousRecognitionAsync();
};

recognizer.sessionStopped = (s, e) => {
    console.log("\n    Session stopped event.");
    recognizer.stopContinuousRecognitionAsync();
};

recognizer.startContinuousRecognitionAsync();

})
//throw error if file wasn't created
.catch(err => console.error(err));

}

fromFile();
</script>

you can use Recognize from in-memory stream example

const fs = require('fs');
const sdk = require("microsoft-cognitiveservices-speech-sdk");
const speechConfig = sdk.SpeechConfig.fromSubscription("<paste-your-speech-key-here>", "<paste-your-speech-location/region-here>");

function fromStream() {
    let pushStream = sdk.AudioInputStream.createPushStream();

    fs.createReadStream("YourAudioFile.wav").on('data', function(arrayBuffer) {
        pushStream.write(arrayBuffer.slice());
    }).on('end', function() {
        pushStream.close();
    });
 
    let audioConfig = sdk.AudioConfig.fromStreamInput(pushStream);
    let recognizer = new sdk.SpeechRecognizer(speechConfig, audioConfig);
    recognizer.recognizeOnceAsync(result => {
        console.log(`RECOGNIZED: Text=${result.text}`);
        recognizer.close();
    });
}
fromStream();

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM