簡體   English   中英

無法使用@google-cloud/speech 識別來自 GCS uri 的文本內容

[英]Unable to recognize text content from GCS uri using @google-cloud/speech

當我從本地緩沖區加載文件時它正在工作。 但是當我使用 GCS URI 加載同一個文件時,響應是 null。

    const fileName = './audio.wav';

    // Reads a local audio file and converts it to base64
    const file = fs.readFileSync(fileName);
    const audioBytes = file.toString('base64');

    const audio = {
        uri: 'gs://bucket-name/path-to-audio/audio.wav'
        // content: audioBytes
    };
    const config = {
        audioChannelCount: 1,
        encoding: 'LINEAR16',
        sampleRateHertz: 16000,
        languageCode: 'ta-IN',
    };
    const request = {
        audio: audio,
        config: config,
    };

    // Detects speech in the audio file
    const [operation] = await client.longRunningRecognize(request);
    console.info('OPERATION STATUS', operation.name);

當我嘗試使用 GCS URI 加載它時,我收到 null 作為響應。 然而,當我嘗試將相同的文件作為緩沖區發送時,我得到了正確的響應。

# from GCS
TRANSLATION STATUS true
OPERATION COMPLETE STATUS  3489419937829075659 null undefined

# from local file
TRANSLATION STATUS true
OPERATION COMPLETE STATUS  390578141483807025 வணக்கம் வணக்கம் வணக்கம் SpeechRecognitionResult {
  alternatives: [
    SpeechRecognitionAlternative {
      words: [],
      transcript: 'வணக்கம் வணக்கம் வணக்கம்',
      confidence: 0.8997038006782532
    }
  ]
}

當我做操作控制台時名稱 *3489419937829075659 *

STATUS DATA Operation {
  _events: [Object: null prototype] {
    newListener: [Function],
    removeListener: [Function]
  },
  _eventsCount: 2,
  _maxListeners: undefined,
  completeListeners: 0,
  hasActiveListeners: false,
  latestResponse: {
    name: '3489419937829075659',
    metadata: {
      type_url: 'type.googleapis.com/google.cloud.speech.v1.LongRunningRecognizeMetadata',
      value: <Buffer 08 64 12 0c 08 d7 9e b7 fa 05 10 90 d6 a3 a5 02 1a 0c 08 dc 9e b7 fa 05 10 e0 b7 f0 c8 02 22 40 67 73 3a 2f 2f 73 74 61 67 69 6e 67 2e 63 65 72 74 69 ... 46 more bytes>
    },
    done: true,
    response: {
      type_url: 'type.googleapis.com/google.cloud.speech.v1.LongRunningRecognizeResponse',
      value: <Buffer >
    },
    result: 'response'
  },
  name: '3489419937829075659',
  done: true,
  error: undefined,
  longrunningDescriptor: LongRunningDescriptor {
    operationsClient: OperationsClient {
      auth: [GoogleAuth],
      innerApiCalls: [Object],
      descriptor: [Object]
    },
    responseDecoder: [Function: bound decode_setup],
    metadataDecoder: [Function: bound decode_setup]
  },
  result: LongRunningRecognizeResponse { results: [] },
  metadata: LongRunningRecognizeMetadata {
    progressPercent: 100,
    startTime: Timestamp { seconds: [Long], nanos: 615050000 },
    lastUpdateTime: Timestamp { seconds: [Long], nanos: 689708000 }
  },
  backoffSettings: {
    initialRetryDelayMillis: 100,
    retryDelayMultiplier: 1.3,
    maxRetryDelayMillis: 60000,
    initialRpcTimeoutMillis: null,
    rpcTimeoutMultiplier: null,
    maxRpcTimeoutMillis: null,
    totalTimeoutMillis: null
  },
  response: {
    type_url: 'type.googleapis.com/google.cloud.speech.v1.LongRunningRecognizeResponse',
    value: <Buffer >
  },
  _callOptions: undefined,
  [Symbol(kCapture)]: false
}
STATUS DATA undefined

對於當我控制整個 object 進行操作時的操作,我得到了這個,

STATUS DATA Operation {
  _events: [Object: null prototype] {
    newListener: [Function],
    removeListener: [Function]
  },
  _eventsCount: 2,
  _maxListeners: undefined,
  completeListeners: 0,
  hasActiveListeners: false,
  latestResponse: {
    name: '390578141483807025',
    metadata: {
      type_url: 'type.googleapis.com/google.cloud.speech.v1.LongRunningRecognizeMetadata',
      value: <Buffer 08 64 12 0c 08 f4 a3 b7 fa 05 10 88 e0 d4 ab 02 1a 0c 08 f7 a3 b7 fa 05 10 88 b0 ef d4 01>
    },
    done: true,
    response: {
      type_url: 'type.googleapis.com/google.cloud.speech.v1.LongRunningRecognizeResponse',
      value: <Buffer 12 4a 0a 48 0a 41 e0 ae b5 e0 ae a3 e0 ae 95 e0 af 8d e0 ae 95 e0 ae ae e0 af 8d 20 e0 ae b5 e0 ae a3 e0 ae 95 e0 af 8d e0 ae 95 e0 ae ae e0 af 8d 20 ... 26 more bytes>
    },
    result: 'response'
  },
  name: '390578141483807025',
  done: true,
  error: undefined,
  longrunningDescriptor: LongRunningDescriptor {
    operationsClient: OperationsClient {
      auth: [GoogleAuth],
      innerApiCalls: [Object],
      descriptor: [Object]
    },
    responseDecoder: [Function: bound decode_setup],
    metadataDecoder: [Function: bound decode_setup]
  },
  result: LongRunningRecognizeResponse {
    results: [ [SpeechRecognitionResult] ]
  },
  metadata: LongRunningRecognizeMetadata {
    progressPercent: 100,
    startTime: Timestamp { seconds: [Long], nanos: 628437000 },
    lastUpdateTime: Timestamp { seconds: [Long], nanos: 446421000 }
  },
  backoffSettings: {
    initialRetryDelayMillis: 100,
    retryDelayMultiplier: 1.3,
    maxRetryDelayMillis: 60000,
    initialRpcTimeoutMillis: null,
    rpcTimeoutMultiplier: null,
    maxRpcTimeoutMillis: null,
    totalTimeoutMillis: null
  },
  response: {
    type_url: 'type.googleapis.com/google.cloud.speech.v1.LongRunningRecognizeResponse',
    value: <Buffer 12 4a 0a 48 0a 41 e0 ae b5 e0 ae a3 e0 ae 95 e0 af 8d e0 ae 95 e0 ae ae e0 af 8d 20 e0 ae b5 e0 ae a3 e0 ae 95 e0 af 8d e0 ae 95 e0 ae ae e0 af 8d 20 ... 26 more bytes>
  },
  _callOptions: undefined,
  [Symbol(kCapture)]: false
}
STATUS DATA SpeechRecognitionResult {
  alternatives: [
    SpeechRecognitionAlternative {
      words: [],
      transcript: 'வணக்கம் வணக்கம் வணக்கம்',
      confidence: 0.8997038006782532
    }
  ]
}

此代碼適用於本地和 GCS:

    async function main() {
      // Imports the Google Cloud client library
      const speech = require('@google-cloud/speech');
      const fs = require('fs');
    
      // Creates a client
      const client = new speech.SpeechClient();
    
      // The name of the audio file to transcribe
      const fileName = './audio.wav';
    
      // Reads a local audio file and converts it to base64
      const file = fs.readFileSync(fileName);
      const audioBytes = file.toString('base64');
    
      // The audio file's encoding, sample rate in hertz, and BCP-47 language code
      const audio = {
          uri: "gs://BUCKET_NAME/audio.wav"
        //content: audioBytes,
      };
      // these config could be different from one audio type to another
      const config = {
        audioChannelCount: 1,
        encoding: 'LINEAR16',
        sampleRateHertz: 8000,
        languageCode: 'en-US',
      };
      const request = {
        audio: audio,
        config: config,
      };
    
      // Detects speech in the audio file
      const [operation] = await client.longRunningRecognize(request);
      const [response] = await operation.promise();
      const transcription = response.results
        .map(result => result.alternatives[0].transcript)
        .join('\n');
      console.log(`Transcription: ${transcription}`);
    }
    
    main().catch(console.error);

暫無
暫無

聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.

 
粵ICP備18138465號  © 2020-2024 STACKOOM.COM