简体   繁体   中英

Android speech recognition without a time-out?

I am using android speech recognition and written some code to identify the spoken words. Please have a look at the below code.

import java.util.ArrayList;
import java.util.Arrays;
import java.util.LinkedList;
import java.util.List;

import android.speech.RecognitionListener;
import android.speech.RecognizerIntent;
import android.speech.SpeechRecognizer;
import android.app.Activity;
import android.content.Intent;
import android.os.Bundle;
import android.speech.tts.TextToSpeech;
import android.text.Html;
import android.util.Log;
import android.view.View;
import android.widget.AdapterView;
import android.widget.ArrayAdapter;
import android.widget.CompoundButton;
import android.widget.CompoundButton.OnCheckedChangeListener;
import android.widget.ListView;
import android.widget.ProgressBar;
import android.widget.TextView;
import android.widget.Toast;
import android.widget.ToggleButton;

public class MainActivity extends Activity {

    private TextView returnedText;
    private ToggleButton toggleButton;
    private ProgressBar progressBar;
    private SpeechRecognizer speech = null;
    private Intent recognizerIntent;
    private ListView wordList;
    private String LOG_TAG = "VoiceRecognitionActivity";
    private List<String> previousInterim;
    private diff_match_patch diff;
    private String display = "test";

    private List<String>adapterList = new ArrayList<String>();
    ArrayAdapter<String>  adapter;

    @Override
    protected void onCreate(Bundle savedInstanceState) {
        super.onCreate(savedInstanceState);
        setContentView(R.layout.activity_main);
        returnedText = (TextView) findViewById(R.id.textView1);
        progressBar = (ProgressBar) findViewById(R.id.progressBar1);
        toggleButton = (ToggleButton) findViewById(R.id.toggleButton1);
        wordList = (ListView)findViewById(R.id.word_list);

        adapter = new ArrayAdapter<String>(this,android.R.layout.simple_list_item_checked, adapterList);
        adapter.setNotifyOnChange(true);
        wordList.setAdapter(adapter);
        progressBar.setVisibility(View.INVISIBLE);

       // createRecog();
        speech = SpeechRecognizer.createSpeechRecognizer(getApplicationContext());
        toggleButton.setOnCheckedChangeListener(new ButtonListener());



    }

    private class ButtonListener implements OnCheckedChangeListener, Runnable
    {
        boolean isChecked;

        @Override
        public void onCheckedChanged(CompoundButton buttonView, boolean isChecked) {

            this.isChecked = isChecked;
            this.run();
        }

        @Override
        public void run() {

            if (isChecked) {
                speech.stopListening();
                speech.cancel();
                speech.destroy();
                createRecog();
                progressBar.setVisibility(View.VISIBLE);
                progressBar.setIndeterminate(true);
                speech.startListening(recognizerIntent);
                adapter.clear();

                returnedText.setText("");

            } else {
                progressBar.setIndeterminate(false);
                progressBar.setVisibility(View.INVISIBLE);
                speech.stopListening();

            }


        }
    }

    private class RecognitionListenerClass implements RecognitionListener
    {


        @Override
        public void onBeginningOfSpeech() {
            Log.i(LOG_TAG, "onBeginningOfSpeech");
            progressBar.setIndeterminate(false);
            progressBar.setMax(10);
            wordList.computeScroll();
        }

        @Override
        public void onBufferReceived(byte[] buffer) {
            Log.i(LOG_TAG, "onBufferReceived: " + buffer);
        }

        @Override
        public void onEndOfSpeech() {
            Log.i(LOG_TAG, "onEndOfSpeech");
            progressBar.setIndeterminate(true);
            toggleButton.setChecked(false);
        }

        @Override
        public void onError(int errorCode) {
            String errorMessage = getErrorText(errorCode);
            Log.d(LOG_TAG, "FAILED " + errorMessage);
            returnedText.setText(errorMessage);
            toggleButton.setChecked(false);


          //  speech = null;
            toggleButton.performClick();
        }

        @Override
        public void onEvent(int arg0, Bundle arg1) {
            Log.i(LOG_TAG, "onEvent");
        }

        @Override
        public void onPartialResults(Bundle arg0) {
            Log.i(LOG_TAG, "onPartialResults");
            final ArrayList<String> matches = arg0.getStringArrayList(SpeechRecognizer.RESULTS_RECOGNITION);
            final float[] scores = arg0.getFloatArray(SpeechRecognizer.CONFIDENCE_SCORES);
            receiveWhatWasHeard(matches, scores);

        }

        @Override
        public void onReadyForSpeech(Bundle arg0) {
            Log.i(LOG_TAG, "onReadyForSpeech");
        }

        @Override
        public void onResults(Bundle results) {

        }

        @Override
        public void onRmsChanged(float rmsdB) {
            Log.i(LOG_TAG, "onRmsChanged: " + rmsdB);
            progressBar.setProgress((int) rmsdB);
        }
    }


    private void receiveWhatWasHeard(ArrayList<String> matches, float[] scores) {

        Log.i(LOG_TAG, matches.get(0));
    returnedText.setText(matches.get(0));        
    }

    private void createRecog()
    {
        speech = SpeechRecognizer.createSpeechRecognizer(this);
        speech.setRecognitionListener(new RecognitionListenerClass());
        recognizerIntent = new Intent(RecognizerIntent.ACTION_RECOGNIZE_SPEECH);
        recognizerIntent.putExtra(RecognizerIntent.EXTRA_LANGUAGE_PREFERENCE,
                "en");
        recognizerIntent.putExtra(RecognizerIntent.EXTRA_CALLING_PACKAGE,
                this.getPackageName());
        recognizerIntent.putExtra(RecognizerIntent.EXTRA_LANGUAGE_MODEL,
                RecognizerIntent.LANGUAGE_MODEL_WEB_SEARCH);
        recognizerIntent.putExtra(RecognizerIntent.EXTRA_MAX_RESULTS, 3);
        recognizerIntent.putExtra("android.speech.extra.DICTATION_MODE", true);
        recognizerIntent.putExtra(RecognizerIntent.EXTRA_PARTIAL_RESULTS, true);
    }

    @Override
    public void onResume() {
        super.onResume();
    }

    @Override
    protected void onPause() {
        super.onPause();
    }



    public  String getErrorText(int errorCode) {
        String message;
        switch (errorCode) {
            case SpeechRecognizer.ERROR_AUDIO:
                message = "Audio recording error";
                break;
            case SpeechRecognizer.ERROR_CLIENT:
                message = "Client side error";
                break;
            case SpeechRecognizer.ERROR_INSUFFICIENT_PERMISSIONS:
                message = "Insufficient permissions";
                break;
            case SpeechRecognizer.ERROR_NETWORK:
                message = "Network error";
                break;
            case SpeechRecognizer.ERROR_NETWORK_TIMEOUT:
                message = "Network timeout";
                break;
            case SpeechRecognizer.ERROR_NO_MATCH:
                message = "No match";
                break;
            case SpeechRecognizer.ERROR_RECOGNIZER_BUSY:
                message = "RecognitionService busy";
                break;
            case SpeechRecognizer.ERROR_SERVER:
                message = "error from server";
                break;
            case SpeechRecognizer.ERROR_SPEECH_TIMEOUT:
                message = "No speech input";
                break;
            default:
                message = "Didn't understand, please try again.";
                break;
        }

        return message;
    }

}

However I need to run this continusly, which I failed to do all the time. Restarting the recognition at the end of speech could be a good idea but it gives that small noise and it take 1-3 seconds to load, which means, some of the spoken words will be lost.

Yes I know the Google has mentioned their system is not for continuous recognition; but several people had done work-arounds before. Unfortunately these work-arounds seems not work by now as they are written in pretty old APIs. If it was possible with older versions, then of course it is much more possible in newer versions.

So, any ideas about how to achieve this continuous recognition task? I am using API 15.

read here for background

this demo is continuous and uses the chromium source mentioned in the pultz blog comments stream

The full duplex google api works fine in android . AFAIK , it is still rate limited making it useless in a production app.

google full duplex stdout sample using curl cli

IBM watson has a full duplex and production ready API supporting continuous mode. you would have to dig into the docs details but a general cli sample is here

see continuous here on watson api

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM