content/browser/speech/speech_recognizer_impl.h

   1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #ifndef CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_
   6 #define CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_
   7 #pragma once
   8
   9 #include "base/basictypes.h"
  10 #include "base/memory/ref_counted.h"
  11 #include "base/memory/scoped_ptr.h"
  12 #include "content/browser/speech/endpointer/endpointer.h"
  13 #include "content/browser/speech/speech_recognition_engine.h"
  14 #include "content/public/common/speech_recognition_error.h"
  15 #include "content/public/common/speech_recognition_result.h"
  16 #include "media/audio/audio_input_controller.h"
  17 #include "net/url_request/url_request_context_getter.h"
  18
  19 namespace content {
  20 class SpeechRecognitionEventListener;
  21 }
  22
  23 namespace media {
  24 class AudioManager;
  25 }
  26
  27 namespace speech {
  28
  29 // TODO(primiano) Next CL: Remove the Impl suffix.
  30
  31 // Handles speech recognition for a session (identified by |session_id|), taking
  32 // care of audio capture, silence detection/endpointer and interaction with the
  33 // SpeechRecognitionEngine.
  34 class CONTENT_EXPORT SpeechRecognizerImpl
  35     : public base::RefCountedThreadSafe<SpeechRecognizerImpl>,
  36       public media::AudioInputController::EventHandler,
  37       public NON_EXPORTED_BASE(SpeechRecognitionEngineDelegate) {
  38  public:
  39   static const int kAudioSampleRate;
  40   static const ChannelLayout kChannelLayout;
  41   static const int kNumBitsPerAudioSample;
  42   static const int kNoSpeechTimeoutMs;
  43   static const int kEndpointerEstimationTimeMs;
  44
  45   SpeechRecognizerImpl(
  46       content::SpeechRecognitionEventListener* listener,
  47       int session_id,
  48       SpeechRecognitionEngine* engine);
  49
  50   void StartRecognition();
  51   void AbortRecognition();
  52   void StopAudioCapture();
  53   bool IsActive() const;
  54   bool IsCapturingAudio() const;
  55   const SpeechRecognitionEngine& recognition_engine() const;
  56
  57  private:
  58   friend class base::RefCountedThreadSafe<SpeechRecognizerImpl>;
  59   friend class SpeechRecognizerImplTest;
  60
  61   enum FSMState {
  62     STATE_IDLE = 0,
  63     STATE_STARTING,
  64     STATE_ESTIMATING_ENVIRONMENT,
  65     STATE_WAITING_FOR_SPEECH,
  66     STATE_RECOGNIZING,
  67     STATE_WAITING_FINAL_RESULT,
  68     STATE_MAX_VALUE = STATE_WAITING_FINAL_RESULT
  69   };
  70
  71   enum FSMEvent {
  72     EVENT_ABORT = 0,
  73     EVENT_START,
  74     EVENT_STOP_CAPTURE,
  75     EVENT_AUDIO_DATA,
  76     EVENT_ENGINE_RESULT,
  77     EVENT_ENGINE_ERROR,
  78     EVENT_AUDIO_ERROR,
  79     EVENT_MAX_VALUE = EVENT_AUDIO_ERROR
  80   };
  81
  82   struct FSMEventArgs {
  83     explicit FSMEventArgs(FSMEvent event_value);
  84     ~FSMEventArgs();
  85
  86     FSMEvent event;
  87     int audio_error_code;
  88     scoped_refptr<AudioChunk> audio_data;
  89     content::SpeechRecognitionResult engine_result;
  90     content::SpeechRecognitionError engine_error;
  91   };
  92
  93   virtual ~SpeechRecognizerImpl();
  94
  95   // Entry point for pushing any new external event into the recognizer FSM.
  96   void DispatchEvent(const FSMEventArgs& event_args);
  97
  98   // Defines the behavior of the recognizer FSM, selecting the appropriate
  99   // transition according to the current state and event.
 100   FSMState ExecuteTransitionAndGetNextState(const FSMEventArgs& args);
 101
 102   // Process a new audio chunk in the audio pipeline (endpointer, vumeter, etc).
 103   void ProcessAudioPipeline(const AudioChunk& raw_audio);
 104
 105   // The methods below handle transitions of the recognizer FSM.
 106   FSMState StartRecording(const FSMEventArgs& event_args);
 107   FSMState StartRecognitionEngine(const FSMEventArgs& event_args);
 108   FSMState WaitEnvironmentEstimationCompletion(const FSMEventArgs& event_args);
 109   FSMState DetectUserSpeechOrTimeout(const FSMEventArgs& event_args);
 110   FSMState StopCaptureAndWaitForResult(const FSMEventArgs& event_args);
 111   FSMState ProcessIntermediateResult(const FSMEventArgs& event_args);
 112   FSMState ProcessFinalResult(const FSMEventArgs& event_args);
 113   FSMState AbortSilently(const FSMEventArgs& event_args);
 114   FSMState AbortWithError(const FSMEventArgs& event_args);
 115   FSMState Abort(const content::SpeechRecognitionError& error);
 116   FSMState DetectEndOfSpeech(const FSMEventArgs& event_args);
 117   FSMState DoNothing(const FSMEventArgs& event_args) const;
 118   FSMState NotFeasible(const FSMEventArgs& event_args);
 119
 120   // Returns the time span of captured audio samples since the start of capture.
 121   int GetElapsedTimeMs() const;
 122
 123   // Calculates the input volume to be displayed in the UI, triggering the
 124   // OnAudioLevelsChange event accordingly.
 125   void UpdateSignalAndNoiseLevels(const float& rms, bool clip_detected);
 126
 127   void CloseAudioControllerAsynchronously();
 128   void SetAudioManagerForTesting(media::AudioManager* audio_manager);
 129
 130   // Callback called on IO thread by audio_controller->Close().
 131   void OnAudioClosed(media::AudioInputController*);
 132
 133   // AudioInputController::EventHandler methods.
 134   virtual void OnCreated(media::AudioInputController* controller) OVERRIDE {}
 135   virtual void OnRecording(media::AudioInputController* controller) OVERRIDE {}
 136   virtual void OnError(media::AudioInputController* controller,
 137                        int error_code) OVERRIDE;
 138   virtual void OnData(media::AudioInputController* controller,
 139                       const uint8* data, uint32 size) OVERRIDE;
 140
 141   // SpeechRecognitionEngineDelegate methods.
 142   virtual void OnSpeechRecognitionEngineResult(
 143       const content::SpeechRecognitionResult& result) OVERRIDE;
 144   virtual void OnSpeechRecognitionEngineError(
 145       const content::SpeechRecognitionError& error) OVERRIDE;
 146
 147   content::SpeechRecognitionEventListener* listener_;
 148   media::AudioManager* testing_audio_manager_;
 149   scoped_ptr<SpeechRecognitionEngine> recognition_engine_;
 150   Endpointer endpointer_;
 151   scoped_refptr<media::AudioInputController> audio_controller_;
 152   int session_id_;
 153   int num_samples_recorded_;
 154   float audio_level_;
 155   bool is_dispatching_event_;
 156   FSMState state_;
 157
 158   DISALLOW_COPY_AND_ASSIGN(SpeechRecognizerImpl);
 159 };
 160
 161 }  // namespace speech
 162
 163 #endif  // CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_