1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #ifndef CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_
6 #define CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_
8 #include "base/basictypes.h"
9 #include "base/memory/scoped_ptr.h"
10 #include "content/browser/speech/endpointer/endpointer.h"
11 #include "content/browser/speech/speech_recognition_engine.h"
12 #include "content/browser/speech/speech_recognizer.h"
13 #include "content/public/common/speech_recognition_error.h"
14 #include "content/public/common/speech_recognition_result.h"
15 #include "media/audio/audio_input_controller.h"
16 #include "media/audio/audio_logging.h"
17 #include "net/url_request/url_request_context_getter.h"
26 class SpeechRecognitionEventListener
;
28 // Handles speech recognition for a session (identified by |session_id|), taking
29 // care of audio capture, silence detection/endpointer and interaction with the
30 // SpeechRecognitionEngine.
31 class CONTENT_EXPORT SpeechRecognizerImpl
32 : public SpeechRecognizer
,
33 public media::AudioInputController::EventHandler
,
34 public NON_EXPORTED_BASE(SpeechRecognitionEngineDelegate
) {
36 static const int kAudioSampleRate
;
37 static const media::ChannelLayout kChannelLayout
;
38 static const int kNumBitsPerAudioSample
;
39 static const int kNoSpeechTimeoutMs
;
40 static const int kEndpointerEstimationTimeMs
;
42 static void SetAudioManagerForTesting(media::AudioManager
* audio_manager
);
44 SpeechRecognizerImpl(SpeechRecognitionEventListener
* listener
,
47 bool provisional_results
,
48 SpeechRecognitionEngine
* engine
);
50 virtual void StartRecognition(const std::string
& device_id
) OVERRIDE
;
51 virtual void AbortRecognition() OVERRIDE
;
52 virtual void StopAudioCapture() OVERRIDE
;
53 virtual bool IsActive() const OVERRIDE
;
54 virtual bool IsCapturingAudio() const OVERRIDE
;
55 const SpeechRecognitionEngine
& recognition_engine() const;
58 friend class SpeechRecognizerTest
;
63 STATE_ESTIMATING_ENVIRONMENT
,
64 STATE_WAITING_FOR_SPEECH
,
66 STATE_WAITING_FINAL_RESULT
,
68 STATE_MAX_VALUE
= STATE_ENDED
79 EVENT_MAX_VALUE
= EVENT_AUDIO_ERROR
83 explicit FSMEventArgs(FSMEvent event_value
);
87 scoped_refptr
<AudioChunk
> audio_data
;
88 SpeechRecognitionResults engine_results
;
89 SpeechRecognitionError engine_error
;
92 virtual ~SpeechRecognizerImpl();
94 // Entry point for pushing any new external event into the recognizer FSM.
95 void DispatchEvent(const FSMEventArgs
& event_args
);
97 // Defines the behavior of the recognizer FSM, selecting the appropriate
98 // transition according to the current state and event.
99 FSMState
ExecuteTransitionAndGetNextState(const FSMEventArgs
& args
);
101 // Process a new audio chunk in the audio pipeline (endpointer, vumeter, etc).
102 void ProcessAudioPipeline(const AudioChunk
& raw_audio
);
104 // The methods below handle transitions of the recognizer FSM.
105 FSMState
StartRecording(const FSMEventArgs
& event_args
);
106 FSMState
StartRecognitionEngine(const FSMEventArgs
& event_args
);
107 FSMState
WaitEnvironmentEstimationCompletion(const FSMEventArgs
& event_args
);
108 FSMState
DetectUserSpeechOrTimeout(const FSMEventArgs
& event_args
);
109 FSMState
StopCaptureAndWaitForResult(const FSMEventArgs
& event_args
);
110 FSMState
ProcessIntermediateResult(const FSMEventArgs
& event_args
);
111 FSMState
ProcessFinalResult(const FSMEventArgs
& event_args
);
112 FSMState
AbortSilently(const FSMEventArgs
& event_args
);
113 FSMState
AbortWithError(const FSMEventArgs
& event_args
);
114 FSMState
Abort(const SpeechRecognitionError
& error
);
115 FSMState
DetectEndOfSpeech(const FSMEventArgs
& event_args
);
116 FSMState
DoNothing(const FSMEventArgs
& event_args
) const;
117 FSMState
NotFeasible(const FSMEventArgs
& event_args
);
119 // Returns the time span of captured audio samples since the start of capture.
120 int GetElapsedTimeMs() const;
122 // Calculates the input volume to be displayed in the UI, triggering the
123 // OnAudioLevelsChange event accordingly.
124 void UpdateSignalAndNoiseLevels(const float& rms
, bool clip_detected
);
126 void CloseAudioControllerAsynchronously();
128 // Callback called on IO thread by audio_controller->Close().
129 void OnAudioClosed(media::AudioInputController
*);
131 // AudioInputController::EventHandler methods.
132 virtual void OnCreated(media::AudioInputController
* controller
) OVERRIDE
{}
133 virtual void OnRecording(media::AudioInputController
* controller
) OVERRIDE
{}
134 virtual void OnError(media::AudioInputController
* controller
,
135 media::AudioInputController::ErrorCode error_code
) OVERRIDE
;
136 virtual void OnData(media::AudioInputController
* controller
,
137 const media::AudioBus
* data
) OVERRIDE
;
138 virtual void OnLog(media::AudioInputController
* controller
,
139 const std::string
& message
) OVERRIDE
{}
141 // SpeechRecognitionEngineDelegate methods.
142 virtual void OnSpeechRecognitionEngineResults(
143 const SpeechRecognitionResults
& results
) OVERRIDE
;
144 virtual void OnSpeechRecognitionEngineError(
145 const SpeechRecognitionError
& error
) OVERRIDE
;
147 static media::AudioManager
* audio_manager_for_tests_
;
149 scoped_ptr
<SpeechRecognitionEngine
> recognition_engine_
;
150 Endpointer endpointer_
;
151 scoped_refptr
<media::AudioInputController
> audio_controller_
;
152 scoped_ptr
<media::AudioLog
> audio_log_
;
153 int num_samples_recorded_
;
155 bool is_dispatching_event_
;
156 bool provisional_results_
;
158 std::string device_id_
;
160 class OnDataConverter
;
162 // Converts data between native input format and a WebSpeech specific
164 scoped_ptr
<SpeechRecognizerImpl::OnDataConverter
> audio_converter_
;
166 DISALLOW_COPY_AND_ASSIGN(SpeechRecognizerImpl
);
169 } // namespace content
171 #endif // CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_