Roll src/third_party/WebKit d9c6159:8139f33 (svn 201974:201975)
[chromium-blink-merge.git] / content / browser / speech / speech_recognizer_impl.h
blob3a26d335cacdfc66d5322e8fe2c55e1b5cafe10e
1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #ifndef CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_
6 #define CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_
8 #include "base/basictypes.h"
9 #include "base/memory/scoped_ptr.h"
10 #include "content/browser/speech/endpointer/endpointer.h"
11 #include "content/browser/speech/speech_recognition_engine.h"
12 #include "content/browser/speech/speech_recognizer.h"
13 #include "content/public/common/speech_recognition_error.h"
14 #include "content/public/common/speech_recognition_result.h"
15 #include "media/audio/audio_input_controller.h"
16 #include "media/audio/audio_logging.h"
17 #include "net/url_request/url_request_context_getter.h"
19 namespace media {
20 class AudioBus;
21 class AudioManager;
24 namespace content {
26 class SpeechRecognitionEventListener;
28 // Handles speech recognition for a session (identified by |session_id|), taking
29 // care of audio capture, silence detection/endpointer and interaction with the
30 // SpeechRecognitionEngine.
31 class CONTENT_EXPORT SpeechRecognizerImpl
32 : public SpeechRecognizer,
33 public media::AudioInputController::EventHandler,
34 public NON_EXPORTED_BASE(SpeechRecognitionEngineDelegate) {
35 public:
36 static const int kAudioSampleRate;
37 static const media::ChannelLayout kChannelLayout;
38 static const int kNumBitsPerAudioSample;
39 static const int kNoSpeechTimeoutMs;
40 static const int kEndpointerEstimationTimeMs;
42 static void SetAudioManagerForTesting(media::AudioManager* audio_manager);
44 SpeechRecognizerImpl(SpeechRecognitionEventListener* listener,
45 int session_id,
46 bool continuous,
47 bool provisional_results,
48 SpeechRecognitionEngine* engine);
50 void StartRecognition(const std::string& device_id) override;
51 void AbortRecognition() override;
52 void StopAudioCapture() override;
53 bool IsActive() const override;
54 bool IsCapturingAudio() const override;
55 const SpeechRecognitionEngine& recognition_engine() const;
57 private:
58 friend class SpeechRecognizerTest;
60 enum FSMState {
61 STATE_IDLE = 0,
62 STATE_STARTING,
63 STATE_ESTIMATING_ENVIRONMENT,
64 STATE_WAITING_FOR_SPEECH,
65 STATE_RECOGNIZING,
66 STATE_WAITING_FINAL_RESULT,
67 STATE_ENDED,
68 STATE_MAX_VALUE = STATE_ENDED
71 enum FSMEvent {
72 EVENT_ABORT = 0,
73 EVENT_START,
74 EVENT_STOP_CAPTURE,
75 EVENT_AUDIO_DATA,
76 EVENT_ENGINE_RESULT,
77 EVENT_ENGINE_ERROR,
78 EVENT_AUDIO_ERROR,
79 EVENT_MAX_VALUE = EVENT_AUDIO_ERROR
82 struct FSMEventArgs {
83 explicit FSMEventArgs(FSMEvent event_value);
84 ~FSMEventArgs();
86 FSMEvent event;
87 scoped_refptr<AudioChunk> audio_data;
88 SpeechRecognitionResults engine_results;
89 SpeechRecognitionError engine_error;
92 ~SpeechRecognizerImpl() override;
94 // Entry point for pushing any new external event into the recognizer FSM.
95 void DispatchEvent(const FSMEventArgs& event_args);
97 // Defines the behavior of the recognizer FSM, selecting the appropriate
98 // transition according to the current state and event.
99 FSMState ExecuteTransitionAndGetNextState(const FSMEventArgs& args);
101 // Process a new audio chunk in the audio pipeline (endpointer, vumeter, etc).
102 void ProcessAudioPipeline(const AudioChunk& raw_audio);
104 // The methods below handle transitions of the recognizer FSM.
105 FSMState StartRecording(const FSMEventArgs& event_args);
106 FSMState StartRecognitionEngine(const FSMEventArgs& event_args);
107 FSMState WaitEnvironmentEstimationCompletion(const FSMEventArgs& event_args);
108 FSMState DetectUserSpeechOrTimeout(const FSMEventArgs& event_args);
109 FSMState StopCaptureAndWaitForResult(const FSMEventArgs& event_args);
110 FSMState ProcessIntermediateResult(const FSMEventArgs& event_args);
111 FSMState ProcessFinalResult(const FSMEventArgs& event_args);
112 FSMState AbortSilently(const FSMEventArgs& event_args);
113 FSMState AbortWithError(const FSMEventArgs& event_args);
114 FSMState Abort(const SpeechRecognitionError& error);
115 FSMState DetectEndOfSpeech(const FSMEventArgs& event_args);
116 FSMState DoNothing(const FSMEventArgs& event_args) const;
117 FSMState NotFeasible(const FSMEventArgs& event_args);
119 // Returns the time span of captured audio samples since the start of capture.
120 int GetElapsedTimeMs() const;
122 // Calculates the input volume to be displayed in the UI, triggering the
123 // OnAudioLevelsChange event accordingly.
124 void UpdateSignalAndNoiseLevels(const float& rms, bool clip_detected);
126 void CloseAudioControllerAsynchronously();
128 // Callback called on IO thread by audio_controller->Close().
129 void OnAudioClosed(media::AudioInputController*);
131 // AudioInputController::EventHandler methods.
132 void OnCreated(media::AudioInputController* controller) override {}
133 void OnRecording(media::AudioInputController* controller) override {}
134 void OnError(media::AudioInputController* controller,
135 media::AudioInputController::ErrorCode error_code) override;
136 void OnData(media::AudioInputController* controller,
137 const media::AudioBus* data) override;
138 void OnLog(media::AudioInputController* controller,
139 const std::string& message) override {}
141 // SpeechRecognitionEngineDelegate methods.
142 void OnSpeechRecognitionEngineResults(
143 const SpeechRecognitionResults& results) override;
144 void OnSpeechRecognitionEngineError(
145 const SpeechRecognitionError& error) override;
147 static media::AudioManager* audio_manager_for_tests_;
149 scoped_ptr<SpeechRecognitionEngine> recognition_engine_;
150 Endpointer endpointer_;
151 scoped_refptr<media::AudioInputController> audio_controller_;
152 scoped_ptr<media::AudioLog> audio_log_;
153 int num_samples_recorded_;
154 float audio_level_;
155 bool is_dispatching_event_;
156 bool provisional_results_;
157 FSMState state_;
158 std::string device_id_;
160 class OnDataConverter;
162 // Converts data between native input format and a WebSpeech specific
163 // output format.
164 scoped_ptr<SpeechRecognizerImpl::OnDataConverter> audio_converter_;
166 DISALLOW_COPY_AND_ASSIGN(SpeechRecognizerImpl);
169 } // namespace content
171 #endif // CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_