Update ASan/Android runtime and setup script to LLVM r200682.
[chromium-blink-merge.git] / content / browser / speech / speech_recognizer_impl.h
blob7bacd1973b7ac4d05a1a23548074d08c19de06d1
1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #ifndef CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_
6 #define CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_
8 #include "base/basictypes.h"
9 #include "base/memory/scoped_ptr.h"
10 #include "content/browser/speech/endpointer/endpointer.h"
11 #include "content/browser/speech/speech_recognition_engine.h"
12 #include "content/browser/speech/speech_recognizer.h"
13 #include "content/public/common/speech_recognition_error.h"
14 #include "content/public/common/speech_recognition_result.h"
15 #include "media/audio/audio_input_controller.h"
16 #include "net/url_request/url_request_context_getter.h"
18 namespace media {
19 class AudioManager;
22 namespace content {
24 class SpeechRecognitionEventListener;
26 // Handles speech recognition for a session (identified by |session_id|), taking
27 // care of audio capture, silence detection/endpointer and interaction with the
28 // SpeechRecognitionEngine.
29 class CONTENT_EXPORT SpeechRecognizerImpl
30 : public SpeechRecognizer,
31 public media::AudioInputController::EventHandler,
32 public NON_EXPORTED_BASE(SpeechRecognitionEngineDelegate) {
33 public:
34 static const int kAudioSampleRate;
35 static const media::ChannelLayout kChannelLayout;
36 static const int kNumBitsPerAudioSample;
37 static const int kNoSpeechTimeoutMs;
38 static const int kEndpointerEstimationTimeMs;
40 static void SetAudioManagerForTesting(media::AudioManager* audio_manager);
42 SpeechRecognizerImpl(SpeechRecognitionEventListener* listener,
43 int session_id,
44 bool continuous,
45 bool provisional_results,
46 SpeechRecognitionEngine* engine);
48 virtual void StartRecognition(const std::string& device_id) OVERRIDE;
49 virtual void AbortRecognition() OVERRIDE;
50 virtual void StopAudioCapture() OVERRIDE;
51 virtual bool IsActive() const OVERRIDE;
52 virtual bool IsCapturingAudio() const OVERRIDE;
53 const SpeechRecognitionEngine& recognition_engine() const;
55 private:
56 friend class SpeechRecognizerTest;
58 enum FSMState {
59 STATE_IDLE = 0,
60 STATE_STARTING,
61 STATE_ESTIMATING_ENVIRONMENT,
62 STATE_WAITING_FOR_SPEECH,
63 STATE_RECOGNIZING,
64 STATE_WAITING_FINAL_RESULT,
65 STATE_ENDED,
66 STATE_MAX_VALUE = STATE_ENDED
69 enum FSMEvent {
70 EVENT_ABORT = 0,
71 EVENT_START,
72 EVENT_STOP_CAPTURE,
73 EVENT_AUDIO_DATA,
74 EVENT_ENGINE_RESULT,
75 EVENT_ENGINE_ERROR,
76 EVENT_AUDIO_ERROR,
77 EVENT_MAX_VALUE = EVENT_AUDIO_ERROR
80 struct FSMEventArgs {
81 explicit FSMEventArgs(FSMEvent event_value);
82 ~FSMEventArgs();
84 FSMEvent event;
85 scoped_refptr<AudioChunk> audio_data;
86 SpeechRecognitionResults engine_results;
87 SpeechRecognitionError engine_error;
90 virtual ~SpeechRecognizerImpl();
92 // Entry point for pushing any new external event into the recognizer FSM.
93 void DispatchEvent(const FSMEventArgs& event_args);
95 // Defines the behavior of the recognizer FSM, selecting the appropriate
96 // transition according to the current state and event.
97 FSMState ExecuteTransitionAndGetNextState(const FSMEventArgs& args);
99 // Process a new audio chunk in the audio pipeline (endpointer, vumeter, etc).
100 void ProcessAudioPipeline(const AudioChunk& raw_audio);
102 // The methods below handle transitions of the recognizer FSM.
103 FSMState StartRecording(const FSMEventArgs& event_args);
104 FSMState StartRecognitionEngine(const FSMEventArgs& event_args);
105 FSMState WaitEnvironmentEstimationCompletion(const FSMEventArgs& event_args);
106 FSMState DetectUserSpeechOrTimeout(const FSMEventArgs& event_args);
107 FSMState StopCaptureAndWaitForResult(const FSMEventArgs& event_args);
108 FSMState ProcessIntermediateResult(const FSMEventArgs& event_args);
109 FSMState ProcessFinalResult(const FSMEventArgs& event_args);
110 FSMState AbortSilently(const FSMEventArgs& event_args);
111 FSMState AbortWithError(const FSMEventArgs& event_args);
112 FSMState Abort(const SpeechRecognitionError& error);
113 FSMState DetectEndOfSpeech(const FSMEventArgs& event_args);
114 FSMState DoNothing(const FSMEventArgs& event_args) const;
115 FSMState NotFeasible(const FSMEventArgs& event_args);
117 // Returns the time span of captured audio samples since the start of capture.
118 int GetElapsedTimeMs() const;
120 // Calculates the input volume to be displayed in the UI, triggering the
121 // OnAudioLevelsChange event accordingly.
122 void UpdateSignalAndNoiseLevels(const float& rms, bool clip_detected);
124 void CloseAudioControllerAsynchronously();
126 // Callback called on IO thread by audio_controller->Close().
127 void OnAudioClosed(media::AudioInputController*);
129 // AudioInputController::EventHandler methods.
130 virtual void OnCreated(media::AudioInputController* controller) OVERRIDE {}
131 virtual void OnRecording(media::AudioInputController* controller) OVERRIDE {}
132 virtual void OnError(media::AudioInputController* controller) OVERRIDE;
133 virtual void OnData(media::AudioInputController* controller,
134 const uint8* data, uint32 size) OVERRIDE;
136 // SpeechRecognitionEngineDelegate methods.
137 virtual void OnSpeechRecognitionEngineResults(
138 const SpeechRecognitionResults& results) OVERRIDE;
139 virtual void OnSpeechRecognitionEngineError(
140 const SpeechRecognitionError& error) OVERRIDE;
142 static media::AudioManager* audio_manager_for_tests_;
144 scoped_ptr<SpeechRecognitionEngine> recognition_engine_;
145 Endpointer endpointer_;
146 scoped_refptr<media::AudioInputController> audio_controller_;
147 int num_samples_recorded_;
148 float audio_level_;
149 bool is_dispatching_event_;
150 bool provisional_results_;
151 FSMState state_;
152 std::string device_id_;
154 class OnDataConverter;
156 // Converts data between native input format and a WebSpeech specific
157 // output format.
158 scoped_ptr<SpeechRecognizerImpl::OnDataConverter> audio_converter_;
160 DISALLOW_COPY_AND_ASSIGN(SpeechRecognizerImpl);
163 } // namespace content
165 #endif // CONTENT_BROWSER_SPEECH_SPEECH_RECOGNIZER_IMPL_H_