Introduced SpeechRecognitionDispatcher(Host) classes, handling dispatch of IPC messag...
[chromium-blink-merge.git] / content / browser / speech / speech_recognition_manager_impl.cc
blob6bad70fce718915659a74eb690c063e9ebc5988b
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "content/browser/speech/speech_recognition_manager_impl.h"
7 #include "base/bind.h"
8 #include "content/browser/browser_main_loop.h"
9 #include "content/browser/speech/google_one_shot_remote_engine.h"
10 #include "content/browser/speech/speech_recognition_engine.h"
11 #include "content/browser/speech/speech_recognizer_impl.h"
12 #include "content/public/browser/browser_thread.h"
13 #include "content/public/browser/content_browser_client.h"
14 #include "content/public/browser/resource_context.h"
15 #include "content/public/browser/speech_recognition_event_listener.h"
16 #include "content/public/browser/speech_recognition_manager_delegate.h"
17 #include "content/public/browser/speech_recognition_session_config.h"
18 #include "content/public/browser/speech_recognition_session_context.h"
19 #include "content/public/common/speech_recognition_result.h"
20 #include "media/audio/audio_manager.h"
22 using base::Callback;
23 using content::BrowserMainLoop;
24 using content::BrowserThread;
25 using content::SpeechRecognitionError;
26 using content::SpeechRecognitionEventListener;
27 using content::SpeechRecognitionManager;
28 using content::SpeechRecognitionResult;
29 using content::SpeechRecognitionSessionContext;
30 using content::SpeechRecognitionSessionConfig;
32 namespace content {
33 const int SpeechRecognitionManager::kSessionIDInvalid = 0;
35 SpeechRecognitionManager* SpeechRecognitionManager::GetInstance() {
36 return speech::SpeechRecognitionManagerImpl::GetInstance();
38 } // namespace content
40 namespace {
41 speech::SpeechRecognitionManagerImpl* g_speech_recognition_manager_impl;
42 } // namespace
44 namespace speech {
46 SpeechRecognitionManagerImpl* SpeechRecognitionManagerImpl::GetInstance() {
47 return g_speech_recognition_manager_impl;
50 SpeechRecognitionManagerImpl::SpeechRecognitionManagerImpl()
51 : session_id_capturing_audio_(kSessionIDInvalid),
52 last_session_id_(kSessionIDInvalid),
53 is_dispatching_event_(false),
54 delegate_(content::GetContentClient()->browser()->
55 GetSpeechRecognitionManagerDelegate()) {
56 DCHECK(!g_speech_recognition_manager_impl);
57 g_speech_recognition_manager_impl = this;
60 SpeechRecognitionManagerImpl::~SpeechRecognitionManagerImpl() {
61 DCHECK(g_speech_recognition_manager_impl);
62 g_speech_recognition_manager_impl = NULL;
63 // Recognition sessions will be aborted by the corresponding destructors.
64 sessions_.clear();
67 int SpeechRecognitionManagerImpl::CreateSession(
68 const SpeechRecognitionSessionConfig& config) {
69 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
71 const int session_id = GetNextSessionID();
72 DCHECK(!SessionExists(session_id));
73 // Set-up the new session.
74 Session& session = sessions_[session_id];
75 session.id = session_id;
76 session.config = config;
77 session.context = config.initial_context;
79 std::string hardware_info;
80 bool can_report_metrics = false;
81 if (delegate_.get())
82 delegate_->GetDiagnosticInformation(&can_report_metrics, &hardware_info);
84 SpeechRecognitionEngineConfig remote_engine_config;
85 remote_engine_config.language = config.language;
86 remote_engine_config.grammars = config.grammars;
87 remote_engine_config.audio_sample_rate =
88 SpeechRecognizerImpl::kAudioSampleRate;
89 remote_engine_config.audio_num_bits_per_sample =
90 SpeechRecognizerImpl::kNumBitsPerAudioSample;
91 remote_engine_config.filter_profanities = config.filter_profanities;
92 remote_engine_config.hardware_info = hardware_info;
93 remote_engine_config.origin_url = can_report_metrics ? config.origin_url : "";
95 SpeechRecognitionEngine* google_remote_engine =
96 new GoogleOneShotRemoteEngine(config.url_request_context_getter);
97 google_remote_engine->SetConfig(remote_engine_config);
99 session.recognizer = new SpeechRecognizerImpl(this,
100 session_id,
101 google_remote_engine);
102 return session_id;
105 void SpeechRecognitionManagerImpl::StartSession(int session_id) {
106 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
107 if (!SessionExists(session_id))
108 return;
110 // If there is another active session, abort that.
111 if (session_id_capturing_audio_ != kSessionIDInvalid &&
112 session_id_capturing_audio_ != session_id) {
113 AbortSession(session_id_capturing_audio_);
116 if (delegate_.get())
117 delegate_->CheckRecognitionIsAllowed(
118 session_id,
119 base::Bind(&SpeechRecognitionManagerImpl::RecognitionAllowedCallback,
120 this->AsWeakPtr()));
123 void SpeechRecognitionManagerImpl::RecognitionAllowedCallback(int session_id,
124 bool is_allowed) {
125 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
126 DCHECK(SessionExists(session_id));
127 if (is_allowed) {
128 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE,
129 base::Bind(&SpeechRecognitionManagerImpl::DispatchEvent,
130 this->AsWeakPtr(), session_id, EVENT_START));
131 } else {
132 sessions_.erase(session_id);
136 void SpeechRecognitionManagerImpl::AbortSession(int session_id) {
137 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
138 if (!SessionExists(session_id))
139 return;
141 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE,
142 base::Bind(&SpeechRecognitionManagerImpl::DispatchEvent,
143 this->AsWeakPtr(), session_id, EVENT_ABORT));
146 void SpeechRecognitionManagerImpl::StopAudioCaptureForSession(int session_id) {
147 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
148 if (!SessionExists(session_id))
149 return;
151 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE,
152 base::Bind(&SpeechRecognitionManagerImpl::DispatchEvent,
153 this->AsWeakPtr(), session_id, EVENT_STOP_CAPTURE));
156 // Here begins the SpeechRecognitionEventListener interface implementation,
157 // which will simply relay the events to the proper listener registered for the
158 // particular session (most likely InputTagSpeechDispatcherHost) and to the
159 // catch-all listener provided by the delegate (if any).
161 void SpeechRecognitionManagerImpl::OnRecognitionStart(int session_id) {
162 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
163 if (!SessionExists(session_id))
164 return;
166 DCHECK_EQ(session_id_capturing_audio_, session_id);
167 if (SpeechRecognitionEventListener* delegate_listener = GetDelegateListener())
168 delegate_listener->OnRecognitionStart(session_id);
169 if (SpeechRecognitionEventListener* listener = GetListener(session_id))
170 listener->OnRecognitionStart(session_id);
173 void SpeechRecognitionManagerImpl::OnAudioStart(int session_id) {
174 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
175 if (!SessionExists(session_id))
176 return;
178 DCHECK_EQ(session_id_capturing_audio_, session_id);
179 if (SpeechRecognitionEventListener* delegate_listener = GetDelegateListener())
180 delegate_listener->OnAudioStart(session_id);
181 if (SpeechRecognitionEventListener* listener = GetListener(session_id))
182 listener->OnAudioStart(session_id);
185 void SpeechRecognitionManagerImpl::OnEnvironmentEstimationComplete(
186 int session_id) {
187 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
188 if (!SessionExists(session_id))
189 return;
191 DCHECK_EQ(session_id_capturing_audio_, session_id);
192 if (SpeechRecognitionEventListener* delegate_listener = GetDelegateListener())
193 delegate_listener->OnEnvironmentEstimationComplete(session_id);
194 if (SpeechRecognitionEventListener* listener = GetListener(session_id))
195 listener->OnEnvironmentEstimationComplete(session_id);
198 void SpeechRecognitionManagerImpl::OnSoundStart(int session_id) {
199 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
200 if (!SessionExists(session_id))
201 return;
203 DCHECK_EQ(session_id_capturing_audio_, session_id);
204 if (SpeechRecognitionEventListener* delegate_listener = GetDelegateListener())
205 delegate_listener->OnSoundStart(session_id);
206 if (SpeechRecognitionEventListener* listener = GetListener(session_id))
207 listener->OnSoundStart(session_id);
210 void SpeechRecognitionManagerImpl::OnSoundEnd(int session_id) {
211 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
212 if (!SessionExists(session_id))
213 return;
215 if (SpeechRecognitionEventListener* delegate_listener = GetDelegateListener())
216 delegate_listener->OnSoundEnd(session_id);
217 if (SpeechRecognitionEventListener* listener = GetListener(session_id))
218 listener->OnSoundEnd(session_id);
221 void SpeechRecognitionManagerImpl::OnAudioEnd(int session_id) {
222 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
223 if (!SessionExists(session_id))
224 return;
226 if (SpeechRecognitionEventListener* delegate_listener = GetDelegateListener())
227 delegate_listener->OnAudioEnd(session_id);
228 if (SpeechRecognitionEventListener* listener = GetListener(session_id))
229 listener->OnAudioEnd(session_id);
230 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE,
231 base::Bind(&SpeechRecognitionManagerImpl::DispatchEvent,
232 this->AsWeakPtr(), session_id, EVENT_AUDIO_ENDED));
235 void SpeechRecognitionManagerImpl::OnRecognitionResult(
236 int session_id, const content::SpeechRecognitionResult& result) {
237 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
238 if (!SessionExists(session_id))
239 return;
241 if (SpeechRecognitionEventListener* delegate_listener = GetDelegateListener())
242 delegate_listener->OnRecognitionResult(session_id, result);
243 if (SpeechRecognitionEventListener* listener = GetListener(session_id))
244 listener->OnRecognitionResult(session_id, result);
247 void SpeechRecognitionManagerImpl::OnRecognitionError(
248 int session_id, const content::SpeechRecognitionError& error) {
249 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
250 if (!SessionExists(session_id))
251 return;
253 if (SpeechRecognitionEventListener* delegate_listener = GetDelegateListener())
254 delegate_listener->OnRecognitionError(session_id, error);
255 if (SpeechRecognitionEventListener* listener = GetListener(session_id))
256 listener->OnRecognitionError(session_id, error);
259 void SpeechRecognitionManagerImpl::OnAudioLevelsChange(
260 int session_id, float volume, float noise_volume) {
261 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
262 if (!SessionExists(session_id))
263 return;
265 if (SpeechRecognitionEventListener* delegate_listener = GetDelegateListener())
266 delegate_listener->OnAudioLevelsChange(session_id, volume, noise_volume);
267 if (SpeechRecognitionEventListener* listener = GetListener(session_id))
268 listener->OnAudioLevelsChange(session_id, volume, noise_volume);
271 void SpeechRecognitionManagerImpl::OnRecognitionEnd(int session_id) {
272 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
273 if (!SessionExists(session_id))
274 return;
276 if (SpeechRecognitionEventListener* delegate_listener = GetDelegateListener())
277 delegate_listener->OnRecognitionEnd(session_id);
278 if (SpeechRecognitionEventListener* listener = GetListener(session_id))
279 listener->OnRecognitionEnd(session_id);
280 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE,
281 base::Bind(&SpeechRecognitionManagerImpl::DispatchEvent,
282 this->AsWeakPtr(), session_id, EVENT_RECOGNITION_ENDED));
285 int SpeechRecognitionManagerImpl::GetSession(
286 int render_process_id, int render_view_id, int request_id) const {
287 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
288 SessionsTable::const_iterator iter;
289 for(iter = sessions_.begin(); iter != sessions_.end(); ++iter) {
290 const int session_id = iter->first;
291 const SpeechRecognitionSessionContext& context = iter->second.context;
292 if (context.render_process_id == render_process_id &&
293 context.render_view_id == render_view_id &&
294 context.request_id == request_id) {
295 return session_id;
298 return kSessionIDInvalid;
301 SpeechRecognitionSessionContext
302 SpeechRecognitionManagerImpl::GetSessionContext(int session_id) const {
303 return GetSession(session_id).context;
306 void SpeechRecognitionManagerImpl::AbortAllSessionsForListener(
307 SpeechRecognitionEventListener* listener) {
308 // This method gracefully destroys sessions for the listener. However, since
309 // the listener itself is likely to be destroyed after this call, we avoid
310 // dispatching further events to it, marking the |listener_is_active| flag.
311 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
312 for (SessionsTable::iterator it = sessions_.begin(); it != sessions_.end();
313 ++it) {
314 Session& session = it->second;
315 if (session.config.event_listener == listener) {
316 AbortSession(session.id);
317 session.listener_is_active = false;
322 // ----------------------- Core FSM implementation ---------------------------
323 void SpeechRecognitionManagerImpl::DispatchEvent(int session_id,
324 FSMEvent event) {
325 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
327 // There are some corner cases in which the session might be deleted (due to
328 // an EndRecognition event) between a request (e.g. Abort) and its dispatch.
329 if (!SessionExists(session_id))
330 return;
332 const Session& session = GetSession(session_id);
333 FSMState session_state = GetSessionState(session_id);
334 DCHECK_LE(session_state, SESSION_STATE_MAX_VALUE);
335 DCHECK_LE(event, EVENT_MAX_VALUE);
337 // Event dispatching must be sequential, otherwise it will break all the rules
338 // and the assumptions of the finite state automata model.
339 DCHECK(!is_dispatching_event_);
340 is_dispatching_event_ = true;
341 ExecuteTransitionAndGetNextState(session, session_state, event);
342 is_dispatching_event_ = false;
345 // This FSM handles the evolution of each session, from the viewpoint of the
346 // interaction with the user (that may be either the browser end-user which
347 // interacts with UI bubbles, or JS developer intracting with JS methods).
348 // All the events received by the SpeechRecognizerImpl instances (one for each
349 // session) are always routed to the SpeechRecognitionEventListener(s)
350 // regardless the choices taken in this FSM.
351 void SpeechRecognitionManagerImpl::ExecuteTransitionAndGetNextState(
352 const Session& session, FSMState session_state, FSMEvent event) {
353 // Note: since we're not tracking the state of the recognizer object, rather
354 // we're directly retrieving it (through GetSessionState), we see its events
355 // (that are AUDIO_ENDED and RECOGNITION_ENDED) after its state evolution
356 // (e.g., when we receive the AUDIO_ENDED event, the recognizer has just
357 // completed the transition from CAPTURING_AUDIO to WAITING_FOR_RESULT, thus
358 // we perceive the AUDIO_ENDED event in WAITING_FOR_RESULT).
359 // This makes the code below a bit tricky but avoids a lot of code for
360 // tracking and reconstructing asynchronously the state of the recognizer.
361 switch (session_state) {
362 case SESSION_STATE_IDLE:
363 switch (event) {
364 case EVENT_START:
365 return SessionStart(session);
366 case EVENT_ABORT:
367 case EVENT_RECOGNITION_ENDED:
368 return SessionDelete(session);
369 case EVENT_STOP_CAPTURE:
370 case EVENT_AUDIO_ENDED:
371 return;
373 break;
374 case SESSION_STATE_CAPTURING_AUDIO:
375 switch (event) {
376 case EVENT_STOP_CAPTURE:
377 return SessionStopAudioCapture(session);
378 case EVENT_ABORT:
379 return SessionAbort(session);
380 case EVENT_START:
381 return;
382 case EVENT_AUDIO_ENDED:
383 case EVENT_RECOGNITION_ENDED:
384 return NotFeasible(session, event);
386 break;
387 case SESSION_STATE_WAITING_FOR_RESULT:
388 switch (event) {
389 case EVENT_ABORT:
390 return SessionAbort(session);
391 case EVENT_AUDIO_ENDED:
392 return ResetCapturingSessionId(session);
393 case EVENT_START:
394 case EVENT_STOP_CAPTURE:
395 return;
396 case EVENT_RECOGNITION_ENDED:
397 return NotFeasible(session, event);
399 break;
401 return NotFeasible(session, event);
404 SpeechRecognitionManagerImpl::FSMState
405 SpeechRecognitionManagerImpl::GetSessionState(int session_id) const {
406 const Session& session = GetSession(session_id);
407 if (!session.recognizer.get() || !session.recognizer->IsActive())
408 return SESSION_STATE_IDLE;
409 if (session.recognizer->IsCapturingAudio())
410 return SESSION_STATE_CAPTURING_AUDIO;
411 return SESSION_STATE_WAITING_FOR_RESULT;
414 // ----------- Contract for all the FSM evolution functions below -------------
415 // - Are guaranteed to be executed in the IO thread;
416 // - Are guaranteed to be not reentrant (themselves and each other);
418 void SpeechRecognitionManagerImpl::SessionStart(const Session& session) {
419 session_id_capturing_audio_ = session.id;
420 session.recognizer->StartRecognition();
423 void SpeechRecognitionManagerImpl::SessionAbort(const Session& session) {
424 if (session_id_capturing_audio_ == session.id)
425 session_id_capturing_audio_ = kSessionIDInvalid;
426 DCHECK(session.recognizer.get() && session.recognizer->IsActive());
427 session.recognizer->AbortRecognition();
430 void SpeechRecognitionManagerImpl::SessionStopAudioCapture(
431 const Session& session) {
432 DCHECK(session.recognizer.get() && session.recognizer->IsCapturingAudio());
433 session.recognizer->StopAudioCapture();
436 void SpeechRecognitionManagerImpl::ResetCapturingSessionId(
437 const Session& session) {
438 DCHECK_EQ(session_id_capturing_audio_, session.id);
439 session_id_capturing_audio_ = kSessionIDInvalid;
442 void SpeechRecognitionManagerImpl::SessionDelete(const Session& session) {
443 DCHECK(session.recognizer == NULL || !session.recognizer->IsActive());
444 if (session_id_capturing_audio_ == session.id)
445 session_id_capturing_audio_ = kSessionIDInvalid;
446 sessions_.erase(session.id);
449 void SpeechRecognitionManagerImpl::NotFeasible(const Session& session,
450 FSMEvent event) {
451 NOTREACHED() << "Unfeasible event " << event
452 << " in state " << GetSessionState(session.id)
453 << " for session " << session.id;
456 int SpeechRecognitionManagerImpl::GetNextSessionID() {
457 ++last_session_id_;
458 // Deal with wrapping of last_session_id_. (How civilized).
459 if (last_session_id_ <= 0)
460 last_session_id_ = 1;
461 return last_session_id_;
464 bool SpeechRecognitionManagerImpl::SessionExists(int session_id) const {
465 return sessions_.find(session_id) != sessions_.end();
468 const SpeechRecognitionManagerImpl::Session&
469 SpeechRecognitionManagerImpl::GetSession(int session_id) const {
470 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
471 SessionsTable::const_iterator iter = sessions_.find(session_id);
472 DCHECK(iter != sessions_.end());
473 return iter->second;
476 SpeechRecognitionEventListener* SpeechRecognitionManagerImpl::GetListener(
477 int session_id) const {
478 const Session& session = GetSession(session_id);
479 return session.listener_is_active ? session.config.event_listener : NULL;
482 SpeechRecognitionEventListener*
483 SpeechRecognitionManagerImpl::GetDelegateListener() const {
484 return delegate_.get() ? delegate_->GetEventListener() : NULL;
487 const SpeechRecognitionSessionConfig&
488 SpeechRecognitionManagerImpl::GetSessionConfig(int session_id) const {
489 return GetSession(session_id).config;
492 bool SpeechRecognitionManagerImpl::HasAudioInputDevices() {
493 return BrowserMainLoop::GetAudioManager()->HasAudioInputDevices();
496 bool SpeechRecognitionManagerImpl::IsCapturingAudio() {
497 return BrowserMainLoop::GetAudioManager()->IsRecordingInProcess();
500 string16 SpeechRecognitionManagerImpl::GetAudioInputDeviceModel() {
501 return BrowserMainLoop::GetAudioManager()->GetAudioInputDeviceModel();
504 void SpeechRecognitionManagerImpl::ShowAudioInputSettings() {
505 // Since AudioManager::ShowAudioInputSettings can potentially launch external
506 // processes, do that in the FILE thread to not block the calling threads.
507 if (!BrowserThread::CurrentlyOn(BrowserThread::FILE)) {
508 BrowserThread::PostTask(
509 BrowserThread::FILE, FROM_HERE,
510 base::Bind(&SpeechRecognitionManagerImpl::ShowAudioInputSettings,
511 this->AsWeakPtr()));
512 return;
515 media::AudioManager* audio_manager = BrowserMainLoop::GetAudioManager();
516 DCHECK(audio_manager->CanShowAudioInputSettings());
517 if (audio_manager->CanShowAudioInputSettings())
518 audio_manager->ShowAudioInputSettings();
521 SpeechRecognitionManagerImpl::Session::Session()
522 : id(kSessionIDInvalid),
523 listener_is_active(true) {
526 SpeechRecognitionManagerImpl::Session::~Session() {
529 } // namespace speech