1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "content/browser/speech/speech_recognition_manager_impl.h"
8 #include "content/browser/browser_main_loop.h"
9 #include "content/browser/speech/google_one_shot_remote_engine.h"
10 #include "content/browser/speech/speech_recognition_engine.h"
11 #include "content/browser/speech/speech_recognizer_impl.h"
12 #include "content/public/browser/browser_thread.h"
13 #include "content/public/browser/content_browser_client.h"
14 #include "content/public/browser/resource_context.h"
15 #include "content/public/browser/speech_recognition_event_listener.h"
16 #include "content/public/browser/speech_recognition_manager_delegate.h"
17 #include "content/public/browser/speech_recognition_session_config.h"
18 #include "content/public/browser/speech_recognition_session_context.h"
19 #include "content/public/common/speech_recognition_result.h"
20 #include "media/audio/audio_manager.h"
23 using content::BrowserMainLoop
;
24 using content::BrowserThread
;
25 using content::SpeechRecognitionError
;
26 using content::SpeechRecognitionEventListener
;
27 using content::SpeechRecognitionManager
;
28 using content::SpeechRecognitionResult
;
29 using content::SpeechRecognitionSessionContext
;
30 using content::SpeechRecognitionSessionConfig
;
33 const int SpeechRecognitionManager::kSessionIDInvalid
= 0;
35 SpeechRecognitionManager
* SpeechRecognitionManager::GetInstance() {
36 return speech::SpeechRecognitionManagerImpl::GetInstance();
38 } // namespace content
41 speech::SpeechRecognitionManagerImpl
* g_speech_recognition_manager_impl
;
46 SpeechRecognitionManagerImpl
* SpeechRecognitionManagerImpl::GetInstance() {
47 return g_speech_recognition_manager_impl
;
50 SpeechRecognitionManagerImpl::SpeechRecognitionManagerImpl()
51 : session_id_capturing_audio_(kSessionIDInvalid
),
52 last_session_id_(kSessionIDInvalid
),
53 is_dispatching_event_(false),
54 delegate_(content::GetContentClient()->browser()->
55 GetSpeechRecognitionManagerDelegate()) {
56 DCHECK(!g_speech_recognition_manager_impl
);
57 g_speech_recognition_manager_impl
= this;
60 SpeechRecognitionManagerImpl::~SpeechRecognitionManagerImpl() {
61 DCHECK(g_speech_recognition_manager_impl
);
62 g_speech_recognition_manager_impl
= NULL
;
63 // Recognition sessions will be aborted by the corresponding destructors.
67 int SpeechRecognitionManagerImpl::CreateSession(
68 const SpeechRecognitionSessionConfig
& config
) {
69 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO
));
71 const int session_id
= GetNextSessionID();
72 DCHECK(!SessionExists(session_id
));
73 // Set-up the new session.
74 Session
& session
= sessions_
[session_id
];
75 session
.id
= session_id
;
76 session
.config
= config
;
77 session
.context
= config
.initial_context
;
79 std::string hardware_info
;
80 bool can_report_metrics
= false;
82 delegate_
->GetDiagnosticInformation(&can_report_metrics
, &hardware_info
);
84 SpeechRecognitionEngineConfig remote_engine_config
;
85 remote_engine_config
.language
= config
.language
;
86 remote_engine_config
.grammars
= config
.grammars
;
87 remote_engine_config
.audio_sample_rate
=
88 SpeechRecognizerImpl::kAudioSampleRate
;
89 remote_engine_config
.audio_num_bits_per_sample
=
90 SpeechRecognizerImpl::kNumBitsPerAudioSample
;
91 remote_engine_config
.filter_profanities
= config
.filter_profanities
;
92 remote_engine_config
.hardware_info
= hardware_info
;
93 remote_engine_config
.origin_url
= can_report_metrics
? config
.origin_url
: "";
95 SpeechRecognitionEngine
* google_remote_engine
=
96 new GoogleOneShotRemoteEngine(config
.url_request_context_getter
);
97 google_remote_engine
->SetConfig(remote_engine_config
);
99 session
.recognizer
= new SpeechRecognizerImpl(this,
101 google_remote_engine
);
105 void SpeechRecognitionManagerImpl::StartSession(int session_id
) {
106 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO
));
107 if (!SessionExists(session_id
))
110 // If there is another active session, abort that.
111 if (session_id_capturing_audio_
!= kSessionIDInvalid
&&
112 session_id_capturing_audio_
!= session_id
) {
113 AbortSession(session_id_capturing_audio_
);
117 delegate_
->CheckRecognitionIsAllowed(
119 base::Bind(&SpeechRecognitionManagerImpl::RecognitionAllowedCallback
,
123 void SpeechRecognitionManagerImpl::RecognitionAllowedCallback(int session_id
,
125 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO
));
126 DCHECK(SessionExists(session_id
));
128 BrowserThread::PostTask(BrowserThread::IO
, FROM_HERE
,
129 base::Bind(&SpeechRecognitionManagerImpl::DispatchEvent
,
130 this->AsWeakPtr(), session_id
, EVENT_START
));
132 sessions_
.erase(session_id
);
136 void SpeechRecognitionManagerImpl::AbortSession(int session_id
) {
137 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO
));
138 if (!SessionExists(session_id
))
141 BrowserThread::PostTask(BrowserThread::IO
, FROM_HERE
,
142 base::Bind(&SpeechRecognitionManagerImpl::DispatchEvent
,
143 this->AsWeakPtr(), session_id
, EVENT_ABORT
));
146 void SpeechRecognitionManagerImpl::StopAudioCaptureForSession(int session_id
) {
147 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO
));
148 if (!SessionExists(session_id
))
151 BrowserThread::PostTask(BrowserThread::IO
, FROM_HERE
,
152 base::Bind(&SpeechRecognitionManagerImpl::DispatchEvent
,
153 this->AsWeakPtr(), session_id
, EVENT_STOP_CAPTURE
));
156 // Here begins the SpeechRecognitionEventListener interface implementation,
157 // which will simply relay the events to the proper listener registered for the
158 // particular session (most likely InputTagSpeechDispatcherHost) and to the
159 // catch-all listener provided by the delegate (if any).
161 void SpeechRecognitionManagerImpl::OnRecognitionStart(int session_id
) {
162 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO
));
163 if (!SessionExists(session_id
))
166 DCHECK_EQ(session_id_capturing_audio_
, session_id
);
167 if (SpeechRecognitionEventListener
* delegate_listener
= GetDelegateListener())
168 delegate_listener
->OnRecognitionStart(session_id
);
169 if (SpeechRecognitionEventListener
* listener
= GetListener(session_id
))
170 listener
->OnRecognitionStart(session_id
);
173 void SpeechRecognitionManagerImpl::OnAudioStart(int session_id
) {
174 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO
));
175 if (!SessionExists(session_id
))
178 DCHECK_EQ(session_id_capturing_audio_
, session_id
);
179 if (SpeechRecognitionEventListener
* delegate_listener
= GetDelegateListener())
180 delegate_listener
->OnAudioStart(session_id
);
181 if (SpeechRecognitionEventListener
* listener
= GetListener(session_id
))
182 listener
->OnAudioStart(session_id
);
185 void SpeechRecognitionManagerImpl::OnEnvironmentEstimationComplete(
187 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO
));
188 if (!SessionExists(session_id
))
191 DCHECK_EQ(session_id_capturing_audio_
, session_id
);
192 if (SpeechRecognitionEventListener
* delegate_listener
= GetDelegateListener())
193 delegate_listener
->OnEnvironmentEstimationComplete(session_id
);
194 if (SpeechRecognitionEventListener
* listener
= GetListener(session_id
))
195 listener
->OnEnvironmentEstimationComplete(session_id
);
198 void SpeechRecognitionManagerImpl::OnSoundStart(int session_id
) {
199 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO
));
200 if (!SessionExists(session_id
))
203 DCHECK_EQ(session_id_capturing_audio_
, session_id
);
204 if (SpeechRecognitionEventListener
* delegate_listener
= GetDelegateListener())
205 delegate_listener
->OnSoundStart(session_id
);
206 if (SpeechRecognitionEventListener
* listener
= GetListener(session_id
))
207 listener
->OnSoundStart(session_id
);
210 void SpeechRecognitionManagerImpl::OnSoundEnd(int session_id
) {
211 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO
));
212 if (!SessionExists(session_id
))
215 if (SpeechRecognitionEventListener
* delegate_listener
= GetDelegateListener())
216 delegate_listener
->OnSoundEnd(session_id
);
217 if (SpeechRecognitionEventListener
* listener
= GetListener(session_id
))
218 listener
->OnSoundEnd(session_id
);
221 void SpeechRecognitionManagerImpl::OnAudioEnd(int session_id
) {
222 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO
));
223 if (!SessionExists(session_id
))
226 if (SpeechRecognitionEventListener
* delegate_listener
= GetDelegateListener())
227 delegate_listener
->OnAudioEnd(session_id
);
228 if (SpeechRecognitionEventListener
* listener
= GetListener(session_id
))
229 listener
->OnAudioEnd(session_id
);
230 BrowserThread::PostTask(BrowserThread::IO
, FROM_HERE
,
231 base::Bind(&SpeechRecognitionManagerImpl::DispatchEvent
,
232 this->AsWeakPtr(), session_id
, EVENT_AUDIO_ENDED
));
235 void SpeechRecognitionManagerImpl::OnRecognitionResult(
236 int session_id
, const content::SpeechRecognitionResult
& result
) {
237 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO
));
238 if (!SessionExists(session_id
))
241 if (SpeechRecognitionEventListener
* delegate_listener
= GetDelegateListener())
242 delegate_listener
->OnRecognitionResult(session_id
, result
);
243 if (SpeechRecognitionEventListener
* listener
= GetListener(session_id
))
244 listener
->OnRecognitionResult(session_id
, result
);
247 void SpeechRecognitionManagerImpl::OnRecognitionError(
248 int session_id
, const content::SpeechRecognitionError
& error
) {
249 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO
));
250 if (!SessionExists(session_id
))
253 if (SpeechRecognitionEventListener
* delegate_listener
= GetDelegateListener())
254 delegate_listener
->OnRecognitionError(session_id
, error
);
255 if (SpeechRecognitionEventListener
* listener
= GetListener(session_id
))
256 listener
->OnRecognitionError(session_id
, error
);
259 void SpeechRecognitionManagerImpl::OnAudioLevelsChange(
260 int session_id
, float volume
, float noise_volume
) {
261 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO
));
262 if (!SessionExists(session_id
))
265 if (SpeechRecognitionEventListener
* delegate_listener
= GetDelegateListener())
266 delegate_listener
->OnAudioLevelsChange(session_id
, volume
, noise_volume
);
267 if (SpeechRecognitionEventListener
* listener
= GetListener(session_id
))
268 listener
->OnAudioLevelsChange(session_id
, volume
, noise_volume
);
271 void SpeechRecognitionManagerImpl::OnRecognitionEnd(int session_id
) {
272 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO
));
273 if (!SessionExists(session_id
))
276 if (SpeechRecognitionEventListener
* delegate_listener
= GetDelegateListener())
277 delegate_listener
->OnRecognitionEnd(session_id
);
278 if (SpeechRecognitionEventListener
* listener
= GetListener(session_id
))
279 listener
->OnRecognitionEnd(session_id
);
280 BrowserThread::PostTask(BrowserThread::IO
, FROM_HERE
,
281 base::Bind(&SpeechRecognitionManagerImpl::DispatchEvent
,
282 this->AsWeakPtr(), session_id
, EVENT_RECOGNITION_ENDED
));
285 int SpeechRecognitionManagerImpl::GetSession(
286 int render_process_id
, int render_view_id
, int request_id
) const {
287 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO
));
288 SessionsTable::const_iterator iter
;
289 for(iter
= sessions_
.begin(); iter
!= sessions_
.end(); ++iter
) {
290 const int session_id
= iter
->first
;
291 const SpeechRecognitionSessionContext
& context
= iter
->second
.context
;
292 if (context
.render_process_id
== render_process_id
&&
293 context
.render_view_id
== render_view_id
&&
294 context
.request_id
== request_id
) {
298 return kSessionIDInvalid
;
301 SpeechRecognitionSessionContext
302 SpeechRecognitionManagerImpl::GetSessionContext(int session_id
) const {
303 return GetSession(session_id
).context
;
306 void SpeechRecognitionManagerImpl::AbortAllSessionsForListener(
307 SpeechRecognitionEventListener
* listener
) {
308 // This method gracefully destroys sessions for the listener. However, since
309 // the listener itself is likely to be destroyed after this call, we avoid
310 // dispatching further events to it, marking the |listener_is_active| flag.
311 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO
));
312 for (SessionsTable::iterator it
= sessions_
.begin(); it
!= sessions_
.end();
314 Session
& session
= it
->second
;
315 if (session
.config
.event_listener
== listener
) {
316 AbortSession(session
.id
);
317 session
.listener_is_active
= false;
322 // ----------------------- Core FSM implementation ---------------------------
323 void SpeechRecognitionManagerImpl::DispatchEvent(int session_id
,
325 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO
));
327 // There are some corner cases in which the session might be deleted (due to
328 // an EndRecognition event) between a request (e.g. Abort) and its dispatch.
329 if (!SessionExists(session_id
))
332 const Session
& session
= GetSession(session_id
);
333 FSMState session_state
= GetSessionState(session_id
);
334 DCHECK_LE(session_state
, SESSION_STATE_MAX_VALUE
);
335 DCHECK_LE(event
, EVENT_MAX_VALUE
);
337 // Event dispatching must be sequential, otherwise it will break all the rules
338 // and the assumptions of the finite state automata model.
339 DCHECK(!is_dispatching_event_
);
340 is_dispatching_event_
= true;
341 ExecuteTransitionAndGetNextState(session
, session_state
, event
);
342 is_dispatching_event_
= false;
345 // This FSM handles the evolution of each session, from the viewpoint of the
346 // interaction with the user (that may be either the browser end-user which
347 // interacts with UI bubbles, or JS developer intracting with JS methods).
348 // All the events received by the SpeechRecognizerImpl instances (one for each
349 // session) are always routed to the SpeechRecognitionEventListener(s)
350 // regardless the choices taken in this FSM.
351 void SpeechRecognitionManagerImpl::ExecuteTransitionAndGetNextState(
352 const Session
& session
, FSMState session_state
, FSMEvent event
) {
353 // Note: since we're not tracking the state of the recognizer object, rather
354 // we're directly retrieving it (through GetSessionState), we see its events
355 // (that are AUDIO_ENDED and RECOGNITION_ENDED) after its state evolution
356 // (e.g., when we receive the AUDIO_ENDED event, the recognizer has just
357 // completed the transition from CAPTURING_AUDIO to WAITING_FOR_RESULT, thus
358 // we perceive the AUDIO_ENDED event in WAITING_FOR_RESULT).
359 // This makes the code below a bit tricky but avoids a lot of code for
360 // tracking and reconstructing asynchronously the state of the recognizer.
361 switch (session_state
) {
362 case SESSION_STATE_IDLE
:
365 return SessionStart(session
);
367 case EVENT_RECOGNITION_ENDED
:
368 return SessionDelete(session
);
369 case EVENT_STOP_CAPTURE
:
370 case EVENT_AUDIO_ENDED
:
374 case SESSION_STATE_CAPTURING_AUDIO
:
376 case EVENT_STOP_CAPTURE
:
377 return SessionStopAudioCapture(session
);
379 return SessionAbort(session
);
382 case EVENT_AUDIO_ENDED
:
383 case EVENT_RECOGNITION_ENDED
:
384 return NotFeasible(session
, event
);
387 case SESSION_STATE_WAITING_FOR_RESULT
:
390 return SessionAbort(session
);
391 case EVENT_AUDIO_ENDED
:
392 return ResetCapturingSessionId(session
);
394 case EVENT_STOP_CAPTURE
:
396 case EVENT_RECOGNITION_ENDED
:
397 return NotFeasible(session
, event
);
401 return NotFeasible(session
, event
);
404 SpeechRecognitionManagerImpl::FSMState
405 SpeechRecognitionManagerImpl::GetSessionState(int session_id
) const {
406 const Session
& session
= GetSession(session_id
);
407 if (!session
.recognizer
.get() || !session
.recognizer
->IsActive())
408 return SESSION_STATE_IDLE
;
409 if (session
.recognizer
->IsCapturingAudio())
410 return SESSION_STATE_CAPTURING_AUDIO
;
411 return SESSION_STATE_WAITING_FOR_RESULT
;
414 // ----------- Contract for all the FSM evolution functions below -------------
415 // - Are guaranteed to be executed in the IO thread;
416 // - Are guaranteed to be not reentrant (themselves and each other);
418 void SpeechRecognitionManagerImpl::SessionStart(const Session
& session
) {
419 session_id_capturing_audio_
= session
.id
;
420 session
.recognizer
->StartRecognition();
423 void SpeechRecognitionManagerImpl::SessionAbort(const Session
& session
) {
424 if (session_id_capturing_audio_
== session
.id
)
425 session_id_capturing_audio_
= kSessionIDInvalid
;
426 DCHECK(session
.recognizer
.get() && session
.recognizer
->IsActive());
427 session
.recognizer
->AbortRecognition();
430 void SpeechRecognitionManagerImpl::SessionStopAudioCapture(
431 const Session
& session
) {
432 DCHECK(session
.recognizer
.get() && session
.recognizer
->IsCapturingAudio());
433 session
.recognizer
->StopAudioCapture();
436 void SpeechRecognitionManagerImpl::ResetCapturingSessionId(
437 const Session
& session
) {
438 DCHECK_EQ(session_id_capturing_audio_
, session
.id
);
439 session_id_capturing_audio_
= kSessionIDInvalid
;
442 void SpeechRecognitionManagerImpl::SessionDelete(const Session
& session
) {
443 DCHECK(session
.recognizer
== NULL
|| !session
.recognizer
->IsActive());
444 if (session_id_capturing_audio_
== session
.id
)
445 session_id_capturing_audio_
= kSessionIDInvalid
;
446 sessions_
.erase(session
.id
);
449 void SpeechRecognitionManagerImpl::NotFeasible(const Session
& session
,
451 NOTREACHED() << "Unfeasible event " << event
452 << " in state " << GetSessionState(session
.id
)
453 << " for session " << session
.id
;
456 int SpeechRecognitionManagerImpl::GetNextSessionID() {
458 // Deal with wrapping of last_session_id_. (How civilized).
459 if (last_session_id_
<= 0)
460 last_session_id_
= 1;
461 return last_session_id_
;
464 bool SpeechRecognitionManagerImpl::SessionExists(int session_id
) const {
465 return sessions_
.find(session_id
) != sessions_
.end();
468 const SpeechRecognitionManagerImpl::Session
&
469 SpeechRecognitionManagerImpl::GetSession(int session_id
) const {
470 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO
));
471 SessionsTable::const_iterator iter
= sessions_
.find(session_id
);
472 DCHECK(iter
!= sessions_
.end());
476 SpeechRecognitionEventListener
* SpeechRecognitionManagerImpl::GetListener(
477 int session_id
) const {
478 const Session
& session
= GetSession(session_id
);
479 return session
.listener_is_active
? session
.config
.event_listener
: NULL
;
482 SpeechRecognitionEventListener
*
483 SpeechRecognitionManagerImpl::GetDelegateListener() const {
484 return delegate_
.get() ? delegate_
->GetEventListener() : NULL
;
487 const SpeechRecognitionSessionConfig
&
488 SpeechRecognitionManagerImpl::GetSessionConfig(int session_id
) const {
489 return GetSession(session_id
).config
;
492 bool SpeechRecognitionManagerImpl::HasAudioInputDevices() {
493 return BrowserMainLoop::GetAudioManager()->HasAudioInputDevices();
496 bool SpeechRecognitionManagerImpl::IsCapturingAudio() {
497 return BrowserMainLoop::GetAudioManager()->IsRecordingInProcess();
500 string16
SpeechRecognitionManagerImpl::GetAudioInputDeviceModel() {
501 return BrowserMainLoop::GetAudioManager()->GetAudioInputDeviceModel();
504 void SpeechRecognitionManagerImpl::ShowAudioInputSettings() {
505 // Since AudioManager::ShowAudioInputSettings can potentially launch external
506 // processes, do that in the FILE thread to not block the calling threads.
507 if (!BrowserThread::CurrentlyOn(BrowserThread::FILE)) {
508 BrowserThread::PostTask(
509 BrowserThread::FILE, FROM_HERE
,
510 base::Bind(&SpeechRecognitionManagerImpl::ShowAudioInputSettings
,
515 media::AudioManager
* audio_manager
= BrowserMainLoop::GetAudioManager();
516 DCHECK(audio_manager
->CanShowAudioInputSettings());
517 if (audio_manager
->CanShowAudioInputSettings())
518 audio_manager
->ShowAudioInputSettings();
521 SpeechRecognitionManagerImpl::Session::Session()
522 : id(kSessionIDInvalid
),
523 listener_is_active(true) {
526 SpeechRecognitionManagerImpl::Session::~Session() {
529 } // namespace speech