content/browser/speech/speech_recognition_manager_impl.cc

   1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "content/browser/speech/speech_recognition_manager_impl.h"
   6
   7 #include "base/bind.h"
   8 #include "content/browser/browser_main_loop.h"
   9 #include "content/browser/speech/google_one_shot_remote_engine.h"
  10 #include "content/browser/speech/speech_recognition_engine.h"
  11 #include "content/browser/speech/speech_recognizer_impl.h"
  12 #include "content/public/browser/browser_thread.h"
  13 #include "content/public/browser/content_browser_client.h"
  14 #include "content/public/browser/resource_context.h"
  15 #include "content/public/browser/speech_recognition_event_listener.h"
  16 #include "content/public/browser/speech_recognition_manager_delegate.h"
  17 #include "content/public/browser/speech_recognition_session_config.h"
  18 #include "content/public/browser/speech_recognition_session_context.h"
  19 #include "content/public/common/speech_recognition_result.h"
  20 #include "media/audio/audio_manager.h"
  21
  22 using base::Callback;
  23 using content::BrowserMainLoop;
  24 using content::BrowserThread;
  25 using content::SpeechRecognitionError;
  26 using content::SpeechRecognitionEventListener;
  27 using content::SpeechRecognitionManager;
  28 using content::SpeechRecognitionResult;
  29 using content::SpeechRecognitionSessionContext;
  30 using content::SpeechRecognitionSessionConfig;
  31
  32 namespace content {
  33 const int SpeechRecognitionManager::kSessionIDInvalid = 0;
  34
  35 SpeechRecognitionManager* SpeechRecognitionManager::GetInstance() {
  36   return speech::SpeechRecognitionManagerImpl::GetInstance();
  37 }
  38 }  // namespace content
  39
  40 namespace {
  41 speech::SpeechRecognitionManagerImpl* g_speech_recognition_manager_impl;
  42 }  // namespace
  43
  44 namespace speech {
  45
  46 SpeechRecognitionManagerImpl* SpeechRecognitionManagerImpl::GetInstance() {
  47   return g_speech_recognition_manager_impl;
  48 }
  49
  50 SpeechRecognitionManagerImpl::SpeechRecognitionManagerImpl()
  51     : session_id_capturing_audio_(kSessionIDInvalid),
  52       last_session_id_(kSessionIDInvalid),
  53       is_dispatching_event_(false),
  54       delegate_(content::GetContentClient()->browser()->
  55                     GetSpeechRecognitionManagerDelegate()) {
  56   DCHECK(!g_speech_recognition_manager_impl);
  57   g_speech_recognition_manager_impl = this;
  58 }
  59
  60 SpeechRecognitionManagerImpl::~SpeechRecognitionManagerImpl() {
  61   DCHECK(g_speech_recognition_manager_impl);
  62   g_speech_recognition_manager_impl = NULL;
  63   // Recognition sessions will be aborted by the corresponding destructors.
  64   sessions_.clear();
  65 }
  66
  67 int SpeechRecognitionManagerImpl::CreateSession(
  68     const SpeechRecognitionSessionConfig& config) {
  69   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
  70
  71   const int session_id = GetNextSessionID();
  72   DCHECK(!SessionExists(session_id));
  73   // Set-up the new session.
  74   Session& session = sessions_[session_id];
  75   session.id = session_id;
  76   session.config = config;
  77   session.context = config.initial_context;
  78
  79   std::string hardware_info;
  80   bool can_report_metrics = false;
  81   if (delegate_.get())
  82     delegate_->GetDiagnosticInformation(&can_report_metrics, &hardware_info);
  83
  84   SpeechRecognitionEngineConfig remote_engine_config;
  85   remote_engine_config.language = config.language;
  86   remote_engine_config.grammars = config.grammars;
  87   remote_engine_config.audio_sample_rate =
  88       SpeechRecognizerImpl::kAudioSampleRate;
  89   remote_engine_config.audio_num_bits_per_sample =
  90      SpeechRecognizerImpl::kNumBitsPerAudioSample;
  91   remote_engine_config.filter_profanities = config.filter_profanities;
  92   remote_engine_config.hardware_info = hardware_info;
  93   remote_engine_config.origin_url = can_report_metrics ? config.origin_url : "";
  94
  95   SpeechRecognitionEngine* google_remote_engine =
  96         new GoogleOneShotRemoteEngine(config.url_request_context_getter);
  97   google_remote_engine->SetConfig(remote_engine_config);
  98
  99   session.recognizer = new SpeechRecognizerImpl(this,
 100                                                 session_id,
 101                                                 google_remote_engine);
 102   return session_id;
 103 }
 104
 105 void SpeechRecognitionManagerImpl::StartSession(int session_id) {
 106   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
 107   if (!SessionExists(session_id))
 108     return;
 109
 110   // If there is another active session, abort that.
 111   if (session_id_capturing_audio_ != kSessionIDInvalid &&
 112       session_id_capturing_audio_ != session_id) {
 113     AbortSession(session_id_capturing_audio_);
 114   }
 115
 116   if (delegate_.get())
 117     delegate_->CheckRecognitionIsAllowed(
 118         session_id,
 119         base::Bind(&SpeechRecognitionManagerImpl::RecognitionAllowedCallback,
 120                    this->AsWeakPtr()));
 121 }
 122
 123 void SpeechRecognitionManagerImpl::RecognitionAllowedCallback(int session_id,
 124                                                               bool is_allowed) {
 125   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
 126   DCHECK(SessionExists(session_id));
 127   if (is_allowed) {
 128     BrowserThread::PostTask(BrowserThread::IO, FROM_HERE,
 129         base::Bind(&SpeechRecognitionManagerImpl::DispatchEvent,
 130                    this->AsWeakPtr(), session_id, EVENT_START));
 131   } else {
 132     sessions_.erase(session_id);
 133   }
 134 }
 135
 136 void SpeechRecognitionManagerImpl::AbortSession(int session_id) {
 137   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
 138   if (!SessionExists(session_id))
 139     return;
 140
 141   BrowserThread::PostTask(BrowserThread::IO, FROM_HERE,
 142       base::Bind(&SpeechRecognitionManagerImpl::DispatchEvent,
 143                  this->AsWeakPtr(), session_id, EVENT_ABORT));
 144 }
 145
 146 void SpeechRecognitionManagerImpl::StopAudioCaptureForSession(int session_id) {
 147   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
 148   if (!SessionExists(session_id))
 149     return;
 150
 151   BrowserThread::PostTask(BrowserThread::IO, FROM_HERE,
 152       base::Bind(&SpeechRecognitionManagerImpl::DispatchEvent,
 153                  this->AsWeakPtr(), session_id, EVENT_STOP_CAPTURE));
 154 }
 155
 156 // Here begins the SpeechRecognitionEventListener interface implementation,
 157 // which will simply relay the events to the proper listener registered for the
 158 // particular session (most likely InputTagSpeechDispatcherHost) and to the
 159 // catch-all listener provided by the delegate (if any).
 160
 161 void SpeechRecognitionManagerImpl::OnRecognitionStart(int session_id) {
 162   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
 163   if (!SessionExists(session_id))
 164     return;
 165
 166   DCHECK_EQ(session_id_capturing_audio_, session_id);
 167   if (SpeechRecognitionEventListener* delegate_listener = GetDelegateListener())
 168     delegate_listener->OnRecognitionStart(session_id);
 169   if (SpeechRecognitionEventListener* listener = GetListener(session_id))
 170     listener->OnRecognitionStart(session_id);
 171 }
 172
 173 void SpeechRecognitionManagerImpl::OnAudioStart(int session_id) {
 174   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
 175   if (!SessionExists(session_id))
 176     return;
 177
 178   DCHECK_EQ(session_id_capturing_audio_, session_id);
 179   if (SpeechRecognitionEventListener* delegate_listener = GetDelegateListener())
 180     delegate_listener->OnAudioStart(session_id);
 181   if (SpeechRecognitionEventListener* listener = GetListener(session_id))
 182     listener->OnAudioStart(session_id);
 183 }
 184
 185 void SpeechRecognitionManagerImpl::OnEnvironmentEstimationComplete(
 186     int session_id) {
 187   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
 188   if (!SessionExists(session_id))
 189     return;
 190
 191   DCHECK_EQ(session_id_capturing_audio_, session_id);
 192   if (SpeechRecognitionEventListener* delegate_listener = GetDelegateListener())
 193     delegate_listener->OnEnvironmentEstimationComplete(session_id);
 194   if (SpeechRecognitionEventListener* listener = GetListener(session_id))
 195     listener->OnEnvironmentEstimationComplete(session_id);
 196 }
 197
 198 void SpeechRecognitionManagerImpl::OnSoundStart(int session_id) {
 199   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
 200   if (!SessionExists(session_id))
 201     return;
 202
 203   DCHECK_EQ(session_id_capturing_audio_, session_id);
 204   if (SpeechRecognitionEventListener* delegate_listener = GetDelegateListener())
 205     delegate_listener->OnSoundStart(session_id);
 206   if (SpeechRecognitionEventListener* listener = GetListener(session_id))
 207     listener->OnSoundStart(session_id);
 208 }
 209
 210 void SpeechRecognitionManagerImpl::OnSoundEnd(int session_id) {
 211   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
 212   if (!SessionExists(session_id))
 213     return;
 214
 215   if (SpeechRecognitionEventListener* delegate_listener = GetDelegateListener())
 216     delegate_listener->OnSoundEnd(session_id);
 217   if (SpeechRecognitionEventListener* listener = GetListener(session_id))
 218     listener->OnSoundEnd(session_id);
 219 }
 220
 221 void SpeechRecognitionManagerImpl::OnAudioEnd(int session_id) {
 222   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
 223   if (!SessionExists(session_id))
 224     return;
 225
 226   if (SpeechRecognitionEventListener* delegate_listener = GetDelegateListener())
 227     delegate_listener->OnAudioEnd(session_id);
 228   if (SpeechRecognitionEventListener* listener = GetListener(session_id))
 229     listener->OnAudioEnd(session_id);
 230   BrowserThread::PostTask(BrowserThread::IO, FROM_HERE,
 231       base::Bind(&SpeechRecognitionManagerImpl::DispatchEvent,
 232                  this->AsWeakPtr(), session_id, EVENT_AUDIO_ENDED));
 233 }
 234
 235 void SpeechRecognitionManagerImpl::OnRecognitionResult(
 236     int session_id, const content::SpeechRecognitionResult& result) {
 237   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
 238   if (!SessionExists(session_id))
 239     return;
 240
 241   if (SpeechRecognitionEventListener* delegate_listener = GetDelegateListener())
 242     delegate_listener->OnRecognitionResult(session_id, result);
 243   if (SpeechRecognitionEventListener* listener = GetListener(session_id))
 244     listener->OnRecognitionResult(session_id, result);
 245 }
 246
 247 void SpeechRecognitionManagerImpl::OnRecognitionError(
 248     int session_id, const content::SpeechRecognitionError& error) {
 249   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
 250   if (!SessionExists(session_id))
 251     return;
 252
 253   if (SpeechRecognitionEventListener* delegate_listener = GetDelegateListener())
 254     delegate_listener->OnRecognitionError(session_id, error);
 255   if (SpeechRecognitionEventListener* listener = GetListener(session_id))
 256     listener->OnRecognitionError(session_id, error);
 257 }
 258
 259 void SpeechRecognitionManagerImpl::OnAudioLevelsChange(
 260     int session_id, float volume, float noise_volume) {
 261   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
 262   if (!SessionExists(session_id))
 263     return;
 264
 265   if (SpeechRecognitionEventListener* delegate_listener = GetDelegateListener())
 266     delegate_listener->OnAudioLevelsChange(session_id, volume, noise_volume);
 267   if (SpeechRecognitionEventListener* listener = GetListener(session_id))
 268     listener->OnAudioLevelsChange(session_id, volume, noise_volume);
 269 }
 270
 271 void SpeechRecognitionManagerImpl::OnRecognitionEnd(int session_id) {
 272   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
 273   if (!SessionExists(session_id))
 274     return;
 275
 276   if (SpeechRecognitionEventListener* delegate_listener = GetDelegateListener())
 277     delegate_listener->OnRecognitionEnd(session_id);
 278   if (SpeechRecognitionEventListener* listener = GetListener(session_id))
 279     listener->OnRecognitionEnd(session_id);
 280   BrowserThread::PostTask(BrowserThread::IO, FROM_HERE,
 281       base::Bind(&SpeechRecognitionManagerImpl::DispatchEvent,
 282                  this->AsWeakPtr(), session_id, EVENT_RECOGNITION_ENDED));
 283 }
 284
 285 int SpeechRecognitionManagerImpl::GetSession(
 286     int render_process_id, int render_view_id, int request_id) const {
 287   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
 288   SessionsTable::const_iterator iter;
 289   for(iter = sessions_.begin(); iter != sessions_.end(); ++iter) {
 290     const int session_id = iter->first;
 291     const SpeechRecognitionSessionContext& context = iter->second.context;
 292     if (context.render_process_id == render_process_id &&
 293         context.render_view_id == render_view_id &&
 294         context.request_id == request_id) {
 295       return session_id;
 296     }
 297   }
 298   return kSessionIDInvalid;
 299 }
 300
 301 SpeechRecognitionSessionContext
 302 SpeechRecognitionManagerImpl::GetSessionContext(int session_id) const {
 303   return GetSession(session_id).context;
 304 }
 305
 306 void SpeechRecognitionManagerImpl::AbortAllSessionsForListener(
 307     SpeechRecognitionEventListener* listener) {
 308   // This method gracefully destroys sessions for the listener. However, since
 309   // the listener itself is likely to be destroyed after this call, we avoid
 310   // dispatching further events to it, marking the |listener_is_active| flag.
 311   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
 312   for (SessionsTable::iterator it = sessions_.begin(); it != sessions_.end();
 313        ++it) {
 314     Session& session = it->second;
 315     if (session.config.event_listener == listener) {
 316       AbortSession(session.id);
 317       session.listener_is_active = false;
 318     }
 319   }
 320 }
 321
 322 // -----------------------  Core FSM implementation ---------------------------
 323 void SpeechRecognitionManagerImpl::DispatchEvent(int session_id,
 324                                                  FSMEvent event) {
 325   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
 326
 327   // There are some corner cases in which the session might be deleted (due to
 328   // an EndRecognition event) between a request (e.g. Abort) and its dispatch.
 329   if (!SessionExists(session_id))
 330     return;
 331
 332   const Session& session = GetSession(session_id);
 333   FSMState session_state = GetSessionState(session_id);
 334   DCHECK_LE(session_state, SESSION_STATE_MAX_VALUE);
 335   DCHECK_LE(event, EVENT_MAX_VALUE);
 336
 337   // Event dispatching must be sequential, otherwise it will break all the rules
 338   // and the assumptions of the finite state automata model.
 339   DCHECK(!is_dispatching_event_);
 340   is_dispatching_event_ = true;
 341   ExecuteTransitionAndGetNextState(session, session_state, event);
 342   is_dispatching_event_ = false;
 343 }
 344
 345 // This FSM handles the evolution of each session, from the viewpoint of the
 346 // interaction with the user (that may be either the browser end-user which
 347 // interacts with UI bubbles, or JS developer intracting with JS methods).
 348 // All the events received by the SpeechRecognizerImpl instances (one for each
 349 // session) are always routed to the SpeechRecognitionEventListener(s)
 350 // regardless the choices taken in this FSM.
 351 void SpeechRecognitionManagerImpl::ExecuteTransitionAndGetNextState(
 352     const Session& session, FSMState session_state, FSMEvent event) {
 353   // Note: since we're not tracking the state of the recognizer object, rather
 354   // we're directly retrieving it (through GetSessionState), we see its events
 355   // (that are AUDIO_ENDED and RECOGNITION_ENDED) after its state evolution
 356   // (e.g., when we receive the AUDIO_ENDED event, the recognizer has just
 357   // completed the transition from CAPTURING_AUDIO to WAITING_FOR_RESULT, thus
 358   // we perceive the AUDIO_ENDED event in WAITING_FOR_RESULT).
 359   // This makes the code below a bit tricky but avoids a lot of code for
 360   // tracking and reconstructing asynchronously the state of the recognizer.
 361   switch (session_state) {
 362     case SESSION_STATE_IDLE:
 363       switch (event) {
 364         case EVENT_START:
 365           return SessionStart(session);
 366         case EVENT_ABORT:
 367         case EVENT_RECOGNITION_ENDED:
 368           return SessionDelete(session);
 369         case EVENT_STOP_CAPTURE:
 370         case EVENT_AUDIO_ENDED:
 371           return;
 372       }
 373       break;
 374     case SESSION_STATE_CAPTURING_AUDIO:
 375       switch (event) {
 376         case EVENT_STOP_CAPTURE:
 377           return SessionStopAudioCapture(session);
 378         case EVENT_ABORT:
 379           return SessionAbort(session);
 380         case EVENT_START:
 381           return;
 382         case EVENT_AUDIO_ENDED:
 383         case EVENT_RECOGNITION_ENDED:
 384           return NotFeasible(session, event);
 385       }
 386       break;
 387     case SESSION_STATE_WAITING_FOR_RESULT:
 388       switch (event) {
 389         case EVENT_ABORT:
 390           return SessionAbort(session);
 391         case EVENT_AUDIO_ENDED:
 392           return ResetCapturingSessionId(session);
 393         case EVENT_START:
 394         case EVENT_STOP_CAPTURE:
 395           return;
 396         case EVENT_RECOGNITION_ENDED:
 397           return NotFeasible(session, event);
 398       }
 399       break;
 400   }
 401   return NotFeasible(session, event);
 402 }
 403
 404 SpeechRecognitionManagerImpl::FSMState
 405 SpeechRecognitionManagerImpl::GetSessionState(int session_id) const {
 406   const Session& session = GetSession(session_id);
 407   if (!session.recognizer.get() || !session.recognizer->IsActive())
 408     return SESSION_STATE_IDLE;
 409   if (session.recognizer->IsCapturingAudio())
 410     return SESSION_STATE_CAPTURING_AUDIO;
 411   return SESSION_STATE_WAITING_FOR_RESULT;
 412 }
 413
 414 // ----------- Contract for all the FSM evolution functions below -------------
 415 //  - Are guaranteed to be executed in the IO thread;
 416 //  - Are guaranteed to be not reentrant (themselves and each other);
 417
 418 void SpeechRecognitionManagerImpl::SessionStart(const Session& session) {
 419   session_id_capturing_audio_ = session.id;
 420   session.recognizer->StartRecognition();
 421 }
 422
 423 void SpeechRecognitionManagerImpl::SessionAbort(const Session& session) {
 424   if (session_id_capturing_audio_ == session.id)
 425     session_id_capturing_audio_ = kSessionIDInvalid;
 426   DCHECK(session.recognizer.get() && session.recognizer->IsActive());
 427   session.recognizer->AbortRecognition();
 428 }
 429
 430 void SpeechRecognitionManagerImpl::SessionStopAudioCapture(
 431     const Session& session) {
 432   DCHECK(session.recognizer.get() && session.recognizer->IsCapturingAudio());
 433   session.recognizer->StopAudioCapture();
 434 }
 435
 436 void SpeechRecognitionManagerImpl::ResetCapturingSessionId(
 437     const Session& session) {
 438   DCHECK_EQ(session_id_capturing_audio_, session.id);
 439   session_id_capturing_audio_ = kSessionIDInvalid;
 440 }
 441
 442 void SpeechRecognitionManagerImpl::SessionDelete(const Session& session) {
 443   DCHECK(session.recognizer == NULL || !session.recognizer->IsActive());
 444   if (session_id_capturing_audio_ == session.id)
 445     session_id_capturing_audio_ = kSessionIDInvalid;
 446   sessions_.erase(session.id);
 447 }
 448
 449 void SpeechRecognitionManagerImpl::NotFeasible(const Session& session,
 450                                                FSMEvent event) {
 451   NOTREACHED() << "Unfeasible event " << event
 452                << " in state " << GetSessionState(session.id)
 453                << " for session " << session.id;
 454 }
 455
 456 int SpeechRecognitionManagerImpl::GetNextSessionID() {
 457   ++last_session_id_;
 458   // Deal with wrapping of last_session_id_. (How civilized).
 459   if (last_session_id_ <= 0)
 460     last_session_id_ = 1;
 461   return last_session_id_;
 462 }
 463
 464 bool SpeechRecognitionManagerImpl::SessionExists(int session_id) const {
 465   return sessions_.find(session_id) != sessions_.end();
 466 }
 467
 468 const SpeechRecognitionManagerImpl::Session&
 469 SpeechRecognitionManagerImpl::GetSession(int session_id) const {
 470   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
 471   SessionsTable::const_iterator iter = sessions_.find(session_id);
 472   DCHECK(iter != sessions_.end());
 473   return iter->second;
 474 }
 475
 476 SpeechRecognitionEventListener* SpeechRecognitionManagerImpl::GetListener(
 477     int session_id) const {
 478   const Session& session = GetSession(session_id);
 479   return session.listener_is_active ? session.config.event_listener : NULL;
 480 }
 481
 482 SpeechRecognitionEventListener*
 483 SpeechRecognitionManagerImpl::GetDelegateListener() const {
 484   return delegate_.get() ? delegate_->GetEventListener() : NULL;
 485 }
 486
 487 const SpeechRecognitionSessionConfig&
 488 SpeechRecognitionManagerImpl::GetSessionConfig(int session_id) const {
 489   return GetSession(session_id).config;
 490 }
 491
 492 bool SpeechRecognitionManagerImpl::HasAudioInputDevices() {
 493   return BrowserMainLoop::GetAudioManager()->HasAudioInputDevices();
 494 }
 495
 496 bool SpeechRecognitionManagerImpl::IsCapturingAudio() {
 497   return BrowserMainLoop::GetAudioManager()->IsRecordingInProcess();
 498 }
 499
 500 string16 SpeechRecognitionManagerImpl::GetAudioInputDeviceModel() {
 501   return BrowserMainLoop::GetAudioManager()->GetAudioInputDeviceModel();
 502 }
 503
 504 void SpeechRecognitionManagerImpl::ShowAudioInputSettings() {
 505   // Since AudioManager::ShowAudioInputSettings can potentially launch external
 506   // processes, do that in the FILE thread to not block the calling threads.
 507   if (!BrowserThread::CurrentlyOn(BrowserThread::FILE)) {
 508     BrowserThread::PostTask(
 509         BrowserThread::FILE, FROM_HERE,
 510         base::Bind(&SpeechRecognitionManagerImpl::ShowAudioInputSettings,
 511                    this->AsWeakPtr()));
 512     return;
 513   }
 514
 515   media::AudioManager* audio_manager = BrowserMainLoop::GetAudioManager();
 516   DCHECK(audio_manager->CanShowAudioInputSettings());
 517   if (audio_manager->CanShowAudioInputSettings())
 518     audio_manager->ShowAudioInputSettings();
 519 }
 520
 521 SpeechRecognitionManagerImpl::Session::Session()
 522   : id(kSessionIDInvalid),
 523     listener_is_active(true) {
 524 }
 525
 526 SpeechRecognitionManagerImpl::Session::~Session() {
 527 }
 528
 529 }  // namespace speech