chrome/browser/ui/app_list/speech_recognizer.cc

   1 // Copyright 2014 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "chrome/browser/ui/app_list/speech_recognizer.h"
   6
   7 #include <algorithm>
   8
   9 #include "base/bind.h"
  10 #include "base/strings/string16.h"
  11 #include "base/timer/timer.h"
  12 #include "chrome/browser/ui/app_list/speech_recognizer_delegate.h"
  13 #include "content/public/browser/browser_thread.h"
  14 #include "content/public/browser/render_process_host.h"
  15 #include "content/public/browser/speech_recognition_event_listener.h"
  16 #include "content/public/browser/speech_recognition_manager.h"
  17 #include "content/public/browser/speech_recognition_session_config.h"
  18 #include "content/public/browser/speech_recognition_session_preamble.h"
  19 #include "content/public/common/child_process_host.h"
  20 #include "content/public/common/speech_recognition_error.h"
  21 #include "net/url_request/url_request_context_getter.h"
  22 #include "ui/app_list/speech_ui_model_observer.h"
  23
  24 namespace app_list {
  25
  26 // Length of timeout to cancel recognition if there's no speech heard.
  27 static const int kNoSpeechTimeoutInSeconds = 5;
  28
  29 // Length of timeout to cancel recognition if no different results are received.
  30 static const int kNoNewSpeechTimeoutInSeconds = 3;
  31
  32 // Invalid speech session.
  33 static const int kInvalidSessionId = -1;
  34
  35 // Speech recognizer listener. This is separate from SpeechRecognizer because
  36 // the speech recognition engine must function from the IO thread. Because of
  37 // this, the lifecycle of this class must be decoupled from the lifecycle of
  38 // SpeechRecognizer. To avoid circular references, this class has no reference
  39 // to SpeechRecognizer. Instead, it has a reference to the
  40 // SpeechRecognizerDelegate via a weak pointer that is only ever referenced from
  41 // the UI thread.
  42 class SpeechRecognizer::EventListener
  43     : public base::RefCountedThreadSafe<SpeechRecognizer::EventListener>,
  44       public content::SpeechRecognitionEventListener {
  45  public:
  46   EventListener(const base::WeakPtr<SpeechRecognizerDelegate>& delegate,
  47                 net::URLRequestContextGetter* url_request_context_getter,
  48                 const std::string& locale);
  49
  50   void StartOnIOThread(
  51       const std::string& auth_scope,
  52       const std::string& auth_token,
  53       const scoped_refptr<content::SpeechRecognitionSessionPreamble>& preamble);
  54   void StopOnIOThread();
  55
  56  private:
  57   friend class base::RefCountedThreadSafe<SpeechRecognizer::EventListener>;
  58   ~EventListener() override;
  59
  60   void NotifyRecognitionStateChanged(SpeechRecognitionState new_state);
  61
  62   // Starts a timer for |timeout_seconds|. When the timer expires, will stop
  63   // capturing audio and get a final utterance from the recognition manager.
  64   void StartSpeechTimeout(int timeout_seconds);
  65   void StopSpeechTimeout();
  66   void SpeechTimeout();
  67
  68   // Overidden from content::SpeechRecognitionEventListener:
  69   // These are always called on the IO thread.
  70   void OnRecognitionStart(int session_id) override;
  71   void OnRecognitionEnd(int session_id) override;
  72   void OnRecognitionResults(
  73       int session_id,
  74       const content::SpeechRecognitionResults& results) override;
  75   void OnRecognitionError(
  76       int session_id, const content::SpeechRecognitionError& error) override;
  77   void OnSoundStart(int session_id) override;
  78   void OnSoundEnd(int session_id) override;
  79   void OnAudioLevelsChange(
  80       int session_id, float volume, float noise_volume) override;
  81   void OnEnvironmentEstimationComplete(int session_id) override;
  82   void OnAudioStart(int session_id) override;
  83   void OnAudioEnd(int session_id) override;
  84
  85   // Only dereferenced from the UI thread, but copied on IO thread.
  86   base::WeakPtr<SpeechRecognizerDelegate> delegate_;
  87
  88   // All remaining members only accessed from the IO thread.
  89   scoped_refptr<net::URLRequestContextGetter> url_request_context_getter_;
  90   std::string locale_;
  91   base::Timer speech_timeout_;
  92   int session_;
  93   base::string16 last_result_str_;
  94
  95   base::WeakPtrFactory<EventListener> weak_factory_;
  96
  97   DISALLOW_COPY_AND_ASSIGN(EventListener);
  98 };
  99
 100 SpeechRecognizer::EventListener::EventListener(
 101     const base::WeakPtr<SpeechRecognizerDelegate>& delegate,
 102     net::URLRequestContextGetter* url_request_context_getter,
 103     const std::string& locale)
 104     : delegate_(delegate),
 105       url_request_context_getter_(url_request_context_getter),
 106       locale_(locale),
 107       speech_timeout_(false, false),
 108       session_(kInvalidSessionId),
 109       weak_factory_(this) {
 110   DCHECK_CURRENTLY_ON(content::BrowserThread::UI);
 111 }
 112
 113 SpeechRecognizer::EventListener::~EventListener() {
 114   DCHECK(!speech_timeout_.IsRunning());
 115 }
 116
 117 void SpeechRecognizer::EventListener::StartOnIOThread(
 118     const std::string& auth_scope,
 119     const std::string& auth_token,
 120     const scoped_refptr<content::SpeechRecognitionSessionPreamble>& preamble) {
 121   DCHECK_CURRENTLY_ON(content::BrowserThread::IO);
 122   if (session_ != kInvalidSessionId)
 123     StopOnIOThread();
 124
 125   content::SpeechRecognitionSessionConfig config;
 126   config.language = locale_;
 127   config.is_legacy_api = false;
 128   config.continuous = true;
 129   config.interim_results = true;
 130   config.max_hypotheses = 1;
 131   config.filter_profanities = true;
 132   config.url_request_context_getter = url_request_context_getter_;
 133   config.event_listener = weak_factory_.GetWeakPtr();
 134   // kInvalidUniqueID is not a valid render process, so the speech permission
 135   // check allows the request through.
 136   config.initial_context.render_process_id =
 137       content::ChildProcessHost::kInvalidUniqueID;
 138   config.auth_scope = auth_scope;
 139   config.auth_token = auth_token;
 140   config.preamble = preamble;
 141
 142   auto speech_instance = content::SpeechRecognitionManager::GetInstance();
 143   session_ = speech_instance->CreateSession(config);
 144   speech_instance->StartSession(session_);
 145 }
 146
 147 void SpeechRecognizer::EventListener::StopOnIOThread() {
 148   DCHECK_CURRENTLY_ON(content::BrowserThread::IO);
 149   if (session_ == kInvalidSessionId)
 150     return;
 151
 152   // Prevent recursion.
 153   int session = session_;
 154   session_ = kInvalidSessionId;
 155   StopSpeechTimeout();
 156   content::SpeechRecognitionManager::GetInstance()->StopAudioCaptureForSession(
 157       session);
 158 }
 159
 160 void SpeechRecognizer::EventListener::NotifyRecognitionStateChanged(
 161     SpeechRecognitionState new_state) {
 162   content::BrowserThread::PostTask(
 163       content::BrowserThread::UI,
 164       FROM_HERE,
 165       base::Bind(&SpeechRecognizerDelegate::OnSpeechRecognitionStateChanged,
 166                  delegate_,
 167                  new_state));
 168 }
 169
 170 void SpeechRecognizer::EventListener::StartSpeechTimeout(int timeout_seconds) {
 171   DCHECK_CURRENTLY_ON(content::BrowserThread::IO);
 172   speech_timeout_.Start(
 173       FROM_HERE,
 174       base::TimeDelta::FromSeconds(timeout_seconds),
 175       base::Bind(&SpeechRecognizer::EventListener::SpeechTimeout, this));
 176 }
 177
 178 void SpeechRecognizer::EventListener::StopSpeechTimeout() {
 179   DCHECK_CURRENTLY_ON(content::BrowserThread::IO);
 180   speech_timeout_.Stop();
 181 }
 182
 183 void SpeechRecognizer::EventListener::SpeechTimeout() {
 184   DCHECK_CURRENTLY_ON(content::BrowserThread::IO);
 185   StopOnIOThread();
 186 }
 187
 188 void SpeechRecognizer::EventListener::OnRecognitionStart(int session_id) {
 189   NotifyRecognitionStateChanged(SPEECH_RECOGNITION_RECOGNIZING);
 190 }
 191
 192 void SpeechRecognizer::EventListener::OnRecognitionEnd(int session_id) {
 193   StopOnIOThread();
 194   NotifyRecognitionStateChanged(SPEECH_RECOGNITION_READY);
 195 }
 196
 197 void SpeechRecognizer::EventListener::OnRecognitionResults(
 198     int session_id, const content::SpeechRecognitionResults& results) {
 199   base::string16 result_str;
 200   size_t final_count = 0;
 201   // The number of results with |is_provisional| false. If |final_count| ==
 202   // results.size(), then all results are non-provisional and the recognition is
 203   // complete.
 204   for (const auto& result : results) {
 205     if (!result.is_provisional)
 206       final_count++;
 207     result_str += result.hypotheses[0].utterance;
 208   }
 209   content::BrowserThread::PostTask(
 210       content::BrowserThread::UI,
 211       FROM_HERE,
 212       base::Bind(&SpeechRecognizerDelegate::OnSpeechResult,
 213                  delegate_,
 214                  result_str,
 215                  final_count == results.size()));
 216
 217   // Stop the moment we have a final result. If we receive any new or changed
 218   // text, restart the timer to give the user more time to speak. (The timer is
 219   // recording the amount of time since the most recent utterance.)
 220   if (final_count == results.size())
 221     StopOnIOThread();
 222   else if (result_str != last_result_str_)
 223     StartSpeechTimeout(kNoNewSpeechTimeoutInSeconds);
 224
 225   last_result_str_ = result_str;
 226 }
 227
 228 void SpeechRecognizer::EventListener::OnRecognitionError(
 229     int session_id, const content::SpeechRecognitionError& error) {
 230   StopOnIOThread();
 231   if (error.code == content::SPEECH_RECOGNITION_ERROR_NETWORK) {
 232     NotifyRecognitionStateChanged(SPEECH_RECOGNITION_NETWORK_ERROR);
 233   }
 234   NotifyRecognitionStateChanged(SPEECH_RECOGNITION_READY);
 235 }
 236
 237 void SpeechRecognizer::EventListener::OnSoundStart(int session_id) {
 238   StartSpeechTimeout(kNoSpeechTimeoutInSeconds);
 239   NotifyRecognitionStateChanged(SPEECH_RECOGNITION_IN_SPEECH);
 240 }
 241
 242 void SpeechRecognizer::EventListener::OnSoundEnd(int session_id) {
 243   StopOnIOThread();
 244   NotifyRecognitionStateChanged(SPEECH_RECOGNITION_RECOGNIZING);
 245 }
 246
 247 void SpeechRecognizer::EventListener::OnAudioLevelsChange(
 248     int session_id, float volume, float noise_volume) {
 249   DCHECK_LE(0.0, volume);
 250   DCHECK_GE(1.0, volume);
 251   DCHECK_LE(0.0, noise_volume);
 252   DCHECK_GE(1.0, noise_volume);
 253   volume = std::max(0.0f, volume - noise_volume);
 254   // Both |volume| and |noise_volume| are defined to be in the range [0.0, 1.0].
 255   // See: content/public/browser/speech_recognition_event_listener.h
 256   int16_t sound_level = static_cast<int16_t>(INT16_MAX * volume);
 257   content::BrowserThread::PostTask(
 258       content::BrowserThread::UI,
 259       FROM_HERE,
 260       base::Bind(&SpeechRecognizerDelegate::OnSpeechSoundLevelChanged,
 261                  delegate_,
 262                  sound_level));
 263 }
 264
 265 void SpeechRecognizer::EventListener::OnEnvironmentEstimationComplete(
 266     int session_id) {
 267 }
 268
 269 void SpeechRecognizer::EventListener::OnAudioStart(int session_id) {
 270 }
 271
 272 void SpeechRecognizer::EventListener::OnAudioEnd(int session_id) {
 273 }
 274
 275 SpeechRecognizer::SpeechRecognizer(
 276     const base::WeakPtr<SpeechRecognizerDelegate>& delegate,
 277     net::URLRequestContextGetter* url_request_context_getter,
 278     const std::string& locale)
 279     : delegate_(delegate),
 280       speech_event_listener_(new EventListener(
 281           delegate, url_request_context_getter, locale)) {
 282   DCHECK_CURRENTLY_ON(content::BrowserThread::UI);
 283 }
 284
 285 SpeechRecognizer::~SpeechRecognizer() {
 286   DCHECK_CURRENTLY_ON(content::BrowserThread::UI);
 287   Stop();
 288 }
 289
 290 void SpeechRecognizer::Start(
 291     const scoped_refptr<content::SpeechRecognitionSessionPreamble>& preamble) {
 292   DCHECK_CURRENTLY_ON(content::BrowserThread::UI);
 293   std::string auth_scope;
 294   std::string auth_token;
 295   delegate_->GetSpeechAuthParameters(&auth_scope, &auth_token);
 296
 297   content::BrowserThread::PostTask(
 298       content::BrowserThread::IO,
 299       FROM_HERE,
 300       base::Bind(&SpeechRecognizer::EventListener::StartOnIOThread,
 301                  speech_event_listener_,
 302                  auth_scope,
 303                  auth_token,
 304                  preamble));
 305 }
 306
 307 void SpeechRecognizer::Stop() {
 308   DCHECK_CURRENTLY_ON(content::BrowserThread::UI);
 309   content::BrowserThread::PostTask(
 310       content::BrowserThread::IO,
 311       FROM_HERE,
 312       base::Bind(&SpeechRecognizer::EventListener::StopOnIOThread,
 313                  speech_event_listener_));
 314 }
 315
 316 }  // namespace app_list