chrome/browser/speech/tts_controller_impl.cc

   1 // Copyright 2014 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "chrome/browser/speech/tts_controller_impl.h"
   6
   7 #include <string>
   8 #include <vector>
   9
  10 #include "base/float_util.h"
  11 #include "base/values.h"
  12 #include "chrome/browser/browser_process.h"
  13 #include "chrome/browser/speech/tts_platform.h"
  14
  15 namespace {
  16 // A value to be used to indicate that there is no char index available.
  17 const int kInvalidCharIndex = -1;
  18
  19 // Given a language/region code of the form 'fr-FR', returns just the basic
  20 // language portion, e.g. 'fr'.
  21 std::string TrimLanguageCode(std::string lang) {
  22   if (lang.size() >= 5 && lang[2] == '-')
  23     return lang.substr(0, 2);
  24   else
  25     return lang;
  26 }
  27
  28 }  // namespace
  29
  30 bool IsFinalTtsEventType(TtsEventType event_type) {
  31   return (event_type == TTS_EVENT_END ||
  32           event_type == TTS_EVENT_INTERRUPTED ||
  33           event_type == TTS_EVENT_CANCELLED ||
  34           event_type == TTS_EVENT_ERROR);
  35 }
  36
  37 //
  38 // UtteranceContinuousParameters
  39 //
  40
  41
  42 UtteranceContinuousParameters::UtteranceContinuousParameters()
  43     : rate(-1),
  44       pitch(-1),
  45       volume(-1) {}
  46
  47
  48 //
  49 // VoiceData
  50 //
  51
  52
  53 VoiceData::VoiceData()
  54     : gender(TTS_GENDER_NONE),
  55       remote(false),
  56       native(false) {}
  57
  58 VoiceData::~VoiceData() {}
  59
  60
  61 //
  62 // Utterance
  63 //
  64
  65 // static
  66 int Utterance::next_utterance_id_ = 0;
  67
  68 Utterance::Utterance(content::BrowserContext* browser_context)
  69     : browser_context_(browser_context),
  70       id_(next_utterance_id_++),
  71       src_id_(-1),
  72       gender_(TTS_GENDER_NONE),
  73       can_enqueue_(false),
  74       char_index_(0),
  75       finished_(false) {
  76   options_.reset(new base::DictionaryValue());
  77 }
  78
  79 Utterance::~Utterance() {
  80   DCHECK(finished_);
  81 }
  82
  83 void Utterance::OnTtsEvent(TtsEventType event_type,
  84                            int char_index,
  85                            const std::string& error_message) {
  86   if (char_index >= 0)
  87     char_index_ = char_index;
  88   if (IsFinalTtsEventType(event_type))
  89     finished_ = true;
  90
  91   if (event_delegate_)
  92     event_delegate_->OnTtsEvent(this, event_type, char_index, error_message);
  93   if (finished_)
  94     event_delegate_ = NULL;
  95 }
  96
  97 void Utterance::Finish() {
  98   finished_ = true;
  99 }
 100
 101 void Utterance::set_options(const base::Value* options) {
 102   options_.reset(options->DeepCopy());
 103 }
 104
 105 TtsController* TtsController::GetInstance() {
 106   return TtsControllerImpl::GetInstance();
 107 }
 108
 109 //
 110 // TtsControllerImpl
 111 //
 112
 113 // static
 114 TtsControllerImpl* TtsControllerImpl::GetInstance() {
 115   return Singleton<TtsControllerImpl>::get();
 116 }
 117
 118 TtsControllerImpl::TtsControllerImpl()
 119     : current_utterance_(NULL),
 120       paused_(false),
 121       platform_impl_(NULL),
 122       tts_engine_delegate_(NULL) {
 123 }
 124
 125 TtsControllerImpl::~TtsControllerImpl() {
 126   if (current_utterance_) {
 127     current_utterance_->Finish();
 128     delete current_utterance_;
 129   }
 130
 131   // Clear any queued utterances too.
 132   ClearUtteranceQueue(false);  // Don't sent events.
 133 }
 134
 135 void TtsControllerImpl::SpeakOrEnqueue(Utterance* utterance) {
 136   // If we're paused and we get an utterance that can't be queued,
 137   // flush the queue but stay in the paused state.
 138   if (paused_ && !utterance->can_enqueue()) {
 139     Stop();
 140     paused_ = true;
 141     delete utterance;
 142     return;
 143   }
 144
 145   if (paused_ || (IsSpeaking() && utterance->can_enqueue())) {
 146     utterance_queue_.push(utterance);
 147   } else {
 148     Stop();
 149     SpeakNow(utterance);
 150   }
 151 }
 152
 153 void TtsControllerImpl::SpeakNow(Utterance* utterance) {
 154   // Ensure we have all built-in voices loaded. This is a no-op if already
 155   // loaded.
 156   bool loaded_built_in =
 157       GetPlatformImpl()->LoadBuiltInTtsExtension(utterance->browser_context());
 158
 159   // Get all available voices and try to find a matching voice.
 160   std::vector<VoiceData> voices;
 161   GetVoices(utterance->browser_context(), &voices);
 162   int index = GetMatchingVoice(utterance, voices);
 163
 164   VoiceData voice;
 165   if (index != -1) {
 166     // Select the matching voice.
 167     voice = voices[index];
 168   } else {
 169     // However, if no match was found on a platform without native tts voices,
 170     // attempt to get a voice based only on the current locale without respect
 171     // to any supplied voice names.
 172     std::vector<VoiceData> native_voices;
 173
 174     if (GetPlatformImpl()->PlatformImplAvailable())
 175       GetPlatformImpl()->GetVoices(&native_voices);
 176
 177     if (native_voices.empty() && !voices.empty()) {
 178       // TODO(dtseng): Notify extension caller of an error.
 179       utterance->set_voice_name("");
 180       // TODO(gaochun): Replace the global variable g_browser_process with
 181       // GetContentClient()->browser() to eliminate the dependency of browser
 182       // once TTS implementation was moved to content.
 183       utterance->set_lang(g_browser_process->GetApplicationLocale());
 184       index = GetMatchingVoice(utterance, voices);
 185
 186       // If even that fails, just take the first available voice.
 187       if (index == -1)
 188         index = 0;
 189       voice = voices[index];
 190     } else {
 191       // Otherwise, simply give native voices a chance to handle this utterance.
 192       voice.native = true;
 193     }
 194   }
 195
 196   GetPlatformImpl()->WillSpeakUtteranceWithVoice(utterance, voice);
 197
 198   if (!voice.native) {
 199 #if !defined(OS_ANDROID)
 200     DCHECK(!voice.extension_id.empty());
 201     current_utterance_ = utterance;
 202     utterance->set_extension_id(voice.extension_id);
 203     if (tts_engine_delegate_)
 204       tts_engine_delegate_->Speak(utterance, voice);
 205     bool sends_end_event =
 206         voice.events.find(TTS_EVENT_END) != voice.events.end();
 207     if (!sends_end_event) {
 208       utterance->Finish();
 209       delete utterance;
 210       current_utterance_ = NULL;
 211       SpeakNextUtterance();
 212     }
 213 #endif
 214   } else {
 215     // It's possible for certain platforms to send start events immediately
 216     // during |speak|.
 217     current_utterance_ = utterance;
 218     GetPlatformImpl()->clear_error();
 219     bool success = GetPlatformImpl()->Speak(
 220         utterance->id(),
 221         utterance->text(),
 222         utterance->lang(),
 223         voice,
 224         utterance->continuous_parameters());
 225     if (!success)
 226       current_utterance_ = NULL;
 227
 228     // If the native voice wasn't able to process this speech, see if
 229     // the browser has built-in TTS that isn't loaded yet.
 230     if (!success && loaded_built_in) {
 231       utterance_queue_.push(utterance);
 232       return;
 233     }
 234
 235     if (!success) {
 236       utterance->OnTtsEvent(TTS_EVENT_ERROR, kInvalidCharIndex,
 237                             GetPlatformImpl()->error());
 238       delete utterance;
 239       return;
 240     }
 241   }
 242 }
 243
 244 void TtsControllerImpl::Stop() {
 245   paused_ = false;
 246   if (current_utterance_ && !current_utterance_->extension_id().empty()) {
 247     if (tts_engine_delegate_)
 248       tts_engine_delegate_->Stop(current_utterance_);
 249   } else {
 250     GetPlatformImpl()->clear_error();
 251     GetPlatformImpl()->StopSpeaking();
 252   }
 253
 254   if (current_utterance_)
 255     current_utterance_->OnTtsEvent(TTS_EVENT_INTERRUPTED, kInvalidCharIndex,
 256                                    std::string());
 257   FinishCurrentUtterance();
 258   ClearUtteranceQueue(true);  // Send events.
 259 }
 260
 261 void TtsControllerImpl::Pause() {
 262   paused_ = true;
 263   if (current_utterance_ && !current_utterance_->extension_id().empty()) {
 264     if (tts_engine_delegate_)
 265       tts_engine_delegate_->Pause(current_utterance_);
 266   } else if (current_utterance_) {
 267     GetPlatformImpl()->clear_error();
 268     GetPlatformImpl()->Pause();
 269   }
 270 }
 271
 272 void TtsControllerImpl::Resume() {
 273   paused_ = false;
 274   if (current_utterance_ && !current_utterance_->extension_id().empty()) {
 275     if (tts_engine_delegate_)
 276       tts_engine_delegate_->Resume(current_utterance_);
 277   } else if (current_utterance_) {
 278     GetPlatformImpl()->clear_error();
 279     GetPlatformImpl()->Resume();
 280   } else {
 281     SpeakNextUtterance();
 282   }
 283 }
 284
 285 void TtsControllerImpl::OnTtsEvent(int utterance_id,
 286                                         TtsEventType event_type,
 287                                         int char_index,
 288                                         const std::string& error_message) {
 289   // We may sometimes receive completion callbacks "late", after we've
 290   // already finished the utterance (for example because another utterance
 291   // interrupted or we got a call to Stop). This is normal and we can
 292   // safely just ignore these events.
 293   if (!current_utterance_ || utterance_id != current_utterance_->id()) {
 294     return;
 295   }
 296   current_utterance_->OnTtsEvent(event_type, char_index, error_message);
 297   if (current_utterance_->finished()) {
 298     FinishCurrentUtterance();
 299     SpeakNextUtterance();
 300   }
 301 }
 302
 303 void TtsControllerImpl::GetVoices(content::BrowserContext* browser_context,
 304                               std::vector<VoiceData>* out_voices) {
 305   if (browser_context && tts_engine_delegate_)
 306     tts_engine_delegate_->GetVoices(browser_context, out_voices);
 307
 308   TtsPlatformImpl* platform_impl = GetPlatformImpl();
 309   if (platform_impl) {
 310     // Ensure we have all built-in voices loaded. This is a no-op if already
 311     // loaded.
 312     platform_impl->LoadBuiltInTtsExtension(browser_context);
 313     if (platform_impl->PlatformImplAvailable())
 314       platform_impl->GetVoices(out_voices);
 315   }
 316 }
 317
 318 bool TtsControllerImpl::IsSpeaking() {
 319   return current_utterance_ != NULL || GetPlatformImpl()->IsSpeaking();
 320 }
 321
 322 void TtsControllerImpl::FinishCurrentUtterance() {
 323   if (current_utterance_) {
 324     if (!current_utterance_->finished())
 325       current_utterance_->OnTtsEvent(TTS_EVENT_INTERRUPTED, kInvalidCharIndex,
 326                                      std::string());
 327     delete current_utterance_;
 328     current_utterance_ = NULL;
 329   }
 330 }
 331
 332 void TtsControllerImpl::SpeakNextUtterance() {
 333   if (paused_)
 334     return;
 335
 336   // Start speaking the next utterance in the queue.  Keep trying in case
 337   // one fails but there are still more in the queue to try.
 338   while (!utterance_queue_.empty() && !current_utterance_) {
 339     Utterance* utterance = utterance_queue_.front();
 340     utterance_queue_.pop();
 341     SpeakNow(utterance);
 342   }
 343 }
 344
 345 void TtsControllerImpl::ClearUtteranceQueue(bool send_events) {
 346   while (!utterance_queue_.empty()) {
 347     Utterance* utterance = utterance_queue_.front();
 348     utterance_queue_.pop();
 349     if (send_events)
 350       utterance->OnTtsEvent(TTS_EVENT_CANCELLED, kInvalidCharIndex,
 351                             std::string());
 352     else
 353       utterance->Finish();
 354     delete utterance;
 355   }
 356 }
 357
 358 void TtsControllerImpl::SetPlatformImpl(
 359     TtsPlatformImpl* platform_impl) {
 360   platform_impl_ = platform_impl;
 361 }
 362
 363 int TtsControllerImpl::QueueSize() {
 364   return static_cast<int>(utterance_queue_.size());
 365 }
 366
 367 TtsPlatformImpl* TtsControllerImpl::GetPlatformImpl() {
 368   if (!platform_impl_)
 369     platform_impl_ = TtsPlatformImpl::GetInstance();
 370   return platform_impl_;
 371 }
 372
 373 int TtsControllerImpl::GetMatchingVoice(
 374     const Utterance* utterance, std::vector<VoiceData>& voices) {
 375   // Make two passes: the first time, do strict language matching
 376   // ('fr-FR' does not match 'fr-CA'). The second time, do prefix
 377   // language matching ('fr-FR' matches 'fr' and 'fr-CA')
 378   for (int pass = 0; pass < 2; ++pass) {
 379     for (size_t i = 0; i < voices.size(); ++i) {
 380       const VoiceData& voice = voices[i];
 381
 382       if (!utterance->extension_id().empty() &&
 383           utterance->extension_id() != voice.extension_id) {
 384         continue;
 385       }
 386
 387       if (!voice.name.empty() &&
 388           !utterance->voice_name().empty() &&
 389           voice.name != utterance->voice_name()) {
 390         continue;
 391       }
 392       if (!voice.lang.empty() && !utterance->lang().empty()) {
 393         std::string voice_lang = voice.lang;
 394         std::string utterance_lang = utterance->lang();
 395         if (pass == 1) {
 396           voice_lang = TrimLanguageCode(voice_lang);
 397           utterance_lang = TrimLanguageCode(utterance_lang);
 398         }
 399         if (voice_lang != utterance_lang) {
 400           continue;
 401         }
 402       }
 403       if (voice.gender != TTS_GENDER_NONE &&
 404           utterance->gender() != TTS_GENDER_NONE &&
 405           voice.gender != utterance->gender()) {
 406         continue;
 407       }
 408
 409       if (utterance->required_event_types().size() > 0) {
 410         bool has_all_required_event_types = true;
 411         for (std::set<TtsEventType>::const_iterator iter =
 412                  utterance->required_event_types().begin();
 413              iter != utterance->required_event_types().end();
 414              ++iter) {
 415           if (voice.events.find(*iter) == voice.events.end()) {
 416             has_all_required_event_types = false;
 417             break;
 418           }
 419         }
 420         if (!has_all_required_event_types)
 421           continue;
 422       }
 423
 424       return static_cast<int>(i);
 425     }
 426   }
 427
 428   return -1;
 429 }
 430
 431 void TtsControllerImpl::VoicesChanged() {
 432   // Existence of platform tts indicates explicit requests to tts. Since
 433   // |VoicesChanged| can occur implicitly, only send if needed.
 434   if (!platform_impl_)
 435     return;
 436
 437   for (std::set<VoicesChangedDelegate*>::iterator iter =
 438            voices_changed_delegates_.begin();
 439        iter != voices_changed_delegates_.end(); ++iter) {
 440     (*iter)->OnVoicesChanged();
 441   }
 442 }
 443
 444 void TtsControllerImpl::AddVoicesChangedDelegate(
 445     VoicesChangedDelegate* delegate) {
 446   voices_changed_delegates_.insert(delegate);
 447 }
 448
 449 void TtsControllerImpl::RemoveVoicesChangedDelegate(
 450     VoicesChangedDelegate* delegate) {
 451   voices_changed_delegates_.erase(delegate);
 452 }
 453
 454 void TtsControllerImpl::RemoveUtteranceEventDelegate(
 455     UtteranceEventDelegate* delegate) {
 456   // First clear any pending utterances with this delegate.
 457   std::queue<Utterance*> old_queue = utterance_queue_;
 458   utterance_queue_ = std::queue<Utterance*>();
 459   while (!old_queue.empty()) {
 460     Utterance* utterance = old_queue.front();
 461     old_queue.pop();
 462     if (utterance->event_delegate() != delegate)
 463       utterance_queue_.push(utterance);
 464     else
 465       delete utterance;
 466   }
 467
 468   if (current_utterance_ && current_utterance_->event_delegate() == delegate) {
 469     current_utterance_->set_event_delegate(NULL);
 470     if (!current_utterance_->extension_id().empty()) {
 471       if (tts_engine_delegate_)
 472         tts_engine_delegate_->Stop(current_utterance_);
 473     } else {
 474       GetPlatformImpl()->clear_error();
 475       GetPlatformImpl()->StopSpeaking();
 476     }
 477
 478     FinishCurrentUtterance();
 479     if (!paused_)
 480       SpeakNextUtterance();
 481   }
 482 }
 483
 484 void TtsControllerImpl::SetTtsEngineDelegate(
 485     TtsEngineDelegate* delegate) {
 486   tts_engine_delegate_ = delegate;
 487 }
 488
 489 TtsEngineDelegate* TtsControllerImpl::GetTtsEngineDelegate() {
 490   return tts_engine_delegate_;
 491 }