chrome/browser/speech/tts_controller_impl.cc

   1 // Copyright 2014 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "chrome/browser/speech/tts_controller_impl.h"
   6
   7 #include <string>
   8 #include <vector>
   9
  10 #include "base/values.h"
  11 #include "chrome/browser/browser_process.h"
  12 #include "chrome/browser/speech/tts_platform.h"
  13
  14 namespace {
  15 // A value to be used to indicate that there is no char index available.
  16 const int kInvalidCharIndex = -1;
  17
  18 // Given a language/region code of the form 'fr-FR', returns just the basic
  19 // language portion, e.g. 'fr'.
  20 std::string TrimLanguageCode(std::string lang) {
  21   if (lang.size() >= 5 && lang[2] == '-')
  22     return lang.substr(0, 2);
  23   else
  24     return lang;
  25 }
  26
  27 }  // namespace
  28
  29 bool IsFinalTtsEventType(TtsEventType event_type) {
  30   return (event_type == TTS_EVENT_END ||
  31           event_type == TTS_EVENT_INTERRUPTED ||
  32           event_type == TTS_EVENT_CANCELLED ||
  33           event_type == TTS_EVENT_ERROR);
  34 }
  35
  36 //
  37 // UtteranceContinuousParameters
  38 //
  39
  40
  41 UtteranceContinuousParameters::UtteranceContinuousParameters()
  42     : rate(-1),
  43       pitch(-1),
  44       volume(-1) {}
  45
  46
  47 //
  48 // VoiceData
  49 //
  50
  51
  52 VoiceData::VoiceData()
  53     : gender(TTS_GENDER_NONE),
  54       remote(false),
  55       native(false) {}
  56
  57 VoiceData::~VoiceData() {}
  58
  59
  60 //
  61 // Utterance
  62 //
  63
  64 // static
  65 int Utterance::next_utterance_id_ = 0;
  66
  67 Utterance::Utterance(content::BrowserContext* browser_context)
  68     : browser_context_(browser_context),
  69       id_(next_utterance_id_++),
  70       src_id_(-1),
  71       gender_(TTS_GENDER_NONE),
  72       can_enqueue_(false),
  73       char_index_(0),
  74       finished_(false) {
  75   options_.reset(new base::DictionaryValue());
  76 }
  77
  78 Utterance::~Utterance() {
  79   DCHECK(finished_);
  80 }
  81
  82 void Utterance::OnTtsEvent(TtsEventType event_type,
  83                            int char_index,
  84                            const std::string& error_message) {
  85   if (char_index >= 0)
  86     char_index_ = char_index;
  87   if (IsFinalTtsEventType(event_type))
  88     finished_ = true;
  89
  90   if (event_delegate_)
  91     event_delegate_->OnTtsEvent(this, event_type, char_index, error_message);
  92   if (finished_)
  93     event_delegate_ = NULL;
  94 }
  95
  96 void Utterance::Finish() {
  97   finished_ = true;
  98 }
  99
 100 void Utterance::set_options(const base::Value* options) {
 101   options_.reset(options->DeepCopy());
 102 }
 103
 104 TtsController* TtsController::GetInstance() {
 105   return TtsControllerImpl::GetInstance();
 106 }
 107
 108 //
 109 // TtsControllerImpl
 110 //
 111
 112 // static
 113 TtsControllerImpl* TtsControllerImpl::GetInstance() {
 114   return Singleton<TtsControllerImpl>::get();
 115 }
 116
 117 TtsControllerImpl::TtsControllerImpl()
 118     : current_utterance_(NULL),
 119       paused_(false),
 120       platform_impl_(NULL),
 121       tts_engine_delegate_(NULL) {
 122 }
 123
 124 TtsControllerImpl::~TtsControllerImpl() {
 125   if (current_utterance_) {
 126     current_utterance_->Finish();
 127     delete current_utterance_;
 128   }
 129
 130   // Clear any queued utterances too.
 131   ClearUtteranceQueue(false);  // Don't sent events.
 132 }
 133
 134 void TtsControllerImpl::SpeakOrEnqueue(Utterance* utterance) {
 135   // If we're paused and we get an utterance that can't be queued,
 136   // flush the queue but stay in the paused state.
 137   if (paused_ && !utterance->can_enqueue()) {
 138     utterance_queue_.push(utterance);
 139     Stop();
 140     paused_ = true;
 141     return;
 142   }
 143
 144   if (paused_ || (IsSpeaking() && utterance->can_enqueue())) {
 145     utterance_queue_.push(utterance);
 146   } else {
 147     Stop();
 148     SpeakNow(utterance);
 149   }
 150 }
 151
 152 void TtsControllerImpl::SpeakNow(Utterance* utterance) {
 153   // Ensure we have all built-in voices loaded. This is a no-op if already
 154   // loaded.
 155   bool loaded_built_in =
 156       GetPlatformImpl()->LoadBuiltInTtsExtension(utterance->browser_context());
 157
 158   // Get all available voices and try to find a matching voice.
 159   std::vector<VoiceData> voices;
 160   GetVoices(utterance->browser_context(), &voices);
 161   int index = GetMatchingVoice(utterance, voices);
 162
 163   VoiceData voice;
 164   if (index != -1) {
 165     // Select the matching voice.
 166     voice = voices[index];
 167   } else {
 168     // However, if no match was found on a platform without native tts voices,
 169     // attempt to get a voice based only on the current locale without respect
 170     // to any supplied voice names.
 171     std::vector<VoiceData> native_voices;
 172
 173     if (GetPlatformImpl()->PlatformImplAvailable())
 174       GetPlatformImpl()->GetVoices(&native_voices);
 175
 176     if (native_voices.empty() && !voices.empty()) {
 177       // TODO(dtseng): Notify extension caller of an error.
 178       utterance->set_voice_name("");
 179       // TODO(gaochun): Replace the global variable g_browser_process with
 180       // GetContentClient()->browser() to eliminate the dependency of browser
 181       // once TTS implementation was moved to content.
 182       utterance->set_lang(g_browser_process->GetApplicationLocale());
 183       index = GetMatchingVoice(utterance, voices);
 184
 185       // If even that fails, just take the first available voice.
 186       if (index == -1)
 187         index = 0;
 188       voice = voices[index];
 189     } else {
 190       // Otherwise, simply give native voices a chance to handle this utterance.
 191       voice.native = true;
 192     }
 193   }
 194
 195   GetPlatformImpl()->WillSpeakUtteranceWithVoice(utterance, voice);
 196
 197   if (!voice.native) {
 198 #if !defined(OS_ANDROID)
 199     DCHECK(!voice.extension_id.empty());
 200     current_utterance_ = utterance;
 201     utterance->set_extension_id(voice.extension_id);
 202     if (tts_engine_delegate_)
 203       tts_engine_delegate_->Speak(utterance, voice);
 204     bool sends_end_event =
 205         voice.events.find(TTS_EVENT_END) != voice.events.end();
 206     if (!sends_end_event) {
 207       utterance->Finish();
 208       delete utterance;
 209       current_utterance_ = NULL;
 210       SpeakNextUtterance();
 211     }
 212 #endif
 213   } else {
 214     // It's possible for certain platforms to send start events immediately
 215     // during |speak|.
 216     current_utterance_ = utterance;
 217     GetPlatformImpl()->clear_error();
 218     bool success = GetPlatformImpl()->Speak(
 219         utterance->id(),
 220         utterance->text(),
 221         utterance->lang(),
 222         voice,
 223         utterance->continuous_parameters());
 224     if (!success)
 225       current_utterance_ = NULL;
 226
 227     // If the native voice wasn't able to process this speech, see if
 228     // the browser has built-in TTS that isn't loaded yet.
 229     if (!success && loaded_built_in) {
 230       utterance_queue_.push(utterance);
 231       return;
 232     }
 233
 234     if (!success) {
 235       utterance->OnTtsEvent(TTS_EVENT_ERROR, kInvalidCharIndex,
 236                             GetPlatformImpl()->error());
 237       delete utterance;
 238       return;
 239     }
 240   }
 241 }
 242
 243 void TtsControllerImpl::Stop() {
 244   paused_ = false;
 245   if (current_utterance_ && !current_utterance_->extension_id().empty()) {
 246     if (tts_engine_delegate_)
 247       tts_engine_delegate_->Stop(current_utterance_);
 248   } else {
 249     GetPlatformImpl()->clear_error();
 250     GetPlatformImpl()->StopSpeaking();
 251   }
 252
 253   if (current_utterance_)
 254     current_utterance_->OnTtsEvent(TTS_EVENT_INTERRUPTED, kInvalidCharIndex,
 255                                    std::string());
 256   FinishCurrentUtterance();
 257   ClearUtteranceQueue(true);  // Send events.
 258 }
 259
 260 void TtsControllerImpl::Pause() {
 261   paused_ = true;
 262   if (current_utterance_ && !current_utterance_->extension_id().empty()) {
 263     if (tts_engine_delegate_)
 264       tts_engine_delegate_->Pause(current_utterance_);
 265   } else if (current_utterance_) {
 266     GetPlatformImpl()->clear_error();
 267     GetPlatformImpl()->Pause();
 268   }
 269 }
 270
 271 void TtsControllerImpl::Resume() {
 272   paused_ = false;
 273   if (current_utterance_ && !current_utterance_->extension_id().empty()) {
 274     if (tts_engine_delegate_)
 275       tts_engine_delegate_->Resume(current_utterance_);
 276   } else if (current_utterance_) {
 277     GetPlatformImpl()->clear_error();
 278     GetPlatformImpl()->Resume();
 279   } else {
 280     SpeakNextUtterance();
 281   }
 282 }
 283
 284 void TtsControllerImpl::OnTtsEvent(int utterance_id,
 285                                         TtsEventType event_type,
 286                                         int char_index,
 287                                         const std::string& error_message) {
 288   // We may sometimes receive completion callbacks "late", after we've
 289   // already finished the utterance (for example because another utterance
 290   // interrupted or we got a call to Stop). This is normal and we can
 291   // safely just ignore these events.
 292   if (!current_utterance_ || utterance_id != current_utterance_->id()) {
 293     return;
 294   }
 295   current_utterance_->OnTtsEvent(event_type, char_index, error_message);
 296   if (current_utterance_->finished()) {
 297     FinishCurrentUtterance();
 298     SpeakNextUtterance();
 299   }
 300 }
 301
 302 void TtsControllerImpl::GetVoices(content::BrowserContext* browser_context,
 303                               std::vector<VoiceData>* out_voices) {
 304   if (browser_context && tts_engine_delegate_)
 305     tts_engine_delegate_->GetVoices(browser_context, out_voices);
 306
 307   TtsPlatformImpl* platform_impl = GetPlatformImpl();
 308   if (platform_impl) {
 309     // Ensure we have all built-in voices loaded. This is a no-op if already
 310     // loaded.
 311     platform_impl->LoadBuiltInTtsExtension(browser_context);
 312     if (platform_impl->PlatformImplAvailable())
 313       platform_impl->GetVoices(out_voices);
 314   }
 315 }
 316
 317 bool TtsControllerImpl::IsSpeaking() {
 318   return current_utterance_ != NULL || GetPlatformImpl()->IsSpeaking();
 319 }
 320
 321 void TtsControllerImpl::FinishCurrentUtterance() {
 322   if (current_utterance_) {
 323     if (!current_utterance_->finished())
 324       current_utterance_->OnTtsEvent(TTS_EVENT_INTERRUPTED, kInvalidCharIndex,
 325                                      std::string());
 326     delete current_utterance_;
 327     current_utterance_ = NULL;
 328   }
 329 }
 330
 331 void TtsControllerImpl::SpeakNextUtterance() {
 332   if (paused_)
 333     return;
 334
 335   // Start speaking the next utterance in the queue.  Keep trying in case
 336   // one fails but there are still more in the queue to try.
 337   while (!utterance_queue_.empty() && !current_utterance_) {
 338     Utterance* utterance = utterance_queue_.front();
 339     utterance_queue_.pop();
 340     SpeakNow(utterance);
 341   }
 342 }
 343
 344 void TtsControllerImpl::ClearUtteranceQueue(bool send_events) {
 345   while (!utterance_queue_.empty()) {
 346     Utterance* utterance = utterance_queue_.front();
 347     utterance_queue_.pop();
 348     if (send_events)
 349       utterance->OnTtsEvent(TTS_EVENT_CANCELLED, kInvalidCharIndex,
 350                             std::string());
 351     else
 352       utterance->Finish();
 353     delete utterance;
 354   }
 355 }
 356
 357 void TtsControllerImpl::SetPlatformImpl(
 358     TtsPlatformImpl* platform_impl) {
 359   platform_impl_ = platform_impl;
 360 }
 361
 362 int TtsControllerImpl::QueueSize() {
 363   return static_cast<int>(utterance_queue_.size());
 364 }
 365
 366 TtsPlatformImpl* TtsControllerImpl::GetPlatformImpl() {
 367   if (!platform_impl_)
 368     platform_impl_ = TtsPlatformImpl::GetInstance();
 369   return platform_impl_;
 370 }
 371
 372 int TtsControllerImpl::GetMatchingVoice(
 373     const Utterance* utterance, std::vector<VoiceData>& voices) {
 374   // Make two passes: the first time, do strict language matching
 375   // ('fr-FR' does not match 'fr-CA'). The second time, do prefix
 376   // language matching ('fr-FR' matches 'fr' and 'fr-CA')
 377   for (int pass = 0; pass < 2; ++pass) {
 378     for (size_t i = 0; i < voices.size(); ++i) {
 379       const VoiceData& voice = voices[i];
 380
 381       if (!utterance->extension_id().empty() &&
 382           utterance->extension_id() != voice.extension_id) {
 383         continue;
 384       }
 385
 386       if (!voice.name.empty() &&
 387           !utterance->voice_name().empty() &&
 388           voice.name != utterance->voice_name()) {
 389         continue;
 390       }
 391       if (!voice.lang.empty() && !utterance->lang().empty()) {
 392         std::string voice_lang = voice.lang;
 393         std::string utterance_lang = utterance->lang();
 394         if (pass == 1) {
 395           voice_lang = TrimLanguageCode(voice_lang);
 396           utterance_lang = TrimLanguageCode(utterance_lang);
 397         }
 398         if (voice_lang != utterance_lang) {
 399           continue;
 400         }
 401       }
 402       if (voice.gender != TTS_GENDER_NONE &&
 403           utterance->gender() != TTS_GENDER_NONE &&
 404           voice.gender != utterance->gender()) {
 405         continue;
 406       }
 407
 408       if (utterance->required_event_types().size() > 0) {
 409         bool has_all_required_event_types = true;
 410         for (std::set<TtsEventType>::const_iterator iter =
 411                  utterance->required_event_types().begin();
 412              iter != utterance->required_event_types().end();
 413              ++iter) {
 414           if (voice.events.find(*iter) == voice.events.end()) {
 415             has_all_required_event_types = false;
 416             break;
 417           }
 418         }
 419         if (!has_all_required_event_types)
 420           continue;
 421       }
 422
 423       return static_cast<int>(i);
 424     }
 425   }
 426
 427   return -1;
 428 }
 429
 430 void TtsControllerImpl::VoicesChanged() {
 431   // Existence of platform tts indicates explicit requests to tts. Since
 432   // |VoicesChanged| can occur implicitly, only send if needed.
 433   if (!platform_impl_)
 434     return;
 435
 436   for (std::set<VoicesChangedDelegate*>::iterator iter =
 437            voices_changed_delegates_.begin();
 438        iter != voices_changed_delegates_.end(); ++iter) {
 439     (*iter)->OnVoicesChanged();
 440   }
 441 }
 442
 443 void TtsControllerImpl::AddVoicesChangedDelegate(
 444     VoicesChangedDelegate* delegate) {
 445   voices_changed_delegates_.insert(delegate);
 446 }
 447
 448 void TtsControllerImpl::RemoveVoicesChangedDelegate(
 449     VoicesChangedDelegate* delegate) {
 450   voices_changed_delegates_.erase(delegate);
 451 }
 452
 453 void TtsControllerImpl::RemoveUtteranceEventDelegate(
 454     UtteranceEventDelegate* delegate) {
 455   // First clear any pending utterances with this delegate.
 456   std::queue<Utterance*> old_queue = utterance_queue_;
 457   utterance_queue_ = std::queue<Utterance*>();
 458   while (!old_queue.empty()) {
 459     Utterance* utterance = old_queue.front();
 460     old_queue.pop();
 461     if (utterance->event_delegate() != delegate)
 462       utterance_queue_.push(utterance);
 463     else
 464       delete utterance;
 465   }
 466
 467   if (current_utterance_ && current_utterance_->event_delegate() == delegate) {
 468     current_utterance_->set_event_delegate(NULL);
 469     if (!current_utterance_->extension_id().empty()) {
 470       if (tts_engine_delegate_)
 471         tts_engine_delegate_->Stop(current_utterance_);
 472     } else {
 473       GetPlatformImpl()->clear_error();
 474       GetPlatformImpl()->StopSpeaking();
 475     }
 476
 477     FinishCurrentUtterance();
 478     if (!paused_)
 479       SpeakNextUtterance();
 480   }
 481 }
 482
 483 void TtsControllerImpl::SetTtsEngineDelegate(
 484     TtsEngineDelegate* delegate) {
 485   tts_engine_delegate_ = delegate;
 486 }
 487
 488 TtsEngineDelegate* TtsControllerImpl::GetTtsEngineDelegate() {
 489   return tts_engine_delegate_;
 490 }