chrome/browser/speech/tts_controller.cc

   1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "chrome/browser/speech/tts_controller.h"
   6
   7 #include <string>
   8 #include <vector>
   9
  10 #include "base/float_util.h"
  11 #include "base/values.h"
  12 #include "chrome/browser/extensions/extension_system.h"
  13 #include "chrome/browser/profiles/profile.h"
  14 #include "chrome/browser/speech/extension_api/tts_engine_extension_api.h"
  15 #include "chrome/browser/speech/extension_api/tts_extension_api.h"
  16 #include "chrome/browser/speech/tts_platform.h"
  17 #include "chrome/common/extensions/api/speech/tts_engine_manifest_handler.h"
  18 #include "extensions/common/extension.h"
  19
  20 namespace {
  21 // A value to be used to indicate that there is no char index available.
  22 const int kInvalidCharIndex = -1;
  23
  24 // Given a language/region code of the form 'fr-FR', returns just the basic
  25 // language portion, e.g. 'fr'.
  26 std::string TrimLanguageCode(std::string lang) {
  27   if (lang.size() >= 5 && lang[2] == '-')
  28     return lang.substr(0, 2);
  29   else
  30     return lang;
  31 }
  32
  33 }  // namespace
  34
  35 bool IsFinalTtsEventType(TtsEventType event_type) {
  36   return (event_type == TTS_EVENT_END ||
  37           event_type == TTS_EVENT_INTERRUPTED ||
  38           event_type == TTS_EVENT_CANCELLED ||
  39           event_type == TTS_EVENT_ERROR);
  40 }
  41
  42 //
  43 // UtteranceContinuousParameters
  44 //
  45
  46
  47 UtteranceContinuousParameters::UtteranceContinuousParameters()
  48     : rate(-1),
  49       pitch(-1),
  50       volume(-1) {}
  51
  52
  53 //
  54 // VoiceData
  55 //
  56
  57
  58 VoiceData::VoiceData()
  59     : gender(TTS_GENDER_NONE),
  60       remote(false),
  61       native(false) {}
  62
  63 VoiceData::~VoiceData() {}
  64
  65
  66 //
  67 // Utterance
  68 //
  69
  70 // static
  71 int Utterance::next_utterance_id_ = 0;
  72
  73 Utterance::Utterance(Profile* profile)
  74     : profile_(profile),
  75       id_(next_utterance_id_++),
  76       src_id_(-1),
  77       gender_(TTS_GENDER_NONE),
  78       can_enqueue_(false),
  79       char_index_(0),
  80       finished_(false) {
  81   options_.reset(new base::DictionaryValue());
  82 }
  83
  84 Utterance::~Utterance() {
  85   DCHECK(finished_);
  86 }
  87
  88 void Utterance::OnTtsEvent(TtsEventType event_type,
  89                            int char_index,
  90                            const std::string& error_message) {
  91   if (char_index >= 0)
  92     char_index_ = char_index;
  93   if (IsFinalTtsEventType(event_type))
  94     finished_ = true;
  95
  96   if (event_delegate_)
  97     event_delegate_->OnTtsEvent(this, event_type, char_index, error_message);
  98   if (finished_)
  99     event_delegate_.reset();
 100 }
 101
 102 void Utterance::Finish() {
 103   finished_ = true;
 104 }
 105
 106 void Utterance::set_options(const base::Value* options) {
 107   options_.reset(options->DeepCopy());
 108 }
 109
 110 //
 111 // TtsController
 112 //
 113
 114 // static
 115 TtsController* TtsController::GetInstance() {
 116   return Singleton<TtsController>::get();
 117 }
 118
 119 TtsController::TtsController()
 120     : current_utterance_(NULL),
 121       paused_(false),
 122       platform_impl_(NULL) {
 123 }
 124
 125 TtsController::~TtsController() {
 126   if (current_utterance_) {
 127     current_utterance_->Finish();
 128     delete current_utterance_;
 129   }
 130
 131   // Clear any queued utterances too.
 132   ClearUtteranceQueue(false);  // Don't sent events.
 133 }
 134
 135 void TtsController::SpeakOrEnqueue(Utterance* utterance) {
 136   // If we're paused and we get an utterance that can't be queued,
 137   // flush the queue but stay in the paused state.
 138   if (paused_ && !utterance->can_enqueue()) {
 139     Stop();
 140     paused_ = true;
 141     return;
 142   }
 143
 144   if (paused_ || (IsSpeaking() && utterance->can_enqueue())) {
 145     utterance_queue_.push(utterance);
 146   } else {
 147     Stop();
 148     SpeakNow(utterance);
 149   }
 150 }
 151
 152 void TtsController::SpeakNow(Utterance* utterance) {
 153   // Get all available voices and try to find a matching voice.
 154   std::vector<VoiceData> voices;
 155   GetVoices(utterance->profile(), &voices);
 156   int index = GetMatchingVoice(utterance, voices);
 157
 158   // Select the matching voice, but if none was found, initialize an
 159   // empty VoiceData with native = true, which will give the native
 160   // speech synthesizer a chance to try to synthesize the utterance
 161   // anyway.
 162   VoiceData voice;
 163   if (index >= 0 && index < static_cast<int>(voices.size()))
 164     voice = voices[index];
 165   else
 166     voice.native = true;
 167
 168   GetPlatformImpl()->WillSpeakUtteranceWithVoice(utterance, voice);
 169
 170   if (!voice.native) {
 171 #if !defined(OS_ANDROID)
 172     DCHECK(!voice.extension_id.empty());
 173     current_utterance_ = utterance;
 174     utterance->set_extension_id(voice.extension_id);
 175     ExtensionTtsEngineSpeak(utterance, voice);
 176     bool sends_end_event =
 177         voice.events.find(TTS_EVENT_END) != voice.events.end();
 178     if (!sends_end_event) {
 179       utterance->Finish();
 180       delete utterance;
 181       current_utterance_ = NULL;
 182       SpeakNextUtterance();
 183     }
 184 #endif
 185   } else {
 186     // It's possible for certain platforms to send start events immediately
 187     // during |speak|.
 188     current_utterance_ = utterance;
 189     GetPlatformImpl()->clear_error();
 190     bool success = GetPlatformImpl()->Speak(
 191         utterance->id(),
 192         utterance->text(),
 193         utterance->lang(),
 194         voice,
 195         utterance->continuous_parameters());
 196     if (!success)
 197       current_utterance_ = NULL;
 198
 199     if (!success) {
 200       utterance->OnTtsEvent(TTS_EVENT_ERROR, kInvalidCharIndex,
 201                             GetPlatformImpl()->error());
 202       delete utterance;
 203       return;
 204     }
 205   }
 206 }
 207
 208 void TtsController::Stop() {
 209   paused_ = false;
 210   if (current_utterance_ && !current_utterance_->extension_id().empty()) {
 211 #if !defined(OS_ANDROID)
 212     ExtensionTtsEngineStop(current_utterance_);
 213 #endif
 214   } else {
 215     GetPlatformImpl()->clear_error();
 216     GetPlatformImpl()->StopSpeaking();
 217   }
 218
 219   if (current_utterance_)
 220     current_utterance_->OnTtsEvent(TTS_EVENT_INTERRUPTED, kInvalidCharIndex,
 221                                    std::string());
 222   FinishCurrentUtterance();
 223   ClearUtteranceQueue(true);  // Send events.
 224 }
 225
 226 void TtsController::Pause() {
 227   paused_ = true;
 228   if (current_utterance_ && !current_utterance_->extension_id().empty()) {
 229 #if !defined(OS_ANDROID)
 230     ExtensionTtsEnginePause(current_utterance_);
 231 #endif
 232   } else if (current_utterance_) {
 233     GetPlatformImpl()->clear_error();
 234     GetPlatformImpl()->Pause();
 235   }
 236 }
 237
 238 void TtsController::Resume() {
 239   paused_ = false;
 240   if (current_utterance_ && !current_utterance_->extension_id().empty()) {
 241 #if !defined(OS_ANDROID)
 242     ExtensionTtsEngineResume(current_utterance_);
 243 #endif
 244   } else if (current_utterance_) {
 245     GetPlatformImpl()->clear_error();
 246     GetPlatformImpl()->Resume();
 247   } else {
 248     SpeakNextUtterance();
 249   }
 250 }
 251
 252 void TtsController::OnTtsEvent(int utterance_id,
 253                                         TtsEventType event_type,
 254                                         int char_index,
 255                                         const std::string& error_message) {
 256   // We may sometimes receive completion callbacks "late", after we've
 257   // already finished the utterance (for example because another utterance
 258   // interrupted or we got a call to Stop). This is normal and we can
 259   // safely just ignore these events.
 260   if (!current_utterance_ || utterance_id != current_utterance_->id()) {
 261     return;
 262   }
 263   current_utterance_->OnTtsEvent(event_type, char_index, error_message);
 264   if (current_utterance_->finished()) {
 265     FinishCurrentUtterance();
 266     SpeakNextUtterance();
 267   }
 268 }
 269
 270 void TtsController::GetVoices(Profile* profile,
 271                               std::vector<VoiceData>* out_voices) {
 272 #if !defined(OS_ANDROID)
 273   if (profile)
 274     GetExtensionVoices(profile, out_voices);
 275 #endif
 276
 277   TtsPlatformImpl* platform_impl = GetPlatformImpl();
 278   if (platform_impl && platform_impl->PlatformImplAvailable())
 279     platform_impl->GetVoices(out_voices);
 280 }
 281
 282 bool TtsController::IsSpeaking() {
 283   return current_utterance_ != NULL || GetPlatformImpl()->IsSpeaking();
 284 }
 285
 286 void TtsController::FinishCurrentUtterance() {
 287   if (current_utterance_) {
 288     if (!current_utterance_->finished())
 289       current_utterance_->OnTtsEvent(TTS_EVENT_INTERRUPTED, kInvalidCharIndex,
 290                                      std::string());
 291     delete current_utterance_;
 292     current_utterance_ = NULL;
 293   }
 294 }
 295
 296 void TtsController::SpeakNextUtterance() {
 297   if (paused_)
 298     return;
 299
 300   // Start speaking the next utterance in the queue.  Keep trying in case
 301   // one fails but there are still more in the queue to try.
 302   while (!utterance_queue_.empty() && !current_utterance_) {
 303     Utterance* utterance = utterance_queue_.front();
 304     utterance_queue_.pop();
 305     SpeakNow(utterance);
 306   }
 307 }
 308
 309 void TtsController::ClearUtteranceQueue(bool send_events) {
 310   while (!utterance_queue_.empty()) {
 311     Utterance* utterance = utterance_queue_.front();
 312     utterance_queue_.pop();
 313     if (send_events)
 314       utterance->OnTtsEvent(TTS_EVENT_CANCELLED, kInvalidCharIndex,
 315                             std::string());
 316     else
 317       utterance->Finish();
 318     delete utterance;
 319   }
 320 }
 321
 322 void TtsController::SetPlatformImpl(
 323     TtsPlatformImpl* platform_impl) {
 324   platform_impl_ = platform_impl;
 325 }
 326
 327 int TtsController::QueueSize() {
 328   return static_cast<int>(utterance_queue_.size());
 329 }
 330
 331 TtsPlatformImpl* TtsController::GetPlatformImpl() {
 332   if (!platform_impl_)
 333     platform_impl_ = TtsPlatformImpl::GetInstance();
 334   return platform_impl_;
 335 }
 336
 337 int TtsController::GetMatchingVoice(
 338     const Utterance* utterance, std::vector<VoiceData>& voices) {
 339   // Make two passes: the first time, do strict language matching
 340   // ('fr-FR' does not match 'fr-CA'). The second time, do prefix
 341   // language matching ('fr-FR' matches 'fr' and 'fr-CA')
 342   for (int pass = 0; pass < 2; ++pass) {
 343     for (size_t i = 0; i < voices.size(); ++i) {
 344       const VoiceData& voice = voices[i];
 345
 346       if (!utterance->extension_id().empty() &&
 347           utterance->extension_id() != voice.extension_id) {
 348         continue;
 349       }
 350
 351       if (!voice.name.empty() &&
 352           !utterance->voice_name().empty() &&
 353           voice.name != utterance->voice_name()) {
 354         continue;
 355       }
 356       if (!voice.lang.empty() && !utterance->lang().empty()) {
 357         std::string voice_lang = voice.lang;
 358         std::string utterance_lang = utterance->lang();
 359         if (pass == 1) {
 360           voice_lang = TrimLanguageCode(voice_lang);
 361           utterance_lang = TrimLanguageCode(utterance_lang);
 362         }
 363         if (voice_lang != utterance_lang) {
 364           continue;
 365         }
 366       }
 367       if (voice.gender != TTS_GENDER_NONE &&
 368           utterance->gender() != TTS_GENDER_NONE &&
 369           voice.gender != utterance->gender()) {
 370         continue;
 371       }
 372
 373       if (utterance->required_event_types().size() > 0) {
 374         bool has_all_required_event_types = true;
 375         for (std::set<TtsEventType>::const_iterator iter =
 376                  utterance->required_event_types().begin();
 377              iter != utterance->required_event_types().end();
 378              ++iter) {
 379           if (voice.events.find(*iter) == voice.events.end()) {
 380             has_all_required_event_types = false;
 381             break;
 382           }
 383         }
 384         if (!has_all_required_event_types)
 385           continue;
 386       }
 387
 388       return static_cast<int>(i);
 389     }
 390   }
 391
 392   return -1;
 393 }
 394
 395 void TtsController::VoicesChanged() {
 396   for (std::set<VoicesChangedDelegate*>::iterator iter =
 397            voices_changed_delegates_.begin();
 398        iter != voices_changed_delegates_.end(); ++iter) {
 399     (*iter)->OnVoicesChanged();
 400   }
 401 }
 402
 403 void TtsController::AddVoicesChangedDelegate(VoicesChangedDelegate* delegate) {
 404   voices_changed_delegates_.insert(delegate);
 405 }
 406
 407 void TtsController::RemoveVoicesChangedDelegate(
 408     VoicesChangedDelegate* delegate) {
 409   voices_changed_delegates_.erase(delegate);
 410 }