chrome/browser/speech/tts_controller.h

   1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #ifndef CHROME_BROWSER_SPEECH_TTS_CONTROLLER_H_
   6 #define CHROME_BROWSER_SPEECH_TTS_CONTROLLER_H_
   7
   8 #include <queue>
   9 #include <set>
  10 #include <string>
  11 #include <vector>
  12
  13 #include "base/memory/scoped_ptr.h"
  14 #include "base/memory/singleton.h"
  15 #include "base/memory/weak_ptr.h"
  16 #include "url/gurl.h"
  17
  18 class Utterance;
  19 class TtsPlatformImpl;
  20 class Profile;
  21
  22 namespace base {
  23 class Value;
  24 }
  25
  26 // Events sent back from the TTS engine indicating the progress.
  27 enum TtsEventType {
  28   TTS_EVENT_START,
  29   TTS_EVENT_END,
  30   TTS_EVENT_WORD,
  31   TTS_EVENT_SENTENCE,
  32   TTS_EVENT_MARKER,
  33   TTS_EVENT_INTERRUPTED,
  34   TTS_EVENT_CANCELLED,
  35   TTS_EVENT_ERROR,
  36   TTS_EVENT_PAUSE,
  37   TTS_EVENT_RESUME
  38 };
  39
  40 enum TtsGenderType {
  41   TTS_GENDER_NONE,
  42   TTS_GENDER_MALE,
  43   TTS_GENDER_FEMALE
  44 };
  45
  46 // Returns true if this event type is one that indicates an utterance
  47 // is finished and can be destroyed.
  48 bool IsFinalTtsEventType(TtsEventType event_type);
  49
  50 // The continuous parameters that apply to a given utterance.
  51 struct UtteranceContinuousParameters {
  52   UtteranceContinuousParameters();
  53
  54   double rate;
  55   double pitch;
  56   double volume;
  57 };
  58
  59 // Information about one voice.
  60 struct VoiceData {
  61   VoiceData();
  62   ~VoiceData();
  63
  64   std::string name;
  65   std::string lang;
  66   TtsGenderType gender;
  67   std::string extension_id;
  68   std::set<TtsEventType> events;
  69
  70   // If true, the synthesis engine is a remote network resource.
  71   // It may be higher latency and may incur bandwidth costs.
  72   bool remote;
  73
  74   // If true, this is implemented by this platform's subclass of
  75   // TtsPlatformImpl. If false, this is implemented by an extension.
  76   bool native;
  77   std::string native_voice_identifier;
  78 };
  79
  80 // Class that wants to receive events on utterances.
  81 class UtteranceEventDelegate {
  82  public:
  83   virtual ~UtteranceEventDelegate() {}
  84   virtual void OnTtsEvent(Utterance* utterance,
  85                           TtsEventType event_type,
  86                           int char_index,
  87                           const std::string& error_message) = 0;
  88 };
  89
  90 // Class that wants to be notified when the set of
  91 // voices has changed.
  92 class VoicesChangedDelegate {
  93  public:
  94   virtual ~VoicesChangedDelegate() {}
  95   virtual void OnVoicesChanged() = 0;
  96 };
  97
  98 // One speech utterance.
  99 class Utterance {
 100  public:
 101   // Construct an utterance given a profile and a completion task to call
 102   // when the utterance is done speaking. Before speaking this utterance,
 103   // its other parameters like text, rate, pitch, etc. should all be set.
 104   explicit Utterance(Profile* profile);
 105   ~Utterance();
 106
 107   // Sends an event to the delegate. If the event type is TTS_EVENT_END
 108   // or TTS_EVENT_ERROR, deletes the utterance. If |char_index| is -1,
 109   // uses the last good value.
 110   void OnTtsEvent(TtsEventType event_type,
 111                   int char_index,
 112                   const std::string& error_message);
 113
 114   // Finish an utterance without sending an event to the delegate.
 115   void Finish();
 116
 117   // Getters and setters for the text to speak and other speech options.
 118   void set_text(const std::string& text) { text_ = text; }
 119   const std::string& text() const { return text_; }
 120
 121   void set_options(const base::Value* options);
 122   const base::Value* options() const { return options_.get(); }
 123
 124   void set_src_extension_id(const std::string& src_extension_id) {
 125     src_extension_id_ = src_extension_id;
 126   }
 127   const std::string& src_extension_id() { return src_extension_id_; }
 128
 129   void set_src_id(int src_id) { src_id_ = src_id; }
 130   int src_id() { return src_id_; }
 131
 132   void set_src_url(const GURL& src_url) { src_url_ = src_url; }
 133   const GURL& src_url() { return src_url_; }
 134
 135   void set_voice_name(const std::string& voice_name) {
 136     voice_name_ = voice_name;
 137   }
 138   const std::string& voice_name() const { return voice_name_; }
 139
 140   void set_lang(const std::string& lang) {
 141     lang_ = lang;
 142   }
 143   const std::string& lang() const { return lang_; }
 144
 145   void set_gender(TtsGenderType gender) {
 146     gender_ = gender;
 147   }
 148   TtsGenderType gender() const { return gender_; }
 149
 150   void set_continuous_parameters(const UtteranceContinuousParameters& params) {
 151     continuous_parameters_ = params;
 152   }
 153   const UtteranceContinuousParameters& continuous_parameters() {
 154     return continuous_parameters_;
 155   }
 156
 157   void set_can_enqueue(bool can_enqueue) { can_enqueue_ = can_enqueue; }
 158   bool can_enqueue() const { return can_enqueue_; }
 159
 160   void set_required_event_types(const std::set<TtsEventType>& types) {
 161     required_event_types_ = types;
 162   }
 163   const std::set<TtsEventType>& required_event_types() const {
 164     return required_event_types_;
 165   }
 166
 167   void set_desired_event_types(const std::set<TtsEventType>& types) {
 168     desired_event_types_ = types;
 169   }
 170   const std::set<TtsEventType>& desired_event_types() const {
 171     return desired_event_types_;
 172   }
 173
 174   const std::string& extension_id() const { return extension_id_; }
 175   void set_extension_id(const std::string& extension_id) {
 176     extension_id_ = extension_id;
 177   }
 178
 179   UtteranceEventDelegate* event_delegate() const {
 180     return event_delegate_.get();
 181   }
 182   void set_event_delegate(
 183       base::WeakPtr<UtteranceEventDelegate> event_delegate) {
 184     event_delegate_ = event_delegate;
 185   }
 186
 187   // Getters and setters for internal state.
 188   Profile* profile() const { return profile_; }
 189   int id() const { return id_; }
 190   bool finished() const { return finished_; }
 191
 192  private:
 193   // The profile that initiated this utterance.
 194   Profile* profile_;
 195
 196   // The extension ID of the extension providing TTS for this utterance, or
 197   // empty if native TTS is being used.
 198   std::string extension_id_;
 199
 200   // The unique ID of this utterance, used to associate callback functions
 201   // with utterances.
 202   int id_;
 203
 204   // The id of the next utterance, so we can associate requests with
 205   // responses.
 206   static int next_utterance_id_;
 207
 208   // The text to speak.
 209   std::string text_;
 210
 211   // The full options arg passed to tts.speak, which may include fields
 212   // other than the ones we explicitly parse, below.
 213   scoped_ptr<base::Value> options_;
 214
 215   // The extension ID of the extension that called speak() and should
 216   // receive events.
 217   std::string src_extension_id_;
 218
 219   // The source extension's ID of this utterance, so that it can associate
 220   // events with the appropriate callback.
 221   int src_id_;
 222
 223   // The URL of the page where the source extension called speak.
 224   GURL src_url_;
 225
 226   // The delegate to be called when an utterance event is fired.
 227   base::WeakPtr<UtteranceEventDelegate> event_delegate_;
 228
 229   // The parsed options.
 230   std::string voice_name_;
 231   std::string lang_;
 232   TtsGenderType gender_;
 233   UtteranceContinuousParameters continuous_parameters_;
 234   bool can_enqueue_;
 235   std::set<TtsEventType> required_event_types_;
 236   std::set<TtsEventType> desired_event_types_;
 237
 238   // The index of the current char being spoken.
 239   int char_index_;
 240
 241   // True if this utterance received an event indicating it's done.
 242   bool finished_;
 243 };
 244
 245 // Singleton class that manages text-to-speech for the TTS and TTS engine
 246 // extension APIs, maintaining a queue of pending utterances and keeping
 247 // track of all state.
 248 class TtsController {
 249  public:
 250   // Get the single instance of this class.
 251   static TtsController* GetInstance();
 252
 253   // Returns true if we're currently speaking an utterance.
 254   bool IsSpeaking();
 255
 256   // Speak the given utterance. If the utterance's can_enqueue flag is true
 257   // and another utterance is in progress, adds it to the end of the queue.
 258   // Otherwise, interrupts any current utterance and speaks this one
 259   // immediately.
 260   void SpeakOrEnqueue(Utterance* utterance);
 261
 262   // Stop all utterances and flush the queue. Implies leaving pause mode
 263   // as well.
 264   void Stop();
 265
 266   // Pause the speech queue. Some engines may support pausing in the middle
 267   // of an utterance.
 268   void Pause();
 269
 270   // Resume speaking.
 271   void Resume();
 272
 273   // Handle events received from the speech engine. Events are forwarded to
 274   // the callback function, and in addition, completion and error events
 275   // trigger finishing the current utterance and starting the next one, if
 276   // any.
 277   void OnTtsEvent(int utterance_id,
 278                   TtsEventType event_type,
 279                   int char_index,
 280                   const std::string& error_message);
 281
 282   // Return a list of all available voices, including the native voice,
 283   // if supported, and all voices registered by extensions.
 284   void GetVoices(Profile* profile, std::vector<VoiceData>* out_voices);
 285
 286   // Called by TtsExtensionLoaderChromeOs::LoadTtsExtension when it
 287   // finishes loading the built-in TTS component extension.
 288   void RetrySpeakingQueuedUtterances();
 289
 290   // Called by the extension system or platform implementation when the
 291   // list of voices may have changed and should be re-queried.
 292   void VoicesChanged();
 293
 294   // Add a delegate that wants to be notified when the set of voices changes.
 295   void AddVoicesChangedDelegate(VoicesChangedDelegate* delegate);
 296
 297   // Remove delegate that wants to be notified when the set of voices changes.
 298   void RemoveVoicesChangedDelegate(VoicesChangedDelegate* delegate);
 299
 300   // For unit testing.
 301   void SetPlatformImpl(TtsPlatformImpl* platform_impl);
 302   int QueueSize();
 303
 304  protected:
 305   TtsController();
 306   virtual ~TtsController();
 307
 308  private:
 309   // Get the platform TTS implementation (or injected mock).
 310   TtsPlatformImpl* GetPlatformImpl();
 311
 312   // Start speaking the given utterance. Will either take ownership of
 313   // |utterance| or delete it if there's an error. Returns true on success.
 314   void SpeakNow(Utterance* utterance);
 315
 316   // Clear the utterance queue. If send_events is true, will send
 317   // TTS_EVENT_CANCELLED events on each one.
 318   void ClearUtteranceQueue(bool send_events);
 319
 320   // Finalize and delete the current utterance.
 321   void FinishCurrentUtterance();
 322
 323   // Start speaking the next utterance in the queue.
 324   void SpeakNextUtterance();
 325
 326   // Given an utterance and a vector of voices, return the
 327   // index of the voice that best matches the utterance.
 328   int GetMatchingVoice(const Utterance* utterance,
 329                        std::vector<VoiceData>& voices);
 330
 331   friend struct DefaultSingletonTraits<TtsController>;
 332
 333   // The current utterance being spoken.
 334   Utterance* current_utterance_;
 335
 336   // Whether the queue is paused or not.
 337   bool paused_;
 338
 339   // A queue of utterances to speak after the current one finishes.
 340   std::queue<Utterance*> utterance_queue_;
 341
 342   // A set of delegates that want to be notified when the voices change.
 343   std::set<VoicesChangedDelegate*> voices_changed_delegates_;
 344
 345   // A pointer to the platform implementation of text-to-speech, for
 346   // dependency injection.
 347   TtsPlatformImpl* platform_impl_;
 348
 349   DISALLOW_COPY_AND_ASSIGN(TtsController);
 350 };
 351
 352 #endif  // CHROME_BROWSER_SPEECH_TTS_CONTROLLER_H_