chrome/browser/speech/tts_controller.h

   1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #ifndef CHROME_BROWSER_SPEECH_TTS_CONTROLLER_H_
   6 #define CHROME_BROWSER_SPEECH_TTS_CONTROLLER_H_
   7
   8 #include <queue>
   9 #include <set>
  10 #include <string>
  11
  12 #include "base/memory/scoped_ptr.h"
  13 #include "base/memory/singleton.h"
  14 #include "googleurl/src/gurl.h"
  15
  16 class TtsPlatformImpl;
  17 class Profile;
  18
  19 namespace base {
  20 class ListValue;
  21 class Value;
  22 }
  23
  24 // Events sent back from the TTS engine indicating the progress.
  25 enum TtsEventType {
  26   TTS_EVENT_START,
  27   TTS_EVENT_END,
  28   TTS_EVENT_WORD,
  29   TTS_EVENT_SENTENCE,
  30   TTS_EVENT_MARKER,
  31   TTS_EVENT_INTERRUPTED,
  32   TTS_EVENT_CANCELLED,
  33   TTS_EVENT_ERROR
  34 };
  35
  36
  37 // The continuous parameters that apply to a given utterance.
  38 struct UtteranceContinuousParameters {
  39   UtteranceContinuousParameters();
  40
  41   double rate;
  42   double pitch;
  43   double volume;
  44 };
  45
  46
  47 // One speech utterance.
  48 class Utterance {
  49  public:
  50   // Construct an utterance given a profile and a completion task to call
  51   // when the utterance is done speaking. Before speaking this utterance,
  52   // its other parameters like text, rate, pitch, etc. should all be set.
  53   explicit Utterance(Profile* profile);
  54   ~Utterance();
  55
  56   // Sends an event to the delegate. If the event type is TTS_EVENT_END
  57   // or TTS_EVENT_ERROR, deletes the utterance. If |char_index| is -1,
  58   // uses the last good value.
  59   void OnTtsEvent(TtsEventType event_type,
  60                   int char_index,
  61                   const std::string& error_message);
  62
  63   // Finish an utterance without sending an event to the delegate.
  64   void Finish();
  65
  66   // Getters and setters for the text to speak and other speech options.
  67   void set_text(const std::string& text) { text_ = text; }
  68   const std::string& text() const { return text_; }
  69
  70   void set_options(const base::Value* options);
  71   const base::Value* options() const { return options_.get(); }
  72
  73   void set_src_extension_id(const std::string& src_extension_id) {
  74     src_extension_id_ = src_extension_id;
  75   }
  76   const std::string& src_extension_id() { return src_extension_id_; }
  77
  78   void set_src_id(int src_id) { src_id_ = src_id; }
  79   int src_id() { return src_id_; }
  80
  81   void set_src_url(const GURL& src_url) { src_url_ = src_url; }
  82   const GURL& src_url() { return src_url_; }
  83
  84   void set_voice_name(const std::string& voice_name) {
  85     voice_name_ = voice_name;
  86   }
  87   const std::string& voice_name() const { return voice_name_; }
  88
  89   void set_lang(const std::string& lang) {
  90     lang_ = lang;
  91   }
  92   const std::string& lang() const { return lang_; }
  93
  94   void set_gender(const std::string& gender) {
  95     gender_ = gender;
  96   }
  97   const std::string& gender() const { return gender_; }
  98
  99   void set_continuous_parameters(const UtteranceContinuousParameters& params) {
 100     continuous_parameters_ = params;
 101   }
 102   const UtteranceContinuousParameters& continuous_parameters() {
 103     return continuous_parameters_;
 104   }
 105
 106   void set_can_enqueue(bool can_enqueue) { can_enqueue_ = can_enqueue; }
 107   bool can_enqueue() const { return can_enqueue_; }
 108
 109   void set_required_event_types(const std::set<std::string>& types) {
 110     required_event_types_ = types;
 111   }
 112   const std::set<std::string>& required_event_types() const {
 113     return required_event_types_;
 114   }
 115
 116   void set_desired_event_types(const std::set<std::string>& types) {
 117     desired_event_types_ = types;
 118   }
 119   const std::set<std::string>& desired_event_types() const {
 120     return desired_event_types_;
 121   }
 122
 123   const std::string& extension_id() const { return extension_id_; }
 124   void set_extension_id(const std::string& extension_id) {
 125     extension_id_ = extension_id;
 126   }
 127
 128   // Getters and setters for internal state.
 129   Profile* profile() const { return profile_; }
 130   int id() const { return id_; }
 131   bool finished() const { return finished_; }
 132
 133  private:
 134   // The profile that initiated this utterance.
 135   Profile* profile_;
 136
 137   // The extension ID of the extension providing TTS for this utterance, or
 138   // empty if native TTS is being used.
 139   std::string extension_id_;
 140
 141   // The unique ID of this utterance, used to associate callback functions
 142   // with utterances.
 143   int id_;
 144
 145   // The id of the next utterance, so we can associate requests with
 146   // responses.
 147   static int next_utterance_id_;
 148
 149   // The text to speak.
 150   std::string text_;
 151
 152   // The full options arg passed to tts.speak, which may include fields
 153   // other than the ones we explicitly parse, below.
 154   scoped_ptr<base::Value> options_;
 155
 156   // The extension ID of the extension that called speak() and should
 157   // receive events.
 158   std::string src_extension_id_;
 159
 160   // The source extension's ID of this utterance, so that it can associate
 161   // events with the appropriate callback.
 162   int src_id_;
 163
 164   // The URL of the page where the source extension called speak.
 165   GURL src_url_;
 166
 167   // The parsed options.
 168   std::string voice_name_;
 169   std::string lang_;
 170   std::string gender_;
 171   UtteranceContinuousParameters continuous_parameters_;
 172   bool can_enqueue_;
 173   std::set<std::string> required_event_types_;
 174   std::set<std::string> desired_event_types_;
 175
 176   // The index of the current char being spoken.
 177   int char_index_;
 178
 179   // True if this utterance received an event indicating it's done.
 180   bool finished_;
 181 };
 182
 183
 184 // Singleton class that manages text-to-speech for the TTS and TTS engine
 185 // extension APIs, maintaining a queue of pending utterances and keeping
 186 // track of all state.
 187 class TtsController {
 188  public:
 189   // Get the single instance of this class.
 190   static TtsController* GetInstance();
 191
 192   // Returns true if we're currently speaking an utterance.
 193   bool IsSpeaking();
 194
 195   // Speak the given utterance. If the utterance's can_enqueue flag is true
 196   // and another utterance is in progress, adds it to the end of the queue.
 197   // Otherwise, interrupts any current utterance and speaks this one
 198   // immediately.
 199   void SpeakOrEnqueue(Utterance* utterance);
 200
 201   // Stop all utterances and flush the queue.
 202   void Stop();
 203
 204   // Handle events received from the speech engine. Events are forwarded to
 205   // the callback function, and in addition, completion and error events
 206   // trigger finishing the current utterance and starting the next one, if
 207   // any.
 208   void OnTtsEvent(int utterance_id,
 209                   TtsEventType event_type,
 210                   int char_index,
 211                   const std::string& error_message);
 212
 213   // Return a list of all available voices, including the native voice,
 214   // if supported, and all voices registered by extensions.
 215   base::ListValue* GetVoices(Profile* profile);
 216
 217   // Called by TtsExtensionLoaderChromeOs::LoadTtsExtension when it
 218   // finishes loading the built-in TTS component extension.
 219   void RetrySpeakingQueuedUtterances();
 220
 221   // For unit testing.
 222   void SetPlatformImpl(TtsPlatformImpl* platform_impl);
 223   int QueueSize();
 224
 225  protected:
 226   TtsController();
 227   virtual ~TtsController();
 228
 229  private:
 230   // Get the platform TTS implementation (or injected mock).
 231   TtsPlatformImpl* GetPlatformImpl();
 232
 233   // Start speaking the given utterance. Will either take ownership of
 234   // |utterance| or delete it if there's an error. Returns true on success.
 235   void SpeakNow(Utterance* utterance);
 236
 237   // Clear the utterance queue. If send_events is true, will send
 238   // TTS_EVENT_CANCELLED events on each one.
 239   void ClearUtteranceQueue(bool send_events);
 240
 241   // Finalize and delete the current utterance.
 242   void FinishCurrentUtterance();
 243
 244   // Start speaking the next utterance in the queue.
 245   void SpeakNextUtterance();
 246
 247   friend struct DefaultSingletonTraits<TtsController>;
 248
 249   // The current utterance being spoken.
 250   Utterance* current_utterance_;
 251
 252   // A queue of utterances to speak after the current one finishes.
 253   std::queue<Utterance*> utterance_queue_;
 254
 255   // A pointer to the platform implementation of text-to-speech, for
 256   // dependency injection.
 257   TtsPlatformImpl* platform_impl_;
 258
 259   DISALLOW_COPY_AND_ASSIGN(TtsController);
 260 };
 261
 262 #endif  // CHROME_BROWSER_SPEECH_TTS_CONTROLLER_H_