NaCl: Update revision in DEPS, r12770 -> r12773
[chromium-blink-merge.git] / chrome / browser / speech / tts_controller.h
blobc8d1c10a4cf456421169892bce4b179ce896a20a
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #ifndef CHROME_BROWSER_SPEECH_TTS_CONTROLLER_H_
6 #define CHROME_BROWSER_SPEECH_TTS_CONTROLLER_H_
8 #include <queue>
9 #include <set>
10 #include <string>
11 #include <vector>
13 #include "base/memory/scoped_ptr.h"
14 #include "base/memory/singleton.h"
15 #include "base/memory/weak_ptr.h"
16 #include "url/gurl.h"
18 class Utterance;
19 class TtsPlatformImpl;
20 class Profile;
22 namespace base {
23 class Value;
26 // Events sent back from the TTS engine indicating the progress.
27 enum TtsEventType {
28 TTS_EVENT_START,
29 TTS_EVENT_END,
30 TTS_EVENT_WORD,
31 TTS_EVENT_SENTENCE,
32 TTS_EVENT_MARKER,
33 TTS_EVENT_INTERRUPTED,
34 TTS_EVENT_CANCELLED,
35 TTS_EVENT_ERROR,
36 TTS_EVENT_PAUSE,
37 TTS_EVENT_RESUME
40 enum TtsGenderType {
41 TTS_GENDER_NONE,
42 TTS_GENDER_MALE,
43 TTS_GENDER_FEMALE
46 // Returns true if this event type is one that indicates an utterance
47 // is finished and can be destroyed.
48 bool IsFinalTtsEventType(TtsEventType event_type);
50 // The continuous parameters that apply to a given utterance.
51 struct UtteranceContinuousParameters {
52 UtteranceContinuousParameters();
54 double rate;
55 double pitch;
56 double volume;
59 // Information about one voice.
60 struct VoiceData {
61 VoiceData();
62 ~VoiceData();
64 std::string name;
65 std::string lang;
66 TtsGenderType gender;
67 std::string extension_id;
68 std::set<TtsEventType> events;
70 // If true, the synthesis engine is a remote network resource.
71 // It may be higher latency and may incur bandwidth costs.
72 bool remote;
74 // If true, this is implemented by this platform's subclass of
75 // TtsPlatformImpl. If false, this is implemented by an extension.
76 bool native;
77 std::string native_voice_identifier;
80 // Class that wants to receive events on utterances.
81 class UtteranceEventDelegate {
82 public:
83 virtual ~UtteranceEventDelegate() {}
84 virtual void OnTtsEvent(Utterance* utterance,
85 TtsEventType event_type,
86 int char_index,
87 const std::string& error_message) = 0;
90 // Class that wants to be notified when the set of
91 // voices has changed.
92 class VoicesChangedDelegate {
93 public:
94 virtual ~VoicesChangedDelegate() {}
95 virtual void OnVoicesChanged() = 0;
98 // One speech utterance.
99 class Utterance {
100 public:
101 // Construct an utterance given a profile and a completion task to call
102 // when the utterance is done speaking. Before speaking this utterance,
103 // its other parameters like text, rate, pitch, etc. should all be set.
104 explicit Utterance(Profile* profile);
105 ~Utterance();
107 // Sends an event to the delegate. If the event type is TTS_EVENT_END
108 // or TTS_EVENT_ERROR, deletes the utterance. If |char_index| is -1,
109 // uses the last good value.
110 void OnTtsEvent(TtsEventType event_type,
111 int char_index,
112 const std::string& error_message);
114 // Finish an utterance without sending an event to the delegate.
115 void Finish();
117 // Getters and setters for the text to speak and other speech options.
118 void set_text(const std::string& text) { text_ = text; }
119 const std::string& text() const { return text_; }
121 void set_options(const base::Value* options);
122 const base::Value* options() const { return options_.get(); }
124 void set_src_extension_id(const std::string& src_extension_id) {
125 src_extension_id_ = src_extension_id;
127 const std::string& src_extension_id() { return src_extension_id_; }
129 void set_src_id(int src_id) { src_id_ = src_id; }
130 int src_id() { return src_id_; }
132 void set_src_url(const GURL& src_url) { src_url_ = src_url; }
133 const GURL& src_url() { return src_url_; }
135 void set_voice_name(const std::string& voice_name) {
136 voice_name_ = voice_name;
138 const std::string& voice_name() const { return voice_name_; }
140 void set_lang(const std::string& lang) {
141 lang_ = lang;
143 const std::string& lang() const { return lang_; }
145 void set_gender(TtsGenderType gender) {
146 gender_ = gender;
148 TtsGenderType gender() const { return gender_; }
150 void set_continuous_parameters(const UtteranceContinuousParameters& params) {
151 continuous_parameters_ = params;
153 const UtteranceContinuousParameters& continuous_parameters() {
154 return continuous_parameters_;
157 void set_can_enqueue(bool can_enqueue) { can_enqueue_ = can_enqueue; }
158 bool can_enqueue() const { return can_enqueue_; }
160 void set_required_event_types(const std::set<TtsEventType>& types) {
161 required_event_types_ = types;
163 const std::set<TtsEventType>& required_event_types() const {
164 return required_event_types_;
167 void set_desired_event_types(const std::set<TtsEventType>& types) {
168 desired_event_types_ = types;
170 const std::set<TtsEventType>& desired_event_types() const {
171 return desired_event_types_;
174 const std::string& extension_id() const { return extension_id_; }
175 void set_extension_id(const std::string& extension_id) {
176 extension_id_ = extension_id;
179 UtteranceEventDelegate* event_delegate() const {
180 return event_delegate_.get();
182 void set_event_delegate(
183 base::WeakPtr<UtteranceEventDelegate> event_delegate) {
184 event_delegate_ = event_delegate;
187 // Getters and setters for internal state.
188 Profile* profile() const { return profile_; }
189 int id() const { return id_; }
190 bool finished() const { return finished_; }
192 private:
193 // The profile that initiated this utterance.
194 Profile* profile_;
196 // The extension ID of the extension providing TTS for this utterance, or
197 // empty if native TTS is being used.
198 std::string extension_id_;
200 // The unique ID of this utterance, used to associate callback functions
201 // with utterances.
202 int id_;
204 // The id of the next utterance, so we can associate requests with
205 // responses.
206 static int next_utterance_id_;
208 // The text to speak.
209 std::string text_;
211 // The full options arg passed to tts.speak, which may include fields
212 // other than the ones we explicitly parse, below.
213 scoped_ptr<base::Value> options_;
215 // The extension ID of the extension that called speak() and should
216 // receive events.
217 std::string src_extension_id_;
219 // The source extension's ID of this utterance, so that it can associate
220 // events with the appropriate callback.
221 int src_id_;
223 // The URL of the page where the source extension called speak.
224 GURL src_url_;
226 // The delegate to be called when an utterance event is fired.
227 base::WeakPtr<UtteranceEventDelegate> event_delegate_;
229 // The parsed options.
230 std::string voice_name_;
231 std::string lang_;
232 TtsGenderType gender_;
233 UtteranceContinuousParameters continuous_parameters_;
234 bool can_enqueue_;
235 std::set<TtsEventType> required_event_types_;
236 std::set<TtsEventType> desired_event_types_;
238 // The index of the current char being spoken.
239 int char_index_;
241 // True if this utterance received an event indicating it's done.
242 bool finished_;
245 // Singleton class that manages text-to-speech for the TTS and TTS engine
246 // extension APIs, maintaining a queue of pending utterances and keeping
247 // track of all state.
248 class TtsController {
249 public:
250 // Get the single instance of this class.
251 static TtsController* GetInstance();
253 // Returns true if we're currently speaking an utterance.
254 bool IsSpeaking();
256 // Speak the given utterance. If the utterance's can_enqueue flag is true
257 // and another utterance is in progress, adds it to the end of the queue.
258 // Otherwise, interrupts any current utterance and speaks this one
259 // immediately.
260 void SpeakOrEnqueue(Utterance* utterance);
262 // Stop all utterances and flush the queue. Implies leaving pause mode
263 // as well.
264 void Stop();
266 // Pause the speech queue. Some engines may support pausing in the middle
267 // of an utterance.
268 void Pause();
270 // Resume speaking.
271 void Resume();
273 // Handle events received from the speech engine. Events are forwarded to
274 // the callback function, and in addition, completion and error events
275 // trigger finishing the current utterance and starting the next one, if
276 // any.
277 void OnTtsEvent(int utterance_id,
278 TtsEventType event_type,
279 int char_index,
280 const std::string& error_message);
282 // Return a list of all available voices, including the native voice,
283 // if supported, and all voices registered by extensions.
284 void GetVoices(Profile* profile, std::vector<VoiceData>* out_voices);
286 // Called by TtsExtensionLoaderChromeOs::LoadTtsExtension when it
287 // finishes loading the built-in TTS component extension.
288 void RetrySpeakingQueuedUtterances();
290 // Called by the extension system or platform implementation when the
291 // list of voices may have changed and should be re-queried.
292 void VoicesChanged();
294 // Add a delegate that wants to be notified when the set of voices changes.
295 void AddVoicesChangedDelegate(VoicesChangedDelegate* delegate);
297 // Remove delegate that wants to be notified when the set of voices changes.
298 void RemoveVoicesChangedDelegate(VoicesChangedDelegate* delegate);
300 // For unit testing.
301 void SetPlatformImpl(TtsPlatformImpl* platform_impl);
302 int QueueSize();
304 protected:
305 TtsController();
306 virtual ~TtsController();
308 private:
309 // Get the platform TTS implementation (or injected mock).
310 TtsPlatformImpl* GetPlatformImpl();
312 // Start speaking the given utterance. Will either take ownership of
313 // |utterance| or delete it if there's an error. Returns true on success.
314 void SpeakNow(Utterance* utterance);
316 // Clear the utterance queue. If send_events is true, will send
317 // TTS_EVENT_CANCELLED events on each one.
318 void ClearUtteranceQueue(bool send_events);
320 // Finalize and delete the current utterance.
321 void FinishCurrentUtterance();
323 // Start speaking the next utterance in the queue.
324 void SpeakNextUtterance();
326 // Given an utterance and a vector of voices, return the
327 // index of the voice that best matches the utterance.
328 int GetMatchingVoice(const Utterance* utterance,
329 std::vector<VoiceData>& voices);
331 friend struct DefaultSingletonTraits<TtsController>;
333 // The current utterance being spoken.
334 Utterance* current_utterance_;
336 // Whether the queue is paused or not.
337 bool paused_;
339 // A queue of utterances to speak after the current one finishes.
340 std::queue<Utterance*> utterance_queue_;
342 // A set of delegates that want to be notified when the voices change.
343 std::set<VoicesChangedDelegate*> voices_changed_delegates_;
345 // A pointer to the platform implementation of text-to-speech, for
346 // dependency injection.
347 TtsPlatformImpl* platform_impl_;
349 DISALLOW_COPY_AND_ASSIGN(TtsController);
352 #endif // CHROME_BROWSER_SPEECH_TTS_CONTROLLER_H_