1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
7 * This is a component extension that implements a text-to-speech (TTS)
8 * engine powered by Google's speech synthesis API.
10 * This is an "event page", so it's not loaded when the API isn't being used,
11 * and doesn't waste resources. When a web page or web app makes a speech
12 * request and the parameters match one of the voices in this extension's
13 * manifest, it makes a request to Google's API using Chrome's private key
14 * and plays the resulting speech using HTML5 audio.
18 * The main class for this extension. Adds listeners to
19 * chrome.ttsEngine.onSpeak and chrome.ttsEngine.onStop and implements
20 * them using Google's speech synthesis API.
23 function TtsExtension() {}
25 TtsExtension
.prototype = {
27 * The url prefix of the speech server, including static query
28 * parameters that don't change.
34 'https://www.google.com/speech-api/v2/synthesize?' +
35 'enc=mpeg&client=chromium',
38 * A mapping from language and gender to voice name, hardcoded for now
39 * until the speech synthesis server capabilities response provides this.
40 * The key of this map is of the form '<lang>-<gender>'.
41 * @type {Object<string>}
44 LANG_AND_GENDER_TO_VOICE_NAME_
: {
46 'en-gb-female': 'fis',
50 * The arguments passed to the onSpeak event handler for the utterance
51 * that's currently being spoken. Should be null when no object is
54 * @type {?{utterance: string, options: Object, callback: Function}}
57 currentUtterance_
: null,
60 * The HTML5 audio element we use for playing the sound served by the
62 * @type {HTMLAudioElement}
68 * A mapping from voice name to language and gender, derived from the
69 * manifest file. This is used in case the speech synthesis request
70 * specifies a voice name but doesn't specify a language code or gender.
71 * @type {Object<{lang: string, gender: string}>}
74 voiceNameToLangAndGender_
: {},
77 * This is the main function called to initialize this extension.
78 * Initializes data structures and adds event listeners.
81 // Get voices from manifest.
82 var voices
= chrome
.app
.getDetails().tts_engine
.voices
;
83 for (var i
= 0; i
< voices
.length
; i
++) {
84 this.voiceNameToLangAndGender_
[voices
[i
].voice_name
] = {
86 gender
: voices
[i
].gender
90 // Initialize the audio element and event listeners on it.
91 this.audioElement_
= document
.createElement('audio');
92 document
.body
.appendChild(this.audioElement_
);
93 this.audioElement_
.addEventListener(
94 'ended', this.onStop_
.bind(this), false);
95 this.audioElement_
.addEventListener(
96 'canplaythrough', this.onStart_
.bind(this), false);
98 // Install event listeners for the ttsEngine API.
99 chrome
.ttsEngine
.onSpeak
.addListener(this.onSpeak_
.bind(this));
100 chrome
.ttsEngine
.onStop
.addListener(this.onStop_
.bind(this));
101 chrome
.ttsEngine
.onPause
.addListener(this.onPause_
.bind(this));
102 chrome
.ttsEngine
.onResume
.addListener(this.onResume_
.bind(this));
106 * Handler for the chrome.ttsEngine.onSpeak interface.
107 * Gets Chrome's Google API key and then uses it to generate a request
108 * url for the requested speech utterance. Sets that url as the source
109 * of the HTML5 audio element.
110 * @param {string} utterance The text to be spoken.
111 * @param {Object} options Options to control the speech, as defined
112 * in the Chrome ttsEngine extension API.
115 onSpeak_: function(utterance
, options
, callback
) {
116 // Truncate the utterance if it's too long. Both Chrome's tts
117 // extension api and the web speech api specify 32k as the
118 // maximum limit for an utterance.
119 if (utterance
.length
> 32768)
120 utterance
= utterance
.substr(0, 32768);
123 // First, stop any pending audio.
126 this.currentUtterance_
= {
127 utterance
: utterance
,
132 var lang
= options
.lang
;
133 var gender
= options
.gender
;
134 if (options
.voiceName
) {
135 lang
= this.voiceNameToLangAndGender_
[options
.voiceName
].lang
;
136 gender
= this.voiceNameToLangAndGender_
[options
.voiceName
].gender
;
140 lang
= navigator
.language
;
142 // Look up the specific voice name for this language and gender.
143 // If it's not in the map, it doesn't matter - the language will
144 // be used directly. This is only used for languages where more
145 // than one gender is actually available.
146 var key
= lang
.toLowerCase() + '-' + gender
;
147 var voiceName
= this.LANG_AND_GENDER_TO_VOICE_NAME_
[key
];
149 var url
= this.SPEECH_SERVER_URL_
;
150 chrome
.systemPrivate
.getApiKey((function(key
) {
151 url
+= '&key=' + key
;
152 url
+= '&text=' + encodeURIComponent(utterance
);
153 url
+= '&lang=' + lang
.toLowerCase();
156 url
+= '&name=' + voiceName
;
159 // Input rate is between 0.1 and 10.0 with a default of 1.0.
160 // Output speed is between 0.0 and 1.0 with a default of 0.5.
161 url
+= '&speed=' + (options
.rate
/ 2.0);
165 // Input pitch is between 0.0 and 2.0 with a default of 1.0.
166 // Output pitch is between 0.0 and 1.0 with a default of 0.5.
167 url
+= '&pitch=' + (options
.pitch
/ 2.0);
170 // This begins loading the audio but does not play it.
171 // When enough of the audio has loaded to begin playback,
172 // the 'canplaythrough' handler will call this.onStart_,
173 // which sends a start event to the ttsEngine callback and
174 // then begins playing audio.
175 this.audioElement_
.src
= url
;
178 console
.error(String(err
));
181 'errorMessage': String(err
)
183 this.currentUtterance_
= null;
188 * Handler for the chrome.ttsEngine.onStop interface.
189 * Called either when the ttsEngine API requests us to stop, or when
190 * we reach the end of the audio stream. Pause the audio element to
191 * silence it, and send a callback to the ttsEngine API to let it know
192 * that we've completed. Note that the ttsEngine API manages callback
193 * messages and will automatically replace the 'end' event with a
194 * more specific callback like 'interrupted' when sending it to the
198 onStop_: function() {
199 if (this.currentUtterance_
) {
200 this.audioElement_
.pause();
201 this.currentUtterance_
.callback({
203 'charIndex': this.currentUtterance_
.utterance
.length
206 this.currentUtterance_
= null;
210 * Handler for the canplaythrough event on the audio element.
211 * Called when the audio element has buffered enough audio to begin
212 * playback. Send the 'start' event to the ttsEngine callback and
213 * then begin playing the audio element.
216 onStart_: function() {
217 if (this.currentUtterance_
) {
218 if (this.currentUtterance_
.options
.volume
!== undefined) {
219 // Both APIs use the same range for volume, between 0.0 and 1.0.
220 this.audioElement_
.volume
= this.currentUtterance_
.options
.volume
;
222 this.audioElement_
.play();
223 this.currentUtterance_
.callback({
231 * Handler for the chrome.ttsEngine.onPause interface.
232 * Pauses audio if we're in the middle of an utterance.
235 onPause_: function() {
236 if (this.currentUtterance_
) {
237 this.audioElement_
.pause();
242 * Handler for the chrome.ttsEngine.onPause interface.
243 * Resumes audio if we're in the middle of an utterance.
246 onResume_: function() {
247 if (this.currentUtterance_
) {
248 this.audioElement_
.play();
254 (new TtsExtension()).init();