chrome/browser/resources/network_speech_synthesis/tts_extension.js

   1 // Copyright 2013 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 /**
   6  * @fileoverview
   7  * This is a component extension that implements a text-to-speech (TTS)
   8  * engine powered by Google's speech synthesis API.
   9  *
  10  * This is an "event page", so it's not loaded when the API isn't being used,
  11  * and doesn't waste resources. When a web page or web app makes a speech
  12  * request and the parameters match one of the voices in this extension's
  13  * manifest, it makes a request to Google's API using Chrome's private key
  14  * and plays the resulting speech using HTML5 audio.
  15  */
  16
  17 /**
  18  * The main class for this extension. Adds listeners to
  19  * chrome.ttsEngine.onSpeak and chrome.ttsEngine.onStop and implements
  20  * them using Google's speech synthesis API.
  21  * @constructor
  22  */
  23 function TtsExtension() {}
  24
  25 TtsExtension.prototype = {
  26   /**
  27    * The url prefix of the speech server, including static query
  28    * parameters that don't change.
  29    * @type {string}
  30    * @const
  31    * @private
  32    */
  33   SPEECH_SERVER_URL_:
  34       'https://www.google.com/speech-api/v2/synthesize?' +
  35       'enc=mpeg&client=chromium',
  36
  37   /**
  38    * A mapping from language and gender to voice name, hardcoded for now
  39    * until the speech synthesis server capabilities response provides this.
  40    * The key of this map is of the form '<lang>-<gender>'.
  41    * @type {Object<string>}
  42    * @private
  43    */
  44   LANG_AND_GENDER_TO_VOICE_NAME_: {
  45     'en-gb-male': 'rjs',
  46     'en-gb-female': 'fis',
  47   },
  48
  49   /**
  50    * The arguments passed to the onSpeak event handler for the utterance
  51    * that's currently being spoken. Should be null when no object is
  52    * pending.
  53    *
  54    * @type {?{utterance: string, options: Object, callback: Function}}
  55    * @private
  56    */
  57   currentUtterance_: null,
  58
  59   /**
  60    * The HTML5 audio element we use for playing the sound served by the
  61    * speech server.
  62    * @type {HTMLAudioElement}
  63    * @private
  64    */
  65   audioElement_: null,
  66
  67   /**
  68    * A mapping from voice name to language and gender, derived from the
  69    * manifest file.  This is used in case the speech synthesis request
  70    * specifies a voice name but doesn't specify a language code or gender.
  71    * @type {Object<{lang: string, gender: string}>}
  72    * @private
  73    */
  74   voiceNameToLangAndGender_: {},
  75
  76   /**
  77    * This is the main function called to initialize this extension.
  78    * Initializes data structures and adds event listeners.
  79    */
  80   init: function() {
  81     // Get voices from manifest.
  82     var voices = chrome.app.getDetails().tts_engine.voices;
  83     for (var i = 0; i < voices.length; i++) {
  84       this.voiceNameToLangAndGender_[voices[i].voice_name] = {
  85         lang: voices[i].lang,
  86         gender: voices[i].gender
  87       };
  88     }
  89
  90     // Initialize the audio element and event listeners on it.
  91     this.audioElement_ = document.createElement('audio');
  92     document.body.appendChild(this.audioElement_);
  93     this.audioElement_.addEventListener(
  94         'ended', this.onStop_.bind(this), false);
  95     this.audioElement_.addEventListener(
  96         'canplaythrough', this.onStart_.bind(this), false);
  97
  98     // Install event listeners for the ttsEngine API.
  99     chrome.ttsEngine.onSpeak.addListener(this.onSpeak_.bind(this));
 100     chrome.ttsEngine.onStop.addListener(this.onStop_.bind(this));
 101     chrome.ttsEngine.onPause.addListener(this.onPause_.bind(this));
 102     chrome.ttsEngine.onResume.addListener(this.onResume_.bind(this));
 103   },
 104
 105   /**
 106    * Handler for the chrome.ttsEngine.onSpeak interface.
 107    * Gets Chrome's Google API key and then uses it to generate a request
 108    * url for the requested speech utterance. Sets that url as the source
 109    * of the HTML5 audio element.
 110    * @param {string} utterance The text to be spoken.
 111    * @param {Object} options Options to control the speech, as defined
 112    *     in the Chrome ttsEngine extension API.
 113    * @private
 114    */
 115   onSpeak_: function(utterance, options, callback) {
 116     // Truncate the utterance if it's too long. Both Chrome's tts
 117     // extension api and the web speech api specify 32k as the
 118     // maximum limit for an utterance.
 119     if (utterance.length > 32768)
 120       utterance = utterance.substr(0, 32768);
 121
 122     try {
 123       // First, stop any pending audio.
 124       this.onStop_();
 125
 126       this.currentUtterance_ = {
 127         utterance: utterance,
 128         options: options,
 129         callback: callback
 130       };
 131
 132       var lang = options.lang;
 133       var gender = options.gender;
 134       if (options.voiceName) {
 135         lang = this.voiceNameToLangAndGender_[options.voiceName].lang;
 136         gender = this.voiceNameToLangAndGender_[options.voiceName].gender;
 137       }
 138
 139       if (!lang)
 140         lang = navigator.language;
 141
 142       // Look up the specific voice name for this language and gender.
 143       // If it's not in the map, it doesn't matter - the language will
 144       // be used directly. This is only used for languages where more
 145       // than one gender is actually available.
 146       var key = lang.toLowerCase() + '-' + gender;
 147       var voiceName = this.LANG_AND_GENDER_TO_VOICE_NAME_[key];
 148
 149       var url = this.SPEECH_SERVER_URL_;
 150       chrome.systemPrivate.getApiKey((function(key) {
 151         url += '&key=' + key;
 152         url += '&text=' + encodeURIComponent(utterance);
 153         url += '&lang=' + lang.toLowerCase();
 154
 155         if (voiceName)
 156           url += '&name=' + voiceName;
 157
 158         if (options.rate) {
 159           // Input rate is between 0.1 and 10.0 with a default of 1.0.
 160           // Output speed is between 0.0 and 1.0 with a default of 0.5.
 161           url += '&speed=' + (options.rate / 2.0);
 162         }
 163
 164         if (options.pitch) {
 165           // Input pitch is between 0.0 and 2.0 with a default of 1.0.
 166           // Output pitch is between 0.0 and 1.0 with a default of 0.5.
 167           url += '&pitch=' + (options.pitch / 2.0);
 168         }
 169
 170         // This begins loading the audio but does not play it.
 171         // When enough of the audio has loaded to begin playback,
 172         // the 'canplaythrough' handler will call this.onStart_,
 173         // which sends a start event to the ttsEngine callback and
 174         // then begins playing audio.
 175         this.audioElement_.src = url;
 176       }).bind(this));
 177     } catch (err) {
 178       console.error(String(err));
 179       callback({
 180         'type': 'error',
 181         'errorMessage': String(err)
 182       });
 183       this.currentUtterance_ = null;
 184     }
 185   },
 186
 187   /**
 188    * Handler for the chrome.ttsEngine.onStop interface.
 189    * Called either when the ttsEngine API requests us to stop, or when
 190    * we reach the end of the audio stream. Pause the audio element to
 191    * silence it, and send a callback to the ttsEngine API to let it know
 192    * that we've completed. Note that the ttsEngine API manages callback
 193    * messages and will automatically replace the 'end' event with a
 194    * more specific callback like 'interrupted' when sending it to the
 195    * TTS client.
 196    * @private
 197    */
 198   onStop_: function() {
 199     if (this.currentUtterance_) {
 200       this.audioElement_.pause();
 201       this.currentUtterance_.callback({
 202         'type': 'end',
 203         'charIndex': this.currentUtterance_.utterance.length
 204       });
 205     }
 206     this.currentUtterance_ = null;
 207   },
 208
 209   /**
 210    * Handler for the canplaythrough event on the audio element.
 211    * Called when the audio element has buffered enough audio to begin
 212    * playback. Send the 'start' event to the ttsEngine callback and
 213    * then begin playing the audio element.
 214    * @private
 215    */
 216   onStart_: function() {
 217     if (this.currentUtterance_) {
 218       if (this.currentUtterance_.options.volume !== undefined) {
 219         // Both APIs use the same range for volume, between 0.0 and 1.0.
 220         this.audioElement_.volume = this.currentUtterance_.options.volume;
 221       }
 222       this.audioElement_.play();
 223       this.currentUtterance_.callback({
 224           'type': 'start',
 225           'charIndex': 0
 226       });
 227     }
 228   },
 229
 230   /**
 231    * Handler for the chrome.ttsEngine.onPause interface.
 232    * Pauses audio if we're in the middle of an utterance.
 233    * @private
 234    */
 235   onPause_: function() {
 236     if (this.currentUtterance_) {
 237       this.audioElement_.pause();
 238     }
 239   },
 240
 241   /**
 242    * Handler for the chrome.ttsEngine.onPause interface.
 243    * Resumes audio if we're in the middle of an utterance.
 244    * @private
 245    */
 246   onResume_: function() {
 247     if (this.currentUtterance_) {
 248       this.audioElement_.play();
 249     }
 250   }
 251
 252 };
 253
 254 (new TtsExtension()).init();