chrome/browser/speech/tts_linux.cc

   1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include <math.h>
   6
   7 #include <map>
   8
   9 #include "base/command_line.h"
  10 #include "base/debug/leak_annotations.h"
  11 #include "base/memory/scoped_ptr.h"
  12 #include "base/memory/singleton.h"
  13 #include "base/synchronization/lock.h"
  14 #include "chrome/browser/speech/tts_platform.h"
  15 #include "content/public/browser/browser_thread.h"
  16 #include "content/public/common/content_switches.h"
  17
  18 #include "library_loaders/libspeechd.h"
  19
  20 using content::BrowserThread;
  21
  22 namespace {
  23
  24 const char kNotSupportedError[] =
  25     "Native speech synthesis not supported on this platform.";
  26
  27 struct SPDChromeVoice {
  28   std::string name;
  29   std::string module;
  30 };
  31
  32 }  // namespace
  33
  34 class TtsPlatformImplLinux : public TtsPlatformImpl {
  35  public:
  36   virtual bool PlatformImplAvailable() OVERRIDE;
  37   virtual bool Speak(
  38       int utterance_id,
  39       const std::string& utterance,
  40       const std::string& lang,
  41       const VoiceData& voice,
  42       const UtteranceContinuousParameters& params) OVERRIDE;
  43   virtual bool StopSpeaking() OVERRIDE;
  44   virtual void Pause() OVERRIDE;
  45   virtual void Resume() OVERRIDE;
  46   virtual bool IsSpeaking() OVERRIDE;
  47   virtual void GetVoices(std::vector<VoiceData>* out_voices) OVERRIDE;
  48
  49   void OnSpeechEvent(SPDNotificationType type);
  50
  51   // Get the single instance of this class.
  52   static TtsPlatformImplLinux* GetInstance();
  53
  54  private:
  55   TtsPlatformImplLinux();
  56   virtual ~TtsPlatformImplLinux();
  57
  58   // Initiate the connection with the speech dispatcher.
  59   void Initialize();
  60
  61   // Resets the connection with speech dispatcher.
  62   void Reset();
  63
  64   static void NotificationCallback(size_t msg_id,
  65                                    size_t client_id,
  66                                    SPDNotificationType type);
  67
  68   static void IndexMarkCallback(size_t msg_id,
  69                                 size_t client_id,
  70                                 SPDNotificationType state,
  71                                 char* index_mark);
  72
  73   static SPDNotificationType current_notification_;
  74
  75   base::Lock initialization_lock_;
  76   LibSpeechdLoader libspeechd_loader_;
  77   SPDConnection* conn_;
  78
  79   // These apply to the current utterance only.
  80   std::string utterance_;
  81   int utterance_id_;
  82
  83   // Map a string composed of a voicename and module to the voicename. Used to
  84   // uniquely identify a voice across all available modules.
  85   scoped_ptr<std::map<std::string, SPDChromeVoice> > all_native_voices_;
  86
  87   friend struct DefaultSingletonTraits<TtsPlatformImplLinux>;
  88
  89   DISALLOW_COPY_AND_ASSIGN(TtsPlatformImplLinux);
  90 };
  91
  92 // static
  93 SPDNotificationType TtsPlatformImplLinux::current_notification_ =
  94     SPD_EVENT_END;
  95
  96 TtsPlatformImplLinux::TtsPlatformImplLinux()
  97     : utterance_id_(0) {
  98   const CommandLine& command_line = *CommandLine::ForCurrentProcess();
  99   if (!command_line.HasSwitch(switches::kEnableSpeechDispatcher))
 100     return;
 101
 102   BrowserThread::PostTask(BrowserThread::FILE,
 103                           FROM_HERE,
 104                           base::Bind(&TtsPlatformImplLinux::Initialize,
 105                                      base::Unretained(this)));
 106 }
 107
 108 void TtsPlatformImplLinux::Initialize() {
 109   base::AutoLock lock(initialization_lock_);
 110
 111   if (!libspeechd_loader_.Load("libspeechd.so.2"))
 112     return;
 113
 114   {
 115     // spd_open has memory leaks which are hard to suppress.
 116     // http://crbug.com/317360
 117     ANNOTATE_SCOPED_MEMORY_LEAK;
 118     conn_ = libspeechd_loader_.spd_open(
 119         "chrome", "extension_api", NULL, SPD_MODE_SINGLE);
 120   }
 121   if (!conn_)
 122     return;
 123
 124   // Register callbacks for all events.
 125   conn_->callback_begin =
 126     conn_->callback_end =
 127     conn_->callback_cancel =
 128     conn_->callback_pause =
 129     conn_->callback_resume =
 130     &NotificationCallback;
 131
 132   conn_->callback_im = &IndexMarkCallback;
 133
 134   libspeechd_loader_.spd_set_notification_on(conn_, SPD_BEGIN);
 135   libspeechd_loader_.spd_set_notification_on(conn_, SPD_END);
 136   libspeechd_loader_.spd_set_notification_on(conn_, SPD_CANCEL);
 137   libspeechd_loader_.spd_set_notification_on(conn_, SPD_PAUSE);
 138   libspeechd_loader_.spd_set_notification_on(conn_, SPD_RESUME);
 139 }
 140
 141 TtsPlatformImplLinux::~TtsPlatformImplLinux() {
 142   base::AutoLock lock(initialization_lock_);
 143   if (conn_) {
 144     libspeechd_loader_.spd_close(conn_);
 145     conn_ = NULL;
 146   }
 147 }
 148
 149 void TtsPlatformImplLinux::Reset() {
 150   base::AutoLock lock(initialization_lock_);
 151   if (conn_)
 152     libspeechd_loader_.spd_close(conn_);
 153   conn_ = libspeechd_loader_.spd_open(
 154       "chrome", "extension_api", NULL, SPD_MODE_SINGLE);
 155 }
 156
 157 bool TtsPlatformImplLinux::PlatformImplAvailable() {
 158   if (!initialization_lock_.Try())
 159     return false;
 160   bool result = libspeechd_loader_.loaded() && (conn_ != NULL);
 161   initialization_lock_.Release();
 162   return result;
 163 }
 164
 165 bool TtsPlatformImplLinux::Speak(
 166     int utterance_id,
 167     const std::string& utterance,
 168     const std::string& lang,
 169     const VoiceData& voice,
 170     const UtteranceContinuousParameters& params) {
 171   if (!PlatformImplAvailable()) {
 172     error_ = kNotSupportedError;
 173     return false;
 174   }
 175
 176   // Speech dispatcher's speech params are around 3x at either limit.
 177   float rate = params.rate > 3 ? 3 : params.rate;
 178   rate = params.rate < 0.334 ? 0.334 : rate;
 179   float pitch = params.pitch > 3 ? 3 : params.pitch;
 180   pitch = params.pitch < 0.334 ? 0.334 : pitch;
 181
 182   std::map<std::string, SPDChromeVoice>::iterator it =
 183       all_native_voices_->find(voice.name);
 184   if (it != all_native_voices_->end()) {
 185     libspeechd_loader_.spd_set_output_module(conn_, it->second.module.c_str());
 186     libspeechd_loader_.spd_set_synthesis_voice(conn_, it->second.name.c_str());
 187   }
 188
 189   // Map our multiplicative range to Speech Dispatcher's linear range.
 190   // .334 = -100.
 191   // 3 = 100.
 192   libspeechd_loader_.spd_set_voice_rate(conn_, 100 * log10(rate) / log10(3));
 193   libspeechd_loader_.spd_set_voice_pitch(conn_, 100 * log10(pitch) / log10(3));
 194
 195   utterance_ = utterance;
 196   utterance_id_ = utterance_id;
 197
 198   if (libspeechd_loader_.spd_say(conn_, SPD_TEXT, utterance.c_str()) == -1) {
 199     Reset();
 200     return false;
 201   }
 202   return true;
 203 }
 204
 205 bool TtsPlatformImplLinux::StopSpeaking() {
 206   if (!PlatformImplAvailable())
 207     return false;
 208   if (libspeechd_loader_.spd_stop(conn_) == -1) {
 209     Reset();
 210     return false;
 211   }
 212   return true;
 213 }
 214
 215 void TtsPlatformImplLinux::Pause() {
 216   if (!PlatformImplAvailable())
 217     return;
 218   libspeechd_loader_.spd_pause(conn_);
 219 }
 220
 221 void TtsPlatformImplLinux::Resume() {
 222   if (!PlatformImplAvailable())
 223     return;
 224   libspeechd_loader_.spd_resume(conn_);
 225 }
 226
 227 bool TtsPlatformImplLinux::IsSpeaking() {
 228   return current_notification_ == SPD_EVENT_BEGIN;
 229 }
 230
 231 void TtsPlatformImplLinux::GetVoices(
 232     std::vector<VoiceData>* out_voices) {
 233   if (!all_native_voices_.get()) {
 234     all_native_voices_.reset(new std::map<std::string, SPDChromeVoice>());
 235     char** modules = libspeechd_loader_.spd_list_modules(conn_);
 236     if (!modules)
 237       return;
 238     for (int i = 0; modules[i]; i++) {
 239       char* module = modules[i];
 240       libspeechd_loader_.spd_set_output_module(conn_, module);
 241       SPDVoice** native_voices =
 242           libspeechd_loader_.spd_list_synthesis_voices(conn_);
 243       if (!native_voices) {
 244         free(module);
 245         continue;
 246       }
 247       for (int j = 0; native_voices[j]; j++) {
 248         SPDVoice* native_voice = native_voices[j];
 249         SPDChromeVoice native_data;
 250         native_data.name = native_voice->name;
 251         native_data.module = module;
 252         std::string key;
 253         key.append(native_data.name);
 254         key.append(" ");
 255         key.append(native_data.module);
 256         all_native_voices_->insert(
 257             std::pair<std::string, SPDChromeVoice>(key, native_data));
 258         free(native_voices[j]);
 259       }
 260       free(modules[i]);
 261     }
 262   }
 263
 264   for (std::map<std::string, SPDChromeVoice>::iterator it =
 265            all_native_voices_->begin();
 266        it != all_native_voices_->end();
 267        it++) {
 268     out_voices->push_back(VoiceData());
 269     VoiceData& voice = out_voices->back();
 270     voice.native = true;
 271     voice.name = it->first;
 272     voice.events.insert(TTS_EVENT_START);
 273     voice.events.insert(TTS_EVENT_END);
 274     voice.events.insert(TTS_EVENT_CANCELLED);
 275     voice.events.insert(TTS_EVENT_MARKER);
 276     voice.events.insert(TTS_EVENT_PAUSE);
 277     voice.events.insert(TTS_EVENT_RESUME);
 278   }
 279 }
 280
 281 void TtsPlatformImplLinux::OnSpeechEvent(SPDNotificationType type) {
 282   TtsController* controller = TtsController::GetInstance();
 283   switch (type) {
 284   case SPD_EVENT_BEGIN:
 285     controller->OnTtsEvent(utterance_id_, TTS_EVENT_START, 0, std::string());
 286     break;
 287   case SPD_EVENT_RESUME:
 288     controller->OnTtsEvent(utterance_id_, TTS_EVENT_RESUME, 0, std::string());
 289     break;
 290   case SPD_EVENT_END:
 291     controller->OnTtsEvent(
 292         utterance_id_, TTS_EVENT_END, utterance_.size(), std::string());
 293     break;
 294   case SPD_EVENT_PAUSE:
 295     controller->OnTtsEvent(
 296         utterance_id_, TTS_EVENT_PAUSE, utterance_.size(), std::string());
 297     break;
 298   case SPD_EVENT_CANCEL:
 299     controller->OnTtsEvent(
 300         utterance_id_, TTS_EVENT_CANCELLED, 0, std::string());
 301     break;
 302   case SPD_EVENT_INDEX_MARK:
 303     controller->OnTtsEvent(utterance_id_, TTS_EVENT_MARKER, 0, std::string());
 304     break;
 305   }
 306 }
 307
 308 // static
 309 void TtsPlatformImplLinux::NotificationCallback(
 310     size_t msg_id, size_t client_id, SPDNotificationType type) {
 311   // We run Speech Dispatcher in threaded mode, so these callbacks should always
 312   // be in a separate thread.
 313   if (!BrowserThread::CurrentlyOn(BrowserThread::UI)) {
 314     current_notification_ = type;
 315     BrowserThread::PostTask(
 316         BrowserThread::UI,
 317         FROM_HERE,
 318         base::Bind(&TtsPlatformImplLinux::OnSpeechEvent,
 319                    base::Unretained(TtsPlatformImplLinux::GetInstance()),
 320                    type));
 321   }
 322 }
 323
 324 // static
 325 void TtsPlatformImplLinux::IndexMarkCallback(size_t msg_id,
 326                                                       size_t client_id,
 327                                                       SPDNotificationType state,
 328                                                       char* index_mark) {
 329   // TODO(dtseng): index_mark appears to specify an index type supplied by a
 330   // client. Need to explore how this is used before hooking it up with existing
 331   // word, sentence events.
 332   // We run Speech Dispatcher in threaded mode, so these callbacks should always
 333   // be in a separate thread.
 334   if (!BrowserThread::CurrentlyOn(BrowserThread::UI)) {
 335     current_notification_ = state;
 336     BrowserThread::PostTask(BrowserThread::UI, FROM_HERE,
 337         base::Bind(&TtsPlatformImplLinux::OnSpeechEvent,
 338         base::Unretained(TtsPlatformImplLinux::GetInstance()),
 339         state));
 340   }
 341 }
 342
 343 // static
 344 TtsPlatformImplLinux* TtsPlatformImplLinux::GetInstance() {
 345   return Singleton<TtsPlatformImplLinux,
 346                    LeakySingletonTraits<TtsPlatformImplLinux> >::get();
 347 }
 348
 349 // static
 350 TtsPlatformImpl* TtsPlatformImpl::GetInstance() {
 351   return TtsPlatformImplLinux::GetInstance();
 352 }