chrome/browser/speech/tts_linux.cc

   1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include <math.h>
   6
   7 #include <map>
   8
   9 #include "base/command_line.h"
  10 #include "base/debug/leak_annotations.h"
  11 #include "base/memory/scoped_ptr.h"
  12 #include "base/memory/singleton.h"
  13 #include "base/synchronization/lock.h"
  14 #include "chrome/browser/speech/tts_platform.h"
  15 #include "content/public/browser/browser_thread.h"
  16 #include "content/public/common/content_switches.h"
  17
  18 #include "library_loaders/libspeechd.h"
  19
  20 using content::BrowserThread;
  21
  22 namespace {
  23
  24 const char kNotSupportedError[] =
  25     "Native speech synthesis not supported on this platform.";
  26
  27 struct SPDChromeVoice {
  28   std::string name;
  29   std::string module;
  30 };
  31
  32 }  // namespace
  33
  34 class TtsPlatformImplLinux : public TtsPlatformImpl {
  35  public:
  36   bool PlatformImplAvailable() override;
  37   bool Speak(int utterance_id,
  38              const std::string& utterance,
  39              const std::string& lang,
  40              const VoiceData& voice,
  41              const UtteranceContinuousParameters& params) override;
  42   bool StopSpeaking() override;
  43   void Pause() override;
  44   void Resume() override;
  45   bool IsSpeaking() override;
  46   void GetVoices(std::vector<VoiceData>* out_voices) override;
  47
  48   void OnSpeechEvent(SPDNotificationType type);
  49
  50   // Get the single instance of this class.
  51   static TtsPlatformImplLinux* GetInstance();
  52
  53  private:
  54   TtsPlatformImplLinux();
  55   ~TtsPlatformImplLinux() override;
  56
  57   // Initiate the connection with the speech dispatcher.
  58   void Initialize();
  59
  60   // Resets the connection with speech dispatcher.
  61   void Reset();
  62
  63   static void NotificationCallback(size_t msg_id,
  64                                    size_t client_id,
  65                                    SPDNotificationType type);
  66
  67   static void IndexMarkCallback(size_t msg_id,
  68                                 size_t client_id,
  69                                 SPDNotificationType state,
  70                                 char* index_mark);
  71
  72   static SPDNotificationType current_notification_;
  73
  74   base::Lock initialization_lock_;
  75   LibSpeechdLoader libspeechd_loader_;
  76   SPDConnection* conn_;
  77
  78   // These apply to the current utterance only.
  79   std::string utterance_;
  80   int utterance_id_;
  81
  82   // Map a string composed of a voicename and module to the voicename. Used to
  83   // uniquely identify a voice across all available modules.
  84   scoped_ptr<std::map<std::string, SPDChromeVoice> > all_native_voices_;
  85
  86   friend struct DefaultSingletonTraits<TtsPlatformImplLinux>;
  87
  88   DISALLOW_COPY_AND_ASSIGN(TtsPlatformImplLinux);
  89 };
  90
  91 // static
  92 SPDNotificationType TtsPlatformImplLinux::current_notification_ =
  93     SPD_EVENT_END;
  94
  95 TtsPlatformImplLinux::TtsPlatformImplLinux()
  96     : utterance_id_(0) {
  97   const CommandLine& command_line = *CommandLine::ForCurrentProcess();
  98   if (!command_line.HasSwitch(switches::kEnableSpeechDispatcher))
  99     return;
 100
 101   BrowserThread::PostTask(BrowserThread::FILE,
 102                           FROM_HERE,
 103                           base::Bind(&TtsPlatformImplLinux::Initialize,
 104                                      base::Unretained(this)));
 105 }
 106
 107 void TtsPlatformImplLinux::Initialize() {
 108   base::AutoLock lock(initialization_lock_);
 109
 110   if (!libspeechd_loader_.Load("libspeechd.so.2"))
 111     return;
 112
 113   {
 114     // spd_open has memory leaks which are hard to suppress.
 115     // http://crbug.com/317360
 116     ANNOTATE_SCOPED_MEMORY_LEAK;
 117     conn_ = libspeechd_loader_.spd_open(
 118         "chrome", "extension_api", NULL, SPD_MODE_SINGLE);
 119   }
 120   if (!conn_)
 121     return;
 122
 123   // Register callbacks for all events.
 124   conn_->callback_begin =
 125     conn_->callback_end =
 126     conn_->callback_cancel =
 127     conn_->callback_pause =
 128     conn_->callback_resume =
 129     &NotificationCallback;
 130
 131   conn_->callback_im = &IndexMarkCallback;
 132
 133   libspeechd_loader_.spd_set_notification_on(conn_, SPD_BEGIN);
 134   libspeechd_loader_.spd_set_notification_on(conn_, SPD_END);
 135   libspeechd_loader_.spd_set_notification_on(conn_, SPD_CANCEL);
 136   libspeechd_loader_.spd_set_notification_on(conn_, SPD_PAUSE);
 137   libspeechd_loader_.spd_set_notification_on(conn_, SPD_RESUME);
 138 }
 139
 140 TtsPlatformImplLinux::~TtsPlatformImplLinux() {
 141   base::AutoLock lock(initialization_lock_);
 142   if (conn_) {
 143     libspeechd_loader_.spd_close(conn_);
 144     conn_ = NULL;
 145   }
 146 }
 147
 148 void TtsPlatformImplLinux::Reset() {
 149   base::AutoLock lock(initialization_lock_);
 150   if (conn_)
 151     libspeechd_loader_.spd_close(conn_);
 152   conn_ = libspeechd_loader_.spd_open(
 153       "chrome", "extension_api", NULL, SPD_MODE_SINGLE);
 154 }
 155
 156 bool TtsPlatformImplLinux::PlatformImplAvailable() {
 157   if (!initialization_lock_.Try())
 158     return false;
 159   bool result = libspeechd_loader_.loaded() && (conn_ != NULL);
 160   initialization_lock_.Release();
 161   return result;
 162 }
 163
 164 bool TtsPlatformImplLinux::Speak(
 165     int utterance_id,
 166     const std::string& utterance,
 167     const std::string& lang,
 168     const VoiceData& voice,
 169     const UtteranceContinuousParameters& params) {
 170   if (!PlatformImplAvailable()) {
 171     error_ = kNotSupportedError;
 172     return false;
 173   }
 174
 175   // Speech dispatcher's speech params are around 3x at either limit.
 176   float rate = params.rate > 3 ? 3 : params.rate;
 177   rate = params.rate < 0.334 ? 0.334 : rate;
 178   float pitch = params.pitch > 3 ? 3 : params.pitch;
 179   pitch = params.pitch < 0.334 ? 0.334 : pitch;
 180
 181   std::map<std::string, SPDChromeVoice>::iterator it =
 182       all_native_voices_->find(voice.name);
 183   if (it != all_native_voices_->end()) {
 184     libspeechd_loader_.spd_set_output_module(conn_, it->second.module.c_str());
 185     libspeechd_loader_.spd_set_synthesis_voice(conn_, it->second.name.c_str());
 186   }
 187
 188   // Map our multiplicative range to Speech Dispatcher's linear range.
 189   // .334 = -100.
 190   // 3 = 100.
 191   libspeechd_loader_.spd_set_voice_rate(conn_, 100 * log10(rate) / log10(3));
 192   libspeechd_loader_.spd_set_voice_pitch(conn_, 100 * log10(pitch) / log10(3));
 193
 194   utterance_ = utterance;
 195   utterance_id_ = utterance_id;
 196
 197   if (libspeechd_loader_.spd_say(conn_, SPD_TEXT, utterance.c_str()) == -1) {
 198     Reset();
 199     return false;
 200   }
 201   return true;
 202 }
 203
 204 bool TtsPlatformImplLinux::StopSpeaking() {
 205   if (!PlatformImplAvailable())
 206     return false;
 207   if (libspeechd_loader_.spd_stop(conn_) == -1) {
 208     Reset();
 209     return false;
 210   }
 211   return true;
 212 }
 213
 214 void TtsPlatformImplLinux::Pause() {
 215   if (!PlatformImplAvailable())
 216     return;
 217   libspeechd_loader_.spd_pause(conn_);
 218 }
 219
 220 void TtsPlatformImplLinux::Resume() {
 221   if (!PlatformImplAvailable())
 222     return;
 223   libspeechd_loader_.spd_resume(conn_);
 224 }
 225
 226 bool TtsPlatformImplLinux::IsSpeaking() {
 227   return current_notification_ == SPD_EVENT_BEGIN;
 228 }
 229
 230 void TtsPlatformImplLinux::GetVoices(
 231     std::vector<VoiceData>* out_voices) {
 232   if (!all_native_voices_.get()) {
 233     all_native_voices_.reset(new std::map<std::string, SPDChromeVoice>());
 234     char** modules = libspeechd_loader_.spd_list_modules(conn_);
 235     if (!modules)
 236       return;
 237     for (int i = 0; modules[i]; i++) {
 238       char* module = modules[i];
 239       libspeechd_loader_.spd_set_output_module(conn_, module);
 240       SPDVoice** native_voices =
 241           libspeechd_loader_.spd_list_synthesis_voices(conn_);
 242       if (!native_voices) {
 243         free(module);
 244         continue;
 245       }
 246       for (int j = 0; native_voices[j]; j++) {
 247         SPDVoice* native_voice = native_voices[j];
 248         SPDChromeVoice native_data;
 249         native_data.name = native_voice->name;
 250         native_data.module = module;
 251         std::string key;
 252         key.append(native_data.name);
 253         key.append(" ");
 254         key.append(native_data.module);
 255         all_native_voices_->insert(
 256             std::pair<std::string, SPDChromeVoice>(key, native_data));
 257         free(native_voices[j]);
 258       }
 259       free(modules[i]);
 260     }
 261   }
 262
 263   for (std::map<std::string, SPDChromeVoice>::iterator it =
 264            all_native_voices_->begin();
 265        it != all_native_voices_->end();
 266        it++) {
 267     out_voices->push_back(VoiceData());
 268     VoiceData& voice = out_voices->back();
 269     voice.native = true;
 270     voice.name = it->first;
 271     voice.events.insert(TTS_EVENT_START);
 272     voice.events.insert(TTS_EVENT_END);
 273     voice.events.insert(TTS_EVENT_CANCELLED);
 274     voice.events.insert(TTS_EVENT_MARKER);
 275     voice.events.insert(TTS_EVENT_PAUSE);
 276     voice.events.insert(TTS_EVENT_RESUME);
 277   }
 278 }
 279
 280 void TtsPlatformImplLinux::OnSpeechEvent(SPDNotificationType type) {
 281   TtsController* controller = TtsController::GetInstance();
 282   switch (type) {
 283   case SPD_EVENT_BEGIN:
 284     controller->OnTtsEvent(utterance_id_, TTS_EVENT_START, 0, std::string());
 285     break;
 286   case SPD_EVENT_RESUME:
 287     controller->OnTtsEvent(utterance_id_, TTS_EVENT_RESUME, 0, std::string());
 288     break;
 289   case SPD_EVENT_END:
 290     controller->OnTtsEvent(
 291         utterance_id_, TTS_EVENT_END, utterance_.size(), std::string());
 292     break;
 293   case SPD_EVENT_PAUSE:
 294     controller->OnTtsEvent(
 295         utterance_id_, TTS_EVENT_PAUSE, utterance_.size(), std::string());
 296     break;
 297   case SPD_EVENT_CANCEL:
 298     controller->OnTtsEvent(
 299         utterance_id_, TTS_EVENT_CANCELLED, 0, std::string());
 300     break;
 301   case SPD_EVENT_INDEX_MARK:
 302     controller->OnTtsEvent(utterance_id_, TTS_EVENT_MARKER, 0, std::string());
 303     break;
 304   }
 305 }
 306
 307 // static
 308 void TtsPlatformImplLinux::NotificationCallback(
 309     size_t msg_id, size_t client_id, SPDNotificationType type) {
 310   // We run Speech Dispatcher in threaded mode, so these callbacks should always
 311   // be in a separate thread.
 312   if (!BrowserThread::CurrentlyOn(BrowserThread::UI)) {
 313     current_notification_ = type;
 314     BrowserThread::PostTask(
 315         BrowserThread::UI,
 316         FROM_HERE,
 317         base::Bind(&TtsPlatformImplLinux::OnSpeechEvent,
 318                    base::Unretained(TtsPlatformImplLinux::GetInstance()),
 319                    type));
 320   }
 321 }
 322
 323 // static
 324 void TtsPlatformImplLinux::IndexMarkCallback(size_t msg_id,
 325                                                       size_t client_id,
 326                                                       SPDNotificationType state,
 327                                                       char* index_mark) {
 328   // TODO(dtseng): index_mark appears to specify an index type supplied by a
 329   // client. Need to explore how this is used before hooking it up with existing
 330   // word, sentence events.
 331   // We run Speech Dispatcher in threaded mode, so these callbacks should always
 332   // be in a separate thread.
 333   if (!BrowserThread::CurrentlyOn(BrowserThread::UI)) {
 334     current_notification_ = state;
 335     BrowserThread::PostTask(BrowserThread::UI, FROM_HERE,
 336         base::Bind(&TtsPlatformImplLinux::OnSpeechEvent,
 337         base::Unretained(TtsPlatformImplLinux::GetInstance()),
 338         state));
 339   }
 340 }
 341
 342 // static
 343 TtsPlatformImplLinux* TtsPlatformImplLinux::GetInstance() {
 344   return Singleton<TtsPlatformImplLinux,
 345                    LeakySingletonTraits<TtsPlatformImplLinux> >::get();
 346 }
 347
 348 // static
 349 TtsPlatformImpl* TtsPlatformImpl::GetInstance() {
 350   return TtsPlatformImplLinux::GetInstance();
 351 }