chrome/browser/speech/tts_win.cc

   1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include <math.h>
   6 #include <sapi.h>
   7
   8 #include "base/memory/singleton.h"
   9 #include "base/strings/string_number_conversions.h"
  10 #include "base/strings/utf_string_conversions.h"
  11 #include "base/values.h"
  12 #include "base/win/scoped_comptr.h"
  13 #include "chrome/browser/speech/tts_controller.h"
  14 #include "chrome/browser/speech/tts_platform.h"
  15
  16 class TtsPlatformImplWin : public TtsPlatformImpl {
  17  public:
  18   bool PlatformImplAvailable() override {
  19     return true;
  20   }
  21
  22   bool Speak(
  23       int utterance_id,
  24       const std::string& utterance,
  25       const std::string& lang,
  26       const VoiceData& voice,
  27       const UtteranceContinuousParameters& params) override;
  28
  29   bool StopSpeaking() override;
  30
  31   void Pause() override;
  32
  33   void Resume() override;
  34
  35   bool IsSpeaking() override;
  36
  37   void GetVoices(std::vector<VoiceData>* out_voices) override;
  38
  39   // Get the single instance of this class.
  40   static TtsPlatformImplWin* GetInstance();
  41
  42   static void __stdcall SpeechEventCallback(WPARAM w_param, LPARAM l_param);
  43
  44  private:
  45   TtsPlatformImplWin();
  46   ~TtsPlatformImplWin() override {}
  47
  48   void OnSpeechEvent();
  49
  50   base::win::ScopedComPtr<ISpVoice> speech_synthesizer_;
  51
  52   // These apply to the current utterance only.
  53   std::wstring utterance_;
  54   int utterance_id_;
  55   int prefix_len_;
  56   ULONG stream_number_;
  57   int char_position_;
  58   bool paused_;
  59
  60   friend struct DefaultSingletonTraits<TtsPlatformImplWin>;
  61
  62   DISALLOW_COPY_AND_ASSIGN(TtsPlatformImplWin);
  63 };
  64
  65 // static
  66 TtsPlatformImpl* TtsPlatformImpl::GetInstance() {
  67   return TtsPlatformImplWin::GetInstance();
  68 }
  69
  70 bool TtsPlatformImplWin::Speak(
  71     int utterance_id,
  72     const std::string& src_utterance,
  73     const std::string& lang,
  74     const VoiceData& voice,
  75     const UtteranceContinuousParameters& params) {
  76   std::wstring prefix;
  77   std::wstring suffix;
  78
  79   if (!speech_synthesizer_.get())
  80     return false;
  81
  82   // TODO(dmazzoni): support languages other than the default: crbug.com/88059
  83
  84   if (params.rate >= 0.0) {
  85     // Map our multiplicative range of 0.1x to 10.0x onto Microsoft's
  86     // linear range of -10 to 10:
  87     //   0.1 -> -10
  88     //   1.0 -> 0
  89     //  10.0 -> 10
  90     speech_synthesizer_->SetRate(static_cast<int32>(10 * log10(params.rate)));
  91   }
  92
  93   if (params.pitch >= 0.0) {
  94     // The TTS api allows a range of -10 to 10 for speech pitch.
  95     // TODO(dtseng): cleanup if we ever use any other properties that
  96     // require xml.
  97     std::wstring pitch_value =
  98         base::IntToString16(static_cast<int>(params.pitch * 10 - 10));
  99     prefix = L"<pitch absmiddle=\"" + pitch_value + L"\">";
 100     suffix = L"</pitch>";
 101   }
 102
 103   if (params.volume >= 0.0) {
 104     // The TTS api allows a range of 0 to 100 for speech volume.
 105     speech_synthesizer_->SetVolume(static_cast<uint16>(params.volume * 100));
 106   }
 107
 108   // TODO(dmazzoni): convert SSML to SAPI xml. http://crbug.com/88072
 109
 110   utterance_ = base::UTF8ToWide(src_utterance);
 111   utterance_id_ = utterance_id;
 112   char_position_ = 0;
 113   std::wstring merged_utterance = prefix + utterance_ + suffix;
 114   prefix_len_ = prefix.size();
 115
 116   HRESULT result = speech_synthesizer_->Speak(
 117       merged_utterance.c_str(),
 118       SPF_ASYNC,
 119       &stream_number_);
 120   return (result == S_OK);
 121 }
 122
 123 bool TtsPlatformImplWin::StopSpeaking() {
 124   if (speech_synthesizer_.get()) {
 125     // Clear the stream number so that any further events relating to this
 126     // utterance are ignored.
 127     stream_number_ = 0;
 128
 129     if (IsSpeaking()) {
 130       // Stop speech by speaking the empty string with the purge flag.
 131       speech_synthesizer_->Speak(L"", SPF_ASYNC | SPF_PURGEBEFORESPEAK, NULL);
 132     }
 133     if (paused_) {
 134       speech_synthesizer_->Resume();
 135       paused_ = false;
 136     }
 137   }
 138   return true;
 139 }
 140
 141 void TtsPlatformImplWin::Pause() {
 142   if (speech_synthesizer_.get() && utterance_id_ && !paused_) {
 143     speech_synthesizer_->Pause();
 144     paused_ = true;
 145     TtsController::GetInstance()->OnTtsEvent(
 146         utterance_id_, TTS_EVENT_PAUSE, char_position_, "");
 147   }
 148 }
 149
 150 void TtsPlatformImplWin::Resume() {
 151   if (speech_synthesizer_.get() && utterance_id_ && paused_) {
 152     speech_synthesizer_->Resume();
 153     paused_ = false;
 154     TtsController::GetInstance()->OnTtsEvent(
 155         utterance_id_, TTS_EVENT_RESUME, char_position_, "");
 156   }
 157 }
 158
 159 bool TtsPlatformImplWin::IsSpeaking() {
 160   if (speech_synthesizer_.get()) {
 161     SPVOICESTATUS status;
 162     HRESULT result = speech_synthesizer_->GetStatus(&status, NULL);
 163     if (result == S_OK) {
 164       if (status.dwRunningState == 0 ||  // 0 == waiting to speak
 165           status.dwRunningState == SPRS_IS_SPEAKING) {
 166         return true;
 167       }
 168     }
 169   }
 170   return false;
 171 }
 172
 173 void TtsPlatformImplWin::GetVoices(
 174     std::vector<VoiceData>* out_voices) {
 175   // TODO: get all voices, not just default voice.
 176   // http://crbug.com/88059
 177   out_voices->push_back(VoiceData());
 178   VoiceData& voice = out_voices->back();
 179   voice.native = true;
 180   voice.name = "native";
 181   voice.events.insert(TTS_EVENT_START);
 182   voice.events.insert(TTS_EVENT_END);
 183   voice.events.insert(TTS_EVENT_MARKER);
 184   voice.events.insert(TTS_EVENT_WORD);
 185   voice.events.insert(TTS_EVENT_SENTENCE);
 186   voice.events.insert(TTS_EVENT_PAUSE);
 187   voice.events.insert(TTS_EVENT_RESUME);
 188 }
 189
 190 void TtsPlatformImplWin::OnSpeechEvent() {
 191   TtsController* controller = TtsController::GetInstance();
 192   SPEVENT event;
 193   while (S_OK == speech_synthesizer_->GetEvents(1, &event, NULL)) {
 194     if (event.ulStreamNum != stream_number_)
 195       continue;
 196
 197     switch (event.eEventId) {
 198     case SPEI_START_INPUT_STREAM:
 199       controller->OnTtsEvent(
 200           utterance_id_, TTS_EVENT_START, 0, std::string());
 201       break;
 202     case SPEI_END_INPUT_STREAM:
 203       char_position_ = utterance_.size();
 204       controller->OnTtsEvent(
 205           utterance_id_, TTS_EVENT_END, char_position_, std::string());
 206       break;
 207     case SPEI_TTS_BOOKMARK:
 208       controller->OnTtsEvent(
 209           utterance_id_, TTS_EVENT_MARKER, char_position_, std::string());
 210       break;
 211     case SPEI_WORD_BOUNDARY:
 212       char_position_ = static_cast<ULONG>(event.lParam) - prefix_len_;
 213       controller->OnTtsEvent(
 214           utterance_id_, TTS_EVENT_WORD, char_position_,
 215           std::string());
 216       break;
 217     case SPEI_SENTENCE_BOUNDARY:
 218       char_position_ = static_cast<ULONG>(event.lParam) - prefix_len_;
 219       controller->OnTtsEvent(
 220           utterance_id_, TTS_EVENT_SENTENCE, char_position_,
 221           std::string());
 222       break;
 223     default:
 224       break;
 225     }
 226   }
 227 }
 228
 229 TtsPlatformImplWin::TtsPlatformImplWin()
 230   : utterance_id_(0),
 231     prefix_len_(0),
 232     stream_number_(0),
 233     char_position_(0),
 234     paused_(false) {
 235   speech_synthesizer_.CreateInstance(CLSID_SpVoice);
 236   if (speech_synthesizer_.get()) {
 237     ULONGLONG event_mask =
 238         SPFEI(SPEI_START_INPUT_STREAM) |
 239         SPFEI(SPEI_TTS_BOOKMARK) |
 240         SPFEI(SPEI_WORD_BOUNDARY) |
 241         SPFEI(SPEI_SENTENCE_BOUNDARY) |
 242         SPFEI(SPEI_END_INPUT_STREAM);
 243     speech_synthesizer_->SetInterest(event_mask, event_mask);
 244     speech_synthesizer_->SetNotifyCallbackFunction(
 245         TtsPlatformImplWin::SpeechEventCallback, 0, 0);
 246   }
 247 }
 248
 249 // static
 250 TtsPlatformImplWin* TtsPlatformImplWin::GetInstance() {
 251   return Singleton<TtsPlatformImplWin,
 252                    LeakySingletonTraits<TtsPlatformImplWin> >::get();
 253 }
 254
 255 // static
 256 void TtsPlatformImplWin::SpeechEventCallback(
 257     WPARAM w_param, LPARAM l_param) {
 258   GetInstance()->OnSpeechEvent();
 259 }