chrome/browser/speech/tts_win.cc

   1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include <math.h>
   6 #include <sapi.h>
   7
   8 #include "base/memory/singleton.h"
   9 #include "base/strings/string_number_conversions.h"
  10 #include "base/utf_string_conversions.h"
  11 #include "base/values.h"
  12 #include "base/win/scoped_comptr.h"
  13 #include "chrome/browser/speech/tts_controller.h"
  14 #include "chrome/browser/speech/tts_platform.h"
  15
  16 class TtsPlatformImplWin : public TtsPlatformImpl {
  17  public:
  18   virtual bool PlatformImplAvailable() {
  19     return true;
  20   }
  21
  22   virtual bool Speak(
  23       int utterance_id,
  24       const std::string& utterance,
  25       const std::string& lang,
  26       const UtteranceContinuousParameters& params);
  27
  28   virtual bool StopSpeaking();
  29
  30   virtual bool IsSpeaking();
  31
  32   virtual bool SendsEvent(TtsEventType event_type);
  33
  34   // Get the single instance of this class.
  35   static TtsPlatformImplWin* GetInstance();
  36
  37   static void __stdcall SpeechEventCallback(WPARAM w_param, LPARAM l_param);
  38
  39  private:
  40   TtsPlatformImplWin();
  41   virtual ~TtsPlatformImplWin() {}
  42
  43   void OnSpeechEvent();
  44
  45   base::win::ScopedComPtr<ISpVoice> speech_synthesizer_;
  46
  47   // These apply to the current utterance only.
  48   std::wstring utterance_;
  49   int utterance_id_;
  50   int prefix_len_;
  51   ULONG stream_number_;
  52   int char_position_;
  53
  54   friend struct DefaultSingletonTraits<TtsPlatformImplWin>;
  55
  56   DISALLOW_COPY_AND_ASSIGN(TtsPlatformImplWin);
  57 };
  58
  59 // static
  60 TtsPlatformImpl* TtsPlatformImpl::GetInstance() {
  61   return TtsPlatformImplWin::GetInstance();
  62 }
  63
  64 bool TtsPlatformImplWin::Speak(
  65     int utterance_id,
  66     const std::string& src_utterance,
  67     const std::string& lang,
  68     const UtteranceContinuousParameters& params) {
  69   std::wstring prefix;
  70   std::wstring suffix;
  71
  72   if (!speech_synthesizer_.get())
  73     return false;
  74
  75   // TODO(dmazzoni): support languages other than the default: crbug.com/88059
  76
  77   if (params.rate >= 0.0) {
  78     // Map our multiplicative range of 0.1x to 10.0x onto Microsoft's
  79     // linear range of -10 to 10:
  80     //   0.1 -> -10
  81     //   1.0 -> 0
  82     //  10.0 -> 10
  83     speech_synthesizer_->SetRate(static_cast<int32>(10 * log10(params.rate)));
  84   }
  85
  86   if (params.pitch >= 0.0) {
  87     // The TTS api allows a range of -10 to 10 for speech pitch.
  88     // TODO(dtseng): cleanup if we ever use any other properties that
  89     // require xml.
  90     std::wstring pitch_value =
  91         base::IntToString16(static_cast<int>(params.pitch * 10 - 10));
  92     prefix = L"<pitch absmiddle=\"" + pitch_value + L"\">";
  93     suffix = L"</pitch>";
  94   }
  95
  96   if (params.volume >= 0.0) {
  97     // The TTS api allows a range of 0 to 100 for speech volume.
  98     speech_synthesizer_->SetVolume(static_cast<uint16>(params.volume * 100));
  99   }
 100
 101   // TODO(dmazzoni): convert SSML to SAPI xml. http://crbug.com/88072
 102
 103   utterance_ = UTF8ToWide(src_utterance);
 104   utterance_id_ = utterance_id;
 105   char_position_ = 0;
 106   std::wstring merged_utterance = prefix + utterance_ + suffix;
 107   prefix_len_ = prefix.size();
 108
 109   HRESULT result = speech_synthesizer_->Speak(
 110       merged_utterance.c_str(),
 111       SPF_ASYNC,
 112       &stream_number_);
 113   return (result == S_OK);
 114 }
 115
 116 bool TtsPlatformImplWin::StopSpeaking() {
 117   if (speech_synthesizer_.get()) {
 118     // Clear the stream number so that any further events relating to this
 119     // utterance are ignored.
 120     stream_number_ = 0;
 121
 122     if (IsSpeaking()) {
 123       // Stop speech by speaking the empty string with the purge flag.
 124       speech_synthesizer_->Speak(L"", SPF_ASYNC | SPF_PURGEBEFORESPEAK, NULL);
 125     }
 126   }
 127   return true;
 128 }
 129
 130 bool TtsPlatformImplWin::IsSpeaking() {
 131   if (speech_synthesizer_.get()) {
 132     SPVOICESTATUS status;
 133     HRESULT result = speech_synthesizer_->GetStatus(&status, NULL);
 134     if (result == S_OK) {
 135       if (status.dwRunningState == 0 ||  // 0 == waiting to speak
 136           status.dwRunningState == SPRS_IS_SPEAKING) {
 137         return true;
 138       }
 139     }
 140   }
 141   return false;
 142 }
 143
 144 bool TtsPlatformImplWin::SendsEvent(TtsEventType event_type) {
 145   return (event_type == TTS_EVENT_START ||
 146           event_type == TTS_EVENT_END ||
 147           event_type == TTS_EVENT_MARKER ||
 148           event_type == TTS_EVENT_WORD ||
 149           event_type == TTS_EVENT_SENTENCE);
 150 }
 151
 152 void TtsPlatformImplWin::OnSpeechEvent() {
 153   TtsController* controller = TtsController::GetInstance();
 154   SPEVENT event;
 155   while (S_OK == speech_synthesizer_->GetEvents(1, &event, NULL)) {
 156     if (event.ulStreamNum != stream_number_)
 157       continue;
 158
 159     switch (event.eEventId) {
 160     case SPEI_START_INPUT_STREAM:
 161       controller->OnTtsEvent(
 162           utterance_id_, TTS_EVENT_START, 0, std::string());
 163       break;
 164     case SPEI_END_INPUT_STREAM:
 165       char_position_ = utterance_.size();
 166       controller->OnTtsEvent(
 167           utterance_id_, TTS_EVENT_END, char_position_, std::string());
 168       break;
 169     case SPEI_TTS_BOOKMARK:
 170       controller->OnTtsEvent(
 171           utterance_id_, TTS_EVENT_MARKER, char_position_, std::string());
 172       break;
 173     case SPEI_WORD_BOUNDARY:
 174       char_position_ = static_cast<ULONG>(event.lParam) - prefix_len_;
 175       controller->OnTtsEvent(
 176           utterance_id_, TTS_EVENT_WORD, char_position_,
 177           std::string());
 178       break;
 179     case SPEI_SENTENCE_BOUNDARY:
 180       char_position_ = static_cast<ULONG>(event.lParam) - prefix_len_;
 181       controller->OnTtsEvent(
 182           utterance_id_, TTS_EVENT_SENTENCE, char_position_,
 183           std::string());
 184       break;
 185     }
 186   }
 187 }
 188
 189 TtsPlatformImplWin::TtsPlatformImplWin()
 190   : utterance_id_(0),
 191     prefix_len_(0),
 192     stream_number_(0),
 193     char_position_(0) {
 194   speech_synthesizer_.CreateInstance(CLSID_SpVoice);
 195   if (speech_synthesizer_.get()) {
 196     ULONGLONG event_mask =
 197         SPFEI(SPEI_START_INPUT_STREAM) |
 198         SPFEI(SPEI_TTS_BOOKMARK) |
 199         SPFEI(SPEI_WORD_BOUNDARY) |
 200         SPFEI(SPEI_SENTENCE_BOUNDARY) |
 201         SPFEI(SPEI_END_INPUT_STREAM);
 202     speech_synthesizer_->SetInterest(event_mask, event_mask);
 203     speech_synthesizer_->SetNotifyCallbackFunction(
 204         TtsPlatformImplWin::SpeechEventCallback, 0, 0);
 205   }
 206 }
 207
 208 // static
 209 TtsPlatformImplWin* TtsPlatformImplWin::GetInstance() {
 210   return Singleton<TtsPlatformImplWin,
 211                    LeakySingletonTraits<TtsPlatformImplWin> >::get();
 212 }
 213
 214 // static
 215 void TtsPlatformImplWin::SpeechEventCallback(
 216     WPARAM w_param, LPARAM l_param) {
 217   GetInstance()->OnSpeechEvent();
 218 }