Elim cr-checkbox
[chromium-blink-merge.git] / chrome / browser / speech / tts_win.cc
blobac258205889449b9658ac9df78d4ee35313bd9d5
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include <math.h>
6 #include <sapi.h>
8 #include "base/memory/singleton.h"
9 #include "base/strings/string_number_conversions.h"
10 #include "base/strings/utf_string_conversions.h"
11 #include "base/values.h"
12 #include "base/win/scoped_comptr.h"
13 #include "chrome/browser/speech/tts_controller.h"
14 #include "chrome/browser/speech/tts_platform.h"
16 class TtsPlatformImplWin : public TtsPlatformImpl {
17 public:
18 bool PlatformImplAvailable() override {
19 return true;
22 bool Speak(
23 int utterance_id,
24 const std::string& utterance,
25 const std::string& lang,
26 const VoiceData& voice,
27 const UtteranceContinuousParameters& params) override;
29 bool StopSpeaking() override;
31 void Pause() override;
33 void Resume() override;
35 bool IsSpeaking() override;
37 void GetVoices(std::vector<VoiceData>* out_voices) override;
39 // Get the single instance of this class.
40 static TtsPlatformImplWin* GetInstance();
42 static void __stdcall SpeechEventCallback(WPARAM w_param, LPARAM l_param);
44 private:
45 TtsPlatformImplWin();
46 ~TtsPlatformImplWin() override {}
48 void OnSpeechEvent();
50 base::win::ScopedComPtr<ISpVoice> speech_synthesizer_;
52 // These apply to the current utterance only.
53 std::wstring utterance_;
54 int utterance_id_;
55 int prefix_len_;
56 ULONG stream_number_;
57 int char_position_;
58 bool paused_;
60 friend struct base::DefaultSingletonTraits<TtsPlatformImplWin>;
62 DISALLOW_COPY_AND_ASSIGN(TtsPlatformImplWin);
65 // static
66 TtsPlatformImpl* TtsPlatformImpl::GetInstance() {
67 return TtsPlatformImplWin::GetInstance();
70 bool TtsPlatformImplWin::Speak(
71 int utterance_id,
72 const std::string& src_utterance,
73 const std::string& lang,
74 const VoiceData& voice,
75 const UtteranceContinuousParameters& params) {
76 std::wstring prefix;
77 std::wstring suffix;
79 if (!speech_synthesizer_.get())
80 return false;
82 // TODO(dmazzoni): support languages other than the default: crbug.com/88059
84 if (params.rate >= 0.0) {
85 // Map our multiplicative range of 0.1x to 10.0x onto Microsoft's
86 // linear range of -10 to 10:
87 // 0.1 -> -10
88 // 1.0 -> 0
89 // 10.0 -> 10
90 speech_synthesizer_->SetRate(static_cast<int32>(10 * log10(params.rate)));
93 if (params.pitch >= 0.0) {
94 // The TTS api allows a range of -10 to 10 for speech pitch.
95 // TODO(dtseng): cleanup if we ever use any other properties that
96 // require xml.
97 std::wstring pitch_value =
98 base::IntToString16(static_cast<int>(params.pitch * 10 - 10));
99 prefix = L"<pitch absmiddle=\"" + pitch_value + L"\">";
100 suffix = L"</pitch>";
103 if (params.volume >= 0.0) {
104 // The TTS api allows a range of 0 to 100 for speech volume.
105 speech_synthesizer_->SetVolume(static_cast<uint16>(params.volume * 100));
108 // TODO(dmazzoni): convert SSML to SAPI xml. http://crbug.com/88072
110 utterance_ = base::UTF8ToWide(src_utterance);
111 utterance_id_ = utterance_id;
112 char_position_ = 0;
113 std::wstring merged_utterance = prefix + utterance_ + suffix;
114 prefix_len_ = prefix.size();
116 HRESULT result = speech_synthesizer_->Speak(
117 merged_utterance.c_str(),
118 SPF_ASYNC,
119 &stream_number_);
120 return (result == S_OK);
123 bool TtsPlatformImplWin::StopSpeaking() {
124 if (speech_synthesizer_.get()) {
125 // Clear the stream number so that any further events relating to this
126 // utterance are ignored.
127 stream_number_ = 0;
129 if (IsSpeaking()) {
130 // Stop speech by speaking the empty string with the purge flag.
131 speech_synthesizer_->Speak(L"", SPF_ASYNC | SPF_PURGEBEFORESPEAK, NULL);
133 if (paused_) {
134 speech_synthesizer_->Resume();
135 paused_ = false;
138 return true;
141 void TtsPlatformImplWin::Pause() {
142 if (speech_synthesizer_.get() && utterance_id_ && !paused_) {
143 speech_synthesizer_->Pause();
144 paused_ = true;
145 TtsController::GetInstance()->OnTtsEvent(
146 utterance_id_, TTS_EVENT_PAUSE, char_position_, "");
150 void TtsPlatformImplWin::Resume() {
151 if (speech_synthesizer_.get() && utterance_id_ && paused_) {
152 speech_synthesizer_->Resume();
153 paused_ = false;
154 TtsController::GetInstance()->OnTtsEvent(
155 utterance_id_, TTS_EVENT_RESUME, char_position_, "");
159 bool TtsPlatformImplWin::IsSpeaking() {
160 if (speech_synthesizer_.get()) {
161 SPVOICESTATUS status;
162 HRESULT result = speech_synthesizer_->GetStatus(&status, NULL);
163 if (result == S_OK) {
164 if (status.dwRunningState == 0 || // 0 == waiting to speak
165 status.dwRunningState == SPRS_IS_SPEAKING) {
166 return true;
170 return false;
173 void TtsPlatformImplWin::GetVoices(
174 std::vector<VoiceData>* out_voices) {
175 // TODO: get all voices, not just default voice.
176 // http://crbug.com/88059
177 out_voices->push_back(VoiceData());
178 VoiceData& voice = out_voices->back();
179 voice.native = true;
180 voice.name = "native";
181 voice.events.insert(TTS_EVENT_START);
182 voice.events.insert(TTS_EVENT_END);
183 voice.events.insert(TTS_EVENT_MARKER);
184 voice.events.insert(TTS_EVENT_WORD);
185 voice.events.insert(TTS_EVENT_SENTENCE);
186 voice.events.insert(TTS_EVENT_PAUSE);
187 voice.events.insert(TTS_EVENT_RESUME);
190 void TtsPlatformImplWin::OnSpeechEvent() {
191 TtsController* controller = TtsController::GetInstance();
192 SPEVENT event;
193 while (S_OK == speech_synthesizer_->GetEvents(1, &event, NULL)) {
194 if (event.ulStreamNum != stream_number_)
195 continue;
197 switch (event.eEventId) {
198 case SPEI_START_INPUT_STREAM:
199 controller->OnTtsEvent(
200 utterance_id_, TTS_EVENT_START, 0, std::string());
201 break;
202 case SPEI_END_INPUT_STREAM:
203 char_position_ = utterance_.size();
204 controller->OnTtsEvent(
205 utterance_id_, TTS_EVENT_END, char_position_, std::string());
206 break;
207 case SPEI_TTS_BOOKMARK:
208 controller->OnTtsEvent(
209 utterance_id_, TTS_EVENT_MARKER, char_position_, std::string());
210 break;
211 case SPEI_WORD_BOUNDARY:
212 char_position_ = static_cast<ULONG>(event.lParam) - prefix_len_;
213 controller->OnTtsEvent(
214 utterance_id_, TTS_EVENT_WORD, char_position_,
215 std::string());
216 break;
217 case SPEI_SENTENCE_BOUNDARY:
218 char_position_ = static_cast<ULONG>(event.lParam) - prefix_len_;
219 controller->OnTtsEvent(
220 utterance_id_, TTS_EVENT_SENTENCE, char_position_,
221 std::string());
222 break;
223 default:
224 break;
229 TtsPlatformImplWin::TtsPlatformImplWin()
230 : utterance_id_(0),
231 prefix_len_(0),
232 stream_number_(0),
233 char_position_(0),
234 paused_(false) {
235 speech_synthesizer_.CreateInstance(CLSID_SpVoice);
236 if (speech_synthesizer_.get()) {
237 ULONGLONG event_mask =
238 SPFEI(SPEI_START_INPUT_STREAM) |
239 SPFEI(SPEI_TTS_BOOKMARK) |
240 SPFEI(SPEI_WORD_BOUNDARY) |
241 SPFEI(SPEI_SENTENCE_BOUNDARY) |
242 SPFEI(SPEI_END_INPUT_STREAM);
243 speech_synthesizer_->SetInterest(event_mask, event_mask);
244 speech_synthesizer_->SetNotifyCallbackFunction(
245 TtsPlatformImplWin::SpeechEventCallback, 0, 0);
249 // static
250 TtsPlatformImplWin* TtsPlatformImplWin::GetInstance() {
251 return base::Singleton<TtsPlatformImplWin,
252 base::LeakySingletonTraits<TtsPlatformImplWin>>::get();
255 // static
256 void TtsPlatformImplWin::SpeechEventCallback(
257 WPARAM w_param, LPARAM l_param) {
258 GetInstance()->OnSpeechEvent();