1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
7 #include "base/mac/scoped_nsobject.h"
8 #include "base/memory/singleton.h"
9 #include "base/strings/sys_string_conversions.h"
10 #include "base/values.h"
11 #include "chrome/browser/speech/tts_controller.h"
12 #include "chrome/browser/speech/tts_platform.h"
13 #include "extensions/browser/extension_function.h"
15 #import <Cocoa/Cocoa.h>
17 class TtsPlatformImplMac;
19 @interface ChromeTtsDelegate : NSObject <NSSpeechSynthesizerDelegate> {
21 TtsPlatformImplMac* ttsImplMac_; // weak.
24 - (id)initWithPlatformImplMac:(TtsPlatformImplMac*)ttsImplMac;
28 // Subclass of NSSpeechSynthesizer that takes an utterance
29 // string on initialization, retains it and only allows it
32 // We construct a new NSSpeechSynthesizer for each utterance, for
34 // 1. To associate delegate callbacks with a particular utterance,
35 // without assuming anything undocumented about the protocol.
36 // 2. To work around http://openradar.appspot.com/radar?id=2854403,
37 // where Nuance voices don't retain the utterance string and
38 // crash when trying to call willSpeakWord.
39 @interface SingleUseSpeechSynthesizer : NSSpeechSynthesizer {
41 base::scoped_nsobject<NSString> utterance_;
45 - (id)initWithUtterance:(NSString*)utterance;
46 - (bool)startSpeakingRetainedUtterance;
47 - (bool)startSpeakingString:(NSString*)utterance;
51 class TtsPlatformImplMac : public TtsPlatformImpl {
53 bool PlatformImplAvailable() override { return true; }
55 bool Speak(int utterance_id,
56 const std::string& utterance,
57 const std::string& lang,
58 const VoiceData& voice,
59 const UtteranceContinuousParameters& params) override;
61 bool StopSpeaking() override;
63 void Pause() override;
65 void Resume() override;
67 bool IsSpeaking() override;
69 void GetVoices(std::vector<VoiceData>* out_voices) override;
71 // Called by ChromeTtsDelegate when we get a callback from the
72 // native speech engine.
73 void OnSpeechEvent(NSSpeechSynthesizer* sender,
74 TtsEventType event_type,
76 const std::string& error_message);
78 // Get the single instance of this class.
79 static TtsPlatformImplMac* GetInstance();
83 ~TtsPlatformImplMac() override;
85 base::scoped_nsobject<SingleUseSpeechSynthesizer> speech_synthesizer_;
86 base::scoped_nsobject<ChromeTtsDelegate> delegate_;
88 std::string utterance_;
92 friend struct base::DefaultSingletonTraits<TtsPlatformImplMac>;
94 DISALLOW_COPY_AND_ASSIGN(TtsPlatformImplMac);
98 TtsPlatformImpl* TtsPlatformImpl::GetInstance() {
99 return TtsPlatformImplMac::GetInstance();
102 bool TtsPlatformImplMac::Speak(
104 const std::string& utterance,
105 const std::string& lang,
106 const VoiceData& voice,
107 const UtteranceContinuousParameters& params) {
108 // TODO: convert SSML to SAPI xml. http://crbug.com/88072
109 utterance_ = utterance;
112 NSString* utterance_nsstring =
113 [NSString stringWithUTF8String:utterance_.c_str()];
115 // Deliberately construct a new speech synthesizer every time Speak is
116 // called, otherwise there's no way to know whether calls to the delegate
117 // apply to the current utterance or a previous utterance. In
118 // experimentation, the overhead of constructing and destructing a
119 // NSSpeechSynthesizer is minimal.
120 speech_synthesizer_.reset(
121 [[SingleUseSpeechSynthesizer alloc]
122 initWithUtterance:utterance_nsstring]);
123 [speech_synthesizer_ setDelegate:delegate_];
125 if (!voice.native_voice_identifier.empty()) {
126 NSString* native_voice_identifier =
127 [NSString stringWithUTF8String:voice.native_voice_identifier.c_str()];
128 [speech_synthesizer_ setVoice:native_voice_identifier];
131 utterance_id_ = utterance_id;
133 // TODO: support languages other than the default: crbug.com/88059
135 if (params.rate >= 0.0) {
136 // The TTS api defines rate via words per minute. Let 200 be the default.
138 setObject:[NSNumber numberWithInt:params.rate * 200]
139 forProperty:NSSpeechRateProperty error:nil];
142 if (params.pitch >= 0.0) {
143 // The input is a float from 0.0 to 2.0, with 1.0 being the default.
144 // Get the default pitch for this voice and modulate it by 50% - 150%.
146 NSNumber* defaultPitchObj =
147 [speech_synthesizer_ objectForProperty:NSSpeechPitchBaseProperty
149 int defaultPitch = defaultPitchObj ? [defaultPitchObj intValue] : 48;
150 int newPitch = static_cast<int>(defaultPitch * (0.5 * params.pitch + 0.5));
152 setObject:[NSNumber numberWithInt:newPitch]
153 forProperty:NSSpeechPitchBaseProperty error:nil];
156 if (params.volume >= 0.0) {
158 setObject: [NSNumber numberWithFloat:params.volume]
159 forProperty:NSSpeechVolumeProperty error:nil];
162 bool success = [speech_synthesizer_ startSpeakingRetainedUtterance];
164 TtsController* controller = TtsController::GetInstance();
165 controller->OnTtsEvent(utterance_id_, TTS_EVENT_START, 0, "");
170 bool TtsPlatformImplMac::StopSpeaking() {
171 if (speech_synthesizer_.get()) {
172 [speech_synthesizer_ stopSpeaking];
173 speech_synthesizer_.reset(nil);
179 void TtsPlatformImplMac::Pause() {
180 if (speech_synthesizer_.get() && utterance_id_ && !paused_) {
181 [speech_synthesizer_ pauseSpeakingAtBoundary:NSSpeechImmediateBoundary];
183 TtsController::GetInstance()->OnTtsEvent(
184 utterance_id_, TTS_EVENT_PAUSE, last_char_index_, "");
188 void TtsPlatformImplMac::Resume() {
189 if (speech_synthesizer_.get() && utterance_id_ && paused_) {
190 [speech_synthesizer_ continueSpeaking];
192 TtsController::GetInstance()->OnTtsEvent(
193 utterance_id_, TTS_EVENT_RESUME, last_char_index_, "");
197 bool TtsPlatformImplMac::IsSpeaking() {
198 if (speech_synthesizer_)
199 return [speech_synthesizer_ isSpeaking];
203 void TtsPlatformImplMac::GetVoices(std::vector<VoiceData>* outVoices) {
204 NSArray* voices = [NSSpeechSynthesizer availableVoices];
206 // Create a new temporary array of the available voices with
207 // the default voice first.
208 NSMutableArray* orderedVoices =
209 [NSMutableArray arrayWithCapacity:[voices count]];
210 NSString* defaultVoice = [NSSpeechSynthesizer defaultVoice];
212 [orderedVoices addObject:defaultVoice];
214 for (NSString* voiceIdentifier in voices) {
215 if (![voiceIdentifier isEqualToString:defaultVoice])
216 [orderedVoices addObject:voiceIdentifier];
219 for (NSString* voiceIdentifier in orderedVoices) {
220 outVoices->push_back(VoiceData());
221 VoiceData& data = outVoices->back();
223 NSDictionary* attributes =
224 [NSSpeechSynthesizer attributesForVoice:voiceIdentifier];
225 NSString* name = [attributes objectForKey:NSVoiceName];
226 NSString* gender = [attributes objectForKey:NSVoiceGender];
227 NSString* localeIdentifier =
228 [attributes objectForKey:NSVoiceLocaleIdentifier];
231 data.native_voice_identifier = base::SysNSStringToUTF8(voiceIdentifier);
232 data.name = base::SysNSStringToUTF8(name);
234 NSDictionary* localeComponents =
235 [NSLocale componentsFromLocaleIdentifier:localeIdentifier];
236 NSString* language = [localeComponents objectForKey:NSLocaleLanguageCode];
237 NSString* country = [localeComponents objectForKey:NSLocaleCountryCode];
238 if (language && country) {
240 [[NSString stringWithFormat:@"%@-%@", language, country] UTF8String];
242 data.lang = base::SysNSStringToUTF8(language);
244 if ([gender isEqualToString:NSVoiceGenderMale])
245 data.gender = TTS_GENDER_MALE;
246 else if ([gender isEqualToString:NSVoiceGenderFemale])
247 data.gender = TTS_GENDER_FEMALE;
249 data.gender = TTS_GENDER_NONE;
250 data.events.insert(TTS_EVENT_START);
251 data.events.insert(TTS_EVENT_END);
252 data.events.insert(TTS_EVENT_WORD);
253 data.events.insert(TTS_EVENT_ERROR);
254 data.events.insert(TTS_EVENT_CANCELLED);
255 data.events.insert(TTS_EVENT_INTERRUPTED);
256 data.events.insert(TTS_EVENT_PAUSE);
257 data.events.insert(TTS_EVENT_RESUME);
261 void TtsPlatformImplMac::OnSpeechEvent(
262 NSSpeechSynthesizer* sender,
263 TtsEventType event_type,
265 const std::string& error_message) {
266 // Don't send events from an utterance that's already completed.
267 // This depends on the fact that we construct a new NSSpeechSynthesizer
268 // each time we call Speak.
269 if (sender != speech_synthesizer_.get())
272 if (event_type == TTS_EVENT_END)
273 char_index = utterance_.size();
274 TtsController* controller = TtsController::GetInstance();
275 controller->OnTtsEvent(
276 utterance_id_, event_type, char_index, error_message);
277 last_char_index_ = char_index;
280 TtsPlatformImplMac::TtsPlatformImplMac() {
284 delegate_.reset([[ChromeTtsDelegate alloc] initWithPlatformImplMac:this]);
287 TtsPlatformImplMac::~TtsPlatformImplMac() {
291 TtsPlatformImplMac* TtsPlatformImplMac::GetInstance() {
292 return base::Singleton<TtsPlatformImplMac>::get();
295 @implementation ChromeTtsDelegate
297 - (id)initWithPlatformImplMac:(TtsPlatformImplMac*)ttsImplMac {
298 if ((self = [super init])) {
299 ttsImplMac_ = ttsImplMac;
304 - (void)speechSynthesizer:(NSSpeechSynthesizer*)sender
305 didFinishSpeaking:(BOOL)finished_speaking {
306 ttsImplMac_->OnSpeechEvent(sender, TTS_EVENT_END, 0, "");
309 - (void)speechSynthesizer:(NSSpeechSynthesizer*)sender
310 willSpeakWord:(NSRange)character_range
311 ofString:(NSString*)string {
312 ttsImplMac_->OnSpeechEvent(sender, TTS_EVENT_WORD,
313 character_range.location, "");
316 - (void)speechSynthesizer:(NSSpeechSynthesizer*)sender
317 didEncounterErrorAtIndex:(NSUInteger)character_index
318 ofString:(NSString*)string
319 message:(NSString*)message {
320 std::string message_utf8 = base::SysNSStringToUTF8(message);
321 ttsImplMac_->OnSpeechEvent(sender, TTS_EVENT_ERROR, character_index,
327 @implementation SingleUseSpeechSynthesizer
329 - (id)initWithUtterance:(NSString*)utterance {
332 utterance_.reset([utterance retain]);
338 - (bool)startSpeakingRetainedUtterance {
342 return [super startSpeakingString:utterance_];
345 - (bool)startSpeakingString:(NSString*)utterance {