1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
7 #include "base/mac/scoped_nsobject.h"
8 #include "base/memory/singleton.h"
9 #include "base/strings/sys_string_conversions.h"
10 #include "base/values.h"
11 #include "chrome/browser/extensions/extension_function.h"
12 #include "chrome/browser/speech/tts_controller.h"
13 #include "chrome/browser/speech/tts_platform.h"
15 #import <Cocoa/Cocoa.h>
17 class TtsPlatformImplMac;
19 @interface ChromeTtsDelegate : NSObject <NSSpeechSynthesizerDelegate> {
21 TtsPlatformImplMac* ttsImplMac_; // weak.
24 - (id)initWithPlatformImplMac:(TtsPlatformImplMac*)ttsImplMac;
28 // Subclass of NSSpeechSynthesizer that takes an utterance
29 // string on initialization, retains it and only allows it
32 // We construct a new NSSpeechSynthesizer for each utterance, for
34 // 1. To associate delegate callbacks with a particular utterance,
35 // without assuming anything undocumented about the protocol.
36 // 2. To work around http://openradar.appspot.com/radar?id=2854403,
37 // where Nuance voices don't retain the utterance string and
38 // crash when trying to call willSpeakWord.
39 @interface SingleUseSpeechSynthesizer : NSSpeechSynthesizer {
41 base::scoped_nsobject<NSString> utterance_;
45 - (id)initWithUtterance:(NSString*)utterance;
46 - (bool)startSpeakingRetainedUtterance;
47 - (bool)startSpeakingString:(NSString*)utterance;
51 class TtsPlatformImplMac : public TtsPlatformImpl {
53 virtual bool PlatformImplAvailable() OVERRIDE {
59 const std::string& utterance,
60 const std::string& lang,
61 const VoiceData& voice,
62 const UtteranceContinuousParameters& params) OVERRIDE;
64 virtual bool StopSpeaking() OVERRIDE;
66 virtual void Pause() OVERRIDE;
68 virtual void Resume() OVERRIDE;
70 virtual bool IsSpeaking() OVERRIDE;
72 virtual void GetVoices(std::vector<VoiceData>* out_voices) OVERRIDE;
74 // Called by ChromeTtsDelegate when we get a callback from the
75 // native speech engine.
76 void OnSpeechEvent(NSSpeechSynthesizer* sender,
77 TtsEventType event_type,
79 const std::string& error_message);
81 // Get the single instance of this class.
82 static TtsPlatformImplMac* GetInstance();
86 virtual ~TtsPlatformImplMac();
88 base::scoped_nsobject<SingleUseSpeechSynthesizer> speech_synthesizer_;
89 base::scoped_nsobject<ChromeTtsDelegate> delegate_;
91 std::string utterance_;
92 bool sent_start_event_;
96 friend struct DefaultSingletonTraits<TtsPlatformImplMac>;
98 DISALLOW_COPY_AND_ASSIGN(TtsPlatformImplMac);
102 TtsPlatformImpl* TtsPlatformImpl::GetInstance() {
103 return TtsPlatformImplMac::GetInstance();
106 bool TtsPlatformImplMac::Speak(
108 const std::string& utterance,
109 const std::string& lang,
110 const VoiceData& voice,
111 const UtteranceContinuousParameters& params) {
112 // TODO: convert SSML to SAPI xml. http://crbug.com/88072
113 utterance_ = utterance;
116 NSString* utterance_nsstring =
117 [NSString stringWithUTF8String:utterance_.c_str()];
119 // Deliberately construct a new speech synthesizer every time Speak is
120 // called, otherwise there's no way to know whether calls to the delegate
121 // apply to the current utterance or a previous utterance. In
122 // experimentation, the overhead of constructing and destructing a
123 // NSSpeechSynthesizer is minimal.
124 speech_synthesizer_.reset(
125 [[SingleUseSpeechSynthesizer alloc]
126 initWithUtterance:utterance_nsstring]);
127 [speech_synthesizer_ setDelegate:delegate_];
129 if (!voice.native_voice_identifier.empty()) {
130 NSString* native_voice_identifier =
131 [NSString stringWithUTF8String:voice.native_voice_identifier.c_str()];
132 [speech_synthesizer_ setVoice:native_voice_identifier];
135 utterance_id_ = utterance_id;
136 sent_start_event_ = false;
138 // TODO: support languages other than the default: crbug.com/88059
140 if (params.rate >= 0.0) {
141 // The TTS api defines rate via words per minute. Let 200 be the default.
143 setObject:[NSNumber numberWithInt:params.rate * 200]
144 forProperty:NSSpeechRateProperty error:nil];
147 if (params.pitch >= 0.0) {
148 // The input is a float from 0.0 to 2.0, with 1.0 being the default.
149 // Get the default pitch for this voice and modulate it by 50% - 150%.
151 NSNumber* defaultPitchObj =
152 [speech_synthesizer_ objectForProperty:NSSpeechPitchBaseProperty
154 int defaultPitch = defaultPitchObj ? [defaultPitchObj intValue] : 48;
155 int newPitch = static_cast<int>(defaultPitch * (0.5 * params.pitch + 0.5));
157 setObject:[NSNumber numberWithInt:newPitch]
158 forProperty:NSSpeechPitchBaseProperty error:nil];
161 if (params.volume >= 0.0) {
163 setObject: [NSNumber numberWithFloat:params.volume]
164 forProperty:NSSpeechVolumeProperty error:nil];
167 return [speech_synthesizer_ startSpeakingRetainedUtterance];
170 bool TtsPlatformImplMac::StopSpeaking() {
171 if (speech_synthesizer_.get()) {
172 [speech_synthesizer_ stopSpeaking];
173 speech_synthesizer_.reset(nil);
179 void TtsPlatformImplMac::Pause() {
180 if (speech_synthesizer_.get() && utterance_id_ && !paused_) {
181 [speech_synthesizer_ pauseSpeakingAtBoundary:NSSpeechImmediateBoundary];
183 TtsController::GetInstance()->OnTtsEvent(
184 utterance_id_, TTS_EVENT_PAUSE, last_char_index_, "");
188 void TtsPlatformImplMac::Resume() {
189 if (speech_synthesizer_.get() && utterance_id_ && paused_) {
190 [speech_synthesizer_ continueSpeaking];
192 TtsController::GetInstance()->OnTtsEvent(
193 utterance_id_, TTS_EVENT_RESUME, last_char_index_, "");
197 bool TtsPlatformImplMac::IsSpeaking() {
198 return [NSSpeechSynthesizer isAnyApplicationSpeaking];
201 void TtsPlatformImplMac::GetVoices(std::vector<VoiceData>* outVoices) {
202 NSArray* voices = [NSSpeechSynthesizer availableVoices];
204 // Create a new temporary array of the available voices with
205 // the default voice first.
206 NSMutableArray* orderedVoices =
207 [NSMutableArray arrayWithCapacity:[voices count]];
208 NSString* defaultVoice = [NSSpeechSynthesizer defaultVoice];
209 [orderedVoices addObject:defaultVoice];
210 for (NSString* voiceIdentifier in voices) {
211 if (![voiceIdentifier isEqualToString:defaultVoice])
212 [orderedVoices addObject:voiceIdentifier];
215 for (NSString* voiceIdentifier in orderedVoices) {
216 outVoices->push_back(VoiceData());
217 VoiceData& data = outVoices->back();
219 NSDictionary* attributes =
220 [NSSpeechSynthesizer attributesForVoice:voiceIdentifier];
221 NSString* name = [attributes objectForKey:NSVoiceName];
222 NSString* gender = [attributes objectForKey:NSVoiceGender];
223 NSString* localeIdentifier =
224 [attributes objectForKey:NSVoiceLocaleIdentifier];
227 data.native_voice_identifier = base::SysNSStringToUTF8(voiceIdentifier);
228 data.name = base::SysNSStringToUTF8(name);
230 NSDictionary* localeComponents =
231 [NSLocale componentsFromLocaleIdentifier:localeIdentifier];
232 NSString* language = [localeComponents objectForKey:NSLocaleLanguageCode];
233 NSString* country = [localeComponents objectForKey:NSLocaleCountryCode];
234 if (language && country) {
236 [[NSString stringWithFormat:@"%@-%@", language, country] UTF8String];
238 data.lang = base::SysNSStringToUTF8(language);
240 if ([gender isEqualToString:NSVoiceGenderMale])
241 data.gender = TTS_GENDER_MALE;
242 else if ([gender isEqualToString:NSVoiceGenderFemale])
243 data.gender = TTS_GENDER_FEMALE;
245 data.gender = TTS_GENDER_NONE;
246 data.events.insert(TTS_EVENT_START);
247 data.events.insert(TTS_EVENT_END);
248 data.events.insert(TTS_EVENT_WORD);
249 data.events.insert(TTS_EVENT_ERROR);
250 data.events.insert(TTS_EVENT_CANCELLED);
251 data.events.insert(TTS_EVENT_INTERRUPTED);
252 data.events.insert(TTS_EVENT_PAUSE);
253 data.events.insert(TTS_EVENT_RESUME);
257 void TtsPlatformImplMac::OnSpeechEvent(
258 NSSpeechSynthesizer* sender,
259 TtsEventType event_type,
261 const std::string& error_message) {
262 // Don't send events from an utterance that's already completed.
263 // This depends on the fact that we construct a new NSSpeechSynthesizer
264 // each time we call Speak.
265 if (sender != speech_synthesizer_.get())
268 if (event_type == TTS_EVENT_END)
269 char_index = utterance_.size();
270 TtsController* controller = TtsController::GetInstance();
271 if (event_type == TTS_EVENT_WORD && !sent_start_event_) {
272 controller->OnTtsEvent(
273 utterance_id_, TTS_EVENT_START, 0, "");
274 sent_start_event_ = true;
276 controller->OnTtsEvent(
277 utterance_id_, event_type, char_index, error_message);
278 last_char_index_ = char_index;
281 TtsPlatformImplMac::TtsPlatformImplMac() {
283 sent_start_event_ = true;
286 delegate_.reset([[ChromeTtsDelegate alloc] initWithPlatformImplMac:this]);
289 TtsPlatformImplMac::~TtsPlatformImplMac() {
293 TtsPlatformImplMac* TtsPlatformImplMac::GetInstance() {
294 return Singleton<TtsPlatformImplMac>::get();
297 @implementation ChromeTtsDelegate
299 - (id)initWithPlatformImplMac:(TtsPlatformImplMac*)ttsImplMac {
300 if ((self = [super init])) {
301 ttsImplMac_ = ttsImplMac;
306 - (void)speechSynthesizer:(NSSpeechSynthesizer*)sender
307 didFinishSpeaking:(BOOL)finished_speaking {
308 ttsImplMac_->OnSpeechEvent(sender, TTS_EVENT_END, 0, "");
311 - (void)speechSynthesizer:(NSSpeechSynthesizer*)sender
312 willSpeakWord:(NSRange)character_range
313 ofString:(NSString*)string {
314 ttsImplMac_->OnSpeechEvent(sender, TTS_EVENT_WORD,
315 character_range.location, "");
318 - (void)speechSynthesizer:(NSSpeechSynthesizer*)sender
319 didEncounterErrorAtIndex:(NSUInteger)character_index
320 ofString:(NSString*)string
321 message:(NSString*)message {
322 std::string message_utf8 = base::SysNSStringToUTF8(message);
323 ttsImplMac_->OnSpeechEvent(sender, TTS_EVENT_ERROR, character_index,
329 @implementation SingleUseSpeechSynthesizer
331 - (id)initWithUtterance:(NSString*)utterance {
334 utterance_.reset([utterance retain]);
340 - (bool)startSpeakingRetainedUtterance {
344 return [super startSpeakingString:utterance_];
347 - (bool)startSpeakingString:(NSString*)utterance {