1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
7 #include "base/mac/scoped_nsobject.h"
8 #include "base/memory/singleton.h"
9 #include "base/strings/sys_string_conversions.h"
10 #include "base/values.h"
11 #include "chrome/browser/speech/tts_controller.h"
12 #include "chrome/browser/speech/tts_platform.h"
13 #include "extensions/browser/extension_function.h"
15 #import <Cocoa/Cocoa.h>
17 class TtsPlatformImplMac;
19 @interface ChromeTtsDelegate : NSObject <NSSpeechSynthesizerDelegate> {
21 TtsPlatformImplMac* ttsImplMac_; // weak.
24 - (id)initWithPlatformImplMac:(TtsPlatformImplMac*)ttsImplMac;
28 // Subclass of NSSpeechSynthesizer that takes an utterance
29 // string on initialization, retains it and only allows it
32 // We construct a new NSSpeechSynthesizer for each utterance, for
34 // 1. To associate delegate callbacks with a particular utterance,
35 // without assuming anything undocumented about the protocol.
36 // 2. To work around http://openradar.appspot.com/radar?id=2854403,
37 // where Nuance voices don't retain the utterance string and
38 // crash when trying to call willSpeakWord.
39 @interface SingleUseSpeechSynthesizer : NSSpeechSynthesizer {
41 base::scoped_nsobject<NSString> utterance_;
45 - (id)initWithUtterance:(NSString*)utterance;
46 - (bool)startSpeakingRetainedUtterance;
47 - (bool)startSpeakingString:(NSString*)utterance;
51 class TtsPlatformImplMac : public TtsPlatformImpl {
53 virtual bool PlatformImplAvailable() OVERRIDE {
59 const std::string& utterance,
60 const std::string& lang,
61 const VoiceData& voice,
62 const UtteranceContinuousParameters& params) OVERRIDE;
64 virtual bool StopSpeaking() OVERRIDE;
66 virtual void Pause() OVERRIDE;
68 virtual void Resume() OVERRIDE;
70 virtual bool IsSpeaking() OVERRIDE;
72 virtual void GetVoices(std::vector<VoiceData>* out_voices) OVERRIDE;
74 // Called by ChromeTtsDelegate when we get a callback from the
75 // native speech engine.
76 void OnSpeechEvent(NSSpeechSynthesizer* sender,
77 TtsEventType event_type,
79 const std::string& error_message);
81 // Get the single instance of this class.
82 static TtsPlatformImplMac* GetInstance();
86 virtual ~TtsPlatformImplMac();
88 base::scoped_nsobject<SingleUseSpeechSynthesizer> speech_synthesizer_;
89 base::scoped_nsobject<ChromeTtsDelegate> delegate_;
91 std::string utterance_;
95 friend struct DefaultSingletonTraits<TtsPlatformImplMac>;
97 DISALLOW_COPY_AND_ASSIGN(TtsPlatformImplMac);
101 TtsPlatformImpl* TtsPlatformImpl::GetInstance() {
102 return TtsPlatformImplMac::GetInstance();
105 bool TtsPlatformImplMac::Speak(
107 const std::string& utterance,
108 const std::string& lang,
109 const VoiceData& voice,
110 const UtteranceContinuousParameters& params) {
111 // TODO: convert SSML to SAPI xml. http://crbug.com/88072
112 utterance_ = utterance;
115 NSString* utterance_nsstring =
116 [NSString stringWithUTF8String:utterance_.c_str()];
118 // Deliberately construct a new speech synthesizer every time Speak is
119 // called, otherwise there's no way to know whether calls to the delegate
120 // apply to the current utterance or a previous utterance. In
121 // experimentation, the overhead of constructing and destructing a
122 // NSSpeechSynthesizer is minimal.
123 speech_synthesizer_.reset(
124 [[SingleUseSpeechSynthesizer alloc]
125 initWithUtterance:utterance_nsstring]);
126 [speech_synthesizer_ setDelegate:delegate_];
128 if (!voice.native_voice_identifier.empty()) {
129 NSString* native_voice_identifier =
130 [NSString stringWithUTF8String:voice.native_voice_identifier.c_str()];
131 [speech_synthesizer_ setVoice:native_voice_identifier];
134 utterance_id_ = utterance_id;
136 // TODO: support languages other than the default: crbug.com/88059
138 if (params.rate >= 0.0) {
139 // The TTS api defines rate via words per minute. Let 200 be the default.
141 setObject:[NSNumber numberWithInt:params.rate * 200]
142 forProperty:NSSpeechRateProperty error:nil];
145 if (params.pitch >= 0.0) {
146 // The input is a float from 0.0 to 2.0, with 1.0 being the default.
147 // Get the default pitch for this voice and modulate it by 50% - 150%.
149 NSNumber* defaultPitchObj =
150 [speech_synthesizer_ objectForProperty:NSSpeechPitchBaseProperty
152 int defaultPitch = defaultPitchObj ? [defaultPitchObj intValue] : 48;
153 int newPitch = static_cast<int>(defaultPitch * (0.5 * params.pitch + 0.5));
155 setObject:[NSNumber numberWithInt:newPitch]
156 forProperty:NSSpeechPitchBaseProperty error:nil];
159 if (params.volume >= 0.0) {
161 setObject: [NSNumber numberWithFloat:params.volume]
162 forProperty:NSSpeechVolumeProperty error:nil];
165 bool success = [speech_synthesizer_ startSpeakingRetainedUtterance];
167 TtsController* controller = TtsController::GetInstance();
168 controller->OnTtsEvent(utterance_id_, TTS_EVENT_START, 0, "");
173 bool TtsPlatformImplMac::StopSpeaking() {
174 if (speech_synthesizer_.get()) {
175 [speech_synthesizer_ stopSpeaking];
176 speech_synthesizer_.reset(nil);
182 void TtsPlatformImplMac::Pause() {
183 if (speech_synthesizer_.get() && utterance_id_ && !paused_) {
184 [speech_synthesizer_ pauseSpeakingAtBoundary:NSSpeechImmediateBoundary];
186 TtsController::GetInstance()->OnTtsEvent(
187 utterance_id_, TTS_EVENT_PAUSE, last_char_index_, "");
191 void TtsPlatformImplMac::Resume() {
192 if (speech_synthesizer_.get() && utterance_id_ && paused_) {
193 [speech_synthesizer_ continueSpeaking];
195 TtsController::GetInstance()->OnTtsEvent(
196 utterance_id_, TTS_EVENT_RESUME, last_char_index_, "");
200 bool TtsPlatformImplMac::IsSpeaking() {
201 if (speech_synthesizer_)
202 return [speech_synthesizer_ isSpeaking];
206 void TtsPlatformImplMac::GetVoices(std::vector<VoiceData>* outVoices) {
207 NSArray* voices = [NSSpeechSynthesizer availableVoices];
209 // Create a new temporary array of the available voices with
210 // the default voice first.
211 NSMutableArray* orderedVoices =
212 [NSMutableArray arrayWithCapacity:[voices count]];
213 NSString* defaultVoice = [NSSpeechSynthesizer defaultVoice];
214 [orderedVoices addObject:defaultVoice];
215 for (NSString* voiceIdentifier in voices) {
216 if (![voiceIdentifier isEqualToString:defaultVoice])
217 [orderedVoices addObject:voiceIdentifier];
220 for (NSString* voiceIdentifier in orderedVoices) {
221 outVoices->push_back(VoiceData());
222 VoiceData& data = outVoices->back();
224 NSDictionary* attributes =
225 [NSSpeechSynthesizer attributesForVoice:voiceIdentifier];
226 NSString* name = [attributes objectForKey:NSVoiceName];
227 NSString* gender = [attributes objectForKey:NSVoiceGender];
228 NSString* localeIdentifier =
229 [attributes objectForKey:NSVoiceLocaleIdentifier];
232 data.native_voice_identifier = base::SysNSStringToUTF8(voiceIdentifier);
233 data.name = base::SysNSStringToUTF8(name);
235 NSDictionary* localeComponents =
236 [NSLocale componentsFromLocaleIdentifier:localeIdentifier];
237 NSString* language = [localeComponents objectForKey:NSLocaleLanguageCode];
238 NSString* country = [localeComponents objectForKey:NSLocaleCountryCode];
239 if (language && country) {
241 [[NSString stringWithFormat:@"%@-%@", language, country] UTF8String];
243 data.lang = base::SysNSStringToUTF8(language);
245 if ([gender isEqualToString:NSVoiceGenderMale])
246 data.gender = TTS_GENDER_MALE;
247 else if ([gender isEqualToString:NSVoiceGenderFemale])
248 data.gender = TTS_GENDER_FEMALE;
250 data.gender = TTS_GENDER_NONE;
251 data.events.insert(TTS_EVENT_START);
252 data.events.insert(TTS_EVENT_END);
253 data.events.insert(TTS_EVENT_WORD);
254 data.events.insert(TTS_EVENT_ERROR);
255 data.events.insert(TTS_EVENT_CANCELLED);
256 data.events.insert(TTS_EVENT_INTERRUPTED);
257 data.events.insert(TTS_EVENT_PAUSE);
258 data.events.insert(TTS_EVENT_RESUME);
262 void TtsPlatformImplMac::OnSpeechEvent(
263 NSSpeechSynthesizer* sender,
264 TtsEventType event_type,
266 const std::string& error_message) {
267 // Don't send events from an utterance that's already completed.
268 // This depends on the fact that we construct a new NSSpeechSynthesizer
269 // each time we call Speak.
270 if (sender != speech_synthesizer_.get())
273 if (event_type == TTS_EVENT_END)
274 char_index = utterance_.size();
275 TtsController* controller = TtsController::GetInstance();
276 controller->OnTtsEvent(
277 utterance_id_, event_type, char_index, error_message);
278 last_char_index_ = char_index;
281 TtsPlatformImplMac::TtsPlatformImplMac() {
285 delegate_.reset([[ChromeTtsDelegate alloc] initWithPlatformImplMac:this]);
288 TtsPlatformImplMac::~TtsPlatformImplMac() {
292 TtsPlatformImplMac* TtsPlatformImplMac::GetInstance() {
293 return Singleton<TtsPlatformImplMac>::get();
296 @implementation ChromeTtsDelegate
298 - (id)initWithPlatformImplMac:(TtsPlatformImplMac*)ttsImplMac {
299 if ((self = [super init])) {
300 ttsImplMac_ = ttsImplMac;
305 - (void)speechSynthesizer:(NSSpeechSynthesizer*)sender
306 didFinishSpeaking:(BOOL)finished_speaking {
307 ttsImplMac_->OnSpeechEvent(sender, TTS_EVENT_END, 0, "");
310 - (void)speechSynthesizer:(NSSpeechSynthesizer*)sender
311 willSpeakWord:(NSRange)character_range
312 ofString:(NSString*)string {
313 ttsImplMac_->OnSpeechEvent(sender, TTS_EVENT_WORD,
314 character_range.location, "");
317 - (void)speechSynthesizer:(NSSpeechSynthesizer*)sender
318 didEncounterErrorAtIndex:(NSUInteger)character_index
319 ofString:(NSString*)string
320 message:(NSString*)message {
321 std::string message_utf8 = base::SysNSStringToUTF8(message);
322 ttsImplMac_->OnSpeechEvent(sender, TTS_EVENT_ERROR, character_index,
328 @implementation SingleUseSpeechSynthesizer
330 - (id)initWithUtterance:(NSString*)utterance {
333 utterance_.reset([utterance retain]);
339 - (bool)startSpeakingRetainedUtterance {
343 return [super startSpeakingString:utterance_];
346 - (bool)startSpeakingString:(NSString*)utterance {