Elim cr-checkbox
[chromium-blink-merge.git] / chrome / browser / speech / tts_mac.mm
bloba492dd54c4772a28af521f86c517d3a9b4ea0a6e
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include <string>
7 #include "base/mac/scoped_nsobject.h"
8 #include "base/memory/singleton.h"
9 #include "base/strings/sys_string_conversions.h"
10 #include "base/values.h"
11 #include "chrome/browser/speech/tts_controller.h"
12 #include "chrome/browser/speech/tts_platform.h"
13 #include "extensions/browser/extension_function.h"
15 #import <Cocoa/Cocoa.h>
17 class TtsPlatformImplMac;
19 @interface ChromeTtsDelegate : NSObject <NSSpeechSynthesizerDelegate> {
20  @private
21   TtsPlatformImplMac* ttsImplMac_;  // weak.
24 - (id)initWithPlatformImplMac:(TtsPlatformImplMac*)ttsImplMac;
26 @end
28 // Subclass of NSSpeechSynthesizer that takes an utterance
29 // string on initialization, retains it and only allows it
30 // to be spoken once.
32 // We construct a new NSSpeechSynthesizer for each utterance, for
33 // two reasons:
34 // 1. To associate delegate callbacks with a particular utterance,
35 //    without assuming anything undocumented about the protocol.
36 // 2. To work around http://openradar.appspot.com/radar?id=2854403,
37 //    where Nuance voices don't retain the utterance string and
38 //    crash when trying to call willSpeakWord.
39 @interface SingleUseSpeechSynthesizer : NSSpeechSynthesizer {
40  @private
41   base::scoped_nsobject<NSString> utterance_;
42   bool didSpeak_;
45 - (id)initWithUtterance:(NSString*)utterance;
46 - (bool)startSpeakingRetainedUtterance;
47 - (bool)startSpeakingString:(NSString*)utterance;
49 @end
51 class TtsPlatformImplMac : public TtsPlatformImpl {
52  public:
53   bool PlatformImplAvailable() override { return true; }
55   bool Speak(int utterance_id,
56              const std::string& utterance,
57              const std::string& lang,
58              const VoiceData& voice,
59              const UtteranceContinuousParameters& params) override;
61   bool StopSpeaking() override;
63   void Pause() override;
65   void Resume() override;
67   bool IsSpeaking() override;
69   void GetVoices(std::vector<VoiceData>* out_voices) override;
71   // Called by ChromeTtsDelegate when we get a callback from the
72   // native speech engine.
73   void OnSpeechEvent(NSSpeechSynthesizer* sender,
74                      TtsEventType event_type,
75                      int char_index,
76                      const std::string& error_message);
78   // Get the single instance of this class.
79   static TtsPlatformImplMac* GetInstance();
81  private:
82   TtsPlatformImplMac();
83   ~TtsPlatformImplMac() override;
85   base::scoped_nsobject<SingleUseSpeechSynthesizer> speech_synthesizer_;
86   base::scoped_nsobject<ChromeTtsDelegate> delegate_;
87   int utterance_id_;
88   std::string utterance_;
89   int last_char_index_;
90   bool paused_;
92   friend struct base::DefaultSingletonTraits<TtsPlatformImplMac>;
94   DISALLOW_COPY_AND_ASSIGN(TtsPlatformImplMac);
97 // static
98 TtsPlatformImpl* TtsPlatformImpl::GetInstance() {
99   return TtsPlatformImplMac::GetInstance();
102 bool TtsPlatformImplMac::Speak(
103     int utterance_id,
104     const std::string& utterance,
105     const std::string& lang,
106     const VoiceData& voice,
107     const UtteranceContinuousParameters& params) {
108   // TODO: convert SSML to SAPI xml. http://crbug.com/88072
109   utterance_ = utterance;
110   paused_ = false;
112   NSString* utterance_nsstring =
113       [NSString stringWithUTF8String:utterance_.c_str()];
115   // Deliberately construct a new speech synthesizer every time Speak is
116   // called, otherwise there's no way to know whether calls to the delegate
117   // apply to the current utterance or a previous utterance. In
118   // experimentation, the overhead of constructing and destructing a
119   // NSSpeechSynthesizer is minimal.
120   speech_synthesizer_.reset(
121       [[SingleUseSpeechSynthesizer alloc]
122         initWithUtterance:utterance_nsstring]);
123   [speech_synthesizer_ setDelegate:delegate_];
125   if (!voice.native_voice_identifier.empty()) {
126     NSString* native_voice_identifier =
127         [NSString stringWithUTF8String:voice.native_voice_identifier.c_str()];
128     [speech_synthesizer_ setVoice:native_voice_identifier];
129   }
131   utterance_id_ = utterance_id;
133   // TODO: support languages other than the default: crbug.com/88059
135   if (params.rate >= 0.0) {
136     // The TTS api defines rate via words per minute. Let 200 be the default.
137     [speech_synthesizer_
138         setObject:[NSNumber numberWithInt:params.rate * 200]
139         forProperty:NSSpeechRateProperty error:nil];
140   }
142   if (params.pitch >= 0.0) {
143     // The input is a float from 0.0 to 2.0, with 1.0 being the default.
144     // Get the default pitch for this voice and modulate it by 50% - 150%.
145     NSError* errorCode;
146     NSNumber* defaultPitchObj =
147         [speech_synthesizer_ objectForProperty:NSSpeechPitchBaseProperty
148                                          error:&errorCode];
149     int defaultPitch = defaultPitchObj ? [defaultPitchObj intValue] : 48;
150     int newPitch = static_cast<int>(defaultPitch * (0.5 * params.pitch + 0.5));
151     [speech_synthesizer_
152         setObject:[NSNumber numberWithInt:newPitch]
153         forProperty:NSSpeechPitchBaseProperty error:nil];
154   }
156   if (params.volume >= 0.0) {
157     [speech_synthesizer_
158         setObject: [NSNumber numberWithFloat:params.volume]
159         forProperty:NSSpeechVolumeProperty error:nil];
160   }
162   bool success = [speech_synthesizer_ startSpeakingRetainedUtterance];
163   if (success) {
164     TtsController* controller = TtsController::GetInstance();
165     controller->OnTtsEvent(utterance_id_, TTS_EVENT_START, 0, "");
166   }
167   return success;
170 bool TtsPlatformImplMac::StopSpeaking() {
171   if (speech_synthesizer_.get()) {
172     [speech_synthesizer_ stopSpeaking];
173     speech_synthesizer_.reset(nil);
174   }
175   paused_ = false;
176   return true;
179 void TtsPlatformImplMac::Pause() {
180   if (speech_synthesizer_.get() && utterance_id_ && !paused_) {
181     [speech_synthesizer_ pauseSpeakingAtBoundary:NSSpeechImmediateBoundary];
182     paused_ = true;
183     TtsController::GetInstance()->OnTtsEvent(
184         utterance_id_, TTS_EVENT_PAUSE, last_char_index_, "");
185   }
188 void TtsPlatformImplMac::Resume() {
189   if (speech_synthesizer_.get() && utterance_id_ && paused_) {
190     [speech_synthesizer_ continueSpeaking];
191     paused_ = false;
192     TtsController::GetInstance()->OnTtsEvent(
193         utterance_id_, TTS_EVENT_RESUME, last_char_index_, "");
194   }
197 bool TtsPlatformImplMac::IsSpeaking() {
198   if (speech_synthesizer_)
199     return [speech_synthesizer_ isSpeaking];
200   return false;
203 void TtsPlatformImplMac::GetVoices(std::vector<VoiceData>* outVoices) {
204   NSArray* voices = [NSSpeechSynthesizer availableVoices];
206   // Create a new temporary array of the available voices with
207   // the default voice first.
208   NSMutableArray* orderedVoices =
209       [NSMutableArray arrayWithCapacity:[voices count]];
210   NSString* defaultVoice = [NSSpeechSynthesizer defaultVoice];
211   if (defaultVoice) {
212     [orderedVoices addObject:defaultVoice];
213   }
214   for (NSString* voiceIdentifier in voices) {
215     if (![voiceIdentifier isEqualToString:defaultVoice])
216       [orderedVoices addObject:voiceIdentifier];
217   }
219   for (NSString* voiceIdentifier in orderedVoices) {
220     outVoices->push_back(VoiceData());
221     VoiceData& data = outVoices->back();
223     NSDictionary* attributes =
224         [NSSpeechSynthesizer attributesForVoice:voiceIdentifier];
225     NSString* name = [attributes objectForKey:NSVoiceName];
226     NSString* gender = [attributes objectForKey:NSVoiceGender];
227     NSString* localeIdentifier =
228         [attributes objectForKey:NSVoiceLocaleIdentifier];
230     data.native = true;
231     data.native_voice_identifier = base::SysNSStringToUTF8(voiceIdentifier);
232     data.name = base::SysNSStringToUTF8(name);
234     NSDictionary* localeComponents =
235         [NSLocale componentsFromLocaleIdentifier:localeIdentifier];
236     NSString* language = [localeComponents objectForKey:NSLocaleLanguageCode];
237     NSString* country = [localeComponents objectForKey:NSLocaleCountryCode];
238     if (language && country) {
239       data.lang =
240           [[NSString stringWithFormat:@"%@-%@", language, country] UTF8String];
241     } else {
242       data.lang = base::SysNSStringToUTF8(language);
243     }
244     if ([gender isEqualToString:NSVoiceGenderMale])
245       data.gender = TTS_GENDER_MALE;
246     else if ([gender isEqualToString:NSVoiceGenderFemale])
247       data.gender = TTS_GENDER_FEMALE;
248     else
249       data.gender = TTS_GENDER_NONE;
250     data.events.insert(TTS_EVENT_START);
251     data.events.insert(TTS_EVENT_END);
252     data.events.insert(TTS_EVENT_WORD);
253     data.events.insert(TTS_EVENT_ERROR);
254     data.events.insert(TTS_EVENT_CANCELLED);
255     data.events.insert(TTS_EVENT_INTERRUPTED);
256     data.events.insert(TTS_EVENT_PAUSE);
257     data.events.insert(TTS_EVENT_RESUME);
258   }
261 void TtsPlatformImplMac::OnSpeechEvent(
262     NSSpeechSynthesizer* sender,
263     TtsEventType event_type,
264     int char_index,
265     const std::string& error_message) {
266   // Don't send events from an utterance that's already completed.
267   // This depends on the fact that we construct a new NSSpeechSynthesizer
268   // each time we call Speak.
269   if (sender != speech_synthesizer_.get())
270     return;
272   if (event_type == TTS_EVENT_END)
273     char_index = utterance_.size();
274   TtsController* controller = TtsController::GetInstance();
275 controller->OnTtsEvent(
276       utterance_id_, event_type, char_index, error_message);
277   last_char_index_ = char_index;
280 TtsPlatformImplMac::TtsPlatformImplMac() {
281   utterance_id_ = -1;
282   paused_ = false;
284   delegate_.reset([[ChromeTtsDelegate alloc] initWithPlatformImplMac:this]);
287 TtsPlatformImplMac::~TtsPlatformImplMac() {
290 // static
291 TtsPlatformImplMac* TtsPlatformImplMac::GetInstance() {
292   return base::Singleton<TtsPlatformImplMac>::get();
295 @implementation ChromeTtsDelegate
297 - (id)initWithPlatformImplMac:(TtsPlatformImplMac*)ttsImplMac {
298   if ((self = [super init])) {
299     ttsImplMac_ = ttsImplMac;
300   }
301   return self;
304 - (void)speechSynthesizer:(NSSpeechSynthesizer*)sender
305         didFinishSpeaking:(BOOL)finished_speaking {
306   ttsImplMac_->OnSpeechEvent(sender, TTS_EVENT_END, 0, "");
309 - (void)speechSynthesizer:(NSSpeechSynthesizer*)sender
310             willSpeakWord:(NSRange)character_range
311                  ofString:(NSString*)string {
312   ttsImplMac_->OnSpeechEvent(sender, TTS_EVENT_WORD,
313       character_range.location, "");
316 - (void)speechSynthesizer:(NSSpeechSynthesizer*)sender
317  didEncounterErrorAtIndex:(NSUInteger)character_index
318                  ofString:(NSString*)string
319                   message:(NSString*)message {
320   std::string message_utf8 = base::SysNSStringToUTF8(message);
321   ttsImplMac_->OnSpeechEvent(sender, TTS_EVENT_ERROR, character_index,
322       message_utf8);
325 @end
327 @implementation SingleUseSpeechSynthesizer
329 - (id)initWithUtterance:(NSString*)utterance {
330   self = [super init];
331   if (self) {
332     utterance_.reset([utterance retain]);
333     didSpeak_ = false;
334   }
335   return self;
338 - (bool)startSpeakingRetainedUtterance {
339   CHECK(!didSpeak_);
340   CHECK(utterance_);
341   didSpeak_ = true;
342   return [super startSpeakingString:utterance_];
345 - (bool)startSpeakingString:(NSString*)utterance {
346   CHECK(false);
347   return false;
350 @end