chrome/browser/speech/tts_mac.mm

   1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include <string>
   6
   7 #include "base/mac/scoped_nsobject.h"
   8 #include "base/memory/singleton.h"
   9 #include "base/strings/sys_string_conversions.h"
  10 #include "base/values.h"
  11 #include "chrome/browser/speech/tts_controller.h"
  12 #include "chrome/browser/speech/tts_platform.h"
  13 #include "extensions/browser/extension_function.h"
  14
  15 #import <Cocoa/Cocoa.h>
  16
  17 class TtsPlatformImplMac;
  18
  19 @interface ChromeTtsDelegate : NSObject <NSSpeechSynthesizerDelegate> {
  20  @private
  21   TtsPlatformImplMac* ttsImplMac_;  // weak.
  22 }
  23
  24 - (id)initWithPlatformImplMac:(TtsPlatformImplMac*)ttsImplMac;
  25
  26 @end
  27
  28 // Subclass of NSSpeechSynthesizer that takes an utterance
  29 // string on initialization, retains it and only allows it
  30 // to be spoken once.
  31 //
  32 // We construct a new NSSpeechSynthesizer for each utterance, for
  33 // two reasons:
  34 // 1. To associate delegate callbacks with a particular utterance,
  35 //    without assuming anything undocumented about the protocol.
  36 // 2. To work around http://openradar.appspot.com/radar?id=2854403,
  37 //    where Nuance voices don't retain the utterance string and
  38 //    crash when trying to call willSpeakWord.
  39 @interface SingleUseSpeechSynthesizer : NSSpeechSynthesizer {
  40  @private
  41   base::scoped_nsobject<NSString> utterance_;
  42   bool didSpeak_;
  43 }
  44
  45 - (id)initWithUtterance:(NSString*)utterance;
  46 - (bool)startSpeakingRetainedUtterance;
  47 - (bool)startSpeakingString:(NSString*)utterance;
  48
  49 @end
  50
  51 class TtsPlatformImplMac : public TtsPlatformImpl {
  52  public:
  53   bool PlatformImplAvailable() override { return true; }
  54
  55   bool Speak(int utterance_id,
  56              const std::string& utterance,
  57              const std::string& lang,
  58              const VoiceData& voice,
  59              const UtteranceContinuousParameters& params) override;
  60
  61   bool StopSpeaking() override;
  62
  63   void Pause() override;
  64
  65   void Resume() override;
  66
  67   bool IsSpeaking() override;
  68
  69   void GetVoices(std::vector<VoiceData>* out_voices) override;
  70
  71   // Called by ChromeTtsDelegate when we get a callback from the
  72   // native speech engine.
  73   void OnSpeechEvent(NSSpeechSynthesizer* sender,
  74                      TtsEventType event_type,
  75                      int char_index,
  76                      const std::string& error_message);
  77
  78   // Get the single instance of this class.
  79   static TtsPlatformImplMac* GetInstance();
  80
  81  private:
  82   TtsPlatformImplMac();
  83   ~TtsPlatformImplMac() override;
  84
  85   base::scoped_nsobject<SingleUseSpeechSynthesizer> speech_synthesizer_;
  86   base::scoped_nsobject<ChromeTtsDelegate> delegate_;
  87   int utterance_id_;
  88   std::string utterance_;
  89   int last_char_index_;
  90   bool paused_;
  91
  92   friend struct DefaultSingletonTraits<TtsPlatformImplMac>;
  93
  94   DISALLOW_COPY_AND_ASSIGN(TtsPlatformImplMac);
  95 };
  96
  97 // static
  98 TtsPlatformImpl* TtsPlatformImpl::GetInstance() {
  99   return TtsPlatformImplMac::GetInstance();
 100 }
 101
 102 bool TtsPlatformImplMac::Speak(
 103     int utterance_id,
 104     const std::string& utterance,
 105     const std::string& lang,
 106     const VoiceData& voice,
 107     const UtteranceContinuousParameters& params) {
 108   // TODO: convert SSML to SAPI xml. http://crbug.com/88072
 109   utterance_ = utterance;
 110   paused_ = false;
 111
 112   NSString* utterance_nsstring =
 113       [NSString stringWithUTF8String:utterance_.c_str()];
 114
 115   // Deliberately construct a new speech synthesizer every time Speak is
 116   // called, otherwise there's no way to know whether calls to the delegate
 117   // apply to the current utterance or a previous utterance. In
 118   // experimentation, the overhead of constructing and destructing a
 119   // NSSpeechSynthesizer is minimal.
 120   speech_synthesizer_.reset(
 121       [[SingleUseSpeechSynthesizer alloc]
 122         initWithUtterance:utterance_nsstring]);
 123   [speech_synthesizer_ setDelegate:delegate_];
 124
 125   if (!voice.native_voice_identifier.empty()) {
 126     NSString* native_voice_identifier =
 127         [NSString stringWithUTF8String:voice.native_voice_identifier.c_str()];
 128     [speech_synthesizer_ setVoice:native_voice_identifier];
 129   }
 130
 131   utterance_id_ = utterance_id;
 132
 133   // TODO: support languages other than the default: crbug.com/88059
 134
 135   if (params.rate >= 0.0) {
 136     // The TTS api defines rate via words per minute. Let 200 be the default.
 137     [speech_synthesizer_
 138         setObject:[NSNumber numberWithInt:params.rate * 200]
 139         forProperty:NSSpeechRateProperty error:nil];
 140   }
 141
 142   if (params.pitch >= 0.0) {
 143     // The input is a float from 0.0 to 2.0, with 1.0 being the default.
 144     // Get the default pitch for this voice and modulate it by 50% - 150%.
 145     NSError* errorCode;
 146     NSNumber* defaultPitchObj =
 147         [speech_synthesizer_ objectForProperty:NSSpeechPitchBaseProperty
 148                                          error:&errorCode];
 149     int defaultPitch = defaultPitchObj ? [defaultPitchObj intValue] : 48;
 150     int newPitch = static_cast<int>(defaultPitch * (0.5 * params.pitch + 0.5));
 151     [speech_synthesizer_
 152         setObject:[NSNumber numberWithInt:newPitch]
 153         forProperty:NSSpeechPitchBaseProperty error:nil];
 154   }
 155
 156   if (params.volume >= 0.0) {
 157     [speech_synthesizer_
 158         setObject: [NSNumber numberWithFloat:params.volume]
 159         forProperty:NSSpeechVolumeProperty error:nil];
 160   }
 161
 162   bool success = [speech_synthesizer_ startSpeakingRetainedUtterance];
 163   if (success) {
 164     TtsController* controller = TtsController::GetInstance();
 165     controller->OnTtsEvent(utterance_id_, TTS_EVENT_START, 0, "");
 166   }
 167   return success;
 168 }
 169
 170 bool TtsPlatformImplMac::StopSpeaking() {
 171   if (speech_synthesizer_.get()) {
 172     [speech_synthesizer_ stopSpeaking];
 173     speech_synthesizer_.reset(nil);
 174   }
 175   paused_ = false;
 176   return true;
 177 }
 178
 179 void TtsPlatformImplMac::Pause() {
 180   if (speech_synthesizer_.get() && utterance_id_ && !paused_) {
 181     [speech_synthesizer_ pauseSpeakingAtBoundary:NSSpeechImmediateBoundary];
 182     paused_ = true;
 183     TtsController::GetInstance()->OnTtsEvent(
 184         utterance_id_, TTS_EVENT_PAUSE, last_char_index_, "");
 185   }
 186 }
 187
 188 void TtsPlatformImplMac::Resume() {
 189   if (speech_synthesizer_.get() && utterance_id_ && paused_) {
 190     [speech_synthesizer_ continueSpeaking];
 191     paused_ = false;
 192     TtsController::GetInstance()->OnTtsEvent(
 193         utterance_id_, TTS_EVENT_RESUME, last_char_index_, "");
 194   }
 195 }
 196
 197 bool TtsPlatformImplMac::IsSpeaking() {
 198   if (speech_synthesizer_)
 199     return [speech_synthesizer_ isSpeaking];
 200   return false;
 201 }
 202
 203 void TtsPlatformImplMac::GetVoices(std::vector<VoiceData>* outVoices) {
 204   NSArray* voices = [NSSpeechSynthesizer availableVoices];
 205
 206   // Create a new temporary array of the available voices with
 207   // the default voice first.
 208   NSMutableArray* orderedVoices =
 209       [NSMutableArray arrayWithCapacity:[voices count]];
 210   NSString* defaultVoice = [NSSpeechSynthesizer defaultVoice];
 211   if (defaultVoice) {
 212     [orderedVoices addObject:defaultVoice];
 213   }
 214   for (NSString* voiceIdentifier in voices) {
 215     if (![voiceIdentifier isEqualToString:defaultVoice])
 216       [orderedVoices addObject:voiceIdentifier];
 217   }
 218
 219   for (NSString* voiceIdentifier in orderedVoices) {
 220     outVoices->push_back(VoiceData());
 221     VoiceData& data = outVoices->back();
 222
 223     NSDictionary* attributes =
 224         [NSSpeechSynthesizer attributesForVoice:voiceIdentifier];
 225     NSString* name = [attributes objectForKey:NSVoiceName];
 226     NSString* gender = [attributes objectForKey:NSVoiceGender];
 227     NSString* localeIdentifier =
 228         [attributes objectForKey:NSVoiceLocaleIdentifier];
 229
 230     data.native = true;
 231     data.native_voice_identifier = base::SysNSStringToUTF8(voiceIdentifier);
 232     data.name = base::SysNSStringToUTF8(name);
 233
 234     NSDictionary* localeComponents =
 235         [NSLocale componentsFromLocaleIdentifier:localeIdentifier];
 236     NSString* language = [localeComponents objectForKey:NSLocaleLanguageCode];
 237     NSString* country = [localeComponents objectForKey:NSLocaleCountryCode];
 238     if (language && country) {
 239       data.lang =
 240           [[NSString stringWithFormat:@"%@-%@", language, country] UTF8String];
 241     } else {
 242       data.lang = base::SysNSStringToUTF8(language);
 243     }
 244     if ([gender isEqualToString:NSVoiceGenderMale])
 245       data.gender = TTS_GENDER_MALE;
 246     else if ([gender isEqualToString:NSVoiceGenderFemale])
 247       data.gender = TTS_GENDER_FEMALE;
 248     else
 249       data.gender = TTS_GENDER_NONE;
 250     data.events.insert(TTS_EVENT_START);
 251     data.events.insert(TTS_EVENT_END);
 252     data.events.insert(TTS_EVENT_WORD);
 253     data.events.insert(TTS_EVENT_ERROR);
 254     data.events.insert(TTS_EVENT_CANCELLED);
 255     data.events.insert(TTS_EVENT_INTERRUPTED);
 256     data.events.insert(TTS_EVENT_PAUSE);
 257     data.events.insert(TTS_EVENT_RESUME);
 258   }
 259 }
 260
 261 void TtsPlatformImplMac::OnSpeechEvent(
 262     NSSpeechSynthesizer* sender,
 263     TtsEventType event_type,
 264     int char_index,
 265     const std::string& error_message) {
 266   // Don't send events from an utterance that's already completed.
 267   // This depends on the fact that we construct a new NSSpeechSynthesizer
 268   // each time we call Speak.
 269   if (sender != speech_synthesizer_.get())
 270     return;
 271
 272   if (event_type == TTS_EVENT_END)
 273     char_index = utterance_.size();
 274   TtsController* controller = TtsController::GetInstance();
 275 controller->OnTtsEvent(
 276       utterance_id_, event_type, char_index, error_message);
 277   last_char_index_ = char_index;
 278 }
 279
 280 TtsPlatformImplMac::TtsPlatformImplMac() {
 281   utterance_id_ = -1;
 282   paused_ = false;
 283
 284   delegate_.reset([[ChromeTtsDelegate alloc] initWithPlatformImplMac:this]);
 285 }
 286
 287 TtsPlatformImplMac::~TtsPlatformImplMac() {
 288 }
 289
 290 // static
 291 TtsPlatformImplMac* TtsPlatformImplMac::GetInstance() {
 292   return Singleton<TtsPlatformImplMac>::get();
 293 }
 294
 295 @implementation ChromeTtsDelegate
 296
 297 - (id)initWithPlatformImplMac:(TtsPlatformImplMac*)ttsImplMac {
 298   if ((self = [super init])) {
 299     ttsImplMac_ = ttsImplMac;
 300   }
 301   return self;
 302 }
 303
 304 - (void)speechSynthesizer:(NSSpeechSynthesizer*)sender
 305         didFinishSpeaking:(BOOL)finished_speaking {
 306   ttsImplMac_->OnSpeechEvent(sender, TTS_EVENT_END, 0, "");
 307 }
 308
 309 - (void)speechSynthesizer:(NSSpeechSynthesizer*)sender
 310             willSpeakWord:(NSRange)character_range
 311                  ofString:(NSString*)string {
 312   ttsImplMac_->OnSpeechEvent(sender, TTS_EVENT_WORD,
 313       character_range.location, "");
 314 }
 315
 316 - (void)speechSynthesizer:(NSSpeechSynthesizer*)sender
 317  didEncounterErrorAtIndex:(NSUInteger)character_index
 318                  ofString:(NSString*)string
 319                   message:(NSString*)message {
 320   std::string message_utf8 = base::SysNSStringToUTF8(message);
 321   ttsImplMac_->OnSpeechEvent(sender, TTS_EVENT_ERROR, character_index,
 322       message_utf8);
 323 }
 324
 325 @end
 326
 327 @implementation SingleUseSpeechSynthesizer
 328
 329 - (id)initWithUtterance:(NSString*)utterance {
 330   self = [super init];
 331   if (self) {
 332     utterance_.reset([utterance retain]);
 333     didSpeak_ = false;
 334   }
 335   return self;
 336 }
 337
 338 - (bool)startSpeakingRetainedUtterance {
 339   CHECK(!didSpeak_);
 340   CHECK(utterance_);
 341   didSpeak_ = true;
 342   return [super startSpeakingString:utterance_];
 343 }
 344
 345 - (bool)startSpeakingString:(NSString*)utterance {
 346   CHECK(false);
 347   return false;
 348 }
 349
 350 @end