chrome/browser/speech/tts_mac.mm

   1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include <string>
   6
   7 #include "base/mac/scoped_nsobject.h"
   8 #include "base/memory/singleton.h"
   9 #include "base/strings/sys_string_conversions.h"
  10 #include "base/values.h"
  11 #include "chrome/browser/speech/tts_controller.h"
  12 #include "chrome/browser/speech/tts_platform.h"
  13 #include "extensions/browser/extension_function.h"
  14
  15 #import <Cocoa/Cocoa.h>
  16
  17 class TtsPlatformImplMac;
  18
  19 @interface ChromeTtsDelegate : NSObject <NSSpeechSynthesizerDelegate> {
  20  @private
  21   TtsPlatformImplMac* ttsImplMac_;  // weak.
  22 }
  23
  24 - (id)initWithPlatformImplMac:(TtsPlatformImplMac*)ttsImplMac;
  25
  26 @end
  27
  28 // Subclass of NSSpeechSynthesizer that takes an utterance
  29 // string on initialization, retains it and only allows it
  30 // to be spoken once.
  31 //
  32 // We construct a new NSSpeechSynthesizer for each utterance, for
  33 // two reasons:
  34 // 1. To associate delegate callbacks with a particular utterance,
  35 //    without assuming anything undocumented about the protocol.
  36 // 2. To work around http://openradar.appspot.com/radar?id=2854403,
  37 //    where Nuance voices don't retain the utterance string and
  38 //    crash when trying to call willSpeakWord.
  39 @interface SingleUseSpeechSynthesizer : NSSpeechSynthesizer {
  40  @private
  41   base::scoped_nsobject<NSString> utterance_;
  42   bool didSpeak_;
  43 }
  44
  45 - (id)initWithUtterance:(NSString*)utterance;
  46 - (bool)startSpeakingRetainedUtterance;
  47 - (bool)startSpeakingString:(NSString*)utterance;
  48
  49 @end
  50
  51 class TtsPlatformImplMac : public TtsPlatformImpl {
  52  public:
  53   virtual bool PlatformImplAvailable() OVERRIDE {
  54     return true;
  55   }
  56
  57   virtual bool Speak(
  58       int utterance_id,
  59       const std::string& utterance,
  60       const std::string& lang,
  61       const VoiceData& voice,
  62       const UtteranceContinuousParameters& params) OVERRIDE;
  63
  64   virtual bool StopSpeaking() OVERRIDE;
  65
  66   virtual void Pause() OVERRIDE;
  67
  68   virtual void Resume() OVERRIDE;
  69
  70   virtual bool IsSpeaking() OVERRIDE;
  71
  72   virtual void GetVoices(std::vector<VoiceData>* out_voices) OVERRIDE;
  73
  74   // Called by ChromeTtsDelegate when we get a callback from the
  75   // native speech engine.
  76   void OnSpeechEvent(NSSpeechSynthesizer* sender,
  77                      TtsEventType event_type,
  78                      int char_index,
  79                      const std::string& error_message);
  80
  81   // Get the single instance of this class.
  82   static TtsPlatformImplMac* GetInstance();
  83
  84  private:
  85   TtsPlatformImplMac();
  86   virtual ~TtsPlatformImplMac();
  87
  88   base::scoped_nsobject<SingleUseSpeechSynthesizer> speech_synthesizer_;
  89   base::scoped_nsobject<ChromeTtsDelegate> delegate_;
  90   int utterance_id_;
  91   std::string utterance_;
  92   int last_char_index_;
  93   bool paused_;
  94
  95   friend struct DefaultSingletonTraits<TtsPlatformImplMac>;
  96
  97   DISALLOW_COPY_AND_ASSIGN(TtsPlatformImplMac);
  98 };
  99
 100 // static
 101 TtsPlatformImpl* TtsPlatformImpl::GetInstance() {
 102   return TtsPlatformImplMac::GetInstance();
 103 }
 104
 105 bool TtsPlatformImplMac::Speak(
 106     int utterance_id,
 107     const std::string& utterance,
 108     const std::string& lang,
 109     const VoiceData& voice,
 110     const UtteranceContinuousParameters& params) {
 111   // TODO: convert SSML to SAPI xml. http://crbug.com/88072
 112   utterance_ = utterance;
 113   paused_ = false;
 114
 115   NSString* utterance_nsstring =
 116       [NSString stringWithUTF8String:utterance_.c_str()];
 117
 118   // Deliberately construct a new speech synthesizer every time Speak is
 119   // called, otherwise there's no way to know whether calls to the delegate
 120   // apply to the current utterance or a previous utterance. In
 121   // experimentation, the overhead of constructing and destructing a
 122   // NSSpeechSynthesizer is minimal.
 123   speech_synthesizer_.reset(
 124       [[SingleUseSpeechSynthesizer alloc]
 125         initWithUtterance:utterance_nsstring]);
 126   [speech_synthesizer_ setDelegate:delegate_];
 127
 128   if (!voice.native_voice_identifier.empty()) {
 129     NSString* native_voice_identifier =
 130         [NSString stringWithUTF8String:voice.native_voice_identifier.c_str()];
 131     [speech_synthesizer_ setVoice:native_voice_identifier];
 132   }
 133
 134   utterance_id_ = utterance_id;
 135
 136   // TODO: support languages other than the default: crbug.com/88059
 137
 138   if (params.rate >= 0.0) {
 139     // The TTS api defines rate via words per minute. Let 200 be the default.
 140     [speech_synthesizer_
 141         setObject:[NSNumber numberWithInt:params.rate * 200]
 142         forProperty:NSSpeechRateProperty error:nil];
 143   }
 144
 145   if (params.pitch >= 0.0) {
 146     // The input is a float from 0.0 to 2.0, with 1.0 being the default.
 147     // Get the default pitch for this voice and modulate it by 50% - 150%.
 148     NSError* errorCode;
 149     NSNumber* defaultPitchObj =
 150         [speech_synthesizer_ objectForProperty:NSSpeechPitchBaseProperty
 151                                          error:&errorCode];
 152     int defaultPitch = defaultPitchObj ? [defaultPitchObj intValue] : 48;
 153     int newPitch = static_cast<int>(defaultPitch * (0.5 * params.pitch + 0.5));
 154     [speech_synthesizer_
 155         setObject:[NSNumber numberWithInt:newPitch]
 156         forProperty:NSSpeechPitchBaseProperty error:nil];
 157   }
 158
 159   if (params.volume >= 0.0) {
 160     [speech_synthesizer_
 161         setObject: [NSNumber numberWithFloat:params.volume]
 162         forProperty:NSSpeechVolumeProperty error:nil];
 163   }
 164
 165   bool success = [speech_synthesizer_ startSpeakingRetainedUtterance];
 166   if (success) {
 167     TtsController* controller = TtsController::GetInstance();
 168     controller->OnTtsEvent(utterance_id_, TTS_EVENT_START, 0, "");
 169   }
 170   return success;
 171 }
 172
 173 bool TtsPlatformImplMac::StopSpeaking() {
 174   if (speech_synthesizer_.get()) {
 175     [speech_synthesizer_ stopSpeaking];
 176     speech_synthesizer_.reset(nil);
 177   }
 178   paused_ = false;
 179   return true;
 180 }
 181
 182 void TtsPlatformImplMac::Pause() {
 183   if (speech_synthesizer_.get() && utterance_id_ && !paused_) {
 184     [speech_synthesizer_ pauseSpeakingAtBoundary:NSSpeechImmediateBoundary];
 185     paused_ = true;
 186     TtsController::GetInstance()->OnTtsEvent(
 187         utterance_id_, TTS_EVENT_PAUSE, last_char_index_, "");
 188   }
 189 }
 190
 191 void TtsPlatformImplMac::Resume() {
 192   if (speech_synthesizer_.get() && utterance_id_ && paused_) {
 193     [speech_synthesizer_ continueSpeaking];
 194     paused_ = false;
 195     TtsController::GetInstance()->OnTtsEvent(
 196         utterance_id_, TTS_EVENT_RESUME, last_char_index_, "");
 197   }
 198 }
 199
 200 bool TtsPlatformImplMac::IsSpeaking() {
 201   if (speech_synthesizer_)
 202     return [speech_synthesizer_ isSpeaking];
 203   return false;
 204 }
 205
 206 void TtsPlatformImplMac::GetVoices(std::vector<VoiceData>* outVoices) {
 207   NSArray* voices = [NSSpeechSynthesizer availableVoices];
 208
 209   // Create a new temporary array of the available voices with
 210   // the default voice first.
 211   NSMutableArray* orderedVoices =
 212       [NSMutableArray arrayWithCapacity:[voices count]];
 213   NSString* defaultVoice = [NSSpeechSynthesizer defaultVoice];
 214   if (defaultVoice) {
 215     [orderedVoices addObject:defaultVoice];
 216   }
 217   for (NSString* voiceIdentifier in voices) {
 218     if (![voiceIdentifier isEqualToString:defaultVoice])
 219       [orderedVoices addObject:voiceIdentifier];
 220   }
 221
 222   for (NSString* voiceIdentifier in orderedVoices) {
 223     outVoices->push_back(VoiceData());
 224     VoiceData& data = outVoices->back();
 225
 226     NSDictionary* attributes =
 227         [NSSpeechSynthesizer attributesForVoice:voiceIdentifier];
 228     NSString* name = [attributes objectForKey:NSVoiceName];
 229     NSString* gender = [attributes objectForKey:NSVoiceGender];
 230     NSString* localeIdentifier =
 231         [attributes objectForKey:NSVoiceLocaleIdentifier];
 232
 233     data.native = true;
 234     data.native_voice_identifier = base::SysNSStringToUTF8(voiceIdentifier);
 235     data.name = base::SysNSStringToUTF8(name);
 236
 237     NSDictionary* localeComponents =
 238         [NSLocale componentsFromLocaleIdentifier:localeIdentifier];
 239     NSString* language = [localeComponents objectForKey:NSLocaleLanguageCode];
 240     NSString* country = [localeComponents objectForKey:NSLocaleCountryCode];
 241     if (language && country) {
 242       data.lang =
 243           [[NSString stringWithFormat:@"%@-%@", language, country] UTF8String];
 244     } else {
 245       data.lang = base::SysNSStringToUTF8(language);
 246     }
 247     if ([gender isEqualToString:NSVoiceGenderMale])
 248       data.gender = TTS_GENDER_MALE;
 249     else if ([gender isEqualToString:NSVoiceGenderFemale])
 250       data.gender = TTS_GENDER_FEMALE;
 251     else
 252       data.gender = TTS_GENDER_NONE;
 253     data.events.insert(TTS_EVENT_START);
 254     data.events.insert(TTS_EVENT_END);
 255     data.events.insert(TTS_EVENT_WORD);
 256     data.events.insert(TTS_EVENT_ERROR);
 257     data.events.insert(TTS_EVENT_CANCELLED);
 258     data.events.insert(TTS_EVENT_INTERRUPTED);
 259     data.events.insert(TTS_EVENT_PAUSE);
 260     data.events.insert(TTS_EVENT_RESUME);
 261   }
 262 }
 263
 264 void TtsPlatformImplMac::OnSpeechEvent(
 265     NSSpeechSynthesizer* sender,
 266     TtsEventType event_type,
 267     int char_index,
 268     const std::string& error_message) {
 269   // Don't send events from an utterance that's already completed.
 270   // This depends on the fact that we construct a new NSSpeechSynthesizer
 271   // each time we call Speak.
 272   if (sender != speech_synthesizer_.get())
 273     return;
 274
 275   if (event_type == TTS_EVENT_END)
 276     char_index = utterance_.size();
 277   TtsController* controller = TtsController::GetInstance();
 278 controller->OnTtsEvent(
 279       utterance_id_, event_type, char_index, error_message);
 280   last_char_index_ = char_index;
 281 }
 282
 283 TtsPlatformImplMac::TtsPlatformImplMac() {
 284   utterance_id_ = -1;
 285   paused_ = false;
 286
 287   delegate_.reset([[ChromeTtsDelegate alloc] initWithPlatformImplMac:this]);
 288 }
 289
 290 TtsPlatformImplMac::~TtsPlatformImplMac() {
 291 }
 292
 293 // static
 294 TtsPlatformImplMac* TtsPlatformImplMac::GetInstance() {
 295   return Singleton<TtsPlatformImplMac>::get();
 296 }
 297
 298 @implementation ChromeTtsDelegate
 299
 300 - (id)initWithPlatformImplMac:(TtsPlatformImplMac*)ttsImplMac {
 301   if ((self = [super init])) {
 302     ttsImplMac_ = ttsImplMac;
 303   }
 304   return self;
 305 }
 306
 307 - (void)speechSynthesizer:(NSSpeechSynthesizer*)sender
 308         didFinishSpeaking:(BOOL)finished_speaking {
 309   ttsImplMac_->OnSpeechEvent(sender, TTS_EVENT_END, 0, "");
 310 }
 311
 312 - (void)speechSynthesizer:(NSSpeechSynthesizer*)sender
 313             willSpeakWord:(NSRange)character_range
 314                  ofString:(NSString*)string {
 315   ttsImplMac_->OnSpeechEvent(sender, TTS_EVENT_WORD,
 316       character_range.location, "");
 317 }
 318
 319 - (void)speechSynthesizer:(NSSpeechSynthesizer*)sender
 320  didEncounterErrorAtIndex:(NSUInteger)character_index
 321                  ofString:(NSString*)string
 322                   message:(NSString*)message {
 323   std::string message_utf8 = base::SysNSStringToUTF8(message);
 324   ttsImplMac_->OnSpeechEvent(sender, TTS_EVENT_ERROR, character_index,
 325       message_utf8);
 326 }
 327
 328 @end
 329
 330 @implementation SingleUseSpeechSynthesizer
 331
 332 - (id)initWithUtterance:(NSString*)utterance {
 333   self = [super init];
 334   if (self) {
 335     utterance_.reset([utterance retain]);
 336     didSpeak_ = false;
 337   }
 338   return self;
 339 }
 340
 341 - (bool)startSpeakingRetainedUtterance {
 342   CHECK(!didSpeak_);
 343   CHECK(utterance_);
 344   didSpeak_ = true;
 345   return [super startSpeakingString:utterance_];
 346 }
 347
 348 - (bool)startSpeakingString:(NSString*)utterance {
 349   CHECK(false);
 350   return false;
 351 }
 352
 353 @end