chrome/browser/speech/tts_mac.mm

   1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include <string>
   6
   7 #include "base/mac/scoped_nsobject.h"
   8 #include "base/memory/singleton.h"
   9 #include "base/strings/sys_string_conversions.h"
  10 #include "base/values.h"
  11 #include "chrome/browser/extensions/extension_function.h"
  12 #include "chrome/browser/speech/tts_controller.h"
  13 #include "chrome/browser/speech/tts_platform.h"
  14
  15 #import <Cocoa/Cocoa.h>
  16
  17 class TtsPlatformImplMac;
  18
  19 @interface ChromeTtsDelegate : NSObject <NSSpeechSynthesizerDelegate> {
  20  @private
  21   TtsPlatformImplMac* ttsImplMac_;  // weak.
  22 }
  23
  24 - (id)initWithPlatformImplMac:(TtsPlatformImplMac*)ttsImplMac;
  25
  26 @end
  27
  28 // Subclass of NSSpeechSynthesizer that takes an utterance
  29 // string on initialization, retains it and only allows it
  30 // to be spoken once.
  31 //
  32 // We construct a new NSSpeechSynthesizer for each utterance, for
  33 // two reasons:
  34 // 1. To associate delegate callbacks with a particular utterance,
  35 //    without assuming anything undocumented about the protocol.
  36 // 2. To work around http://openradar.appspot.com/radar?id=2854403,
  37 //    where Nuance voices don't retain the utterance string and
  38 //    crash when trying to call willSpeakWord.
  39 @interface SingleUseSpeechSynthesizer : NSSpeechSynthesizer {
  40  @private
  41   base::scoped_nsobject<NSString> utterance_;
  42   bool didSpeak_;
  43 }
  44
  45 - (id)initWithUtterance:(NSString*)utterance;
  46 - (bool)startSpeakingRetainedUtterance;
  47 - (bool)startSpeakingString:(NSString*)utterance;
  48
  49 @end
  50
  51 class TtsPlatformImplMac : public TtsPlatformImpl {
  52  public:
  53   virtual bool PlatformImplAvailable() OVERRIDE {
  54     return true;
  55   }
  56
  57   virtual bool Speak(
  58       int utterance_id,
  59       const std::string& utterance,
  60       const std::string& lang,
  61       const VoiceData& voice,
  62       const UtteranceContinuousParameters& params) OVERRIDE;
  63
  64   virtual bool StopSpeaking() OVERRIDE;
  65
  66   virtual void Pause() OVERRIDE;
  67
  68   virtual void Resume() OVERRIDE;
  69
  70   virtual bool IsSpeaking() OVERRIDE;
  71
  72   virtual void GetVoices(std::vector<VoiceData>* out_voices) OVERRIDE;
  73
  74   // Called by ChromeTtsDelegate when we get a callback from the
  75   // native speech engine.
  76   void OnSpeechEvent(NSSpeechSynthesizer* sender,
  77                      TtsEventType event_type,
  78                      int char_index,
  79                      const std::string& error_message);
  80
  81   // Get the single instance of this class.
  82   static TtsPlatformImplMac* GetInstance();
  83
  84  private:
  85   TtsPlatformImplMac();
  86   virtual ~TtsPlatformImplMac();
  87
  88   base::scoped_nsobject<SingleUseSpeechSynthesizer> speech_synthesizer_;
  89   base::scoped_nsobject<ChromeTtsDelegate> delegate_;
  90   int utterance_id_;
  91   std::string utterance_;
  92   bool sent_start_event_;
  93   int last_char_index_;
  94   bool paused_;
  95
  96   friend struct DefaultSingletonTraits<TtsPlatformImplMac>;
  97
  98   DISALLOW_COPY_AND_ASSIGN(TtsPlatformImplMac);
  99 };
 100
 101 // static
 102 TtsPlatformImpl* TtsPlatformImpl::GetInstance() {
 103   return TtsPlatformImplMac::GetInstance();
 104 }
 105
 106 bool TtsPlatformImplMac::Speak(
 107     int utterance_id,
 108     const std::string& utterance,
 109     const std::string& lang,
 110     const VoiceData& voice,
 111     const UtteranceContinuousParameters& params) {
 112   // TODO: convert SSML to SAPI xml. http://crbug.com/88072
 113   utterance_ = utterance;
 114   paused_ = false;
 115
 116   NSString* utterance_nsstring =
 117       [NSString stringWithUTF8String:utterance_.c_str()];
 118
 119   // Deliberately construct a new speech synthesizer every time Speak is
 120   // called, otherwise there's no way to know whether calls to the delegate
 121   // apply to the current utterance or a previous utterance. In
 122   // experimentation, the overhead of constructing and destructing a
 123   // NSSpeechSynthesizer is minimal.
 124   speech_synthesizer_.reset(
 125       [[SingleUseSpeechSynthesizer alloc]
 126         initWithUtterance:utterance_nsstring]);
 127   [speech_synthesizer_ setDelegate:delegate_];
 128
 129   if (!voice.native_voice_identifier.empty()) {
 130     NSString* native_voice_identifier =
 131         [NSString stringWithUTF8String:voice.native_voice_identifier.c_str()];
 132     [speech_synthesizer_ setVoice:native_voice_identifier];
 133   }
 134
 135   utterance_id_ = utterance_id;
 136   sent_start_event_ = false;
 137
 138   // TODO: support languages other than the default: crbug.com/88059
 139
 140   if (params.rate >= 0.0) {
 141     // The TTS api defines rate via words per minute. Let 200 be the default.
 142     [speech_synthesizer_
 143         setObject:[NSNumber numberWithInt:params.rate * 200]
 144         forProperty:NSSpeechRateProperty error:nil];
 145   }
 146
 147   if (params.pitch >= 0.0) {
 148     // The input is a float from 0.0 to 2.0, with 1.0 being the default.
 149     // Get the default pitch for this voice and modulate it by 50% - 150%.
 150     NSError* errorCode;
 151     NSNumber* defaultPitchObj =
 152         [speech_synthesizer_ objectForProperty:NSSpeechPitchBaseProperty
 153                                          error:&errorCode];
 154     int defaultPitch = defaultPitchObj ? [defaultPitchObj intValue] : 48;
 155     int newPitch = static_cast<int>(defaultPitch * (0.5 * params.pitch + 0.5));
 156     [speech_synthesizer_
 157         setObject:[NSNumber numberWithInt:newPitch]
 158         forProperty:NSSpeechPitchBaseProperty error:nil];
 159   }
 160
 161   if (params.volume >= 0.0) {
 162     [speech_synthesizer_
 163         setObject: [NSNumber numberWithFloat:params.volume]
 164         forProperty:NSSpeechVolumeProperty error:nil];
 165   }
 166
 167   return [speech_synthesizer_ startSpeakingRetainedUtterance];
 168 }
 169
 170 bool TtsPlatformImplMac::StopSpeaking() {
 171   if (speech_synthesizer_.get()) {
 172     [speech_synthesizer_ stopSpeaking];
 173     speech_synthesizer_.reset(nil);
 174   }
 175   paused_ = false;
 176   return true;
 177 }
 178
 179 void TtsPlatformImplMac::Pause() {
 180   if (speech_synthesizer_.get() && utterance_id_ && !paused_) {
 181     [speech_synthesizer_ pauseSpeakingAtBoundary:NSSpeechImmediateBoundary];
 182     paused_ = true;
 183     TtsController::GetInstance()->OnTtsEvent(
 184         utterance_id_, TTS_EVENT_PAUSE, last_char_index_, "");
 185   }
 186 }
 187
 188 void TtsPlatformImplMac::Resume() {
 189   if (speech_synthesizer_.get() && utterance_id_ && paused_) {
 190     [speech_synthesizer_ continueSpeaking];
 191     paused_ = false;
 192     TtsController::GetInstance()->OnTtsEvent(
 193         utterance_id_, TTS_EVENT_RESUME, last_char_index_, "");
 194   }
 195 }
 196
 197 bool TtsPlatformImplMac::IsSpeaking() {
 198   return [NSSpeechSynthesizer isAnyApplicationSpeaking];
 199 }
 200
 201 void TtsPlatformImplMac::GetVoices(std::vector<VoiceData>* outVoices) {
 202   NSArray* voices = [NSSpeechSynthesizer availableVoices];
 203
 204   // Create a new temporary array of the available voices with
 205   // the default voice first.
 206   NSMutableArray* orderedVoices =
 207       [NSMutableArray arrayWithCapacity:[voices count]];
 208   NSString* defaultVoice = [NSSpeechSynthesizer defaultVoice];
 209   [orderedVoices addObject:defaultVoice];
 210   for (NSString* voiceIdentifier in voices) {
 211     if (![voiceIdentifier isEqualToString:defaultVoice])
 212       [orderedVoices addObject:voiceIdentifier];
 213   }
 214
 215   for (NSString* voiceIdentifier in orderedVoices) {
 216     outVoices->push_back(VoiceData());
 217     VoiceData& data = outVoices->back();
 218
 219     NSDictionary* attributes =
 220         [NSSpeechSynthesizer attributesForVoice:voiceIdentifier];
 221     NSString* name = [attributes objectForKey:NSVoiceName];
 222     NSString* gender = [attributes objectForKey:NSVoiceGender];
 223     NSString* localeIdentifier =
 224         [attributes objectForKey:NSVoiceLocaleIdentifier];
 225
 226     data.native = true;
 227     data.native_voice_identifier = base::SysNSStringToUTF8(voiceIdentifier);
 228     data.name = base::SysNSStringToUTF8(name);
 229
 230     NSDictionary* localeComponents =
 231         [NSLocale componentsFromLocaleIdentifier:localeIdentifier];
 232     NSString* language = [localeComponents objectForKey:NSLocaleLanguageCode];
 233     NSString* country = [localeComponents objectForKey:NSLocaleCountryCode];
 234     if (language && country) {
 235       data.lang =
 236           [[NSString stringWithFormat:@"%@-%@", language, country] UTF8String];
 237     } else {
 238       data.lang = base::SysNSStringToUTF8(language);
 239     }
 240     if ([gender isEqualToString:NSVoiceGenderMale])
 241       data.gender = TTS_GENDER_MALE;
 242     else if ([gender isEqualToString:NSVoiceGenderFemale])
 243       data.gender = TTS_GENDER_FEMALE;
 244     else
 245       data.gender = TTS_GENDER_NONE;
 246     data.events.insert(TTS_EVENT_START);
 247     data.events.insert(TTS_EVENT_END);
 248     data.events.insert(TTS_EVENT_WORD);
 249     data.events.insert(TTS_EVENT_ERROR);
 250     data.events.insert(TTS_EVENT_CANCELLED);
 251     data.events.insert(TTS_EVENT_INTERRUPTED);
 252     data.events.insert(TTS_EVENT_PAUSE);
 253     data.events.insert(TTS_EVENT_RESUME);
 254   }
 255 }
 256
 257 void TtsPlatformImplMac::OnSpeechEvent(
 258     NSSpeechSynthesizer* sender,
 259     TtsEventType event_type,
 260     int char_index,
 261     const std::string& error_message) {
 262   // Don't send events from an utterance that's already completed.
 263   // This depends on the fact that we construct a new NSSpeechSynthesizer
 264   // each time we call Speak.
 265   if (sender != speech_synthesizer_.get())
 266     return;
 267
 268   if (event_type == TTS_EVENT_END)
 269     char_index = utterance_.size();
 270   TtsController* controller = TtsController::GetInstance();
 271   if (event_type == TTS_EVENT_WORD && !sent_start_event_) {
 272     controller->OnTtsEvent(
 273         utterance_id_, TTS_EVENT_START, 0, "");
 274     sent_start_event_ = true;
 275   }
 276   controller->OnTtsEvent(
 277       utterance_id_, event_type, char_index, error_message);
 278   last_char_index_ = char_index;
 279 }
 280
 281 TtsPlatformImplMac::TtsPlatformImplMac() {
 282   utterance_id_ = -1;
 283   sent_start_event_ = true;
 284   paused_ = false;
 285
 286   delegate_.reset([[ChromeTtsDelegate alloc] initWithPlatformImplMac:this]);
 287 }
 288
 289 TtsPlatformImplMac::~TtsPlatformImplMac() {
 290 }
 291
 292 // static
 293 TtsPlatformImplMac* TtsPlatformImplMac::GetInstance() {
 294   return Singleton<TtsPlatformImplMac>::get();
 295 }
 296
 297 @implementation ChromeTtsDelegate
 298
 299 - (id)initWithPlatformImplMac:(TtsPlatformImplMac*)ttsImplMac {
 300   if ((self = [super init])) {
 301     ttsImplMac_ = ttsImplMac;
 302   }
 303   return self;
 304 }
 305
 306 - (void)speechSynthesizer:(NSSpeechSynthesizer*)sender
 307         didFinishSpeaking:(BOOL)finished_speaking {
 308   ttsImplMac_->OnSpeechEvent(sender, TTS_EVENT_END, 0, "");
 309 }
 310
 311 - (void)speechSynthesizer:(NSSpeechSynthesizer*)sender
 312             willSpeakWord:(NSRange)character_range
 313                  ofString:(NSString*)string {
 314   ttsImplMac_->OnSpeechEvent(sender, TTS_EVENT_WORD,
 315       character_range.location, "");
 316 }
 317
 318 - (void)speechSynthesizer:(NSSpeechSynthesizer*)sender
 319  didEncounterErrorAtIndex:(NSUInteger)character_index
 320                  ofString:(NSString*)string
 321                   message:(NSString*)message {
 322   std::string message_utf8 = base::SysNSStringToUTF8(message);
 323   ttsImplMac_->OnSpeechEvent(sender, TTS_EVENT_ERROR, character_index,
 324       message_utf8);
 325 }
 326
 327 @end
 328
 329 @implementation SingleUseSpeechSynthesizer
 330
 331 - (id)initWithUtterance:(NSString*)utterance {
 332   self = [super init];
 333   if (self) {
 334     utterance_.reset([utterance retain]);
 335     didSpeak_ = false;
 336   }
 337   return self;
 338 }
 339
 340 - (bool)startSpeakingRetainedUtterance {
 341   CHECK(!didSpeak_);
 342   CHECK(utterance_);
 343   didSpeak_ = true;
 344   return [super startSpeakingString:utterance_];
 345 }
 346
 347 - (bool)startSpeakingString:(NSString*)utterance {
 348   CHECK(false);
 349   return false;
 350 }
 351
 352 @end