dom/media/webspeech/synth/nsSpeechTask.cpp

   1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
   2 /* vim:set ts=2 sw=2 sts=2 et cindent: */
   3 /* This Source Code Form is subject to the terms of the Mozilla Public
   4  * License, v. 2.0. If a copy of the MPL was not distributed with this
   5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
   6
   7 #include "AudioChannelService.h"
   8 #include "AudioSegment.h"
   9 #include "nsSpeechTask.h"
  10 #include "nsSynthVoiceRegistry.h"
  11 #include "nsXULAppAPI.h"
  12 #include "SharedBuffer.h"
  13 #include "SpeechSynthesis.h"
  14 #include "nsGlobalWindowInner.h"
  15
  16 #undef LOG
  17 extern mozilla::LogModule* GetSpeechSynthLog();
  18 #define LOG(type, msg) MOZ_LOG(GetSpeechSynthLog(), type, msg)
  19
  20 #define AUDIO_TRACK 1
  21
  22 namespace mozilla::dom {
  23
  24 // nsSpeechTask
  25
  26 NS_IMPL_CYCLE_COLLECTION_WEAK(nsSpeechTask, mSpeechSynthesis, mUtterance,
  27                               mCallback)
  28
  29 NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(nsSpeechTask)
  30   NS_INTERFACE_MAP_ENTRY(nsISpeechTask)
  31   NS_INTERFACE_MAP_ENTRY(nsIAudioChannelAgentCallback)
  32   NS_INTERFACE_MAP_ENTRY(nsISupportsWeakReference)
  33   NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsISpeechTask)
  34 NS_INTERFACE_MAP_END
  35
  36 NS_IMPL_CYCLE_COLLECTING_ADDREF(nsSpeechTask)
  37 NS_IMPL_CYCLE_COLLECTING_RELEASE(nsSpeechTask)
  38
  39 nsSpeechTask::nsSpeechTask(SpeechSynthesisUtterance* aUtterance,
  40                            bool aShouldResistFingerprinting)
  41     : mUtterance(aUtterance),
  42       mInited(false),
  43       mPrePaused(false),
  44       mPreCanceled(false),
  45       mCallback(nullptr),
  46       mShouldResistFingerprinting(aShouldResistFingerprinting),
  47       mState(STATE_PENDING) {
  48   mText = aUtterance->mText;
  49   mVolume = aUtterance->Volume();
  50 }
  51
  52 nsSpeechTask::nsSpeechTask(float aVolume, const nsAString& aText,
  53                            bool aShouldResistFingerprinting)
  54     : mUtterance(nullptr),
  55       mVolume(aVolume),
  56       mText(aText),
  57       mInited(false),
  58       mPrePaused(false),
  59       mPreCanceled(false),
  60       mCallback(nullptr),
  61       mShouldResistFingerprinting(aShouldResistFingerprinting),
  62       mState(STATE_PENDING) {}
  63
  64 nsSpeechTask::~nsSpeechTask() { LOG(LogLevel::Debug, ("~nsSpeechTask")); }
  65
  66 void nsSpeechTask::Init() { mInited = true; }
  67
  68 void nsSpeechTask::SetChosenVoiceURI(const nsAString& aUri) {
  69   mChosenVoiceURI = aUri;
  70 }
  71
  72 NS_IMETHODIMP
  73 nsSpeechTask::Setup(nsISpeechTaskCallback* aCallback) {
  74   MOZ_ASSERT(XRE_IsParentProcess());
  75
  76   LOG(LogLevel::Debug, ("nsSpeechTask::Setup"));
  77
  78   mCallback = aCallback;
  79
  80   return NS_OK;
  81 }
  82
  83 NS_IMETHODIMP
  84 nsSpeechTask::DispatchStart() {
  85   nsSynthVoiceRegistry::GetInstance()->SetIsSpeaking(true);
  86   return DispatchStartImpl();
  87 }
  88
  89 nsresult nsSpeechTask::DispatchStartImpl() {
  90   return DispatchStartImpl(mChosenVoiceURI);
  91 }
  92
  93 nsresult nsSpeechTask::DispatchStartImpl(const nsAString& aUri) {
  94   LOG(LogLevel::Debug, ("nsSpeechTask::DispatchStartImpl"));
  95
  96   MOZ_ASSERT(mUtterance);
  97   if (NS_WARN_IF(mState != STATE_PENDING)) {
  98     return NS_ERROR_NOT_AVAILABLE;
  99   }
 100
 101   CreateAudioChannelAgent();
 102
 103   mState = STATE_SPEAKING;
 104   mUtterance->mChosenVoiceURI = aUri;
 105   mUtterance->DispatchSpeechSynthesisEvent(u"start"_ns, 0, nullptr, 0, u""_ns);
 106
 107   return NS_OK;
 108 }
 109
 110 NS_IMETHODIMP
 111 nsSpeechTask::DispatchEnd(float aElapsedTime, uint32_t aCharIndex) {
 112   // After we end, no callback functions should go through.
 113   mCallback = nullptr;
 114
 115   if (!mPreCanceled) {
 116     nsSynthVoiceRegistry::GetInstance()->SpeakNext();
 117   }
 118
 119   return DispatchEndImpl(aElapsedTime, aCharIndex);
 120 }
 121
 122 nsresult nsSpeechTask::DispatchEndImpl(float aElapsedTime,
 123                                        uint32_t aCharIndex) {
 124   LOG(LogLevel::Debug, ("nsSpeechTask::DispatchEndImpl"));
 125
 126   DestroyAudioChannelAgent();
 127
 128   MOZ_ASSERT(mUtterance);
 129   if (NS_WARN_IF(mState == STATE_ENDED)) {
 130     return NS_ERROR_NOT_AVAILABLE;
 131   }
 132
 133   RefPtr<SpeechSynthesisUtterance> utterance = mUtterance;
 134
 135   if (mSpeechSynthesis) {
 136     mSpeechSynthesis->OnEnd(this);
 137   }
 138
 139   mState = STATE_ENDED;
 140   utterance->DispatchSpeechSynthesisEvent(u"end"_ns, aCharIndex, nullptr,
 141                                           aElapsedTime, u""_ns);
 142
 143   return NS_OK;
 144 }
 145
 146 NS_IMETHODIMP
 147 nsSpeechTask::DispatchPause(float aElapsedTime, uint32_t aCharIndex) {
 148   return DispatchPauseImpl(aElapsedTime, aCharIndex);
 149 }
 150
 151 nsresult nsSpeechTask::DispatchPauseImpl(float aElapsedTime,
 152                                          uint32_t aCharIndex) {
 153   LOG(LogLevel::Debug, ("nsSpeechTask::DispatchPauseImpl"));
 154   MOZ_ASSERT(mUtterance);
 155   if (NS_WARN_IF(mUtterance->mPaused)) {
 156     return NS_ERROR_NOT_AVAILABLE;
 157   }
 158   if (NS_WARN_IF(mState == STATE_ENDED)) {
 159     return NS_ERROR_NOT_AVAILABLE;
 160   }
 161
 162   mUtterance->mPaused = true;
 163   if (mState == STATE_SPEAKING) {
 164     mUtterance->DispatchSpeechSynthesisEvent(u"pause"_ns, aCharIndex, nullptr,
 165                                              aElapsedTime, u""_ns);
 166   }
 167
 168   return NS_OK;
 169 }
 170
 171 NS_IMETHODIMP
 172 nsSpeechTask::DispatchResume(float aElapsedTime, uint32_t aCharIndex) {
 173   return DispatchResumeImpl(aElapsedTime, aCharIndex);
 174 }
 175
 176 nsresult nsSpeechTask::DispatchResumeImpl(float aElapsedTime,
 177                                           uint32_t aCharIndex) {
 178   LOG(LogLevel::Debug, ("nsSpeechTask::DispatchResumeImpl"));
 179   MOZ_ASSERT(mUtterance);
 180   if (NS_WARN_IF(!(mUtterance->mPaused))) {
 181     return NS_ERROR_NOT_AVAILABLE;
 182   }
 183   if (NS_WARN_IF(mState == STATE_ENDED)) {
 184     return NS_ERROR_NOT_AVAILABLE;
 185   }
 186
 187   mUtterance->mPaused = false;
 188   if (mState == STATE_SPEAKING) {
 189     mUtterance->DispatchSpeechSynthesisEvent(u"resume"_ns, aCharIndex, nullptr,
 190                                              aElapsedTime, u""_ns);
 191   }
 192
 193   return NS_OK;
 194 }
 195
 196 void nsSpeechTask::ForceError(float aElapsedTime, uint32_t aCharIndex) {
 197   DispatchError(aElapsedTime, aCharIndex);
 198 }
 199
 200 NS_IMETHODIMP
 201 nsSpeechTask::DispatchError(float aElapsedTime, uint32_t aCharIndex) {
 202   if (!mPreCanceled) {
 203     nsSynthVoiceRegistry::GetInstance()->SpeakNext();
 204   }
 205
 206   return DispatchErrorImpl(aElapsedTime, aCharIndex);
 207 }
 208
 209 nsresult nsSpeechTask::DispatchErrorImpl(float aElapsedTime,
 210                                          uint32_t aCharIndex) {
 211   LOG(LogLevel::Debug, ("nsSpeechTask::DispatchErrorImpl"));
 212
 213   DestroyAudioChannelAgent();
 214
 215   MOZ_ASSERT(mUtterance);
 216   if (NS_WARN_IF(mState == STATE_ENDED)) {
 217     return NS_ERROR_NOT_AVAILABLE;
 218   }
 219
 220   if (mSpeechSynthesis) {
 221     mSpeechSynthesis->OnEnd(this);
 222   }
 223
 224   mState = STATE_ENDED;
 225   mUtterance->DispatchSpeechSynthesisEvent(u"error"_ns, aCharIndex, nullptr,
 226                                            aElapsedTime, u""_ns);
 227   return NS_OK;
 228 }
 229
 230 NS_IMETHODIMP
 231 nsSpeechTask::DispatchBoundary(const nsAString& aName, float aElapsedTime,
 232                                uint32_t aCharIndex, uint32_t aCharLength,
 233                                uint8_t argc) {
 234   return DispatchBoundaryImpl(aName, aElapsedTime, aCharIndex, aCharLength,
 235                               argc);
 236 }
 237
 238 nsresult nsSpeechTask::DispatchBoundaryImpl(const nsAString& aName,
 239                                             float aElapsedTime,
 240                                             uint32_t aCharIndex,
 241                                             uint32_t aCharLength,
 242                                             uint8_t argc) {
 243   MOZ_ASSERT(mUtterance);
 244   if (NS_WARN_IF(mState != STATE_SPEAKING)) {
 245     return NS_ERROR_NOT_AVAILABLE;
 246   }
 247   mUtterance->DispatchSpeechSynthesisEvent(
 248       u"boundary"_ns, aCharIndex,
 249       argc ? static_cast<Nullable<uint32_t> >(aCharLength) : nullptr,
 250       aElapsedTime, aName);
 251
 252   return NS_OK;
 253 }
 254
 255 NS_IMETHODIMP
 256 nsSpeechTask::DispatchMark(const nsAString& aName, float aElapsedTime,
 257                            uint32_t aCharIndex) {
 258   return DispatchMarkImpl(aName, aElapsedTime, aCharIndex);
 259 }
 260
 261 nsresult nsSpeechTask::DispatchMarkImpl(const nsAString& aName,
 262                                         float aElapsedTime,
 263                                         uint32_t aCharIndex) {
 264   MOZ_ASSERT(mUtterance);
 265   if (NS_WARN_IF(mState != STATE_SPEAKING)) {
 266     return NS_ERROR_NOT_AVAILABLE;
 267   }
 268   mUtterance->DispatchSpeechSynthesisEvent(u"mark"_ns, aCharIndex, nullptr,
 269                                            aElapsedTime, aName);
 270   return NS_OK;
 271 }
 272
 273 void nsSpeechTask::Pause() {
 274   MOZ_ASSERT(XRE_IsParentProcess());
 275
 276   if (mCallback) {
 277     DebugOnly<nsresult> rv = mCallback->OnPause();
 278     NS_WARNING_ASSERTION(NS_SUCCEEDED(rv), "Unable to call onPause() callback");
 279   }
 280
 281   if (!mInited) {
 282     mPrePaused = true;
 283   }
 284 }
 285
 286 void nsSpeechTask::Resume() {
 287   MOZ_ASSERT(XRE_IsParentProcess());
 288
 289   if (mCallback) {
 290     DebugOnly<nsresult> rv = mCallback->OnResume();
 291     NS_WARNING_ASSERTION(NS_SUCCEEDED(rv),
 292                          "Unable to call onResume() callback");
 293   }
 294
 295   if (mPrePaused) {
 296     mPrePaused = false;
 297     nsSynthVoiceRegistry::GetInstance()->ResumeQueue();
 298   }
 299 }
 300
 301 void nsSpeechTask::Cancel() {
 302   MOZ_ASSERT(XRE_IsParentProcess());
 303
 304   LOG(LogLevel::Debug, ("nsSpeechTask::Cancel"));
 305
 306   if (mCallback) {
 307     DebugOnly<nsresult> rv = mCallback->OnCancel();
 308     NS_WARNING_ASSERTION(NS_SUCCEEDED(rv),
 309                          "Unable to call onCancel() callback");
 310   }
 311
 312   if (!mInited) {
 313     mPreCanceled = true;
 314   }
 315 }
 316
 317 void nsSpeechTask::ForceEnd() {
 318   if (!mInited) {
 319     mPreCanceled = true;
 320   }
 321
 322   DispatchEnd(0, 0);
 323 }
 324
 325 void nsSpeechTask::SetSpeechSynthesis(SpeechSynthesis* aSpeechSynthesis) {
 326   mSpeechSynthesis = aSpeechSynthesis;
 327 }
 328
 329 void nsSpeechTask::CreateAudioChannelAgent() {
 330   if (!mUtterance) {
 331     return;
 332   }
 333
 334   if (mAudioChannelAgent) {
 335     mAudioChannelAgent->NotifyStoppedPlaying();
 336   }
 337
 338   mAudioChannelAgent = new AudioChannelAgent();
 339   mAudioChannelAgent->InitWithWeakCallback(mUtterance->GetOwnerWindow(), this);
 340
 341   nsresult rv = mAudioChannelAgent->NotifyStartedPlaying(
 342       AudioChannelService::AudibleState::eAudible);
 343   if (NS_WARN_IF(NS_FAILED(rv))) {
 344     return;
 345   }
 346
 347   mAudioChannelAgent->PullInitialUpdate();
 348 }
 349
 350 void nsSpeechTask::DestroyAudioChannelAgent() {
 351   if (mAudioChannelAgent) {
 352     mAudioChannelAgent->NotifyStoppedPlaying();
 353     mAudioChannelAgent = nullptr;
 354   }
 355 }
 356
 357 NS_IMETHODIMP
 358 nsSpeechTask::WindowVolumeChanged(float aVolume, bool aMuted) {
 359   SetAudioOutputVolume(aMuted ? 0.0 : mVolume * aVolume);
 360   return NS_OK;
 361 }
 362
 363 NS_IMETHODIMP
 364 nsSpeechTask::WindowSuspendChanged(nsSuspendedTypes aSuspend) {
 365   if (!mUtterance) {
 366     return NS_OK;
 367   }
 368
 369   if (aSuspend == nsISuspendedTypes::NONE_SUSPENDED && mUtterance->mPaused) {
 370     Resume();
 371   } else if (aSuspend != nsISuspendedTypes::NONE_SUSPENDED &&
 372              !mUtterance->mPaused) {
 373     Pause();
 374   }
 375   return NS_OK;
 376 }
 377
 378 NS_IMETHODIMP
 379 nsSpeechTask::WindowAudioCaptureChanged(bool aCapture) {
 380   // This is not supported yet.
 381   return NS_OK;
 382 }
 383
 384 void nsSpeechTask::SetAudioOutputVolume(float aVolume) {
 385   if (mCallback) {
 386     mCallback->OnVolumeChanged(aVolume);
 387   }
 388 }
 389
 390 }  // namespace mozilla::dom