dom/media/driftcontrol/DynamicResampler.h

   1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/
   2 /* This Source Code Form is subject to the terms of the Mozilla Public
   3  * License, v. 2.0. If a copy of the MPL was not distributed with this file,
   4  * You can obtain one at http://mozilla.org/MPL/2.0/. */
   5
   6 #ifndef DOM_MEDIA_DRIFTCONTROL_DYNAMICRESAMPLER_H_
   7 #define DOM_MEDIA_DRIFTCONTROL_DYNAMICRESAMPLER_H_
   8
   9 #include "AudioRingBuffer.h"
  10 #include "AudioSegment.h"
  11 #include "TimeUnits.h"
  12 #include "WavDumper.h"
  13
  14 #include <speex/speex_resampler.h>
  15
  16 namespace mozilla {
  17
  18 const uint32_t STEREO = 2;
  19
  20 /**
  21  * DynamicResampler allows updating on the fly the output sample rate and the
  22  * number of channels. In addition to that, it maintains an internal buffer for
  23  * the input data and allows pre-buffering as well. The Resample() method
  24  * strives to provide the requested number of output frames by using the input
  25  * data including any pre-buffering. If there are fewer frames in the internal
  26  * buffer than is requested, the internal buffer is padded with enough silence
  27  * to allow the requested to be resampled and returned.
  28  *
  29  * Input data buffering makes use of the AudioRingBuffer. The capacity of the
  30  * buffer is initially 100ms of audio and it is pre-allocated during
  31  * SetSampleFormat(). Should the input data grow beyond that, the input buffer
  32  * is re-allocated on the fly. In addition to that, due to special feature of
  33  * AudioRingBuffer, no extra copies take place when the input data is fed to the
  34  * resampler.
  35  *
  36  * The sample format must be set before using any method.
  37  *
  38  * The DynamicResampler is not thread-safe, so all the methods appart from the
  39  * constructor must be called on the same thread.
  40  */
  41 class DynamicResampler final {
  42  public:
  43   /**
  44    * Provide the initial input and output rate and the amount of pre-buffering.
  45    * The channel count will be set to stereo. Memory allocation will take
  46    * place. The input buffer is non-interleaved.
  47    */
  48   DynamicResampler(uint32_t aInRate, uint32_t aOutRate,
  49                    uint32_t aInputPreBufferFrameCount = 0);
  50   ~DynamicResampler();
  51
  52   /**
  53    * Set the sample format type to float or short.
  54    */
  55   void SetSampleFormat(AudioSampleFormat aFormat);
  56   uint32_t GetInRate() const { return mInRate; }
  57   uint32_t GetChannels() const { return mChannels; }
  58
  59   /**
  60    * Append `aInFrames` number of frames from `aInBuffer` to the internal input
  61    * buffer. Memory copy/move takes place.
  62    */
  63   void AppendInput(Span<const float* const> aInBuffer, uint32_t aInFrames);
  64   void AppendInput(Span<const int16_t* const> aInBuffer, uint32_t aInFrames);
  65   /**
  66    * Append `aInFrames` number of frames of silence to the internal input
  67    * buffer. Memory copy/move takes place.
  68    */
  69   void AppendInputSilence(const uint32_t aInFrames);
  70   /**
  71    * Return the number of frames the internal input buffer can store.
  72    */
  73   uint32_t InFramesBufferSize() const;
  74   /**
  75    * Return the number of frames stored in the internal input buffer.
  76    */
  77   uint32_t InFramesBuffered(uint32_t aChannelIndex) const;
  78
  79   /**
  80    * Prepends existing input data with a silent pre-buffer if not already done.
  81    * Data will be prepended so that after resampling aDuration of data,
  82    * the buffering level will be as close as possible to
  83    * mInputPreBufferFrameCount, which is the desired buffering level.
  84    */
  85   void EnsurePreBuffer(media::TimeUnit aDuration);
  86
  87   /**
  88    * Set the number of frames that should be used for input pre-buffering.
  89    */
  90   void SetInputPreBufferFrameCount(uint32_t aInputPreBufferFrameCount);
  91
  92   /*
  93    * Resample as much frames as needed from the internal input buffer to the
  94    * `aOutBuffer` in order to provide all `aOutFrames`.
  95    *
  96    * On first call, prepends the input buffer with silence so that after
  97    * resampling aOutFrames frames of data, the input buffer holds data as close
  98    * as possible to the configured pre-buffer size.
  99    *
 100    * If there are not enough input frames to provide the requested output
 101    * frames, the input buffer is padded with enough silence to allow the
 102    * requested frames to be resampled, and the pre-buffer is reset so that the
 103    * next call will be treated as the first.
 104    *
 105    * Returns true if the internal input buffer underran and had to be padded
 106    * with silence, otherwise false.
 107    */
 108   bool Resample(float* aOutBuffer, uint32_t aOutFrames, uint32_t aChannelIndex);
 109   bool Resample(int16_t* aOutBuffer, uint32_t aOutFrames,
 110                 uint32_t aChannelIndex);
 111
 112   /**
 113    * Update the output rate or/and the channel count. If a value is not updated
 114    * compared to the current one nothing happens. Changing the `aInRate`
 115    * results in recalculation in the resampler. Changing `aChannels` results in
 116    * the reallocation of the internal input buffer with the exception of
 117    * changes between mono to stereo and vice versa where no reallocation takes
 118    * place. A stereo internal input buffer is always maintained even if the
 119    * sound is mono.
 120    */
 121   void UpdateResampler(uint32_t aInRate, uint32_t aChannels);
 122
 123  private:
 124   template <typename T>
 125   void AppendInputInternal(Span<const T* const>& aInBuffer,
 126                            uint32_t aInFrames) {
 127     MOZ_ASSERT(aInBuffer.Length() == (uint32_t)mChannels);
 128     for (uint32_t i = 0; i < mChannels; ++i) {
 129       PushInFrames(aInBuffer[i], aInFrames, i);
 130     }
 131   }
 132
 133   void ResampleInternal(const float* aInBuffer, uint32_t* aInFrames,
 134                         float* aOutBuffer, uint32_t* aOutFrames,
 135                         uint32_t aChannelIndex);
 136   void ResampleInternal(const int16_t* aInBuffer, uint32_t* aInFrames,
 137                         int16_t* aOutBuffer, uint32_t* aOutFrames,
 138                         uint32_t aChannelIndex);
 139
 140   template <typename T>
 141   bool ResampleInternal(T* aOutBuffer, uint32_t aOutFrames,
 142                         uint32_t aChannelIndex) {
 143     MOZ_ASSERT(mInRate);
 144     MOZ_ASSERT(mOutRate);
 145     MOZ_ASSERT(mChannels);
 146     MOZ_ASSERT(aChannelIndex < mChannels);
 147     MOZ_ASSERT(aChannelIndex < mInternalInBuffer.Length());
 148     MOZ_ASSERT(aOutFrames);
 149
 150     uint32_t outFramesNeeded = aOutFrames;
 151     T* nextOutFrame = aOutBuffer;
 152     if (mInRate == mOutRate) {
 153       if (!mResamplerIsBypassed) {
 154         uint32_t latency = speex_resampler_get_input_latency(mResampler);
 155         mInternalInBuffer[aChannelIndex].ReadNoCopy(
 156             [&](const Span<const T>& aInBuffer) -> uint32_t {
 157               // Although unlikely with the sample rates used with this class,
 158               // the resampler input latency may temporarily be higher than
 159               // indicated, after a change in resampling rate that reduces the
 160               // indicated latency. The resampler's "magic" samples cause
 161               // this. All frames in the resampler are extracted when
 162               // `latency` output frames have been extracted.
 163               uint32_t outFramesResampled = std::min(outFramesNeeded, latency);
 164               uint32_t inFrames = aInBuffer.Length();
 165               ResampleInternal(aInBuffer.Elements(), &inFrames, nextOutFrame,
 166                                &outFramesResampled, aChannelIndex);
 167               nextOutFrame += outFramesResampled;
 168               outFramesNeeded -= outFramesResampled;
 169               if (outFramesResampled == latency) {
 170                 mResamplerIsBypassed = true;
 171                 // The last `latency` frames of input to the resampler will not
 172                 // be extracted from the resampler. Leave them in
 173                 // mInternalInBuffer to be copied directly to nextOutFrame.
 174                 MOZ_ASSERT(inFrames >= latency);
 175                 return inFrames - latency;
 176               }
 177               return inFrames;
 178             });
 179       }
 180       bool underrun = false;
 181       if (uint32_t buffered = mInternalInBuffer[aChannelIndex].AvailableRead();
 182           buffered < outFramesNeeded) {
 183         underrun = true;
 184         mIsPreBufferSet = false;
 185         mInternalInBuffer[aChannelIndex].WriteSilence(outFramesNeeded -
 186                                                       buffered);
 187       }
 188       DebugOnly<uint32_t> numFramesRead = mInternalInBuffer[aChannelIndex].Read(
 189           Span(nextOutFrame, outFramesNeeded));
 190       MOZ_ASSERT(numFramesRead == outFramesNeeded);
 191       // Workaround to avoid discontinuity when the speex resampler operates
 192       // again. Feed it with the last 20 frames to warm up the internal memory
 193       // of the resampler and then skip memory equals to resampler's input
 194       // latency.
 195       mInputTail[aChannelIndex].StoreTail<T>(aOutBuffer, aOutFrames);
 196       if (aChannelIndex == 0 && !mIsWarmingUp) {
 197         mInputStreamFile.Write(nextOutFrame, outFramesNeeded);
 198         mOutputStreamFile.Write(nextOutFrame, outFramesNeeded);
 199       }
 200       return underrun;
 201     }
 202
 203     auto resample = [&](const T* aInBuffer, uint32_t aInLength) -> uint32_t {
 204       uint32_t outFramesResampled = outFramesNeeded;
 205       uint32_t inFrames = aInLength;
 206       ResampleInternal(aInBuffer, &inFrames, nextOutFrame, &outFramesResampled,
 207                        aChannelIndex);
 208       nextOutFrame += outFramesResampled;
 209       outFramesNeeded -= outFramesResampled;
 210       mInputTail[aChannelIndex].StoreTail<T>(aInBuffer, inFrames);
 211       return inFrames;
 212     };
 213
 214     MOZ_ASSERT(!mResamplerIsBypassed);
 215     mInternalInBuffer[aChannelIndex].ReadNoCopy(
 216         [&](const Span<const T>& aInBuffer) -> uint32_t {
 217           if (!outFramesNeeded) {
 218             return 0;
 219           }
 220           return resample(aInBuffer.Elements(), aInBuffer.Length());
 221         });
 222
 223     if (outFramesNeeded == 0) {
 224       return false;
 225     }
 226
 227     while (outFramesNeeded > 0) {
 228       MOZ_ASSERT(mInternalInBuffer[aChannelIndex].AvailableRead() == 0);
 229       // Round up.
 230       uint32_t totalInFramesNeeded =
 231           ((CheckedUint32(outFramesNeeded) * mInRate + mOutRate - 1) / mOutRate)
 232               .value();
 233       resample(nullptr, totalInFramesNeeded);
 234     }
 235     mIsPreBufferSet = false;
 236     return true;
 237   }
 238
 239   template <typename T>
 240   void PushInFrames(const T* aInBuffer, const uint32_t aInFrames,
 241                     uint32_t aChannelIndex) {
 242     MOZ_ASSERT(aInBuffer);
 243     MOZ_ASSERT(aInFrames);
 244     MOZ_ASSERT(mChannels);
 245     MOZ_ASSERT(aChannelIndex < mChannels);
 246     MOZ_ASSERT(aChannelIndex < mInternalInBuffer.Length());
 247     EnsureInputBufferSizeInFrames(
 248         mInternalInBuffer[aChannelIndex].AvailableRead() + aInFrames);
 249     mInternalInBuffer[aChannelIndex].Write(Span(aInBuffer, aInFrames));
 250   }
 251
 252   void WarmUpResampler(bool aSkipLatency);
 253
 254   bool EnsureInputBufferSizeInFrames(uint32_t aSizeInFrames) {
 255     uint32_t sampleSize = 0;
 256     if (mSampleFormat == AUDIO_FORMAT_FLOAT32) {
 257       sampleSize = sizeof(float);
 258     } else if (mSampleFormat == AUDIO_FORMAT_S16) {
 259       sampleSize = sizeof(short);
 260     }
 261
 262     if (sampleSize == 0) {
 263       // No sample format set, we wouldn't know how many bytes to allocate.
 264       return true;
 265     }
 266
 267     uint32_t sizeInFrames = InFramesBufferSize();
 268     if (aSizeInFrames <= sizeInFrames) {
 269       // Buffer size is sufficient.
 270       return true;  // no reallocation necessary
 271     }
 272
 273     // 5 second cap.
 274     const uint32_t cap = 5 * mInRate;
 275     if (sizeInFrames >= cap) {
 276       // Already at the cap.
 277       return false;
 278     }
 279
 280     // As a backoff strategy, at least double the previous size.
 281     sizeInFrames *= 2;
 282
 283     if (aSizeInFrames > sizeInFrames) {
 284       // A larger buffer than the normal backoff strategy provides is needed, or
 285       // this is the first time setting the buffer size. Add another 50ms, as
 286       // some jitter is expected.
 287       sizeInFrames = aSizeInFrames + mInRate / 20;
 288     }
 289
 290     // mInputPreBufferFrameCount is an indication of the desired average
 291     // buffering.  Provide for at least twice this.
 292     sizeInFrames = std::max(sizeInFrames, mInputPreBufferFrameCount * 2);
 293
 294     sizeInFrames = std::min(cap, sizeInFrames);
 295
 296     bool success = true;
 297     for (auto& b : mInternalInBuffer) {
 298       success = success && b.EnsureLengthBytes(sampleSize * sizeInFrames);
 299     }
 300
 301     if (success) {
 302       // All buffers have the new size.
 303       return true;
 304     }
 305
 306     // Allocating an input buffer failed. We stick with the old buffer size.
 307     NS_WARNING(nsPrintfCString("Failed to allocate a buffer of %u bytes (%u "
 308                                "frames). Expect glitches.",
 309                                sampleSize * sizeInFrames, sizeInFrames)
 310                    .get());
 311     return false;
 312   }
 313
 314  public:
 315   const uint32_t mOutRate;
 316
 317  private:
 318   bool mIsPreBufferSet = false;
 319   bool mIsWarmingUp = false;
 320   // The resampler can be bypassed when the input and output rates match and
 321   // any frames buffered in the resampler have been extracted.  This initial
 322   // value is reset on construction by UpdateResampler() if the rates differ.
 323   bool mResamplerIsBypassed = true;
 324   uint32_t mInputPreBufferFrameCount;
 325   uint32_t mChannels = 0;
 326   uint32_t mInRate;
 327
 328   AutoTArray<AudioRingBuffer, STEREO> mInternalInBuffer;
 329
 330   SpeexResamplerState* mResampler = nullptr;
 331   AudioSampleFormat mSampleFormat = AUDIO_FORMAT_SILENCE;
 332
 333   class TailBuffer {
 334    public:
 335     template <typename T>
 336     T* Buffer() {
 337       return reinterpret_cast<T*>(mBuffer);
 338     }
 339     /* Store the MAXSIZE last elements of the buffer. */
 340     template <typename T>
 341     void StoreTail(const Span<const T>& aInBuffer) {
 342       StoreTail(aInBuffer.data(), aInBuffer.size());
 343     }
 344     template <typename T>
 345     void StoreTail(const T* aInBuffer, uint32_t aInFrames) {
 346       const T* inBuffer = aInBuffer;
 347       mSize = std::min(aInFrames, MAXSIZE);
 348       if (inBuffer) {
 349         PodCopy(Buffer<T>(), inBuffer + aInFrames - mSize, mSize);
 350       } else {
 351         std::fill_n(Buffer<T>(), mSize, static_cast<T>(0));
 352       }
 353     }
 354     uint32_t Length() { return mSize; }
 355     static constexpr uint32_t MAXSIZE = 20;
 356
 357    private:
 358     float mBuffer[MAXSIZE] = {};
 359     uint32_t mSize = 0;
 360   };
 361   AutoTArray<TailBuffer, STEREO> mInputTail;
 362
 363   WavDumper mInputStreamFile;
 364   WavDumper mOutputStreamFile;
 365 };
 366
 367 }  // namespace mozilla
 368
 369 #endif  // DOM_MEDIA_DRIFTCONTROL_DYNAMICRESAMPLER_H_