Bug 1943650 - Command-line --help output misformatted after --dbus-service. r=emilio
[gecko.git] / dom / media / driftcontrol / DynamicResampler.h
blob6223a2c1e1321fb4aa47de4f395ef968a59a81e6
1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3 * License, v. 2.0. If a copy of the MPL was not distributed with this file,
4 * You can obtain one at http://mozilla.org/MPL/2.0/. */
6 #ifndef DOM_MEDIA_DRIFTCONTROL_DYNAMICRESAMPLER_H_
7 #define DOM_MEDIA_DRIFTCONTROL_DYNAMICRESAMPLER_H_
9 #include "AudioRingBuffer.h"
10 #include "AudioSegment.h"
11 #include "TimeUnits.h"
12 #include "WavDumper.h"
14 #include <speex/speex_resampler.h>
16 namespace mozilla {
18 const uint32_t STEREO = 2;
20 /**
21 * DynamicResampler allows updating on the fly the output sample rate and the
22 * number of channels. In addition to that, it maintains an internal buffer for
23 * the input data and allows pre-buffering as well. The Resample() method
24 * strives to provide the requested number of output frames by using the input
25 * data including any pre-buffering. If there are fewer frames in the internal
26 * buffer than is requested, the internal buffer is padded with enough silence
27 * to allow the requested to be resampled and returned.
29 * Input data buffering makes use of the AudioRingBuffer. The capacity of the
30 * buffer is initially 100ms of audio and it is pre-allocated during
31 * SetSampleFormat(). Should the input data grow beyond that, the input buffer
32 * is re-allocated on the fly. In addition to that, due to special feature of
33 * AudioRingBuffer, no extra copies take place when the input data is fed to the
34 * resampler.
36 * The sample format must be set before using any method.
38 * The DynamicResampler is not thread-safe, so all the methods appart from the
39 * constructor must be called on the same thread.
41 class DynamicResampler final {
42 public:
43 /**
44 * Provide the initial input and output rate and the amount of pre-buffering.
45 * The channel count will be set to stereo. Memory allocation will take
46 * place. The input buffer is non-interleaved.
48 DynamicResampler(uint32_t aInRate, uint32_t aOutRate,
49 uint32_t aInputPreBufferFrameCount = 0);
50 ~DynamicResampler();
52 /**
53 * Set the sample format type to float or short.
55 void SetSampleFormat(AudioSampleFormat aFormat);
56 uint32_t GetInRate() const { return mInRate; }
57 uint32_t GetChannels() const { return mChannels; }
59 /**
60 * Append `aInFrames` number of frames from `aInBuffer` to the internal input
61 * buffer. Memory copy/move takes place.
63 void AppendInput(Span<const float* const> aInBuffer, uint32_t aInFrames);
64 void AppendInput(Span<const int16_t* const> aInBuffer, uint32_t aInFrames);
65 /**
66 * Append `aInFrames` number of frames of silence to the internal input
67 * buffer. Memory copy/move takes place.
69 void AppendInputSilence(const uint32_t aInFrames);
70 /**
71 * Return the number of frames the internal input buffer can store.
73 uint32_t InFramesBufferSize() const;
74 /**
75 * Return the number of frames stored in the internal input buffer.
77 uint32_t InFramesBuffered(uint32_t aChannelIndex) const;
79 /**
80 * Prepends existing input data with a silent pre-buffer if not already done.
81 * Data will be prepended so that after resampling aDuration of data,
82 * the buffering level will be as close as possible to
83 * mInputPreBufferFrameCount, which is the desired buffering level.
85 void EnsurePreBuffer(media::TimeUnit aDuration);
87 /**
88 * Set the number of frames that should be used for input pre-buffering.
90 void SetInputPreBufferFrameCount(uint32_t aInputPreBufferFrameCount);
93 * Resample as much frames as needed from the internal input buffer to the
94 * `aOutBuffer` in order to provide all `aOutFrames`.
96 * On first call, prepends the input buffer with silence so that after
97 * resampling aOutFrames frames of data, the input buffer holds data as close
98 * as possible to the configured pre-buffer size.
100 * If there are not enough input frames to provide the requested output
101 * frames, the input buffer is padded with enough silence to allow the
102 * requested frames to be resampled, and the pre-buffer is reset so that the
103 * next call will be treated as the first.
105 * Returns true if the internal input buffer underran and had to be padded
106 * with silence, otherwise false.
108 bool Resample(float* aOutBuffer, uint32_t aOutFrames, uint32_t aChannelIndex);
109 bool Resample(int16_t* aOutBuffer, uint32_t aOutFrames,
110 uint32_t aChannelIndex);
113 * Update the output rate or/and the channel count. If a value is not updated
114 * compared to the current one nothing happens. Changing the `aInRate`
115 * results in recalculation in the resampler. Changing `aChannels` results in
116 * the reallocation of the internal input buffer with the exception of
117 * changes between mono to stereo and vice versa where no reallocation takes
118 * place. A stereo internal input buffer is always maintained even if the
119 * sound is mono.
121 void UpdateResampler(uint32_t aInRate, uint32_t aChannels);
123 private:
124 template <typename T>
125 void AppendInputInternal(Span<const T* const>& aInBuffer,
126 uint32_t aInFrames) {
127 MOZ_ASSERT(aInBuffer.Length() == (uint32_t)mChannels);
128 for (uint32_t i = 0; i < mChannels; ++i) {
129 PushInFrames(aInBuffer[i], aInFrames, i);
133 void ResampleInternal(const float* aInBuffer, uint32_t* aInFrames,
134 float* aOutBuffer, uint32_t* aOutFrames,
135 uint32_t aChannelIndex);
136 void ResampleInternal(const int16_t* aInBuffer, uint32_t* aInFrames,
137 int16_t* aOutBuffer, uint32_t* aOutFrames,
138 uint32_t aChannelIndex);
140 template <typename T>
141 bool ResampleInternal(T* aOutBuffer, uint32_t aOutFrames,
142 uint32_t aChannelIndex) {
143 MOZ_ASSERT(mInRate);
144 MOZ_ASSERT(mOutRate);
145 MOZ_ASSERT(mChannels);
146 MOZ_ASSERT(aChannelIndex < mChannels);
147 MOZ_ASSERT(aChannelIndex < mInternalInBuffer.Length());
148 MOZ_ASSERT(aOutFrames);
150 uint32_t outFramesNeeded = aOutFrames;
151 T* nextOutFrame = aOutBuffer;
152 if (mInRate == mOutRate) {
153 if (!mResamplerIsBypassed) {
154 uint32_t latency = speex_resampler_get_input_latency(mResampler);
155 mInternalInBuffer[aChannelIndex].ReadNoCopy(
156 [&](const Span<const T>& aInBuffer) -> uint32_t {
157 // Although unlikely with the sample rates used with this class,
158 // the resampler input latency may temporarily be higher than
159 // indicated, after a change in resampling rate that reduces the
160 // indicated latency. The resampler's "magic" samples cause
161 // this. All frames in the resampler are extracted when
162 // `latency` output frames have been extracted.
163 uint32_t outFramesResampled = std::min(outFramesNeeded, latency);
164 uint32_t inFrames = aInBuffer.Length();
165 ResampleInternal(aInBuffer.Elements(), &inFrames, nextOutFrame,
166 &outFramesResampled, aChannelIndex);
167 nextOutFrame += outFramesResampled;
168 outFramesNeeded -= outFramesResampled;
169 if (outFramesResampled == latency) {
170 mResamplerIsBypassed = true;
171 // The last `latency` frames of input to the resampler will not
172 // be extracted from the resampler. Leave them in
173 // mInternalInBuffer to be copied directly to nextOutFrame.
174 MOZ_ASSERT(inFrames >= latency);
175 return inFrames - latency;
177 return inFrames;
180 bool underrun = false;
181 if (uint32_t buffered = mInternalInBuffer[aChannelIndex].AvailableRead();
182 buffered < outFramesNeeded) {
183 underrun = true;
184 mIsPreBufferSet = false;
185 mInternalInBuffer[aChannelIndex].WriteSilence(outFramesNeeded -
186 buffered);
188 DebugOnly<uint32_t> numFramesRead = mInternalInBuffer[aChannelIndex].Read(
189 Span(nextOutFrame, outFramesNeeded));
190 MOZ_ASSERT(numFramesRead == outFramesNeeded);
191 // Workaround to avoid discontinuity when the speex resampler operates
192 // again. Feed it with the last 20 frames to warm up the internal memory
193 // of the resampler and then skip memory equals to resampler's input
194 // latency.
195 mInputTail[aChannelIndex].StoreTail<T>(aOutBuffer, aOutFrames);
196 if (aChannelIndex == 0 && !mIsWarmingUp) {
197 mInputStreamFile.Write(nextOutFrame, outFramesNeeded);
198 mOutputStreamFile.Write(nextOutFrame, outFramesNeeded);
200 return underrun;
203 auto resample = [&](const T* aInBuffer, uint32_t aInLength) -> uint32_t {
204 uint32_t outFramesResampled = outFramesNeeded;
205 uint32_t inFrames = aInLength;
206 ResampleInternal(aInBuffer, &inFrames, nextOutFrame, &outFramesResampled,
207 aChannelIndex);
208 nextOutFrame += outFramesResampled;
209 outFramesNeeded -= outFramesResampled;
210 mInputTail[aChannelIndex].StoreTail<T>(aInBuffer, inFrames);
211 return inFrames;
214 MOZ_ASSERT(!mResamplerIsBypassed);
215 mInternalInBuffer[aChannelIndex].ReadNoCopy(
216 [&](const Span<const T>& aInBuffer) -> uint32_t {
217 if (!outFramesNeeded) {
218 return 0;
220 return resample(aInBuffer.Elements(), aInBuffer.Length());
223 if (outFramesNeeded == 0) {
224 return false;
227 while (outFramesNeeded > 0) {
228 MOZ_ASSERT(mInternalInBuffer[aChannelIndex].AvailableRead() == 0);
229 // Round up.
230 uint32_t totalInFramesNeeded =
231 ((CheckedUint32(outFramesNeeded) * mInRate + mOutRate - 1) / mOutRate)
232 .value();
233 resample(nullptr, totalInFramesNeeded);
235 mIsPreBufferSet = false;
236 return true;
239 template <typename T>
240 void PushInFrames(const T* aInBuffer, const uint32_t aInFrames,
241 uint32_t aChannelIndex) {
242 MOZ_ASSERT(aInBuffer);
243 MOZ_ASSERT(aInFrames);
244 MOZ_ASSERT(mChannels);
245 MOZ_ASSERT(aChannelIndex < mChannels);
246 MOZ_ASSERT(aChannelIndex < mInternalInBuffer.Length());
247 EnsureInputBufferSizeInFrames(
248 mInternalInBuffer[aChannelIndex].AvailableRead() + aInFrames);
249 mInternalInBuffer[aChannelIndex].Write(Span(aInBuffer, aInFrames));
252 void WarmUpResampler(bool aSkipLatency);
254 bool EnsureInputBufferSizeInFrames(uint32_t aSizeInFrames) {
255 uint32_t sampleSize = 0;
256 if (mSampleFormat == AUDIO_FORMAT_FLOAT32) {
257 sampleSize = sizeof(float);
258 } else if (mSampleFormat == AUDIO_FORMAT_S16) {
259 sampleSize = sizeof(short);
262 if (sampleSize == 0) {
263 // No sample format set, we wouldn't know how many bytes to allocate.
264 return true;
267 uint32_t sizeInFrames = InFramesBufferSize();
268 if (aSizeInFrames <= sizeInFrames) {
269 // Buffer size is sufficient.
270 return true; // no reallocation necessary
273 // 5 second cap.
274 const uint32_t cap = 5 * mInRate;
275 if (sizeInFrames >= cap) {
276 // Already at the cap.
277 return false;
280 // As a backoff strategy, at least double the previous size.
281 sizeInFrames *= 2;
283 if (aSizeInFrames > sizeInFrames) {
284 // A larger buffer than the normal backoff strategy provides is needed, or
285 // this is the first time setting the buffer size. Add another 50ms, as
286 // some jitter is expected.
287 sizeInFrames = aSizeInFrames + mInRate / 20;
290 // mInputPreBufferFrameCount is an indication of the desired average
291 // buffering. Provide for at least twice this.
292 sizeInFrames = std::max(sizeInFrames, mInputPreBufferFrameCount * 2);
294 sizeInFrames = std::min(cap, sizeInFrames);
296 bool success = true;
297 for (auto& b : mInternalInBuffer) {
298 success = success && b.EnsureLengthBytes(sampleSize * sizeInFrames);
301 if (success) {
302 // All buffers have the new size.
303 return true;
306 // Allocating an input buffer failed. We stick with the old buffer size.
307 NS_WARNING(nsPrintfCString("Failed to allocate a buffer of %u bytes (%u "
308 "frames). Expect glitches.",
309 sampleSize * sizeInFrames, sizeInFrames)
310 .get());
311 return false;
314 public:
315 const uint32_t mOutRate;
317 private:
318 bool mIsPreBufferSet = false;
319 bool mIsWarmingUp = false;
320 // The resampler can be bypassed when the input and output rates match and
321 // any frames buffered in the resampler have been extracted. This initial
322 // value is reset on construction by UpdateResampler() if the rates differ.
323 bool mResamplerIsBypassed = true;
324 uint32_t mInputPreBufferFrameCount;
325 uint32_t mChannels = 0;
326 uint32_t mInRate;
328 AutoTArray<AudioRingBuffer, STEREO> mInternalInBuffer;
330 SpeexResamplerState* mResampler = nullptr;
331 AudioSampleFormat mSampleFormat = AUDIO_FORMAT_SILENCE;
333 class TailBuffer {
334 public:
335 template <typename T>
336 T* Buffer() {
337 return reinterpret_cast<T*>(mBuffer);
339 /* Store the MAXSIZE last elements of the buffer. */
340 template <typename T>
341 void StoreTail(const Span<const T>& aInBuffer) {
342 StoreTail(aInBuffer.data(), aInBuffer.size());
344 template <typename T>
345 void StoreTail(const T* aInBuffer, uint32_t aInFrames) {
346 const T* inBuffer = aInBuffer;
347 mSize = std::min(aInFrames, MAXSIZE);
348 if (inBuffer) {
349 PodCopy(Buffer<T>(), inBuffer + aInFrames - mSize, mSize);
350 } else {
351 std::fill_n(Buffer<T>(), mSize, static_cast<T>(0));
354 uint32_t Length() { return mSize; }
355 static constexpr uint32_t MAXSIZE = 20;
357 private:
358 float mBuffer[MAXSIZE] = {};
359 uint32_t mSize = 0;
361 AutoTArray<TailBuffer, STEREO> mInputTail;
363 WavDumper mInputStreamFile;
364 WavDumper mOutputStreamFile;
367 } // namespace mozilla
369 #endif // DOM_MEDIA_DRIFTCONTROL_DYNAMICRESAMPLER_H_