Add "fast" variants for the bsinc resamplers
[openal-soft.git] / alc / mastering.cpp
blobd0a2f78a343c5e35a461dab6b4e4d48b8ac3cee4
2 #include "config.h"
4 #include "mastering.h"
6 #include <algorithm>
7 #include <cmath>
8 #include <cstddef>
9 #include <functional>
10 #include <iterator>
11 #include <limits>
12 #include <new>
14 #include "AL/al.h"
16 #include "almalloc.h"
17 #include "alnumeric.h"
18 #include "alu.h"
19 #include "opthelpers.h"
22 /* These structures assume BUFFERSIZE is a power of 2. */
23 static_assert((BUFFERSIZE & (BUFFERSIZE-1)) == 0, "BUFFERSIZE is not a power of 2");
25 struct SlidingHold {
26 alignas(16) ALfloat mValues[BUFFERSIZE];
27 ALuint mExpiries[BUFFERSIZE];
28 ALuint mLowerIndex;
29 ALuint mUpperIndex;
30 ALuint mLength;
34 namespace {
36 using namespace std::placeholders;
38 /* This sliding hold follows the input level with an instant attack and a
39 * fixed duration hold before an instant release to the next highest level.
40 * It is a sliding window maximum (descending maxima) implementation based on
41 * Richard Harter's ascending minima algorithm available at:
43 * http://www.richardhartersworld.com/cri/2001/slidingmin.html
45 ALfloat UpdateSlidingHold(SlidingHold *Hold, const ALuint i, const ALfloat in)
47 static constexpr ALsizei mask{BUFFERSIZE - 1};
48 const ALuint length{Hold->mLength};
49 ALfloat (&values)[BUFFERSIZE] = Hold->mValues;
50 ALuint (&expiries)[BUFFERSIZE] = Hold->mExpiries;
51 ALuint lowerIndex{Hold->mLowerIndex};
52 ALuint upperIndex{Hold->mUpperIndex};
54 if(i >= expiries[upperIndex])
55 upperIndex = (upperIndex + 1) & mask;
57 if(in >= values[upperIndex])
59 values[upperIndex] = in;
60 expiries[upperIndex] = i + length;
61 lowerIndex = upperIndex;
63 else
65 do {
66 do {
67 if(!(in >= values[lowerIndex]))
68 goto found_place;
69 } while(lowerIndex--);
70 lowerIndex = mask;
71 } while(1);
72 found_place:
74 lowerIndex = (lowerIndex + 1) & mask;
75 values[lowerIndex] = in;
76 expiries[lowerIndex] = i + length;
79 Hold->mLowerIndex = lowerIndex;
80 Hold->mUpperIndex = upperIndex;
82 return values[upperIndex];
85 void ShiftSlidingHold(SlidingHold *Hold, const ALuint n)
87 auto exp_begin = std::begin(Hold->mExpiries) + Hold->mUpperIndex;
88 auto exp_last = std::begin(Hold->mExpiries) + Hold->mLowerIndex;
89 if(exp_last-exp_begin < 0)
91 std::transform(exp_begin, std::end(Hold->mExpiries), exp_begin,
92 std::bind(std::minus<ALuint>{}, _1, n));
93 exp_begin = std::begin(Hold->mExpiries);
95 std::transform(exp_begin, exp_last+1, exp_begin, std::bind(std::minus<ALuint>{}, _1, n));
99 /* Multichannel compression is linked via the absolute maximum of all
100 * channels.
102 void LinkChannels(Compressor *Comp, const ALuint SamplesToDo, const FloatBufferLine *OutBuffer)
104 const ALuint numChans{Comp->mNumChans};
106 ASSUME(SamplesToDo > 0);
107 ASSUME(numChans > 0);
109 auto side_begin = std::begin(Comp->mSideChain) + Comp->mLookAhead;
110 std::fill(side_begin, side_begin+SamplesToDo, 0.0f);
112 auto fill_max = [SamplesToDo,side_begin](const FloatBufferLine &input) -> void
114 const ALfloat *RESTRICT buffer{al::assume_aligned<16>(input.data())};
115 auto max_abs = std::bind(maxf, _1, std::bind(static_cast<float(&)(float)>(std::fabs), _2));
116 std::transform(side_begin, side_begin+SamplesToDo, buffer, side_begin, max_abs);
118 std::for_each(OutBuffer, OutBuffer+numChans, fill_max);
121 /* This calculates the squared crest factor of the control signal for the
122 * basic automation of the attack/release times. As suggested by the paper,
123 * it uses an instantaneous squared peak detector and a squared RMS detector
124 * both with 200ms release times.
126 static void CrestDetector(Compressor *Comp, const ALuint SamplesToDo)
128 const ALfloat a_crest{Comp->mCrestCoeff};
129 ALfloat y2_peak{Comp->mLastPeakSq};
130 ALfloat y2_rms{Comp->mLastRmsSq};
132 ASSUME(SamplesToDo > 0);
134 auto calc_crest = [&y2_rms,&y2_peak,a_crest](const ALfloat x_abs) noexcept -> ALfloat
136 ALfloat x2 = maxf(0.000001f, x_abs * x_abs);
138 y2_peak = maxf(x2, lerp(x2, y2_peak, a_crest));
139 y2_rms = lerp(x2, y2_rms, a_crest);
140 return y2_peak / y2_rms;
142 auto side_begin = std::begin(Comp->mSideChain) + Comp->mLookAhead;
143 std::transform(side_begin, side_begin+SamplesToDo, std::begin(Comp->mCrestFactor), calc_crest);
145 Comp->mLastPeakSq = y2_peak;
146 Comp->mLastRmsSq = y2_rms;
149 /* The side-chain starts with a simple peak detector (based on the absolute
150 * value of the incoming signal) and performs most of its operations in the
151 * log domain.
153 void PeakDetector(Compressor *Comp, const ALuint SamplesToDo)
155 ASSUME(SamplesToDo > 0);
157 /* Clamp the minimum amplitude to near-zero and convert to logarithm. */
158 auto side_begin = std::begin(Comp->mSideChain) + Comp->mLookAhead;
159 std::transform(side_begin, side_begin+SamplesToDo, side_begin,
160 std::bind(static_cast<float(&)(float)>(std::log), std::bind(maxf, 0.000001f, _1)));
163 /* An optional hold can be used to extend the peak detector so it can more
164 * solidly detect fast transients. This is best used when operating as a
165 * limiter.
167 void PeakHoldDetector(Compressor *Comp, const ALuint SamplesToDo)
169 ASSUME(SamplesToDo > 0);
171 SlidingHold *hold{Comp->mHold};
172 ALuint i{0};
173 auto detect_peak = [&i,hold](const ALfloat x_abs) -> ALfloat
175 const ALfloat x_G{std::log(maxf(0.000001f, x_abs))};
176 return UpdateSlidingHold(hold, i++, x_G);
178 auto side_begin = std::begin(Comp->mSideChain) + Comp->mLookAhead;
179 std::transform(side_begin, side_begin+SamplesToDo, side_begin, detect_peak);
181 ShiftSlidingHold(hold, SamplesToDo);
184 /* This is the heart of the feed-forward compressor. It operates in the log
185 * domain (to better match human hearing) and can apply some basic automation
186 * to knee width, attack/release times, make-up/post gain, and clipping
187 * reduction.
189 void GainCompressor(Compressor *Comp, const ALuint SamplesToDo)
191 const bool autoKnee{Comp->mAuto.Knee};
192 const bool autoAttack{Comp->mAuto.Attack};
193 const bool autoRelease{Comp->mAuto.Release};
194 const bool autoPostGain{Comp->mAuto.PostGain};
195 const bool autoDeclip{Comp->mAuto.Declip};
196 const ALuint lookAhead{Comp->mLookAhead};
197 const ALfloat threshold{Comp->mThreshold};
198 const ALfloat slope{Comp->mSlope};
199 const ALfloat attack{Comp->mAttack};
200 const ALfloat release{Comp->mRelease};
201 const ALfloat c_est{Comp->mGainEstimate};
202 const ALfloat a_adp{Comp->mAdaptCoeff};
203 const ALfloat *crestFactor{Comp->mCrestFactor};
204 ALfloat postGain{Comp->mPostGain};
205 ALfloat knee{Comp->mKnee};
206 ALfloat t_att{attack};
207 ALfloat t_rel{release - attack};
208 ALfloat a_att{std::exp(-1.0f / t_att)};
209 ALfloat a_rel{std::exp(-1.0f / t_rel)};
210 ALfloat y_1{Comp->mLastRelease};
211 ALfloat y_L{Comp->mLastAttack};
212 ALfloat c_dev{Comp->mLastGainDev};
214 ASSUME(SamplesToDo > 0);
216 for(ALfloat &sideChain : al::span<float>{Comp->mSideChain, SamplesToDo})
218 if(autoKnee)
219 knee = maxf(0.0f, 2.5f * (c_dev + c_est));
220 const ALfloat knee_h{0.5f * knee};
222 /* This is the gain computer. It applies a static compression curve
223 * to the control signal.
225 const ALfloat x_over{std::addressof(sideChain)[lookAhead] - threshold};
226 const ALfloat y_G{
227 (x_over <= -knee_h) ? 0.0f :
228 (std::fabs(x_over) < knee_h) ? (x_over + knee_h) * (x_over + knee_h) / (2.0f * knee) :
229 x_over
232 const ALfloat y2_crest{*(crestFactor++)};
233 if(autoAttack)
235 t_att = 2.0f*attack/y2_crest;
236 a_att = std::exp(-1.0f / t_att);
238 if(autoRelease)
240 t_rel = 2.0f*release/y2_crest - t_att;
241 a_rel = std::exp(-1.0f / t_rel);
244 /* Gain smoothing (ballistics) is done via a smooth decoupled peak
245 * detector. The attack time is subtracted from the release time
246 * above to compensate for the chained operating mode.
248 const ALfloat x_L{-slope * y_G};
249 y_1 = maxf(x_L, lerp(x_L, y_1, a_rel));
250 y_L = lerp(y_1, y_L, a_att);
252 /* Knee width and make-up gain automation make use of a smoothed
253 * measurement of deviation between the control signal and estimate.
254 * The estimate is also used to bias the measurement to hot-start its
255 * average.
257 c_dev = lerp(-(y_L+c_est), c_dev, a_adp);
259 if(autoPostGain)
261 /* Clipping reduction is only viable when make-up gain is being
262 * automated. It modifies the deviation to further attenuate the
263 * control signal when clipping is detected. The adaptation time
264 * is sufficiently long enough to suppress further clipping at the
265 * same output level.
267 if(autoDeclip)
268 c_dev = maxf(c_dev, sideChain - y_L - threshold - c_est);
270 postGain = -(c_dev + c_est);
273 sideChain = std::exp(postGain - y_L);
276 Comp->mLastRelease = y_1;
277 Comp->mLastAttack = y_L;
278 Comp->mLastGainDev = c_dev;
281 /* Combined with the hold time, a look-ahead delay can improve handling of
282 * fast transients by allowing the envelope time to converge prior to
283 * reaching the offending impulse. This is best used when operating as a
284 * limiter.
286 void SignalDelay(Compressor *Comp, const ALuint SamplesToDo, FloatBufferLine *OutBuffer)
288 const ALuint numChans{Comp->mNumChans};
289 const ALuint lookAhead{Comp->mLookAhead};
291 ASSUME(SamplesToDo > 0);
292 ASSUME(numChans > 0);
293 ASSUME(lookAhead > 0);
295 for(ALuint c{0};c < numChans;c++)
297 ALfloat *inout{al::assume_aligned<16>(OutBuffer[c].data())};
298 ALfloat *delaybuf{al::assume_aligned<16>(Comp->mDelay[c].data())};
300 auto inout_end = inout + SamplesToDo;
301 if LIKELY(SamplesToDo >= lookAhead)
303 auto delay_end = std::rotate(inout, inout_end - lookAhead, inout_end);
304 std::swap_ranges(inout, delay_end, delaybuf);
306 else
308 auto delay_start = std::swap_ranges(inout, inout_end, delaybuf);
309 std::rotate(delaybuf, delay_start, delaybuf + lookAhead);
314 } // namespace
316 /* The compressor is initialized with the following settings:
318 * NumChans - Number of channels to process.
319 * SampleRate - Sample rate to process.
320 * AutoKnee - Whether to automate the knee width parameter.
321 * AutoAttack - Whether to automate the attack time parameter.
322 * AutoRelease - Whether to automate the release time parameter.
323 * AutoPostGain - Whether to automate the make-up (post) gain parameter.
324 * AutoDeclip - Whether to automate clipping reduction. Ignored when
325 * not automating make-up gain.
326 * LookAheadTime - Look-ahead time (in seconds).
327 * HoldTime - Peak hold-time (in seconds).
328 * PreGainDb - Gain applied before detection (in dB).
329 * PostGainDb - Make-up gain applied after compression (in dB).
330 * ThresholdDb - Triggering threshold (in dB).
331 * Ratio - Compression ratio (x:1). Set to INFINITY for true
332 * limiting. Ignored when automating knee width.
333 * KneeDb - Knee width (in dB). Ignored when automating knee
334 * width.
335 * AttackTimeMin - Attack time (in seconds). Acts as a maximum when
336 * automating attack time.
337 * ReleaseTimeMin - Release time (in seconds). Acts as a maximum when
338 * automating release time.
340 std::unique_ptr<Compressor> CompressorInit(const ALuint NumChans, const ALfloat SampleRate,
341 const ALboolean AutoKnee, const ALboolean AutoAttack, const ALboolean AutoRelease,
342 const ALboolean AutoPostGain, const ALboolean AutoDeclip, const ALfloat LookAheadTime,
343 const ALfloat HoldTime, const ALfloat PreGainDb, const ALfloat PostGainDb,
344 const ALfloat ThresholdDb, const ALfloat Ratio, const ALfloat KneeDb, const ALfloat AttackTime,
345 const ALfloat ReleaseTime)
347 const auto lookAhead = static_cast<ALuint>(
348 clampf(std::round(LookAheadTime*SampleRate), 0.0f, BUFFERSIZE-1));
349 const auto hold = static_cast<ALuint>(
350 clampf(std::round(HoldTime*SampleRate), 0.0f, BUFFERSIZE-1));
352 size_t size{sizeof(Compressor)};
353 if(lookAhead > 0)
355 size += sizeof(*Compressor::mDelay) * NumChans;
356 /* The sliding hold implementation doesn't handle a length of 1. A 1-
357 * sample hold is useless anyway, it would only ever give back what was
358 * just given to it.
360 if(hold > 1)
361 size += sizeof(*Compressor::mHold);
364 auto Comp = std::unique_ptr<Compressor>{new (al_calloc(16, size)) Compressor{}};
365 Comp->mNumChans = NumChans;
366 Comp->mAuto.Knee = AutoKnee != AL_FALSE;
367 Comp->mAuto.Attack = AutoAttack != AL_FALSE;
368 Comp->mAuto.Release = AutoRelease != AL_FALSE;
369 Comp->mAuto.PostGain = AutoPostGain != AL_FALSE;
370 Comp->mAuto.Declip = AutoPostGain && AutoDeclip;
371 Comp->mLookAhead = lookAhead;
372 Comp->mPreGain = std::pow(10.0f, PreGainDb / 20.0f);
373 Comp->mPostGain = PostGainDb * std::log(10.0f) / 20.0f;
374 Comp->mThreshold = ThresholdDb * std::log(10.0f) / 20.0f;
375 Comp->mSlope = 1.0f / maxf(1.0f, Ratio) - 1.0f;
376 Comp->mKnee = maxf(0.0f, KneeDb * std::log(10.0f) / 20.0f);
377 Comp->mAttack = maxf(1.0f, AttackTime * SampleRate);
378 Comp->mRelease = maxf(1.0f, ReleaseTime * SampleRate);
380 /* Knee width automation actually treats the compressor as a limiter. By
381 * varying the knee width, it can effectively be seen as applying
382 * compression over a wide range of ratios.
384 if(AutoKnee)
385 Comp->mSlope = -1.0f;
387 if(lookAhead > 0)
389 if(hold > 1)
391 Comp->mHold = ::new (static_cast<void*>(Comp.get() + 1)) SlidingHold{};
392 Comp->mHold->mValues[0] = -std::numeric_limits<float>::infinity();
393 Comp->mHold->mExpiries[0] = hold;
394 Comp->mHold->mLength = hold;
395 Comp->mDelay = ::new (static_cast<void*>(Comp->mHold + 1)) FloatBufferLine[NumChans];
397 else
399 Comp->mDelay = ::new (static_cast<void*>(Comp.get() + 1)) FloatBufferLine[NumChans];
403 Comp->mCrestCoeff = std::exp(-1.0f / (0.200f * SampleRate)); // 200ms
404 Comp->mGainEstimate = Comp->mThreshold * -0.5f * Comp->mSlope;
405 Comp->mAdaptCoeff = std::exp(-1.0f / (2.0f * SampleRate)); // 2s
407 return Comp;
410 Compressor::~Compressor()
412 if(mHold)
413 al::destroy_at(mHold);
414 mHold = nullptr;
415 if(mDelay)
416 al::destroy_n(mDelay, mNumChans);
417 mDelay = nullptr;
421 void Compressor::process(const ALuint SamplesToDo, FloatBufferLine *OutBuffer)
423 const ALuint numChans{mNumChans};
425 ASSUME(SamplesToDo > 0);
426 ASSUME(numChans > 0);
428 const ALfloat preGain{mPreGain};
429 if(preGain != 1.0f)
431 auto apply_gain = [SamplesToDo,preGain](FloatBufferLine &input) noexcept -> void
433 ALfloat *buffer{al::assume_aligned<16>(input.data())};
434 std::transform(buffer, buffer+SamplesToDo, buffer,
435 std::bind(std::multiplies<float>{}, _1, preGain));
437 std::for_each(OutBuffer, OutBuffer+numChans, apply_gain);
440 LinkChannels(this, SamplesToDo, OutBuffer);
442 if(mAuto.Attack || mAuto.Release)
443 CrestDetector(this, SamplesToDo);
445 if(mHold)
446 PeakHoldDetector(this, SamplesToDo);
447 else
448 PeakDetector(this, SamplesToDo);
450 GainCompressor(this, SamplesToDo);
452 if(mDelay)
453 SignalDelay(this, SamplesToDo, OutBuffer);
455 const ALfloat (&sideChain)[BUFFERSIZE*2] = mSideChain;
456 auto apply_comp = [SamplesToDo,&sideChain](FloatBufferLine &input) noexcept -> void
458 ALfloat *buffer{al::assume_aligned<16>(input.data())};
459 const ALfloat *gains{al::assume_aligned<16>(&sideChain[0])};
460 std::transform(gains, gains+SamplesToDo, buffer, buffer,
461 std::bind(std::multiplies<float>{}, _1, _2));
463 std::for_each(OutBuffer, OutBuffer+numChans, apply_comp);
465 auto side_begin = std::begin(mSideChain) + SamplesToDo;
466 std::copy(side_begin, side_begin+mLookAhead, std::begin(mSideChain));