Don't over-allocate the active effect slot array
[openal-soft.git] / core / mastering.cpp
blob468a286def9e9d88190ef7c11b31a27ee57a8635
2 #include "config.h"
4 #include "mastering.h"
6 #include <algorithm>
7 #include <cmath>
8 #include <cstddef>
9 #include <functional>
10 #include <iterator>
11 #include <limits>
12 #include <new>
14 #include "almalloc.h"
15 #include "alnumeric.h"
16 #include "alspan.h"
17 #include "opthelpers.h"
20 /* These structures assume BufferLineSize is a power of 2. */
21 static_assert((BufferLineSize & (BufferLineSize-1)) == 0, "BufferLineSize is not a power of 2");
23 struct SlidingHold {
24 alignas(16) FloatBufferLine mValues;
25 std::array<uint,BufferLineSize> mExpiries;
26 uint mLowerIndex;
27 uint mUpperIndex;
28 uint mLength;
32 namespace {
34 using namespace std::placeholders;
36 /* This sliding hold follows the input level with an instant attack and a
37 * fixed duration hold before an instant release to the next highest level.
38 * It is a sliding window maximum (descending maxima) implementation based on
39 * Richard Harter's ascending minima algorithm available at:
41 * http://www.richardhartersworld.com/cri/2001/slidingmin.html
43 float UpdateSlidingHold(SlidingHold *Hold, const uint i, const float in)
45 static constexpr uint mask{BufferLineSize - 1};
46 const uint length{Hold->mLength};
47 const al::span values{Hold->mValues};
48 const al::span expiries{Hold->mExpiries};
49 uint lowerIndex{Hold->mLowerIndex};
50 uint upperIndex{Hold->mUpperIndex};
52 if(i >= expiries[upperIndex])
53 upperIndex = (upperIndex + 1) & mask;
55 if(in >= values[upperIndex])
57 values[upperIndex] = in;
58 expiries[upperIndex] = i + length;
59 lowerIndex = upperIndex;
61 else
63 auto findLowerIndex = [&lowerIndex,in,values]() noexcept -> bool
65 do {
66 if(!(in >= values[lowerIndex]))
67 return true;
68 } while(lowerIndex--);
69 return false;
71 while(!findLowerIndex())
72 lowerIndex = mask;
74 lowerIndex = (lowerIndex + 1) & mask;
75 values[lowerIndex] = in;
76 expiries[lowerIndex] = i + length;
79 Hold->mLowerIndex = lowerIndex;
80 Hold->mUpperIndex = upperIndex;
82 return values[upperIndex];
85 void ShiftSlidingHold(SlidingHold *Hold, const uint n)
87 auto exp_upper = Hold->mExpiries.begin() + Hold->mUpperIndex;
88 if(Hold->mLowerIndex < Hold->mUpperIndex)
90 std::transform(exp_upper, Hold->mExpiries.end(), exp_upper,
91 [n](const uint e) noexcept { return e - n; });
92 exp_upper = Hold->mExpiries.begin();
94 const auto exp_lower = Hold->mExpiries.begin() + Hold->mLowerIndex;
95 std::transform(exp_upper, exp_lower+1, exp_upper,
96 [n](const uint e) noexcept { return e - n; });
99 } // namespace
101 /* Multichannel compression is linked via the absolute maximum of all
102 * channels.
104 void Compressor::linkChannels(const uint SamplesToDo, const FloatBufferLine *OutBuffer)
106 ASSUME(SamplesToDo > 0);
108 const auto side_begin = mSideChain.begin() + mLookAhead;
109 std::fill(side_begin, side_begin+SamplesToDo, 0.0f);
111 auto fill_max = [SamplesToDo,side_begin](const FloatBufferLine &input) -> void
113 const float *RESTRICT buffer{al::assume_aligned<16>(input.data())};
114 auto max_abs = [](const float s0, const float s1) noexcept -> float
115 { return std::max(s0, std::fabs(s1)); };
116 std::transform(side_begin, side_begin+SamplesToDo, buffer, side_begin, max_abs);
118 std::for_each(OutBuffer, OutBuffer+mNumChans, fill_max);
121 /* This calculates the squared crest factor of the control signal for the
122 * basic automation of the attack/release times. As suggested by the paper,
123 * it uses an instantaneous squared peak detector and a squared RMS detector
124 * both with 200ms release times.
126 void Compressor::crestDetector(const uint SamplesToDo)
128 const float a_crest{mCrestCoeff};
129 float y2_peak{mLastPeakSq};
130 float y2_rms{mLastRmsSq};
132 ASSUME(SamplesToDo > 0);
134 auto calc_crest = [&y2_rms,&y2_peak,a_crest](const float x_abs) noexcept -> float
136 const float x2{std::clamp(x_abs*x_abs, 0.000001f, 1000000.0f)};
138 y2_peak = std::max(x2, lerpf(x2, y2_peak, a_crest));
139 y2_rms = lerpf(x2, y2_rms, a_crest);
140 return y2_peak / y2_rms;
142 const auto side_begin = mSideChain.begin() + mLookAhead;
143 std::transform(side_begin, side_begin+SamplesToDo, mCrestFactor.begin(), calc_crest);
145 mLastPeakSq = y2_peak;
146 mLastRmsSq = y2_rms;
149 /* The side-chain starts with a simple peak detector (based on the absolute
150 * value of the incoming signal) and performs most of its operations in the
151 * log domain.
153 void Compressor::peakDetector(const uint SamplesToDo)
155 ASSUME(SamplesToDo > 0);
157 /* Clamp the minimum amplitude to near-zero and convert to logarithmic. */
158 const auto side_begin = mSideChain.begin() + mLookAhead;
159 std::transform(side_begin, side_begin+SamplesToDo, side_begin,
160 [](float s) { return std::log(std::max(0.000001f, s)); });
163 /* An optional hold can be used to extend the peak detector so it can more
164 * solidly detect fast transients. This is best used when operating as a
165 * limiter.
167 void Compressor::peakHoldDetector(const uint SamplesToDo)
169 ASSUME(SamplesToDo > 0);
171 SlidingHold *hold{mHold.get()};
172 uint i{0};
173 auto detect_peak = [&i,hold](const float x_abs) -> float
175 const float x_G{std::log(std::max(0.000001f, x_abs))};
176 return UpdateSlidingHold(hold, i++, x_G);
178 auto side_begin = mSideChain.begin() + mLookAhead;
179 std::transform(side_begin, side_begin+SamplesToDo, side_begin, detect_peak);
181 ShiftSlidingHold(hold, SamplesToDo);
184 /* This is the heart of the feed-forward compressor. It operates in the log
185 * domain (to better match human hearing) and can apply some basic automation
186 * to knee width, attack/release times, make-up/post gain, and clipping
187 * reduction.
189 void Compressor::gainCompressor(const uint SamplesToDo)
191 const bool autoKnee{mAuto.Knee};
192 const bool autoAttack{mAuto.Attack};
193 const bool autoRelease{mAuto.Release};
194 const bool autoPostGain{mAuto.PostGain};
195 const bool autoDeclip{mAuto.Declip};
196 const float threshold{mThreshold};
197 const float slope{mSlope};
198 const float attack{mAttack};
199 const float release{mRelease};
200 const float c_est{mGainEstimate};
201 const float a_adp{mAdaptCoeff};
202 auto lookAhead = mSideChain.cbegin() + mLookAhead;
203 auto crestFactor = mCrestFactor.cbegin();
204 float postGain{mPostGain};
205 float knee{mKnee};
206 float t_att{attack};
207 float t_rel{release - attack};
208 float a_att{std::exp(-1.0f / t_att)};
209 float a_rel{std::exp(-1.0f / t_rel)};
210 float y_1{mLastRelease};
211 float y_L{mLastAttack};
212 float c_dev{mLastGainDev};
214 ASSUME(SamplesToDo > 0);
216 auto process = [&](const float input) -> float
218 if(autoKnee)
219 knee = std::max(0.0f, 2.5f * (c_dev + c_est));
220 const float knee_h{0.5f * knee};
222 /* This is the gain computer. It applies a static compression curve
223 * to the control signal.
225 const float x_over{*(lookAhead++) - threshold};
226 const float y_G{
227 (x_over <= -knee_h) ? 0.0f :
228 (std::fabs(x_over) < knee_h) ? (x_over+knee_h) * (x_over+knee_h) / (2.0f * knee) :
229 x_over};
231 const float y2_crest{*(crestFactor++)};
232 if(autoAttack)
234 t_att = 2.0f*attack/y2_crest;
235 a_att = std::exp(-1.0f / t_att);
237 if(autoRelease)
239 t_rel = 2.0f*release/y2_crest - t_att;
240 a_rel = std::exp(-1.0f / t_rel);
243 /* Gain smoothing (ballistics) is done via a smooth decoupled peak
244 * detector. The attack time is subtracted from the release time
245 * above to compensate for the chained operating mode.
247 const float x_L{-slope * y_G};
248 y_1 = std::max(x_L, lerpf(x_L, y_1, a_rel));
249 y_L = lerpf(y_1, y_L, a_att);
251 /* Knee width and make-up gain automation make use of a smoothed
252 * measurement of deviation between the control signal and estimate.
253 * The estimate is also used to bias the measurement to hot-start its
254 * average.
256 c_dev = lerpf(-(y_L+c_est), c_dev, a_adp);
258 if(autoPostGain)
260 /* Clipping reduction is only viable when make-up gain is being
261 * automated. It modifies the deviation to further attenuate the
262 * control signal when clipping is detected. The adaptation time
263 * is sufficiently long enough to suppress further clipping at the
264 * same output level.
266 if(autoDeclip)
267 c_dev = std::max(c_dev, input - y_L - threshold - c_est);
269 postGain = -(c_dev + c_est);
272 return std::exp(postGain - y_L);
274 auto sideChain = al::span{mSideChain}.first(SamplesToDo);
275 std::transform(sideChain.begin(), sideChain.end(), sideChain.begin(), process);
277 mLastRelease = y_1;
278 mLastAttack = y_L;
279 mLastGainDev = c_dev;
282 /* Combined with the hold time, a look-ahead delay can improve handling of
283 * fast transients by allowing the envelope time to converge prior to
284 * reaching the offending impulse. This is best used when operating as a
285 * limiter.
287 void Compressor::signalDelay(const uint SamplesToDo, FloatBufferLine *OutBuffer)
289 const size_t numChans{mNumChans};
290 const uint lookAhead{mLookAhead};
292 ASSUME(SamplesToDo > 0);
293 ASSUME(numChans > 0);
294 ASSUME(lookAhead > 0);
296 for(size_t c{0};c < numChans;c++)
298 float *inout{al::assume_aligned<16>(OutBuffer[c].data())};
299 float *delaybuf{al::assume_aligned<16>(mDelay[c].data())};
301 auto inout_end = inout + SamplesToDo;
302 if(SamplesToDo >= lookAhead) LIKELY
304 auto delay_end = std::rotate(inout, inout_end - lookAhead, inout_end);
305 std::swap_ranges(inout, delay_end, delaybuf);
307 else
309 auto delay_start = std::swap_ranges(inout, inout_end, delaybuf);
310 std::rotate(delaybuf, delay_start, delaybuf + lookAhead);
316 std::unique_ptr<Compressor> Compressor::Create(const size_t NumChans, const float SampleRate,
317 const bool AutoKnee, const bool AutoAttack, const bool AutoRelease, const bool AutoPostGain,
318 const bool AutoDeclip, const float LookAheadTime, const float HoldTime, const float PreGainDb,
319 const float PostGainDb, const float ThresholdDb, const float Ratio, const float KneeDb,
320 const float AttackTime, const float ReleaseTime)
322 const auto lookAhead = static_cast<uint>(std::clamp(std::round(LookAheadTime*SampleRate), 0.0f,
323 BufferLineSize-1.0f));
324 const auto hold = static_cast<uint>(std::clamp(std::round(HoldTime*SampleRate), 0.0f,
325 BufferLineSize-1.0f));
327 auto Comp = CompressorPtr{new Compressor{}};
328 Comp->mNumChans = NumChans;
329 Comp->mAuto.Knee = AutoKnee;
330 Comp->mAuto.Attack = AutoAttack;
331 Comp->mAuto.Release = AutoRelease;
332 Comp->mAuto.PostGain = AutoPostGain;
333 Comp->mAuto.Declip = AutoPostGain && AutoDeclip;
334 Comp->mLookAhead = lookAhead;
335 Comp->mPreGain = std::pow(10.0f, PreGainDb / 20.0f);
336 Comp->mPostGain = std::log(10.0f)/20.0f * PostGainDb;
337 Comp->mThreshold = std::log(10.0f)/20.0f * ThresholdDb;
338 Comp->mSlope = 1.0f / std::max(1.0f, Ratio) - 1.0f;
339 Comp->mKnee = std::max(0.0f, std::log(10.0f)/20.0f * KneeDb);
340 Comp->mAttack = std::max(1.0f, AttackTime * SampleRate);
341 Comp->mRelease = std::max(1.0f, ReleaseTime * SampleRate);
343 /* Knee width automation actually treats the compressor as a limiter. By
344 * varying the knee width, it can effectively be seen as applying
345 * compression over a wide range of ratios.
347 if(AutoKnee)
348 Comp->mSlope = -1.0f;
350 if(lookAhead > 0)
352 /* The sliding hold implementation doesn't handle a length of 1. A 1-
353 * sample hold is useless anyway, it would only ever give back what was
354 * just given to it.
356 if(hold > 1)
358 Comp->mHold = std::make_unique<SlidingHold>();
359 Comp->mHold->mValues[0] = -std::numeric_limits<float>::infinity();
360 Comp->mHold->mExpiries[0] = hold;
361 Comp->mHold->mLength = hold;
363 Comp->mDelay.resize(NumChans, FloatBufferLine{});
366 Comp->mCrestCoeff = std::exp(-1.0f / (0.200f * SampleRate)); // 200ms
367 Comp->mGainEstimate = Comp->mThreshold * -0.5f * Comp->mSlope;
368 Comp->mAdaptCoeff = std::exp(-1.0f / (2.0f * SampleRate)); // 2s
370 return Comp;
373 Compressor::~Compressor() = default;
376 void Compressor::process(const uint SamplesToDo, FloatBufferLine *OutBuffer)
378 const size_t numChans{mNumChans};
380 ASSUME(SamplesToDo > 0);
381 ASSUME(numChans > 0);
383 const float preGain{mPreGain};
384 if(preGain != 1.0f)
386 auto apply_gain = [SamplesToDo,preGain](FloatBufferLine &input) noexcept -> void
388 float *buffer{al::assume_aligned<16>(input.data())};
389 std::transform(buffer, buffer+SamplesToDo, buffer,
390 [preGain](const float s) noexcept { return s * preGain; });
392 std::for_each(OutBuffer, OutBuffer+numChans, apply_gain);
395 linkChannels(SamplesToDo, OutBuffer);
397 if(mAuto.Attack || mAuto.Release)
398 crestDetector(SamplesToDo);
400 if(mHold)
401 peakHoldDetector(SamplesToDo);
402 else
403 peakDetector(SamplesToDo);
405 gainCompressor(SamplesToDo);
407 if(!mDelay.empty())
408 signalDelay(SamplesToDo, OutBuffer);
410 const auto sideChain = al::span{mSideChain};
411 auto apply_comp = [SamplesToDo,sideChain](FloatBufferLine &input) noexcept -> void
413 float *buffer{al::assume_aligned<16>(input.data())};
414 const float *gains{al::assume_aligned<16>(sideChain.data())};
415 std::transform(gains, gains+SamplesToDo, buffer, buffer,
416 [](const float g, const float s) noexcept { return g * s; });
418 std::for_each(OutBuffer, OutBuffer+numChans, apply_comp);
420 auto side_begin = mSideChain.begin() + SamplesToDo;
421 std::copy(side_begin, side_begin+mLookAhead, mSideChain.begin());