Use separate 32-bit second and nanosecond atomics for the clock
[openal-soft.git] / core / mastering.cpp
blobe31c3e92075fc9ffde490d4ab22e35ca511b1e51
2 #include "config.h"
4 #include "mastering.h"
6 #include <algorithm>
7 #include <cmath>
8 #include <cstddef>
9 #include <functional>
10 #include <iterator>
11 #include <limits>
12 #include <new>
14 #include "alnumeric.h"
15 #include "alspan.h"
16 #include "opthelpers.h"
19 /* These structures assume BufferLineSize is a power of 2. */
20 static_assert((BufferLineSize & (BufferLineSize-1)) == 0, "BufferLineSize is not a power of 2");
22 struct SIMDALIGN SlidingHold {
23 alignas(16) FloatBufferLine mValues;
24 std::array<uint,BufferLineSize> mExpiries;
25 uint mLowerIndex;
26 uint mUpperIndex;
27 uint mLength;
31 namespace {
33 template<std::size_t A, typename T, std::size_t N>
34 constexpr auto assume_aligned_span(const al::span<T,N> s) noexcept -> al::span<T,N>
35 { return al::span<T,N>{al::assume_aligned<A>(s.data()), s.size()}; }
37 /* This sliding hold follows the input level with an instant attack and a
38 * fixed duration hold before an instant release to the next highest level.
39 * It is a sliding window maximum (descending maxima) implementation based on
40 * Richard Harter's ascending minima algorithm available at:
42 * http://www.richardhartersworld.com/cri/2001/slidingmin.html
44 float UpdateSlidingHold(SlidingHold *Hold, const uint i, const float in)
46 static constexpr uint mask{BufferLineSize - 1};
47 const uint length{Hold->mLength};
48 const al::span values{Hold->mValues};
49 const al::span expiries{Hold->mExpiries};
50 uint lowerIndex{Hold->mLowerIndex};
51 uint upperIndex{Hold->mUpperIndex};
53 if(i >= expiries[upperIndex])
54 upperIndex = (upperIndex + 1) & mask;
56 if(in >= values[upperIndex])
58 values[upperIndex] = in;
59 expiries[upperIndex] = i + length;
60 lowerIndex = upperIndex;
62 else
64 auto findLowerIndex = [&lowerIndex,in,values]() noexcept -> bool
66 do {
67 if(!(in >= values[lowerIndex]))
68 return true;
69 } while(lowerIndex--);
70 return false;
72 while(!findLowerIndex())
73 lowerIndex = mask;
75 lowerIndex = (lowerIndex + 1) & mask;
76 values[lowerIndex] = in;
77 expiries[lowerIndex] = i + length;
80 Hold->mLowerIndex = lowerIndex;
81 Hold->mUpperIndex = upperIndex;
83 return values[upperIndex];
86 void ShiftSlidingHold(SlidingHold *Hold, const uint n)
88 auto exp_upper = Hold->mExpiries.begin() + Hold->mUpperIndex;
89 if(Hold->mLowerIndex < Hold->mUpperIndex)
91 std::transform(exp_upper, Hold->mExpiries.end(), exp_upper,
92 [n](const uint e) noexcept { return e - n; });
93 exp_upper = Hold->mExpiries.begin();
95 const auto exp_lower = Hold->mExpiries.begin() + Hold->mLowerIndex;
96 std::transform(exp_upper, exp_lower+1, exp_upper,
97 [n](const uint e) noexcept { return e - n; });
100 } // namespace
102 /* Multichannel compression is linked via the absolute maximum of all
103 * channels.
105 void Compressor::linkChannels(const uint SamplesToDo,
106 const al::span<const FloatBufferLine> OutBuffer)
108 ASSUME(SamplesToDo > 0);
109 ASSUME(SamplesToDo <= BufferLineSize);
111 const auto sideChain = al::span{mSideChain}.subspan(mLookAhead, SamplesToDo);
112 std::fill_n(sideChain.begin(), sideChain.size(), 0.0f);
114 auto fill_max = [sideChain](const FloatBufferLine &input) -> void
116 const auto buffer = assume_aligned_span<16>(al::span{input});
117 auto max_abs = [](const float s0, const float s1) noexcept -> float
118 { return std::max(s0, std::fabs(s1)); };
119 std::transform(sideChain.begin(), sideChain.end(), buffer.begin(), sideChain.begin(),
120 max_abs);
122 for(const FloatBufferLine &input : OutBuffer)
123 fill_max(input);
126 /* This calculates the squared crest factor of the control signal for the
127 * basic automation of the attack/release times. As suggested by the paper,
128 * it uses an instantaneous squared peak detector and a squared RMS detector
129 * both with 200ms release times.
131 void Compressor::crestDetector(const uint SamplesToDo)
133 const float a_crest{mCrestCoeff};
134 float y2_peak{mLastPeakSq};
135 float y2_rms{mLastRmsSq};
137 ASSUME(SamplesToDo > 0);
138 ASSUME(SamplesToDo <= BufferLineSize);
140 auto calc_crest = [&y2_rms,&y2_peak,a_crest](const float x_abs) noexcept -> float
142 const float x2{std::clamp(x_abs*x_abs, 0.000001f, 1000000.0f)};
144 y2_peak = std::max(x2, lerpf(x2, y2_peak, a_crest));
145 y2_rms = lerpf(x2, y2_rms, a_crest);
146 return y2_peak / y2_rms;
148 const auto sideChain = al::span{mSideChain}.subspan(mLookAhead, SamplesToDo);
149 std::transform(sideChain.cbegin(), sideChain.cend(), mCrestFactor.begin(), calc_crest);
151 mLastPeakSq = y2_peak;
152 mLastRmsSq = y2_rms;
155 /* The side-chain starts with a simple peak detector (based on the absolute
156 * value of the incoming signal) and performs most of its operations in the
157 * log domain.
159 void Compressor::peakDetector(const uint SamplesToDo)
161 ASSUME(SamplesToDo > 0);
162 ASSUME(SamplesToDo <= BufferLineSize);
164 /* Clamp the minimum amplitude to near-zero and convert to logarithmic. */
165 const auto sideChain = al::span{mSideChain}.subspan(mLookAhead, SamplesToDo);
166 std::transform(sideChain.cbegin(), sideChain.cend(), sideChain.begin(),
167 [](float s) { return std::log(std::max(0.000001f, s)); });
170 /* An optional hold can be used to extend the peak detector so it can more
171 * solidly detect fast transients. This is best used when operating as a
172 * limiter.
174 void Compressor::peakHoldDetector(const uint SamplesToDo)
176 ASSUME(SamplesToDo > 0);
177 ASSUME(SamplesToDo <= BufferLineSize);
179 SlidingHold *hold{mHold.get()};
180 uint i{0};
181 auto detect_peak = [&i,hold](const float x_abs) -> float
183 const float x_G{std::log(std::max(0.000001f, x_abs))};
184 return UpdateSlidingHold(hold, i++, x_G);
186 auto sideChain = al::span{mSideChain}.subspan(mLookAhead, SamplesToDo);
187 std::transform(sideChain.cbegin(), sideChain.cend(), sideChain.begin(), detect_peak);
189 ShiftSlidingHold(hold, SamplesToDo);
192 /* This is the heart of the feed-forward compressor. It operates in the log
193 * domain (to better match human hearing) and can apply some basic automation
194 * to knee width, attack/release times, make-up/post gain, and clipping
195 * reduction.
197 void Compressor::gainCompressor(const uint SamplesToDo)
199 const bool autoKnee{mAuto.Knee};
200 const bool autoAttack{mAuto.Attack};
201 const bool autoRelease{mAuto.Release};
202 const bool autoPostGain{mAuto.PostGain};
203 const bool autoDeclip{mAuto.Declip};
204 const float threshold{mThreshold};
205 const float slope{mSlope};
206 const float attack{mAttack};
207 const float release{mRelease};
208 const float c_est{mGainEstimate};
209 const float a_adp{mAdaptCoeff};
210 auto lookAhead = mSideChain.cbegin() + mLookAhead;
211 auto crestFactor = mCrestFactor.cbegin();
212 float postGain{mPostGain};
213 float knee{mKnee};
214 float t_att{attack};
215 float t_rel{release - attack};
216 float a_att{std::exp(-1.0f / t_att)};
217 float a_rel{std::exp(-1.0f / t_rel)};
218 float y_1{mLastRelease};
219 float y_L{mLastAttack};
220 float c_dev{mLastGainDev};
222 ASSUME(SamplesToDo > 0);
224 auto process = [&](const float input) -> float
226 if(autoKnee)
227 knee = std::max(0.0f, 2.5f * (c_dev + c_est));
228 const float knee_h{0.5f * knee};
230 /* This is the gain computer. It applies a static compression curve
231 * to the control signal.
233 const float x_over{*(lookAhead++) - threshold};
234 const float y_G{
235 (x_over <= -knee_h) ? 0.0f :
236 (std::fabs(x_over) < knee_h) ? (x_over+knee_h) * (x_over+knee_h) / (2.0f * knee) :
237 x_over};
239 const float y2_crest{*(crestFactor++)};
240 if(autoAttack)
242 t_att = 2.0f*attack/y2_crest;
243 a_att = std::exp(-1.0f / t_att);
245 if(autoRelease)
247 t_rel = 2.0f*release/y2_crest - t_att;
248 a_rel = std::exp(-1.0f / t_rel);
251 /* Gain smoothing (ballistics) is done via a smooth decoupled peak
252 * detector. The attack time is subtracted from the release time
253 * above to compensate for the chained operating mode.
255 const float x_L{-slope * y_G};
256 y_1 = std::max(x_L, lerpf(x_L, y_1, a_rel));
257 y_L = lerpf(y_1, y_L, a_att);
259 /* Knee width and make-up gain automation make use of a smoothed
260 * measurement of deviation between the control signal and estimate.
261 * The estimate is also used to bias the measurement to hot-start its
262 * average.
264 c_dev = lerpf(-(y_L+c_est), c_dev, a_adp);
266 if(autoPostGain)
268 /* Clipping reduction is only viable when make-up gain is being
269 * automated. It modifies the deviation to further attenuate the
270 * control signal when clipping is detected. The adaptation time
271 * is sufficiently long enough to suppress further clipping at the
272 * same output level.
274 if(autoDeclip)
275 c_dev = std::max(c_dev, input - y_L - threshold - c_est);
277 postGain = -(c_dev + c_est);
280 return std::exp(postGain - y_L);
282 auto sideChain = al::span{mSideChain}.first(SamplesToDo);
283 std::transform(sideChain.begin(), sideChain.end(), sideChain.begin(), process);
285 mLastRelease = y_1;
286 mLastAttack = y_L;
287 mLastGainDev = c_dev;
290 /* Combined with the hold time, a look-ahead delay can improve handling of
291 * fast transients by allowing the envelope time to converge prior to
292 * reaching the offending impulse. This is best used when operating as a
293 * limiter.
295 void Compressor::signalDelay(const uint SamplesToDo, const al::span<FloatBufferLine> OutBuffer)
297 const auto lookAhead = mLookAhead;
299 ASSUME(SamplesToDo > 0);
300 ASSUME(SamplesToDo <= BufferLineSize);
301 ASSUME(lookAhead > 0);
302 ASSUME(lookAhead < BufferLineSize);
304 auto delays = mDelay.begin();
305 for(auto &buffer : OutBuffer)
307 const auto inout = al::span{buffer}.first(SamplesToDo);
308 const auto delaybuf = al::span{*(delays++)}.first(lookAhead);
310 if(SamplesToDo >= delaybuf.size()) LIKELY
312 const auto inout_start = inout.end() - ptrdiff_t(delaybuf.size());
313 const auto delay_end = std::rotate(inout.begin(), inout_start, inout.end());
314 std::swap_ranges(inout.begin(), delay_end, delaybuf.begin());
316 else
318 auto delay_start = std::swap_ranges(inout.begin(), inout.end(), delaybuf.begin());
319 std::rotate(delaybuf.begin(), delay_start, delaybuf.end());
325 std::unique_ptr<Compressor> Compressor::Create(const size_t NumChans, const float SampleRate,
326 const bool AutoKnee, const bool AutoAttack, const bool AutoRelease, const bool AutoPostGain,
327 const bool AutoDeclip, const float LookAheadTime, const float HoldTime, const float PreGainDb,
328 const float PostGainDb, const float ThresholdDb, const float Ratio, const float KneeDb,
329 const float AttackTime, const float ReleaseTime)
331 const auto lookAhead = static_cast<uint>(std::clamp(std::round(LookAheadTime*SampleRate), 0.0f,
332 BufferLineSize-1.0f));
333 const auto hold = static_cast<uint>(std::clamp(std::round(HoldTime*SampleRate), 0.0f,
334 BufferLineSize-1.0f));
336 auto Comp = CompressorPtr{new Compressor{}};
337 Comp->mAuto.Knee = AutoKnee;
338 Comp->mAuto.Attack = AutoAttack;
339 Comp->mAuto.Release = AutoRelease;
340 Comp->mAuto.PostGain = AutoPostGain;
341 Comp->mAuto.Declip = AutoPostGain && AutoDeclip;
342 Comp->mLookAhead = lookAhead;
343 Comp->mPreGain = std::pow(10.0f, PreGainDb / 20.0f);
344 Comp->mPostGain = std::log(10.0f)/20.0f * PostGainDb;
345 Comp->mThreshold = std::log(10.0f)/20.0f * ThresholdDb;
346 Comp->mSlope = 1.0f / std::max(1.0f, Ratio) - 1.0f;
347 Comp->mKnee = std::max(0.0f, std::log(10.0f)/20.0f * KneeDb);
348 Comp->mAttack = std::max(1.0f, AttackTime * SampleRate);
349 Comp->mRelease = std::max(1.0f, ReleaseTime * SampleRate);
351 /* Knee width automation actually treats the compressor as a limiter. By
352 * varying the knee width, it can effectively be seen as applying
353 * compression over a wide range of ratios.
355 if(AutoKnee)
356 Comp->mSlope = -1.0f;
358 if(lookAhead > 0)
360 /* The sliding hold implementation doesn't handle a length of 1. A 1-
361 * sample hold is useless anyway, it would only ever give back what was
362 * just given to it.
364 if(hold > 1)
366 Comp->mHold = std::make_unique<SlidingHold>();
367 Comp->mHold->mValues[0] = -std::numeric_limits<float>::infinity();
368 Comp->mHold->mExpiries[0] = hold;
369 Comp->mHold->mLength = hold;
371 Comp->mDelay.resize(NumChans, FloatBufferLine{});
374 Comp->mCrestCoeff = std::exp(-1.0f / (0.200f * SampleRate)); // 200ms
375 Comp->mGainEstimate = Comp->mThreshold * -0.5f * Comp->mSlope;
376 Comp->mAdaptCoeff = std::exp(-1.0f / (2.0f * SampleRate)); // 2s
378 return Comp;
381 Compressor::~Compressor() = default;
384 void Compressor::process(const uint SamplesToDo, const al::span<FloatBufferLine> InOut)
386 ASSUME(SamplesToDo > 0);
387 ASSUME(SamplesToDo <= BufferLineSize);
389 const float preGain{mPreGain};
390 if(preGain != 1.0f)
392 auto apply_gain = [SamplesToDo,preGain](FloatBufferLine &input) noexcept -> void
394 const auto buffer = assume_aligned_span<16>(al::span{input}.first(SamplesToDo));
395 std::transform(buffer.cbegin(), buffer.cend(), buffer.begin(),
396 [preGain](const float s) noexcept { return s * preGain; });
398 std::for_each(InOut.begin(), InOut.end(), apply_gain);
401 linkChannels(SamplesToDo, InOut);
403 if(mAuto.Attack || mAuto.Release)
404 crestDetector(SamplesToDo);
406 if(mHold)
407 peakHoldDetector(SamplesToDo);
408 else
409 peakDetector(SamplesToDo);
411 gainCompressor(SamplesToDo);
413 if(!mDelay.empty())
414 signalDelay(SamplesToDo, InOut);
416 const auto gains = assume_aligned_span<16>(al::span{mSideChain}.first(SamplesToDo));
417 auto apply_comp = [gains](const FloatBufferSpan inout) noexcept -> void
419 const auto buffer = assume_aligned_span<16>(inout);
420 std::transform(gains.cbegin(), gains.cend(), buffer.cbegin(), buffer.begin(),
421 std::multiplies{});
423 for(const FloatBufferSpan inout : InOut)
424 apply_comp(inout);
426 const auto delayedGains = al::span{mSideChain}.subspan(SamplesToDo, mLookAhead);
427 std::copy(delayedGains.begin(), delayedGains.end(), mSideChain.begin());