core/mastering.cpp

   1
   2 #include "config.h"
   3
   4 #include "mastering.h"
   5
   6 #include <algorithm>
   7 #include <cmath>
   8 #include <cstddef>
   9 #include <functional>
  10 #include <iterator>
  11 #include <limits>
  12 #include <new>
  13
  14 #include "alnumeric.h"
  15 #include "alspan.h"
  16 #include "opthelpers.h"
  17
  18
  19 /* These structures assume BufferLineSize is a power of 2. */
  20 static_assert((BufferLineSize & (BufferLineSize-1)) == 0, "BufferLineSize is not a power of 2");
  21
  22 struct SIMDALIGN SlidingHold {
  23     alignas(16) FloatBufferLine mValues;
  24     std::array<uint,BufferLineSize> mExpiries;
  25     uint mLowerIndex;
  26     uint mUpperIndex;
  27     uint mLength;
  28 };
  29
  30
  31 namespace {
  32
  33 template<std::size_t A, typename T, std::size_t N>
  34 constexpr auto assume_aligned_span(const al::span<T,N> s) noexcept -> al::span<T,N>
  35 { return al::span<T,N>{al::assume_aligned<A>(s.data()), s.size()}; }
  36
  37 /* This sliding hold follows the input level with an instant attack and a
  38  * fixed duration hold before an instant release to the next highest level.
  39  * It is a sliding window maximum (descending maxima) implementation based on
  40  * Richard Harter's ascending minima algorithm available at:
  41  *
  42  *   http://www.richardhartersworld.com/cri/2001/slidingmin.html
  43  */
  44 float UpdateSlidingHold(SlidingHold *Hold, const uint i, const float in)
  45 {
  46     static constexpr uint mask{BufferLineSize - 1};
  47     const uint length{Hold->mLength};
  48     const al::span values{Hold->mValues};
  49     const al::span expiries{Hold->mExpiries};
  50     uint lowerIndex{Hold->mLowerIndex};
  51     uint upperIndex{Hold->mUpperIndex};
  52
  53     if(i >= expiries[upperIndex])
  54         upperIndex = (upperIndex + 1) & mask;
  55
  56     if(in >= values[upperIndex])
  57     {
  58         values[upperIndex] = in;
  59         expiries[upperIndex] = i + length;
  60         lowerIndex = upperIndex;
  61     }
  62     else
  63     {
  64         auto findLowerIndex = [&lowerIndex,in,values]() noexcept -> bool
  65         {
  66             do {
  67                 if(!(in >= values[lowerIndex]))
  68                     return true;
  69             } while(lowerIndex--);
  70             return false;
  71         };
  72         while(!findLowerIndex())
  73             lowerIndex = mask;
  74
  75         lowerIndex = (lowerIndex + 1) & mask;
  76         values[lowerIndex] = in;
  77         expiries[lowerIndex] = i + length;
  78     }
  79
  80     Hold->mLowerIndex = lowerIndex;
  81     Hold->mUpperIndex = upperIndex;
  82
  83     return values[upperIndex];
  84 }
  85
  86 void ShiftSlidingHold(SlidingHold *Hold, const uint n)
  87 {
  88     auto exp_upper = Hold->mExpiries.begin() + Hold->mUpperIndex;
  89     if(Hold->mLowerIndex < Hold->mUpperIndex)
  90     {
  91         std::transform(exp_upper, Hold->mExpiries.end(), exp_upper,
  92             [n](const uint e) noexcept { return e - n; });
  93         exp_upper = Hold->mExpiries.begin();
  94     }
  95     const auto exp_lower = Hold->mExpiries.begin() + Hold->mLowerIndex;
  96     std::transform(exp_upper, exp_lower+1, exp_upper,
  97         [n](const uint e) noexcept { return e - n; });
  98 }
  99
 100 } // namespace
 101
 102 /* Multichannel compression is linked via the absolute maximum of all
 103  * channels.
 104  */
 105 void Compressor::linkChannels(const uint SamplesToDo,
 106     const al::span<const FloatBufferLine> OutBuffer)
 107 {
 108     ASSUME(SamplesToDo > 0);
 109     ASSUME(SamplesToDo <= BufferLineSize);
 110
 111     const auto sideChain = al::span{mSideChain}.subspan(mLookAhead, SamplesToDo);
 112     std::fill_n(sideChain.begin(), sideChain.size(), 0.0f);
 113
 114     auto fill_max = [sideChain](const FloatBufferLine &input) -> void
 115     {
 116         const auto buffer = assume_aligned_span<16>(al::span{input});
 117         auto max_abs = [](const float s0, const float s1) noexcept -> float
 118         { return std::max(s0, std::fabs(s1)); };
 119         std::transform(sideChain.begin(), sideChain.end(), buffer.begin(), sideChain.begin(),
 120             max_abs);
 121     };
 122     for(const FloatBufferLine &input : OutBuffer)
 123         fill_max(input);
 124 }
 125
 126 /* This calculates the squared crest factor of the control signal for the
 127  * basic automation of the attack/release times.  As suggested by the paper,
 128  * it uses an instantaneous squared peak detector and a squared RMS detector
 129  * both with 200ms release times.
 130  */
 131 void Compressor::crestDetector(const uint SamplesToDo)
 132 {
 133     const float a_crest{mCrestCoeff};
 134     float y2_peak{mLastPeakSq};
 135     float y2_rms{mLastRmsSq};
 136
 137     ASSUME(SamplesToDo > 0);
 138     ASSUME(SamplesToDo <= BufferLineSize);
 139
 140     auto calc_crest = [&y2_rms,&y2_peak,a_crest](const float x_abs) noexcept -> float
 141     {
 142         const float x2{std::clamp(x_abs*x_abs, 0.000001f, 1000000.0f)};
 143
 144         y2_peak = std::max(x2, lerpf(x2, y2_peak, a_crest));
 145         y2_rms = lerpf(x2, y2_rms, a_crest);
 146         return y2_peak / y2_rms;
 147     };
 148     const auto sideChain = al::span{mSideChain}.subspan(mLookAhead, SamplesToDo);
 149     std::transform(sideChain.cbegin(), sideChain.cend(), mCrestFactor.begin(), calc_crest);
 150
 151     mLastPeakSq = y2_peak;
 152     mLastRmsSq = y2_rms;
 153 }
 154
 155 /* The side-chain starts with a simple peak detector (based on the absolute
 156  * value of the incoming signal) and performs most of its operations in the
 157  * log domain.
 158  */
 159 void Compressor::peakDetector(const uint SamplesToDo)
 160 {
 161     ASSUME(SamplesToDo > 0);
 162     ASSUME(SamplesToDo <= BufferLineSize);
 163
 164     /* Clamp the minimum amplitude to near-zero and convert to logarithmic. */
 165     const auto sideChain = al::span{mSideChain}.subspan(mLookAhead, SamplesToDo);
 166     std::transform(sideChain.cbegin(), sideChain.cend(), sideChain.begin(),
 167         [](float s) { return std::log(std::max(0.000001f, s)); });
 168 }
 169
 170 /* An optional hold can be used to extend the peak detector so it can more
 171  * solidly detect fast transients.  This is best used when operating as a
 172  * limiter.
 173  */
 174 void Compressor::peakHoldDetector(const uint SamplesToDo)
 175 {
 176     ASSUME(SamplesToDo > 0);
 177     ASSUME(SamplesToDo <= BufferLineSize);
 178
 179     SlidingHold *hold{mHold.get()};
 180     uint i{0};
 181     auto detect_peak = [&i,hold](const float x_abs) -> float
 182     {
 183         const float x_G{std::log(std::max(0.000001f, x_abs))};
 184         return UpdateSlidingHold(hold, i++, x_G);
 185     };
 186     auto sideChain = al::span{mSideChain}.subspan(mLookAhead, SamplesToDo);
 187     std::transform(sideChain.cbegin(), sideChain.cend(), sideChain.begin(), detect_peak);
 188
 189     ShiftSlidingHold(hold, SamplesToDo);
 190 }
 191
 192 /* This is the heart of the feed-forward compressor.  It operates in the log
 193  * domain (to better match human hearing) and can apply some basic automation
 194  * to knee width, attack/release times, make-up/post gain, and clipping
 195  * reduction.
 196  */
 197 void Compressor::gainCompressor(const uint SamplesToDo)
 198 {
 199     const bool autoKnee{mAuto.Knee};
 200     const bool autoAttack{mAuto.Attack};
 201     const bool autoRelease{mAuto.Release};
 202     const bool autoPostGain{mAuto.PostGain};
 203     const bool autoDeclip{mAuto.Declip};
 204     const float threshold{mThreshold};
 205     const float slope{mSlope};
 206     const float attack{mAttack};
 207     const float release{mRelease};
 208     const float c_est{mGainEstimate};
 209     const float a_adp{mAdaptCoeff};
 210     auto lookAhead = mSideChain.cbegin() + mLookAhead;
 211     auto crestFactor = mCrestFactor.cbegin();
 212     float postGain{mPostGain};
 213     float knee{mKnee};
 214     float t_att{attack};
 215     float t_rel{release - attack};
 216     float a_att{std::exp(-1.0f / t_att)};
 217     float a_rel{std::exp(-1.0f / t_rel)};
 218     float y_1{mLastRelease};
 219     float y_L{mLastAttack};
 220     float c_dev{mLastGainDev};
 221
 222     ASSUME(SamplesToDo > 0);
 223
 224     auto process = [&](const float input) -> float
 225     {
 226         if(autoKnee)
 227             knee = std::max(0.0f, 2.5f * (c_dev + c_est));
 228         const float knee_h{0.5f * knee};
 229
 230         /* This is the gain computer.  It applies a static compression curve
 231          * to the control signal.
 232          */
 233         const float x_over{*(lookAhead++) - threshold};
 234         const float y_G{
 235             (x_over <= -knee_h) ? 0.0f :
 236             (std::fabs(x_over) < knee_h) ? (x_over+knee_h) * (x_over+knee_h) / (2.0f * knee) :
 237             x_over};
 238
 239         const float y2_crest{*(crestFactor++)};
 240         if(autoAttack)
 241         {
 242             t_att = 2.0f*attack/y2_crest;
 243             a_att = std::exp(-1.0f / t_att);
 244         }
 245         if(autoRelease)
 246         {
 247             t_rel = 2.0f*release/y2_crest - t_att;
 248             a_rel = std::exp(-1.0f / t_rel);
 249         }
 250
 251         /* Gain smoothing (ballistics) is done via a smooth decoupled peak
 252          * detector.  The attack time is subtracted from the release time
 253          * above to compensate for the chained operating mode.
 254          */
 255         const float x_L{-slope * y_G};
 256         y_1 = std::max(x_L, lerpf(x_L, y_1, a_rel));
 257         y_L = lerpf(y_1, y_L, a_att);
 258
 259         /* Knee width and make-up gain automation make use of a smoothed
 260          * measurement of deviation between the control signal and estimate.
 261          * The estimate is also used to bias the measurement to hot-start its
 262          * average.
 263          */
 264         c_dev = lerpf(-(y_L+c_est), c_dev, a_adp);
 265
 266         if(autoPostGain)
 267         {
 268             /* Clipping reduction is only viable when make-up gain is being
 269              * automated. It modifies the deviation to further attenuate the
 270              * control signal when clipping is detected. The adaptation time
 271              * is sufficiently long enough to suppress further clipping at the
 272              * same output level.
 273              */
 274             if(autoDeclip)
 275                 c_dev = std::max(c_dev, input - y_L - threshold - c_est);
 276
 277             postGain = -(c_dev + c_est);
 278         }
 279
 280         return std::exp(postGain - y_L);
 281     };
 282     auto sideChain = al::span{mSideChain}.first(SamplesToDo);
 283     std::transform(sideChain.begin(), sideChain.end(), sideChain.begin(), process);
 284
 285     mLastRelease = y_1;
 286     mLastAttack = y_L;
 287     mLastGainDev = c_dev;
 288 }
 289
 290 /* Combined with the hold time, a look-ahead delay can improve handling of
 291  * fast transients by allowing the envelope time to converge prior to
 292  * reaching the offending impulse.  This is best used when operating as a
 293  * limiter.
 294  */
 295 void Compressor::signalDelay(const uint SamplesToDo, const al::span<FloatBufferLine> OutBuffer)
 296 {
 297     const auto lookAhead = mLookAhead;
 298
 299     ASSUME(SamplesToDo > 0);
 300     ASSUME(SamplesToDo <= BufferLineSize);
 301     ASSUME(lookAhead > 0);
 302     ASSUME(lookAhead < BufferLineSize);
 303
 304     auto delays = mDelay.begin();
 305     for(auto &buffer : OutBuffer)
 306     {
 307         const auto inout = al::span{buffer}.first(SamplesToDo);
 308         const auto delaybuf = al::span{*(delays++)}.first(lookAhead);
 309
 310         if(SamplesToDo >= delaybuf.size()) LIKELY
 311         {
 312             const auto inout_start = inout.end() - ptrdiff_t(delaybuf.size());
 313             const auto delay_end = std::rotate(inout.begin(), inout_start, inout.end());
 314             std::swap_ranges(inout.begin(), delay_end, delaybuf.begin());
 315         }
 316         else
 317         {
 318             auto delay_start = std::swap_ranges(inout.begin(), inout.end(), delaybuf.begin());
 319             std::rotate(delaybuf.begin(), delay_start, delaybuf.end());
 320         }
 321     }
 322 }
 323
 324
 325 std::unique_ptr<Compressor> Compressor::Create(const size_t NumChans, const float SampleRate,
 326     const bool AutoKnee, const bool AutoAttack, const bool AutoRelease, const bool AutoPostGain,
 327     const bool AutoDeclip, const float LookAheadTime, const float HoldTime, const float PreGainDb,
 328     const float PostGainDb, const float ThresholdDb, const float Ratio, const float KneeDb,
 329     const float AttackTime, const float ReleaseTime)
 330 {
 331     const auto lookAhead = static_cast<uint>(std::clamp(std::round(LookAheadTime*SampleRate), 0.0f,
 332         BufferLineSize-1.0f));
 333     const auto hold = static_cast<uint>(std::clamp(std::round(HoldTime*SampleRate), 0.0f,
 334         BufferLineSize-1.0f));
 335
 336     auto Comp = CompressorPtr{new Compressor{}};
 337     Comp->mAuto.Knee = AutoKnee;
 338     Comp->mAuto.Attack = AutoAttack;
 339     Comp->mAuto.Release = AutoRelease;
 340     Comp->mAuto.PostGain = AutoPostGain;
 341     Comp->mAuto.Declip = AutoPostGain && AutoDeclip;
 342     Comp->mLookAhead = lookAhead;
 343     Comp->mPreGain = std::pow(10.0f, PreGainDb / 20.0f);
 344     Comp->mPostGain = std::log(10.0f)/20.0f * PostGainDb;
 345     Comp->mThreshold = std::log(10.0f)/20.0f * ThresholdDb;
 346     Comp->mSlope = 1.0f / std::max(1.0f, Ratio) - 1.0f;
 347     Comp->mKnee = std::max(0.0f, std::log(10.0f)/20.0f * KneeDb);
 348     Comp->mAttack = std::max(1.0f, AttackTime * SampleRate);
 349     Comp->mRelease = std::max(1.0f, ReleaseTime * SampleRate);
 350
 351     /* Knee width automation actually treats the compressor as a limiter. By
 352      * varying the knee width, it can effectively be seen as applying
 353      * compression over a wide range of ratios.
 354      */
 355     if(AutoKnee)
 356         Comp->mSlope = -1.0f;
 357
 358     if(lookAhead > 0)
 359     {
 360         /* The sliding hold implementation doesn't handle a length of 1. A 1-
 361          * sample hold is useless anyway, it would only ever give back what was
 362          * just given to it.
 363          */
 364         if(hold > 1)
 365         {
 366             Comp->mHold = std::make_unique<SlidingHold>();
 367             Comp->mHold->mValues[0] = -std::numeric_limits<float>::infinity();
 368             Comp->mHold->mExpiries[0] = hold;
 369             Comp->mHold->mLength = hold;
 370         }
 371         Comp->mDelay.resize(NumChans, FloatBufferLine{});
 372     }
 373
 374     Comp->mCrestCoeff = std::exp(-1.0f / (0.200f * SampleRate)); // 200ms
 375     Comp->mGainEstimate = Comp->mThreshold * -0.5f * Comp->mSlope;
 376     Comp->mAdaptCoeff = std::exp(-1.0f / (2.0f * SampleRate)); // 2s
 377
 378     return Comp;
 379 }
 380
 381 Compressor::~Compressor() = default;
 382
 383
 384 void Compressor::process(const uint SamplesToDo, const al::span<FloatBufferLine> InOut)
 385 {
 386     ASSUME(SamplesToDo > 0);
 387     ASSUME(SamplesToDo <= BufferLineSize);
 388
 389     const float preGain{mPreGain};
 390     if(preGain != 1.0f)
 391     {
 392         auto apply_gain = [SamplesToDo,preGain](FloatBufferLine &input) noexcept -> void
 393         {
 394             const auto buffer = assume_aligned_span<16>(al::span{input}.first(SamplesToDo));
 395             std::transform(buffer.cbegin(), buffer.cend(), buffer.begin(),
 396                 [preGain](const float s) noexcept { return s * preGain; });
 397         };
 398         std::for_each(InOut.begin(), InOut.end(), apply_gain);
 399     }
 400
 401     linkChannels(SamplesToDo, InOut);
 402
 403     if(mAuto.Attack || mAuto.Release)
 404         crestDetector(SamplesToDo);
 405
 406     if(mHold)
 407         peakHoldDetector(SamplesToDo);
 408     else
 409         peakDetector(SamplesToDo);
 410
 411     gainCompressor(SamplesToDo);
 412
 413     if(!mDelay.empty())
 414         signalDelay(SamplesToDo, InOut);
 415
 416     const auto gains = assume_aligned_span<16>(al::span{mSideChain}.first(SamplesToDo));
 417     auto apply_comp = [gains](const FloatBufferSpan inout) noexcept -> void
 418     {
 419         const auto buffer = assume_aligned_span<16>(inout);
 420         std::transform(gains.cbegin(), gains.cend(), buffer.cbegin(), buffer.begin(),
 421             std::multiplies{});
 422     };
 423     for(const FloatBufferSpan inout : InOut)
 424         apply_comp(inout);
 425
 426     const auto delayedGains = al::span{mSideChain}.subspan(SamplesToDo, mLookAhead);
 427     std::copy(delayedGains.begin(), delayedGains.end(), mSideChain.begin());
 428 }