alc/effects/reverb.cpp

   1 /**
   2  * Ambisonic reverb engine for the OpenAL cross platform audio library
   3  * Copyright (C) 2008-2017 by Chris Robinson and Christopher Fitzgerald.
   4  * This library is free software; you can redistribute it and/or
   5  *  modify it under the terms of the GNU Library General Public
   6  *  License as published by the Free Software Foundation; either
   7  *  version 2 of the License, or (at your option) any later version.
   8  *
   9  * This library is distributed in the hope that it will be useful,
  10  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  11  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  12  *  Library General Public License for more details.
  13  *
  14  * You should have received a copy of the GNU Library General Public
  15  *  License along with this library; if not, write to the
  16  *  Free Software Foundation, Inc.,
  17  *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  18  * Or go to http://www.gnu.org/copyleft/lgpl.html
  19  */
  20
  21 #include "config.h"
  22
  23 #include <algorithm>
  24 #include <array>
  25 #include <cstdio>
  26 #include <functional>
  27 #include <iterator>
  28 #include <numeric>
  29 #include <stdint.h>
  30
  31 #include "alc/effects/base.h"
  32 #include "almalloc.h"
  33 #include "alnumbers.h"
  34 #include "alnumeric.h"
  35 #include "alspan.h"
  36 #include "core/ambidefs.h"
  37 #include "core/bufferline.h"
  38 #include "core/context.h"
  39 #include "core/devformat.h"
  40 #include "core/device.h"
  41 #include "core/effectslot.h"
  42 #include "core/filters/biquad.h"
  43 #include "core/filters/splitter.h"
  44 #include "core/mixer.h"
  45 #include "core/mixer/defs.h"
  46 #include "intrusive_ptr.h"
  47 #include "opthelpers.h"
  48 #include "vecmat.h"
  49 #include "vector.h"
  50
  51 /* This is a user config option for modifying the overall output of the reverb
  52  * effect.
  53  */
  54 float ReverbBoost = 1.0f;
  55
  56 namespace {
  57
  58 using uint = unsigned int;
  59
  60 constexpr float MaxModulationTime{4.0f};
  61 constexpr float DefaultModulationTime{0.25f};
  62
  63 #define MOD_FRACBITS 24
  64 #define MOD_FRACONE  (1<<MOD_FRACBITS)
  65 #define MOD_FRACMASK (MOD_FRACONE-1)
  66
  67
  68 using namespace std::placeholders;
  69
  70 /* Max samples per process iteration. Used to limit the size needed for
  71  * temporary buffers. Must be a multiple of 4 for SIMD alignment.
  72  */
  73 constexpr size_t MAX_UPDATE_SAMPLES{256};
  74
  75 /* The number of spatialized lines or channels to process. Four channels allows
  76  * for a 3D A-Format response. NOTE: This can't be changed without taking care
  77  * of the conversion matrices, and a few places where the length arrays are
  78  * assumed to have 4 elements.
  79  */
  80 constexpr size_t NUM_LINES{4u};
  81
  82
  83 /* This coefficient is used to define the maximum frequency range controlled by
  84  * the modulation depth. The current value of 0.05 will allow it to swing from
  85  * 0.95x to 1.05x. This value must be below 1. At 1 it will cause the sampler
  86  * to stall on the downswing, and above 1 it will cause it to sample backwards.
  87  * The value 0.05 seems be nearest to Creative hardware behavior.
  88  */
  89 constexpr float MODULATION_DEPTH_COEFF{0.05f};
  90
  91
  92 /* The B-Format to A-Format conversion matrix. The arrangement of rows is
  93  * deliberately chosen to align the resulting lines to their spatial opposites
  94  * (0:above front left <-> 3:above back right, 1:below front right <-> 2:below
  95  * back left). It's not quite opposite, since the A-Format results in a
  96  * tetrahedron, but it's close enough. Should the model be extended to 8-lines
  97  * in the future, true opposites can be used.
  98  */
  99 alignas(16) constexpr float B2A[NUM_LINES][NUM_LINES]{
 100     { 0.5f,  0.5f,  0.5f,  0.5f },
 101     { 0.5f, -0.5f, -0.5f,  0.5f },
 102     { 0.5f,  0.5f, -0.5f, -0.5f },
 103     { 0.5f, -0.5f,  0.5f, -0.5f }
 104 };
 105
 106 /* Converts A-Format to B-Format for early reflections. */
 107 alignas(16) constexpr float EarlyA2B[NUM_LINES][NUM_LINES]{
 108     { 0.5f,  0.5f,  0.5f,  0.5f },
 109     { 0.5f, -0.5f,  0.5f, -0.5f },
 110     { 0.5f, -0.5f, -0.5f,  0.5f },
 111     { 0.5f,  0.5f, -0.5f, -0.5f }
 112 };
 113
 114 /* Converts A-Format to B-Format for late reverb. */
 115 constexpr auto InvSqrt2 = static_cast<float>(1.0/al::numbers::sqrt2);
 116 alignas(16) constexpr float LateA2B[NUM_LINES][NUM_LINES]{
 117     { 0.5f,  0.5f,  0.5f,  0.5f },
 118     { InvSqrt2, -InvSqrt2,  0.0f,  0.0f },
 119     { 0.0f,  0.0f,  InvSqrt2, -InvSqrt2 },
 120     { 0.5f,  0.5f, -0.5f, -0.5f }
 121 };
 122
 123 /* The all-pass and delay lines have a variable length dependent on the
 124  * effect's density parameter, which helps alter the perceived environment
 125  * size. The size-to-density conversion is a cubed scale:
 126  *
 127  * density = min(1.0, pow(size, 3.0) / DENSITY_SCALE);
 128  *
 129  * The line lengths scale linearly with room size, so the inverse density
 130  * conversion is needed, taking the cube root of the re-scaled density to
 131  * calculate the line length multiplier:
 132  *
 133  *     length_mult = max(5.0, cbrt(density*DENSITY_SCALE));
 134  *
 135  * The density scale below will result in a max line multiplier of 50, for an
 136  * effective size range of 5m to 50m.
 137  */
 138 constexpr float DENSITY_SCALE{125000.0f};
 139
 140 /* All delay line lengths are specified in seconds.
 141  *
 142  * To approximate early reflections, we break them up into primary (those
 143  * arriving from the same direction as the source) and secondary (those
 144  * arriving from the opposite direction).
 145  *
 146  * The early taps decorrelate the 4-channel signal to approximate an average
 147  * room response for the primary reflections after the initial early delay.
 148  *
 149  * Given an average room dimension (d_a) and the speed of sound (c) we can
 150  * calculate the average reflection delay (r_a) regardless of listener and
 151  * source positions as:
 152  *
 153  *     r_a = d_a / c
 154  *     c   = 343.3
 155  *
 156  * This can extended to finding the average difference (r_d) between the
 157  * maximum (r_1) and minimum (r_0) reflection delays:
 158  *
 159  *     r_0 = 2 / 3 r_a
 160  *         = r_a - r_d / 2
 161  *         = r_d
 162  *     r_1 = 4 / 3 r_a
 163  *         = r_a + r_d / 2
 164  *         = 2 r_d
 165  *     r_d = 2 / 3 r_a
 166  *         = r_1 - r_0
 167  *
 168  * As can be determined by integrating the 1D model with a source (s) and
 169  * listener (l) positioned across the dimension of length (d_a):
 170  *
 171  *     r_d = int_(l=0)^d_a (int_(s=0)^d_a |2 d_a - 2 (l + s)| ds) dl / c
 172  *
 173  * The initial taps (T_(i=0)^N) are then specified by taking a power series
 174  * that ranges between r_0 and half of r_1 less r_0:
 175  *
 176  *     R_i = 2^(i / (2 N - 1)) r_d
 177  *         = r_0 + (2^(i / (2 N - 1)) - 1) r_d
 178  *         = r_0 + T_i
 179  *     T_i = R_i - r_0
 180  *         = (2^(i / (2 N - 1)) - 1) r_d
 181  *
 182  * Assuming an average of 1m, we get the following taps:
 183  */
 184 constexpr std::array<float,NUM_LINES> EARLY_TAP_LENGTHS{{
 185     0.0000000e+0f, 2.0213520e-4f, 4.2531060e-4f, 6.7171600e-4f
 186 }};
 187
 188 /* The early all-pass filter lengths are based on the early tap lengths:
 189  *
 190  *     A_i = R_i / a
 191  *
 192  * Where a is the approximate maximum all-pass cycle limit (20).
 193  */
 194 constexpr std::array<float,NUM_LINES> EARLY_ALLPASS_LENGTHS{{
 195     9.7096800e-5f, 1.0720356e-4f, 1.1836234e-4f, 1.3068260e-4f
 196 }};
 197
 198 /* The early delay lines are used to transform the primary reflections into
 199  * the secondary reflections.  The A-format is arranged in such a way that
 200  * the channels/lines are spatially opposite:
 201  *
 202  *     C_i is opposite C_(N-i-1)
 203  *
 204  * The delays of the two opposing reflections (R_i and O_i) from a source
 205  * anywhere along a particular dimension always sum to twice its full delay:
 206  *
 207  *     2 r_a = R_i + O_i
 208  *
 209  * With that in mind we can determine the delay between the two reflections
 210  * and thus specify our early line lengths (L_(i=0)^N) using:
 211  *
 212  *     O_i = 2 r_a - R_(N-i-1)
 213  *     L_i = O_i - R_(N-i-1)
 214  *         = 2 (r_a - R_(N-i-1))
 215  *         = 2 (r_a - T_(N-i-1) - r_0)
 216  *         = 2 r_a (1 - (2 / 3) 2^((N - i - 1) / (2 N - 1)))
 217  *
 218  * Using an average dimension of 1m, we get:
 219  */
 220 constexpr std::array<float,NUM_LINES> EARLY_LINE_LENGTHS{{
 221     5.9850400e-4f, 1.0913150e-3f, 1.5376658e-3f, 1.9419362e-3f
 222 }};
 223
 224 /* The late all-pass filter lengths are based on the late line lengths:
 225  *
 226  *     A_i = (5 / 3) L_i / r_1
 227  */
 228 constexpr std::array<float,NUM_LINES> LATE_ALLPASS_LENGTHS{{
 229     1.6182800e-4f, 2.0389060e-4f, 2.8159360e-4f, 3.2365600e-4f
 230 }};
 231
 232 /* The late lines are used to approximate the decaying cycle of recursive
 233  * late reflections.
 234  *
 235  * Splitting the lines in half, we start with the shortest reflection paths
 236  * (L_(i=0)^(N/2)):
 237  *
 238  *     L_i = 2^(i / (N - 1)) r_d
 239  *
 240  * Then for the opposite (longest) reflection paths (L_(i=N/2)^N):
 241  *
 242  *     L_i = 2 r_a - L_(i-N/2)
 243  *         = 2 r_a - 2^((i - N / 2) / (N - 1)) r_d
 244  *
 245  * For our 1m average room, we get:
 246  */
 247 constexpr std::array<float,NUM_LINES> LATE_LINE_LENGTHS{{
 248     1.9419362e-3f, 2.4466860e-3f, 3.3791220e-3f, 3.8838720e-3f
 249 }};
 250
 251
 252 using ReverbUpdateLine = std::array<float,MAX_UPDATE_SAMPLES>;
 253
 254 struct DelayLineI {
 255     /* The delay lines use interleaved samples, with the lengths being powers
 256      * of 2 to allow the use of bit-masking instead of a modulus for wrapping.
 257      */
 258     size_t Mask{0u};
 259     union {
 260         uintptr_t LineOffset{0u};
 261         std::array<float,NUM_LINES> *Line;
 262     };
 263
 264     /* Given the allocated sample buffer, this function updates each delay line
 265      * offset.
 266      */
 267     void realizeLineOffset(std::array<float,NUM_LINES> *sampleBuffer) noexcept
 268     { Line = sampleBuffer + LineOffset; }
 269
 270     /* Calculate the length of a delay line and store its mask and offset. */
 271     uint calcLineLength(const float length, const uintptr_t offset, const float frequency,
 272         const uint extra)
 273     {
 274         /* All line lengths are powers of 2, calculated from their lengths in
 275          * seconds, rounded up.
 276          */
 277         uint samples{float2uint(std::ceil(length*frequency))};
 278         samples = NextPowerOf2(samples + extra);
 279
 280         /* All lines share a single sample buffer. */
 281         Mask = samples - 1;
 282         LineOffset = offset;
 283
 284         /* Return the sample count for accumulation. */
 285         return samples;
 286     }
 287
 288     void write(size_t offset, const size_t c, const float *RESTRICT in, const size_t count) const noexcept
 289     {
 290         ASSUME(count > 0);
 291         for(size_t i{0u};i < count;)
 292         {
 293             offset &= Mask;
 294             size_t td{minz(Mask+1 - offset, count - i)};
 295             do {
 296                 Line[offset++][c] = in[i++];
 297             } while(--td);
 298         }
 299     }
 300 };
 301
 302 struct VecAllpass {
 303     DelayLineI Delay;
 304     float Coeff{0.0f};
 305     size_t Offset[NUM_LINES][2]{};
 306
 307     void processFaded(const al::span<ReverbUpdateLine,NUM_LINES> samples, size_t offset,
 308         const float xCoeff, const float yCoeff, float fadeCount, const float fadeStep,
 309         const size_t todo);
 310     void processUnfaded(const al::span<ReverbUpdateLine,NUM_LINES> samples, size_t offset,
 311         const float xCoeff, const float yCoeff, const size_t todo);
 312 };
 313
 314 struct T60Filter {
 315     /* Two filters are used to adjust the signal. One to control the low
 316      * frequencies, and one to control the high frequencies.
 317      */
 318     float MidGain[2]{0.0f, 0.0f};
 319     BiquadFilter HFFilter, LFFilter;
 320
 321     void calcCoeffs(const float length, const float lfDecayTime, const float mfDecayTime,
 322         const float hfDecayTime, const float lf0norm, const float hf0norm);
 323
 324     /* Applies the two T60 damping filter sections. */
 325     void process(const al::span<float> samples)
 326     { DualBiquad{HFFilter, LFFilter}.process(samples, samples.data()); }
 327 };
 328
 329 struct EarlyReflections {
 330     /* A Gerzon vector all-pass filter is used to simulate initial diffusion.
 331      * The spread from this filter also helps smooth out the reverb tail.
 332      */
 333     VecAllpass VecAp;
 334
 335     /* An echo line is used to complete the second half of the early
 336      * reflections.
 337      */
 338     DelayLineI Delay;
 339     size_t Offset[NUM_LINES][2]{};
 340     float Coeff[NUM_LINES][2]{};
 341
 342     /* The gain for each output channel based on 3D panning. */
 343     float CurrentGain[NUM_LINES][MAX_OUTPUT_CHANNELS]{};
 344     float PanGain[NUM_LINES][MAX_OUTPUT_CHANNELS]{};
 345
 346     void updateLines(const float density_mult, const float diffusion, const float decayTime,
 347         const float frequency);
 348 };
 349
 350
 351 struct Modulation {
 352     /* The vibrato time is tracked with an index over a (MOD_FRACONE)
 353      * normalized range.
 354      */
 355     uint Index, Step;
 356
 357     /* The depth of frequency change, in samples. */
 358     float Depth[2];
 359
 360     float ModDelays[MAX_UPDATE_SAMPLES];
 361
 362     void updateModulator(float modTime, float modDepth, float frequency);
 363
 364     void calcDelays(size_t todo);
 365     void calcFadedDelays(size_t todo, float fadeCount, float fadeStep);
 366 };
 367
 368 struct LateReverb {
 369     /* A recursive delay line is used fill in the reverb tail. */
 370     DelayLineI Delay;
 371     size_t     Offset[NUM_LINES][2]{};
 372
 373     /* Attenuation to compensate for the modal density and decay rate of the
 374      * late lines.
 375      */
 376     float DensityGain[2]{0.0f, 0.0f};
 377
 378     /* T60 decay filters are used to simulate absorption. */
 379     T60Filter T60[NUM_LINES];
 380
 381     Modulation Mod;
 382
 383     /* A Gerzon vector all-pass filter is used to simulate diffusion. */
 384     VecAllpass VecAp;
 385
 386     /* The gain for each output channel based on 3D panning. */
 387     float CurrentGain[NUM_LINES][MAX_OUTPUT_CHANNELS]{};
 388     float PanGain[NUM_LINES][MAX_OUTPUT_CHANNELS]{};
 389
 390     void updateLines(const float density_mult, const float diffusion, const float lfDecayTime,
 391         const float mfDecayTime, const float hfDecayTime, const float lf0norm,
 392         const float hf0norm, const float frequency);
 393 };
 394
 395 struct ReverbState final : public EffectState {
 396     /* All delay lines are allocated as a single buffer to reduce memory
 397      * fragmentation and management code.
 398      */
 399     al::vector<std::array<float,NUM_LINES>,16> mSampleBuffer;
 400
 401     struct {
 402         /* Calculated parameters which indicate if cross-fading is needed after
 403          * an update.
 404          */
 405         float Density{1.0f};
 406         float Diffusion{1.0f};
 407         float DecayTime{1.49f};
 408         float HFDecayTime{0.83f * 1.49f};
 409         float LFDecayTime{1.0f * 1.49f};
 410         float ModulationTime{0.25f};
 411         float ModulationDepth{0.0f};
 412         float HFReference{5000.0f};
 413         float LFReference{250.0f};
 414     } mParams;
 415
 416     /* Master effect filters */
 417     struct {
 418         BiquadFilter Lp;
 419         BiquadFilter Hp;
 420     } mFilter[NUM_LINES];
 421
 422     /* Core delay line (early reflections and late reverb tap from this). */
 423     DelayLineI mDelay;
 424
 425     /* Tap points for early reflection delay. */
 426     size_t mEarlyDelayTap[NUM_LINES][2]{};
 427     float mEarlyDelayCoeff[NUM_LINES][2]{};
 428
 429     /* Tap points for late reverb feed and delay. */
 430     size_t mLateFeedTap{};
 431     size_t mLateDelayTap[NUM_LINES][2]{};
 432
 433     /* Coefficients for the all-pass and line scattering matrices. */
 434     float mMixX{0.0f};
 435     float mMixY{0.0f};
 436
 437     EarlyReflections mEarly;
 438
 439     LateReverb mLate;
 440
 441     bool mDoFading{};
 442
 443     /* Maximum number of samples to process at once. */
 444     size_t mMaxUpdate[2]{MAX_UPDATE_SAMPLES, MAX_UPDATE_SAMPLES};
 445
 446     /* The current write offset for all delay lines. */
 447     size_t mOffset{};
 448
 449     /* Temporary storage used when processing. */
 450     union {
 451         alignas(16) FloatBufferLine mTempLine{};
 452         alignas(16) std::array<ReverbUpdateLine,NUM_LINES> mTempSamples;
 453     };
 454     alignas(16) std::array<ReverbUpdateLine,NUM_LINES> mEarlySamples{};
 455     alignas(16) std::array<ReverbUpdateLine,NUM_LINES> mLateSamples{};
 456
 457
 458     bool mUpmixOutput{false};
 459     std::array<float,MaxAmbiOrder+1> mOrderScales{};
 460     std::array<std::array<BandSplitter,NUM_LINES>,2> mAmbiSplitter;
 461
 462
 463     static void DoMixRow(const al::span<float> OutBuffer, const al::span<const float> Gains,
 464         const float *InSamples, const size_t InStride)
 465     {
 466         std::fill(OutBuffer.begin(), OutBuffer.end(), 0.0f);
 467         for(const float gain : Gains)
 468         {
 469             const float *RESTRICT input{al::assume_aligned<16>(InSamples)};
 470             InSamples += InStride;
 471
 472             if(!(std::fabs(gain) > GainSilenceThreshold))
 473                 continue;
 474
 475             for(float &sample : OutBuffer)
 476             {
 477                 sample += *input * gain;
 478                 ++input;
 479             }
 480         }
 481     }
 482
 483
 484     void MixOutPlain(const al::span<FloatBufferLine> samplesOut, const size_t counter,
 485         const size_t offset, const size_t todo)
 486     {
 487         ASSUME(todo > 0);
 488
 489         /* Convert back to B-Format, and mix the results to output. */
 490         const al::span<float> tmpspan{al::assume_aligned<16>(mTempLine.data()), todo};
 491         for(size_t c{0u};c < NUM_LINES;c++)
 492         {
 493             DoMixRow(tmpspan, EarlyA2B[c], mEarlySamples[0].data(), mEarlySamples[0].size());
 494             MixSamples(tmpspan, samplesOut, mEarly.CurrentGain[c], mEarly.PanGain[c], counter,
 495                 offset);
 496         }
 497         for(size_t c{0u};c < NUM_LINES;c++)
 498         {
 499             DoMixRow(tmpspan, LateA2B[c], mLateSamples[0].data(), mLateSamples[0].size());
 500             MixSamples(tmpspan, samplesOut, mLate.CurrentGain[c], mLate.PanGain[c], counter,
 501                 offset);
 502         }
 503     }
 504
 505     void MixOutAmbiUp(const al::span<FloatBufferLine> samplesOut, const size_t counter,
 506         const size_t offset, const size_t todo)
 507     {
 508         ASSUME(todo > 0);
 509
 510         const al::span<float> tmpspan{al::assume_aligned<16>(mTempLine.data()), todo};
 511         for(size_t c{0u};c < NUM_LINES;c++)
 512         {
 513             DoMixRow(tmpspan, EarlyA2B[c], mEarlySamples[0].data(), mEarlySamples[0].size());
 514
 515             /* Apply scaling to the B-Format's HF response to "upsample" it to
 516              * higher-order output.
 517              */
 518             const float hfscale{(c==0) ? mOrderScales[0] : mOrderScales[1]};
 519             mAmbiSplitter[0][c].processHfScale(tmpspan, hfscale);
 520
 521             MixSamples(tmpspan, samplesOut, mEarly.CurrentGain[c], mEarly.PanGain[c], counter,
 522                 offset);
 523         }
 524         for(size_t c{0u};c < NUM_LINES;c++)
 525         {
 526             DoMixRow(tmpspan, LateA2B[c], mLateSamples[0].data(), mLateSamples[0].size());
 527
 528             const float hfscale{(c==0) ? mOrderScales[0] : mOrderScales[1]};
 529             mAmbiSplitter[1][c].processHfScale(tmpspan, hfscale);
 530
 531             MixSamples(tmpspan, samplesOut, mLate.CurrentGain[c], mLate.PanGain[c], counter,
 532                 offset);
 533         }
 534     }
 535
 536     void mixOut(const al::span<FloatBufferLine> samplesOut, const size_t counter,
 537         const size_t offset, const size_t todo)
 538     {
 539         if(mUpmixOutput)
 540             MixOutAmbiUp(samplesOut, counter, offset, todo);
 541         else
 542             MixOutPlain(samplesOut, counter, offset, todo);
 543     }
 544
 545     void allocLines(const float frequency);
 546
 547     void updateDelayLine(const float earlyDelay, const float lateDelay, const float density_mult,
 548         const float decayTime, const float frequency);
 549     void update3DPanning(const float *ReflectionsPan, const float *LateReverbPan,
 550         const float earlyGain, const float lateGain, const EffectTarget &target);
 551
 552     void earlyUnfaded(const size_t offset, const size_t todo);
 553     void earlyFaded(const size_t offset, const size_t todo, const float fade,
 554         const float fadeStep);
 555
 556     void lateUnfaded(const size_t offset, const size_t todo);
 557     void lateFaded(const size_t offset, const size_t todo, const float fade,
 558         const float fadeStep);
 559
 560     void deviceUpdate(const DeviceBase *device, const Buffer &buffer) override;
 561     void update(const ContextBase *context, const EffectSlot *slot, const EffectProps *props,
 562         const EffectTarget target) override;
 563     void process(const size_t samplesToDo, const al::span<const FloatBufferLine> samplesIn,
 564         const al::span<FloatBufferLine> samplesOut) override;
 565
 566     DEF_NEWDEL(ReverbState)
 567 };
 568
 569 /**************************************
 570  *  Device Update                     *
 571  **************************************/
 572
 573 inline float CalcDelayLengthMult(float density)
 574 { return maxf(5.0f, std::cbrt(density*DENSITY_SCALE)); }
 575
 576 /* Calculates the delay line metrics and allocates the shared sample buffer
 577  * for all lines given the sample rate (frequency).
 578  */
 579 void ReverbState::allocLines(const float frequency)
 580 {
 581     /* All delay line lengths are calculated to accomodate the full range of
 582      * lengths given their respective paramters.
 583      */
 584     size_t totalSamples{0u};
 585
 586     /* Multiplier for the maximum density value, i.e. density=1, which is
 587      * actually the least density...
 588      */
 589     const float multiplier{CalcDelayLengthMult(1.0f)};
 590
 591     /* The main delay length includes the maximum early reflection delay, the
 592      * largest early tap width, the maximum late reverb delay, and the
 593      * largest late tap width.  Finally, it must also be extended by the
 594      * update size (BufferLineSize) for block processing.
 595      */
 596     constexpr float LateLineDiffAvg{(LATE_LINE_LENGTHS.back()-LATE_LINE_LENGTHS.front()) /
 597         float{NUM_LINES}};
 598     float length{ReverbMaxReflectionsDelay + EARLY_TAP_LENGTHS.back()*multiplier +
 599         ReverbMaxLateReverbDelay + LateLineDiffAvg*multiplier};
 600     totalSamples += mDelay.calcLineLength(length, totalSamples, frequency, BufferLineSize);
 601
 602     /* The early vector all-pass line. */
 603     length = EARLY_ALLPASS_LENGTHS.back() * multiplier;
 604     totalSamples += mEarly.VecAp.Delay.calcLineLength(length, totalSamples, frequency, 0);
 605
 606     /* The early reflection line. */
 607     length = EARLY_LINE_LENGTHS.back() * multiplier;
 608     totalSamples += mEarly.Delay.calcLineLength(length, totalSamples, frequency, 0);
 609
 610     /* The late vector all-pass line. */
 611     length = LATE_ALLPASS_LENGTHS.back() * multiplier;
 612     totalSamples += mLate.VecAp.Delay.calcLineLength(length, totalSamples, frequency, 0);
 613
 614     /* The modulator's line length is calculated from the maximum modulation
 615      * time and depth coefficient, and halfed for the low-to-high frequency
 616      * swing.
 617      */
 618     constexpr float max_mod_delay{MaxModulationTime*MODULATION_DEPTH_COEFF / 2.0f};
 619
 620     /* The late delay lines are calculated from the largest maximum density
 621      * line length, and the maximum modulation delay. An additional sample is
 622      * added to keep it stable when there is no modulation.
 623      */
 624     length = LATE_LINE_LENGTHS.back()*multiplier + max_mod_delay;
 625     totalSamples += mLate.Delay.calcLineLength(length, totalSamples, frequency, 1);
 626
 627     if(totalSamples != mSampleBuffer.size())
 628         decltype(mSampleBuffer)(totalSamples).swap(mSampleBuffer);
 629
 630     /* Clear the sample buffer. */
 631     std::fill(mSampleBuffer.begin(), mSampleBuffer.end(), decltype(mSampleBuffer)::value_type{});
 632
 633     /* Update all delays to reflect the new sample buffer. */
 634     mDelay.realizeLineOffset(mSampleBuffer.data());
 635     mEarly.VecAp.Delay.realizeLineOffset(mSampleBuffer.data());
 636     mEarly.Delay.realizeLineOffset(mSampleBuffer.data());
 637     mLate.VecAp.Delay.realizeLineOffset(mSampleBuffer.data());
 638     mLate.Delay.realizeLineOffset(mSampleBuffer.data());
 639 }
 640
 641 void ReverbState::deviceUpdate(const DeviceBase *device, const Buffer&)
 642 {
 643     const auto frequency = static_cast<float>(device->Frequency);
 644
 645     /* Allocate the delay lines. */
 646     allocLines(frequency);
 647
 648     const float multiplier{CalcDelayLengthMult(1.0f)};
 649
 650     /* The late feed taps are set a fixed position past the latest delay tap. */
 651     mLateFeedTap = float2uint((ReverbMaxReflectionsDelay + EARLY_TAP_LENGTHS.back()*multiplier) *
 652         frequency);
 653
 654     /* Clear filters and gain coefficients since the delay lines were all just
 655      * cleared (if not reallocated).
 656      */
 657     for(auto &filter : mFilter)
 658     {
 659         filter.Lp.clear();
 660         filter.Hp.clear();
 661     }
 662
 663     for(auto &coeff : mEarlyDelayCoeff)
 664         std::fill(std::begin(coeff), std::end(coeff), 0.0f);
 665     for(auto &coeff : mEarly.Coeff)
 666         std::fill(std::begin(coeff), std::end(coeff), 0.0f);
 667
 668     mLate.DensityGain[0] = 0.0f;
 669     mLate.DensityGain[1] = 0.0f;
 670     for(auto &t60 : mLate.T60)
 671     {
 672         t60.MidGain[0] = 0.0f;
 673         t60.MidGain[1] = 0.0f;
 674         t60.HFFilter.clear();
 675         t60.LFFilter.clear();
 676     }
 677
 678     mLate.Mod.Index = 0;
 679     mLate.Mod.Step = 1;
 680     std::fill(std::begin(mLate.Mod.Depth), std::end(mLate.Mod.Depth), 0.0f);
 681
 682     for(auto &gains : mEarly.CurrentGain)
 683         std::fill(std::begin(gains), std::end(gains), 0.0f);
 684     for(auto &gains : mEarly.PanGain)
 685         std::fill(std::begin(gains), std::end(gains), 0.0f);
 686     for(auto &gains : mLate.CurrentGain)
 687         std::fill(std::begin(gains), std::end(gains), 0.0f);
 688     for(auto &gains : mLate.PanGain)
 689         std::fill(std::begin(gains), std::end(gains), 0.0f);
 690
 691     /* Reset fading and offset base. */
 692     mDoFading = true;
 693     std::fill(std::begin(mMaxUpdate), std::end(mMaxUpdate), MAX_UPDATE_SAMPLES);
 694     mOffset = 0;
 695
 696     if(device->mAmbiOrder > 1)
 697     {
 698         mUpmixOutput = true;
 699         mOrderScales = AmbiScale::GetHFOrderScales(1, device->mAmbiOrder);
 700     }
 701     else
 702     {
 703         mUpmixOutput = false;
 704         mOrderScales.fill(1.0f);
 705     }
 706     mAmbiSplitter[0][0].init(device->mXOverFreq / frequency);
 707     std::fill(mAmbiSplitter[0].begin()+1, mAmbiSplitter[0].end(), mAmbiSplitter[0][0]);
 708     std::fill(mAmbiSplitter[1].begin(), mAmbiSplitter[1].end(), mAmbiSplitter[0][0]);
 709 }
 710
 711 /**************************************
 712  *  Effect Update                     *
 713  **************************************/
 714
 715 /* Calculate a decay coefficient given the length of each cycle and the time
 716  * until the decay reaches -60 dB.
 717  */
 718 inline float CalcDecayCoeff(const float length, const float decayTime)
 719 { return std::pow(ReverbDecayGain, length/decayTime); }
 720
 721 /* Calculate a decay length from a coefficient and the time until the decay
 722  * reaches -60 dB.
 723  */
 724 inline float CalcDecayLength(const float coeff, const float decayTime)
 725 {
 726     constexpr float log10_decaygain{-3.0f/*std::log10(ReverbDecayGain)*/};
 727     return std::log10(coeff) * decayTime / log10_decaygain;
 728 }
 729
 730 /* Calculate an attenuation to be applied to the input of any echo models to
 731  * compensate for modal density and decay time.
 732  */
 733 inline float CalcDensityGain(const float a)
 734 {
 735     /* The energy of a signal can be obtained by finding the area under the
 736      * squared signal.  This takes the form of Sum(x_n^2), where x is the
 737      * amplitude for the sample n.
 738      *
 739      * Decaying feedback matches exponential decay of the form Sum(a^n),
 740      * where a is the attenuation coefficient, and n is the sample.  The area
 741      * under this decay curve can be calculated as:  1 / (1 - a).
 742      *
 743      * Modifying the above equation to find the area under the squared curve
 744      * (for energy) yields:  1 / (1 - a^2).  Input attenuation can then be
 745      * calculated by inverting the square root of this approximation,
 746      * yielding:  1 / sqrt(1 / (1 - a^2)), simplified to: sqrt(1 - a^2).
 747      */
 748     return std::sqrt(1.0f - a*a);
 749 }
 750
 751 /* Calculate the scattering matrix coefficients given a diffusion factor. */
 752 inline void CalcMatrixCoeffs(const float diffusion, float *x, float *y)
 753 {
 754     /* The matrix is of order 4, so n is sqrt(4 - 1). */
 755     constexpr float n{al::numbers::sqrt3_v<float>};
 756     const float t{diffusion * std::atan(n)};
 757
 758     /* Calculate the first mixing matrix coefficient. */
 759     *x = std::cos(t);
 760     /* Calculate the second mixing matrix coefficient. */
 761     *y = std::sin(t) / n;
 762 }
 763
 764 /* Calculate the limited HF ratio for use with the late reverb low-pass
 765  * filters.
 766  */
 767 float CalcLimitedHfRatio(const float hfRatio, const float airAbsorptionGainHF,
 768     const float decayTime)
 769 {
 770     /* Find the attenuation due to air absorption in dB (converting delay
 771      * time to meters using the speed of sound).  Then reversing the decay
 772      * equation, solve for HF ratio.  The delay length is cancelled out of
 773      * the equation, so it can be calculated once for all lines.
 774      */
 775     float limitRatio{1.0f / SpeedOfSoundMetersPerSec /
 776         CalcDecayLength(airAbsorptionGainHF, decayTime)};
 777
 778     /* Using the limit calculated above, apply the upper bound to the HF ratio. */
 779     return minf(limitRatio, hfRatio);
 780 }
 781
 782
 783 /* Calculates the 3-band T60 damping coefficients for a particular delay line
 784  * of specified length, using a combination of two shelf filter sections given
 785  * decay times for each band split at two reference frequencies.
 786  */
 787 void T60Filter::calcCoeffs(const float length, const float lfDecayTime,
 788     const float mfDecayTime, const float hfDecayTime, const float lf0norm,
 789     const float hf0norm)
 790 {
 791     const float mfGain{CalcDecayCoeff(length, mfDecayTime)};
 792     const float lfGain{CalcDecayCoeff(length, lfDecayTime) / mfGain};
 793     const float hfGain{CalcDecayCoeff(length, hfDecayTime) / mfGain};
 794
 795     MidGain[1] = mfGain;
 796     LFFilter.setParamsFromSlope(BiquadType::LowShelf, lf0norm, lfGain, 1.0f);
 797     HFFilter.setParamsFromSlope(BiquadType::HighShelf, hf0norm, hfGain, 1.0f);
 798 }
 799
 800 /* Update the early reflection line lengths and gain coefficients. */
 801 void EarlyReflections::updateLines(const float density_mult, const float diffusion,
 802     const float decayTime, const float frequency)
 803 {
 804     /* Calculate the all-pass feed-back/forward coefficient. */
 805     VecAp.Coeff = diffusion*diffusion * InvSqrt2;
 806
 807     for(size_t i{0u};i < NUM_LINES;i++)
 808     {
 809         /* Calculate the delay length of each all-pass line. */
 810         float length{EARLY_ALLPASS_LENGTHS[i] * density_mult};
 811         VecAp.Offset[i][1] = float2uint(length * frequency);
 812
 813         /* Calculate the delay length of each delay line. */
 814         length = EARLY_LINE_LENGTHS[i] * density_mult;
 815         Offset[i][1] = float2uint(length * frequency);
 816
 817         /* Calculate the gain (coefficient) for each line. */
 818         Coeff[i][1] = CalcDecayCoeff(length, decayTime);
 819     }
 820 }
 821
 822 /* Update the EAX modulation step and depth. Keep in mind that this kind of
 823  * vibrato is additive and not multiplicative as one may expect. The downswing
 824  * will sound stronger than the upswing.
 825  */
 826 void Modulation::updateModulator(float modTime, float modDepth, float frequency)
 827 {
 828     /* Modulation is calculated in two parts.
 829      *
 830      * The modulation time effects the sinus rate, altering the speed of
 831      * frequency changes. An index is incremented for each sample with an
 832      * appropriate step size to generate an LFO, which will vary the feedback
 833      * delay over time.
 834      */
 835     Step = maxu(fastf2u(MOD_FRACONE / (frequency * modTime)), 1);
 836
 837     /* The modulation depth effects the amount of frequency change over the
 838      * range of the sinus. It needs to be scaled by the modulation time so that
 839      * a given depth produces a consistent change in frequency over all ranges
 840      * of time. Since the depth is applied to a sinus value, it needs to be
 841      * halved once for the sinus range and again for the sinus swing in time
 842      * (half of it is spent decreasing the frequency, half is spent increasing
 843      * it).
 844      */
 845     if(modTime >= DefaultModulationTime)
 846     {
 847         /* To cancel the effects of a long period modulation on the late
 848          * reverberation, the amount of pitch should be varied (decreased)
 849          * according to the modulation time. The natural form is varying
 850          * inversely, in fact resulting in an invariant.
 851          */
 852         Depth[1] = MODULATION_DEPTH_COEFF / 4.0f * DefaultModulationTime * modDepth * frequency;
 853     }
 854     else
 855         Depth[1] = MODULATION_DEPTH_COEFF / 4.0f * modTime * modDepth * frequency;
 856 }
 857
 858 /* Update the late reverb line lengths and T60 coefficients. */
 859 void LateReverb::updateLines(const float density_mult, const float diffusion,
 860     const float lfDecayTime, const float mfDecayTime, const float hfDecayTime,
 861     const float lf0norm, const float hf0norm, const float frequency)
 862 {
 863     /* Scaling factor to convert the normalized reference frequencies from
 864      * representing 0...freq to 0...max_reference.
 865      */
 866     constexpr float MaxHFReference{20000.0f};
 867     const float norm_weight_factor{frequency / MaxHFReference};
 868
 869     const float late_allpass_avg{
 870         std::accumulate(LATE_ALLPASS_LENGTHS.begin(), LATE_ALLPASS_LENGTHS.end(), 0.0f) /
 871         float{NUM_LINES}};
 872
 873     /* To compensate for changes in modal density and decay time of the late
 874      * reverb signal, the input is attenuated based on the maximal energy of
 875      * the outgoing signal.  This approximation is used to keep the apparent
 876      * energy of the signal equal for all ranges of density and decay time.
 877      *
 878      * The average length of the delay lines is used to calculate the
 879      * attenuation coefficient.
 880      */
 881     float length{std::accumulate(LATE_LINE_LENGTHS.begin(), LATE_LINE_LENGTHS.end(), 0.0f) /
 882         float{NUM_LINES} + late_allpass_avg};
 883     length *= density_mult;
 884     /* The density gain calculation uses an average decay time weighted by
 885      * approximate bandwidth. This attempts to compensate for losses of energy
 886      * that reduce decay time due to scattering into highly attenuated bands.
 887      */
 888     const float decayTimeWeighted{
 889         lf0norm*norm_weight_factor*lfDecayTime +
 890         (hf0norm - lf0norm)*norm_weight_factor*mfDecayTime +
 891         (1.0f - hf0norm*norm_weight_factor)*hfDecayTime};
 892     DensityGain[1] = CalcDensityGain(CalcDecayCoeff(length, decayTimeWeighted));
 893
 894     /* Calculate the all-pass feed-back/forward coefficient. */
 895     VecAp.Coeff = diffusion*diffusion * InvSqrt2;
 896
 897     for(size_t i{0u};i < NUM_LINES;i++)
 898     {
 899         /* Calculate the delay length of each all-pass line. */
 900         length = LATE_ALLPASS_LENGTHS[i] * density_mult;
 901         VecAp.Offset[i][1] = float2uint(length * frequency);
 902
 903         /* Calculate the delay length of each feedback delay line. */
 904         length = LATE_LINE_LENGTHS[i] * density_mult;
 905         Offset[i][1] = float2uint(length*frequency + 0.5f);
 906
 907         /* Approximate the absorption that the vector all-pass would exhibit
 908          * given the current diffusion so we don't have to process a full T60
 909          * filter for each of its four lines. Also include the average
 910          * modulation delay (depth is half the max delay in samples).
 911          */
 912         length += lerpf(LATE_ALLPASS_LENGTHS[i], late_allpass_avg, diffusion)*density_mult +
 913             Mod.Depth[1]/frequency;
 914
 915         /* Calculate the T60 damping coefficients for each line. */
 916         T60[i].calcCoeffs(length, lfDecayTime, mfDecayTime, hfDecayTime, lf0norm, hf0norm);
 917     }
 918 }
 919
 920
 921 /* Update the offsets for the main effect delay line. */
 922 void ReverbState::updateDelayLine(const float earlyDelay, const float lateDelay,
 923     const float density_mult, const float decayTime, const float frequency)
 924 {
 925     /* Early reflection taps are decorrelated by means of an average room
 926      * reflection approximation described above the definition of the taps.
 927      * This approximation is linear and so the above density multiplier can
 928      * be applied to adjust the width of the taps.  A single-band decay
 929      * coefficient is applied to simulate initial attenuation and absorption.
 930      *
 931      * Late reverb taps are based on the late line lengths to allow a zero-
 932      * delay path and offsets that would continue the propagation naturally
 933      * into the late lines.
 934      */
 935     for(size_t i{0u};i < NUM_LINES;i++)
 936     {
 937         float length{EARLY_TAP_LENGTHS[i]*density_mult};
 938         mEarlyDelayTap[i][1] = float2uint((earlyDelay+length) * frequency);
 939         mEarlyDelayCoeff[i][1] = CalcDecayCoeff(length, decayTime);
 940
 941         length = (LATE_LINE_LENGTHS[i] - LATE_LINE_LENGTHS.front())/float{NUM_LINES}*density_mult +
 942             lateDelay;
 943         mLateDelayTap[i][1] = mLateFeedTap + float2uint(length * frequency);
 944     }
 945 }
 946
 947 /* Creates a transform matrix given a reverb vector. The vector pans the reverb
 948  * reflections toward the given direction, using its magnitude (up to 1) as a
 949  * focal strength. This function results in a B-Format transformation matrix
 950  * that spatially focuses the signal in the desired direction.
 951  */
 952 alu::Matrix GetTransformFromVector(const float *vec)
 953 {
 954     /* Normalize the panning vector according to the N3D scale, which has an
 955      * extra sqrt(3) term on the directional components. Converting from OpenAL
 956      * to B-Format also requires negating X (ACN 1) and Z (ACN 3). Note however
 957      * that the reverb panning vectors use left-handed coordinates, unlike the
 958      * rest of OpenAL which use right-handed. This is fixed by negating Z,
 959      * which cancels out with the B-Format Z negation.
 960      */
 961     float norm[3];
 962     float mag{std::sqrt(vec[0]*vec[0] + vec[1]*vec[1] + vec[2]*vec[2])};
 963     if(mag > 1.0f)
 964     {
 965         norm[0] = vec[0] / mag * -al::numbers::sqrt3_v<float>;
 966         norm[1] = vec[1] / mag * al::numbers::sqrt3_v<float>;
 967         norm[2] = vec[2] / mag * al::numbers::sqrt3_v<float>;
 968         mag = 1.0f;
 969     }
 970     else
 971     {
 972         /* If the magnitude is less than or equal to 1, just apply the sqrt(3)
 973          * term. There's no need to renormalize the magnitude since it would
 974          * just be reapplied in the matrix.
 975          */
 976         norm[0] = vec[0] * -al::numbers::sqrt3_v<float>;
 977         norm[1] = vec[1] * al::numbers::sqrt3_v<float>;
 978         norm[2] = vec[2] * al::numbers::sqrt3_v<float>;
 979     }
 980
 981     return alu::Matrix{
 982         1.0f,   0.0f,    0.0f,   0.0f,
 983         norm[0], 1.0f-mag, 0.0f, 0.0f,
 984         norm[1], 0.0f, 1.0f-mag, 0.0f,
 985         norm[2], 0.0f, 0.0f, 1.0f-mag
 986     };
 987 }
 988
 989 /* Update the early and late 3D panning gains. */
 990 void ReverbState::update3DPanning(const float *ReflectionsPan, const float *LateReverbPan,
 991     const float earlyGain, const float lateGain, const EffectTarget &target)
 992 {
 993     /* Create matrices that transform a B-Format signal according to the
 994      * panning vectors.
 995      */
 996     const alu::Matrix earlymat{GetTransformFromVector(ReflectionsPan)};
 997     const alu::Matrix latemat{GetTransformFromVector(LateReverbPan)};
 998
 999     mOutTarget = target.Main->Buffer;
1000     for(size_t i{0u};i < NUM_LINES;i++)
1001     {
1002         const float coeffs[MaxAmbiChannels]{earlymat[0][i], earlymat[1][i], earlymat[2][i],
1003             earlymat[3][i]};
1004         ComputePanGains(target.Main, coeffs, earlyGain, mEarly.PanGain[i]);
1005     }
1006     for(size_t i{0u};i < NUM_LINES;i++)
1007     {
1008         const float coeffs[MaxAmbiChannels]{latemat[0][i], latemat[1][i], latemat[2][i],
1009             latemat[3][i]};
1010         ComputePanGains(target.Main, coeffs, lateGain, mLate.PanGain[i]);
1011     }
1012 }
1013
1014 void ReverbState::update(const ContextBase *Context, const EffectSlot *Slot,
1015     const EffectProps *props, const EffectTarget target)
1016 {
1017     const DeviceBase *Device{Context->mDevice};
1018     const auto frequency = static_cast<float>(Device->Frequency);
1019
1020     /* Calculate the master filters */
1021     float hf0norm{minf(props->Reverb.HFReference/frequency, 0.49f)};
1022     mFilter[0].Lp.setParamsFromSlope(BiquadType::HighShelf, hf0norm, props->Reverb.GainHF, 1.0f);
1023     float lf0norm{minf(props->Reverb.LFReference/frequency, 0.49f)};
1024     mFilter[0].Hp.setParamsFromSlope(BiquadType::LowShelf, lf0norm, props->Reverb.GainLF, 1.0f);
1025     for(size_t i{1u};i < NUM_LINES;i++)
1026     {
1027         mFilter[i].Lp.copyParamsFrom(mFilter[0].Lp);
1028         mFilter[i].Hp.copyParamsFrom(mFilter[0].Hp);
1029     }
1030
1031     /* The density-based room size (delay length) multiplier. */
1032     const float density_mult{CalcDelayLengthMult(props->Reverb.Density)};
1033
1034     /* Update the main effect delay and associated taps. */
1035     updateDelayLine(props->Reverb.ReflectionsDelay, props->Reverb.LateReverbDelay,
1036         density_mult, props->Reverb.DecayTime, frequency);
1037
1038     /* Update the early lines. */
1039     mEarly.updateLines(density_mult, props->Reverb.Diffusion, props->Reverb.DecayTime, frequency);
1040
1041     /* Get the mixing matrix coefficients. */
1042     CalcMatrixCoeffs(props->Reverb.Diffusion, &mMixX, &mMixY);
1043
1044     /* If the HF limit parameter is flagged, calculate an appropriate limit
1045      * based on the air absorption parameter.
1046      */
1047     float hfRatio{props->Reverb.DecayHFRatio};
1048     if(props->Reverb.DecayHFLimit && props->Reverb.AirAbsorptionGainHF < 1.0f)
1049         hfRatio = CalcLimitedHfRatio(hfRatio, props->Reverb.AirAbsorptionGainHF,
1050             props->Reverb.DecayTime);
1051
1052     /* Calculate the LF/HF decay times. */
1053     constexpr float MinDecayTime{0.1f}, MaxDecayTime{20.0f};
1054     const float lfDecayTime{clampf(props->Reverb.DecayTime*props->Reverb.DecayLFRatio,
1055         MinDecayTime, MaxDecayTime)};
1056     const float hfDecayTime{clampf(props->Reverb.DecayTime*hfRatio, MinDecayTime, MaxDecayTime)};
1057
1058     /* Update the modulator rate and depth. */
1059     mLate.Mod.updateModulator(props->Reverb.ModulationTime, props->Reverb.ModulationDepth,
1060         frequency);
1061
1062     /* Update the late lines. */
1063     mLate.updateLines(density_mult, props->Reverb.Diffusion, lfDecayTime,
1064         props->Reverb.DecayTime, hfDecayTime, lf0norm, hf0norm, frequency);
1065
1066     /* Update early and late 3D panning. */
1067     const float gain{props->Reverb.Gain * Slot->Gain * ReverbBoost};
1068     update3DPanning(props->Reverb.ReflectionsPan, props->Reverb.LateReverbPan,
1069         props->Reverb.ReflectionsGain*gain, props->Reverb.LateReverbGain*gain, target);
1070
1071     /* Calculate the max update size from the smallest relevant delay. */
1072     mMaxUpdate[1] = minz(MAX_UPDATE_SAMPLES, minz(mEarly.Offset[0][1], mLate.Offset[0][1]));
1073
1074     /* Determine if delay-line cross-fading is required. Density is essentially
1075      * a master control for the feedback delays, so changes the offsets of many
1076      * delay lines.
1077      */
1078     mDoFading |= (mParams.Density != props->Reverb.Density ||
1079         /* Diffusion and decay times influences the decay rate (gain) of the
1080          * late reverb T60 filter.
1081          */
1082         mParams.Diffusion != props->Reverb.Diffusion ||
1083         mParams.DecayTime != props->Reverb.DecayTime ||
1084         mParams.HFDecayTime != hfDecayTime ||
1085         mParams.LFDecayTime != lfDecayTime ||
1086         /* Modulation time and depth both require fading the modulation delay. */
1087         mParams.ModulationTime != props->Reverb.ModulationTime ||
1088         mParams.ModulationDepth != props->Reverb.ModulationDepth ||
1089         /* HF/LF References control the weighting used to calculate the density
1090          * gain.
1091          */
1092         mParams.HFReference != props->Reverb.HFReference ||
1093         mParams.LFReference != props->Reverb.LFReference);
1094     if(mDoFading)
1095     {
1096         mParams.Density = props->Reverb.Density;
1097         mParams.Diffusion = props->Reverb.Diffusion;
1098         mParams.DecayTime = props->Reverb.DecayTime;
1099         mParams.HFDecayTime = hfDecayTime;
1100         mParams.LFDecayTime = lfDecayTime;
1101         mParams.ModulationTime = props->Reverb.ModulationTime;
1102         mParams.ModulationDepth = props->Reverb.ModulationDepth;
1103         mParams.HFReference = props->Reverb.HFReference;
1104         mParams.LFReference = props->Reverb.LFReference;
1105     }
1106 }
1107
1108
1109 /**************************************
1110  *  Effect Processing                 *
1111  **************************************/
1112
1113 /* Applies a scattering matrix to the 4-line (vector) input.  This is used
1114  * for both the below vector all-pass model and to perform modal feed-back
1115  * delay network (FDN) mixing.
1116  *
1117  * The matrix is derived from a skew-symmetric matrix to form a 4D rotation
1118  * matrix with a single unitary rotational parameter:
1119  *
1120  *     [  d,  a,  b,  c ]          1 = a^2 + b^2 + c^2 + d^2
1121  *     [ -a,  d,  c, -b ]
1122  *     [ -b, -c,  d,  a ]
1123  *     [ -c,  b, -a,  d ]
1124  *
1125  * The rotation is constructed from the effect's diffusion parameter,
1126  * yielding:
1127  *
1128  *     1 = x^2 + 3 y^2
1129  *
1130  * Where a, b, and c are the coefficient y with differing signs, and d is the
1131  * coefficient x.  The final matrix is thus:
1132  *
1133  *     [  x,  y, -y,  y ]          n = sqrt(matrix_order - 1)
1134  *     [ -y,  x,  y,  y ]          t = diffusion_parameter * atan(n)
1135  *     [  y, -y,  x,  y ]          x = cos(t)
1136  *     [ -y, -y, -y,  x ]          y = sin(t) / n
1137  *
1138  * Any square orthogonal matrix with an order that is a power of two will
1139  * work (where ^T is transpose, ^-1 is inverse):
1140  *
1141  *     M^T = M^-1
1142  *
1143  * Using that knowledge, finding an appropriate matrix can be accomplished
1144  * naively by searching all combinations of:
1145  *
1146  *     M = D + S - S^T
1147  *
1148  * Where D is a diagonal matrix (of x), and S is a triangular matrix (of y)
1149  * whose combination of signs are being iterated.
1150  */
1151 inline auto VectorPartialScatter(const std::array<float,NUM_LINES> &RESTRICT in,
1152     const float xCoeff, const float yCoeff) -> std::array<float,NUM_LINES>
1153 {
1154     return std::array<float,NUM_LINES>{{
1155         xCoeff*in[0] + yCoeff*(          in[1] + -in[2] + in[3]),
1156         xCoeff*in[1] + yCoeff*(-in[0]          +  in[2] + in[3]),
1157         xCoeff*in[2] + yCoeff*( in[0] + -in[1]          + in[3]),
1158         xCoeff*in[3] + yCoeff*(-in[0] + -in[1] + -in[2]        )
1159     }};
1160 }
1161
1162 /* Utilizes the above, but reverses the input channels. */
1163 void VectorScatterRevDelayIn(const DelayLineI delay, size_t offset, const float xCoeff,
1164     const float yCoeff, const al::span<const ReverbUpdateLine,NUM_LINES> in, const size_t count)
1165 {
1166     ASSUME(count > 0);
1167
1168     for(size_t i{0u};i < count;)
1169     {
1170         offset &= delay.Mask;
1171         size_t td{minz(delay.Mask+1 - offset, count-i)};
1172         do {
1173             std::array<float,NUM_LINES> f;
1174             for(size_t j{0u};j < NUM_LINES;j++)
1175                 f[NUM_LINES-1-j] = in[j][i];
1176             ++i;
1177
1178             delay.Line[offset++] = VectorPartialScatter(f, xCoeff, yCoeff);
1179         } while(--td);
1180     }
1181 }
1182
1183 /* This applies a Gerzon multiple-in/multiple-out (MIMO) vector all-pass
1184  * filter to the 4-line input.
1185  *
1186  * It works by vectorizing a regular all-pass filter and replacing the delay
1187  * element with a scattering matrix (like the one above) and a diagonal
1188  * matrix of delay elements.
1189  *
1190  * Two static specializations are used for transitional (cross-faded) delay
1191  * line processing and non-transitional processing.
1192  */
1193 void VecAllpass::processUnfaded(const al::span<ReverbUpdateLine,NUM_LINES> samples, size_t offset,
1194     const float xCoeff, const float yCoeff, const size_t todo)
1195 {
1196     const DelayLineI delay{Delay};
1197     const float feedCoeff{Coeff};
1198
1199     ASSUME(todo > 0);
1200
1201     size_t vap_offset[NUM_LINES];
1202     for(size_t j{0u};j < NUM_LINES;j++)
1203         vap_offset[j] = offset - Offset[j][0];
1204     for(size_t i{0u};i < todo;)
1205     {
1206         for(size_t j{0u};j < NUM_LINES;j++)
1207             vap_offset[j] &= delay.Mask;
1208         offset &= delay.Mask;
1209
1210         size_t maxoff{offset};
1211         for(size_t j{0u};j < NUM_LINES;j++)
1212             maxoff = maxz(maxoff, vap_offset[j]);
1213         size_t td{minz(delay.Mask+1 - maxoff, todo - i)};
1214
1215         do {
1216             std::array<float,NUM_LINES> f;
1217             for(size_t j{0u};j < NUM_LINES;j++)
1218             {
1219                 const float input{samples[j][i]};
1220                 const float out{delay.Line[vap_offset[j]++][j] - feedCoeff*input};
1221                 f[j] = input + feedCoeff*out;
1222
1223                 samples[j][i] = out;
1224             }
1225             ++i;
1226
1227             delay.Line[offset++] = VectorPartialScatter(f, xCoeff, yCoeff);
1228         } while(--td);
1229     }
1230 }
1231 void VecAllpass::processFaded(const al::span<ReverbUpdateLine,NUM_LINES> samples, size_t offset,
1232     const float xCoeff, const float yCoeff, float fadeCount, const float fadeStep,
1233     const size_t todo)
1234 {
1235     const DelayLineI delay{Delay};
1236     const float feedCoeff{Coeff};
1237
1238     ASSUME(todo > 0);
1239
1240     size_t vap_offset[NUM_LINES][2];
1241     for(size_t j{0u};j < NUM_LINES;j++)
1242     {
1243         vap_offset[j][0] = offset - Offset[j][0];
1244         vap_offset[j][1] = offset - Offset[j][1];
1245     }
1246     for(size_t i{0u};i < todo;)
1247     {
1248         for(size_t j{0u};j < NUM_LINES;j++)
1249         {
1250             vap_offset[j][0] &= delay.Mask;
1251             vap_offset[j][1] &= delay.Mask;
1252         }
1253         offset &= delay.Mask;
1254
1255         size_t maxoff{offset};
1256         for(size_t j{0u};j < NUM_LINES;j++)
1257             maxoff = maxz(maxoff, maxz(vap_offset[j][0], vap_offset[j][1]));
1258         size_t td{minz(delay.Mask+1 - maxoff, todo - i)};
1259
1260         do {
1261             fadeCount += 1.0f;
1262             const float fade{fadeCount * fadeStep};
1263
1264             std::array<float,NUM_LINES> f;
1265             for(size_t j{0u};j < NUM_LINES;j++)
1266                 f[j] = delay.Line[vap_offset[j][0]++][j]*(1.0f-fade) +
1267                     delay.Line[vap_offset[j][1]++][j]*fade;
1268
1269             for(size_t j{0u};j < NUM_LINES;j++)
1270             {
1271                 const float input{samples[j][i]};
1272                 const float out{f[j] - feedCoeff*input};
1273                 f[j] = input + feedCoeff*out;
1274
1275                 samples[j][i] = out;
1276             }
1277             ++i;
1278
1279             delay.Line[offset++] = VectorPartialScatter(f, xCoeff, yCoeff);
1280         } while(--td);
1281     }
1282 }
1283
1284 /* This generates early reflections.
1285  *
1286  * This is done by obtaining the primary reflections (those arriving from the
1287  * same direction as the source) from the main delay line.  These are
1288  * attenuated and all-pass filtered (based on the diffusion parameter).
1289  *
1290  * The early lines are then fed in reverse (according to the approximately
1291  * opposite spatial location of the A-Format lines) to create the secondary
1292  * reflections (those arriving from the opposite direction as the source).
1293  *
1294  * The early response is then completed by combining the primary reflections
1295  * with the delayed and attenuated output from the early lines.
1296  *
1297  * Finally, the early response is reversed, scattered (based on diffusion),
1298  * and fed into the late reverb section of the main delay line.
1299  *
1300  * Two static specializations are used for transitional (cross-faded) delay
1301  * line processing and non-transitional processing.
1302  */
1303 void ReverbState::earlyUnfaded(const size_t offset, const size_t todo)
1304 {
1305     const DelayLineI early_delay{mEarly.Delay};
1306     const DelayLineI main_delay{mDelay};
1307     const float mixX{mMixX};
1308     const float mixY{mMixY};
1309
1310     ASSUME(todo > 0);
1311
1312     /* First, load decorrelated samples from the main delay line as the primary
1313      * reflections.
1314      */
1315     for(size_t j{0u};j < NUM_LINES;j++)
1316     {
1317         size_t early_delay_tap{offset - mEarlyDelayTap[j][0]};
1318         const float coeff{mEarlyDelayCoeff[j][0]};
1319         for(size_t i{0u};i < todo;)
1320         {
1321             early_delay_tap &= main_delay.Mask;
1322             size_t td{minz(main_delay.Mask+1 - early_delay_tap, todo - i)};
1323             do {
1324                 mTempSamples[j][i++] = main_delay.Line[early_delay_tap++][j] * coeff;
1325             } while(--td);
1326         }
1327     }
1328
1329     /* Apply a vector all-pass, to help color the initial reflections based on
1330      * the diffusion strength.
1331      */
1332     mEarly.VecAp.processUnfaded(mTempSamples, offset, mixX, mixY, todo);
1333
1334     /* Apply a delay and bounce to generate secondary reflections, combine with
1335      * the primary reflections and write out the result for mixing.
1336      */
1337     for(size_t j{0u};j < NUM_LINES;j++)
1338     {
1339         size_t feedb_tap{offset - mEarly.Offset[j][0]};
1340         const float feedb_coeff{mEarly.Coeff[j][0]};
1341         float *out{mEarlySamples[j].data()};
1342
1343         for(size_t i{0u};i < todo;)
1344         {
1345             feedb_tap &= early_delay.Mask;
1346             size_t td{minz(early_delay.Mask+1 - feedb_tap, todo - i)};
1347             do {
1348                 out[i] = mTempSamples[j][i] + early_delay.Line[feedb_tap++][j]*feedb_coeff;
1349                 ++i;
1350             } while(--td);
1351         }
1352     }
1353     for(size_t j{0u};j < NUM_LINES;j++)
1354         early_delay.write(offset, NUM_LINES-1-j, mTempSamples[j].data(), todo);
1355
1356     /* Also write the result back to the main delay line for the late reverb
1357      * stage to pick up at the appropriate time, appplying a scatter and
1358      * bounce to improve the initial diffusion in the late reverb.
1359      */
1360     const size_t late_feed_tap{offset - mLateFeedTap};
1361     VectorScatterRevDelayIn(main_delay, late_feed_tap, mixX, mixY, mEarlySamples, todo);
1362 }
1363 void ReverbState::earlyFaded(const size_t offset, const size_t todo, const float fade,
1364     const float fadeStep)
1365 {
1366     const DelayLineI early_delay{mEarly.Delay};
1367     const DelayLineI main_delay{mDelay};
1368     const float mixX{mMixX};
1369     const float mixY{mMixY};
1370
1371     ASSUME(todo > 0);
1372
1373     for(size_t j{0u};j < NUM_LINES;j++)
1374     {
1375         size_t early_delay_tap0{offset - mEarlyDelayTap[j][0]};
1376         size_t early_delay_tap1{offset - mEarlyDelayTap[j][1]};
1377         const float oldCoeff{mEarlyDelayCoeff[j][0]};
1378         const float oldCoeffStep{-oldCoeff * fadeStep};
1379         const float newCoeffStep{mEarlyDelayCoeff[j][1] * fadeStep};
1380         float fadeCount{fade};
1381
1382         for(size_t i{0u};i < todo;)
1383         {
1384             early_delay_tap0 &= main_delay.Mask;
1385             early_delay_tap1 &= main_delay.Mask;
1386             size_t td{minz(main_delay.Mask+1 - maxz(early_delay_tap0, early_delay_tap1), todo-i)};
1387             do {
1388                 fadeCount += 1.0f;
1389                 const float fade0{oldCoeff + oldCoeffStep*fadeCount};
1390                 const float fade1{newCoeffStep*fadeCount};
1391                 mTempSamples[j][i++] =
1392                     main_delay.Line[early_delay_tap0++][j]*fade0 +
1393                     main_delay.Line[early_delay_tap1++][j]*fade1;
1394             } while(--td);
1395         }
1396     }
1397
1398     mEarly.VecAp.processFaded(mTempSamples, offset, mixX, mixY, fade, fadeStep, todo);
1399
1400     for(size_t j{0u};j < NUM_LINES;j++)
1401     {
1402         size_t feedb_tap0{offset - mEarly.Offset[j][0]};
1403         size_t feedb_tap1{offset - mEarly.Offset[j][1]};
1404         const float feedb_oldCoeff{mEarly.Coeff[j][0]};
1405         const float feedb_oldCoeffStep{-feedb_oldCoeff * fadeStep};
1406         const float feedb_newCoeffStep{mEarly.Coeff[j][1] * fadeStep};
1407         float *out{mEarlySamples[j].data()};
1408         float fadeCount{fade};
1409
1410         for(size_t i{0u};i < todo;)
1411         {
1412             feedb_tap0 &= early_delay.Mask;
1413             feedb_tap1 &= early_delay.Mask;
1414             size_t td{minz(early_delay.Mask+1 - maxz(feedb_tap0, feedb_tap1), todo - i)};
1415
1416             do {
1417                 fadeCount += 1.0f;
1418                 const float fade0{feedb_oldCoeff + feedb_oldCoeffStep*fadeCount};
1419                 const float fade1{feedb_newCoeffStep*fadeCount};
1420                 out[i] = mTempSamples[j][i] +
1421                     early_delay.Line[feedb_tap0++][j]*fade0 +
1422                     early_delay.Line[feedb_tap1++][j]*fade1;
1423                 ++i;
1424             } while(--td);
1425         }
1426     }
1427     for(size_t j{0u};j < NUM_LINES;j++)
1428         early_delay.write(offset, NUM_LINES-1-j, mTempSamples[j].data(), todo);
1429
1430     const size_t late_feed_tap{offset - mLateFeedTap};
1431     VectorScatterRevDelayIn(main_delay, late_feed_tap, mixX, mixY, mEarlySamples, todo);
1432 }
1433
1434
1435 void Modulation::calcDelays(size_t todo)
1436 {
1437     constexpr float mod_scale{al::numbers::pi_v<float> * 2.0f / MOD_FRACONE};
1438     uint idx{Index};
1439     const uint step{Step};
1440     const float depth{Depth[0]};
1441     for(size_t i{0};i < todo;++i)
1442     {
1443         idx += step;
1444         const float lfo{std::sin(static_cast<float>(idx&MOD_FRACMASK) * mod_scale)};
1445         ModDelays[i] = (lfo+1.0f) * depth;
1446     }
1447     Index = idx;
1448 }
1449
1450 void Modulation::calcFadedDelays(size_t todo, float fadeCount, float fadeStep)
1451 {
1452     constexpr float mod_scale{al::numbers::pi_v<float> * 2.0f / MOD_FRACONE};
1453     uint idx{Index};
1454     const uint step{Step};
1455     const float depth{Depth[0]};
1456     const float depthStep{(Depth[1]-depth) * fadeStep};
1457     for(size_t i{0};i < todo;++i)
1458     {
1459         fadeCount += 1.0f;
1460         idx += step;
1461         const float lfo{std::sin(static_cast<float>(idx&MOD_FRACMASK) * mod_scale)};
1462         ModDelays[i] = (lfo+1.0f) * (depth + depthStep*fadeCount);
1463     }
1464     Index = idx;
1465 }
1466
1467
1468 /* This generates the reverb tail using a modified feed-back delay network
1469  * (FDN).
1470  *
1471  * Results from the early reflections are mixed with the output from the
1472  * modulated late delay lines.
1473  *
1474  * The late response is then completed by T60 and all-pass filtering the mix.
1475  *
1476  * Finally, the lines are reversed (so they feed their opposite directions)
1477  * and scattered with the FDN matrix before re-feeding the delay lines.
1478  *
1479  * Two variations are made, one for for transitional (cross-faded) delay line
1480  * processing and one for non-transitional processing.
1481  */
1482 void ReverbState::lateUnfaded(const size_t offset, const size_t todo)
1483 {
1484     const DelayLineI late_delay{mLate.Delay};
1485     const DelayLineI main_delay{mDelay};
1486     const float mixX{mMixX};
1487     const float mixY{mMixY};
1488
1489     ASSUME(todo > 0);
1490
1491     /* First, calculate the modulated delays for the late feedback. */
1492     mLate.Mod.calcDelays(todo);
1493
1494     /* Next, load decorrelated samples from the main and feedback delay lines.
1495      * Filter the signal to apply its frequency-dependent decay.
1496      */
1497     for(size_t j{0u};j < NUM_LINES;j++)
1498     {
1499         size_t late_delay_tap{offset - mLateDelayTap[j][0]};
1500         size_t late_feedb_tap{offset - mLate.Offset[j][0]};
1501         const float midGain{mLate.T60[j].MidGain[0]};
1502         const float densityGain{mLate.DensityGain[0] * midGain};
1503
1504         for(size_t i{0u};i < todo;)
1505         {
1506             late_delay_tap &= main_delay.Mask;
1507             size_t td{minz(todo - i, main_delay.Mask+1 - late_delay_tap)};
1508             do {
1509                 /* Calculate the read offset and fraction between it and the
1510                  * next sample.
1511                  */
1512                 const float fdelay{mLate.Mod.ModDelays[i]};
1513                 const size_t delay{float2uint(fdelay)};
1514                 const float frac{fdelay - static_cast<float>(delay)};
1515
1516                 /* Feed the delay line with the late feedback sample, and get
1517                  * the two samples crossed by the delayed offset.
1518                  */
1519                 const float out0{late_delay.Line[(late_feedb_tap-delay) & late_delay.Mask][j]};
1520                 const float out1{late_delay.Line[(late_feedb_tap-delay-1) & late_delay.Mask][j]};
1521                 ++late_feedb_tap;
1522
1523                 /* The output is obtained by linearly interpolating the two
1524                  * samples that were acquired above, and combined with the main
1525                  * delay tap.
1526                  */
1527                 mTempSamples[j][i] = lerpf(out0, out1, frac)*midGain +
1528                     main_delay.Line[late_delay_tap++][j]*densityGain;
1529                 ++i;
1530             } while(--td);
1531         }
1532         mLate.T60[j].process({mTempSamples[j].data(), todo});
1533     }
1534
1535     /* Apply a vector all-pass to improve micro-surface diffusion, and write
1536      * out the results for mixing.
1537      */
1538     mLate.VecAp.processUnfaded(mTempSamples, offset, mixX, mixY, todo);
1539     for(size_t j{0u};j < NUM_LINES;j++)
1540         std::copy_n(mTempSamples[j].begin(), todo, mLateSamples[j].begin());
1541
1542     /* Finally, scatter and bounce the results to refeed the feedback buffer. */
1543     VectorScatterRevDelayIn(late_delay, offset, mixX, mixY, mTempSamples, todo);
1544 }
1545 void ReverbState::lateFaded(const size_t offset, const size_t todo, const float fade,
1546     const float fadeStep)
1547 {
1548     const DelayLineI late_delay{mLate.Delay};
1549     const DelayLineI main_delay{mDelay};
1550     const float mixX{mMixX};
1551     const float mixY{mMixY};
1552
1553     ASSUME(todo > 0);
1554
1555     mLate.Mod.calcFadedDelays(todo, fade, fadeStep);
1556
1557     for(size_t j{0u};j < NUM_LINES;j++)
1558     {
1559         const float oldMidGain{mLate.T60[j].MidGain[0]};
1560         const float midGain{mLate.T60[j].MidGain[1]};
1561         const float oldMidStep{-oldMidGain * fadeStep};
1562         const float midStep{midGain * fadeStep};
1563         const float oldDensityGain{mLate.DensityGain[0] * oldMidGain};
1564         const float densityGain{mLate.DensityGain[1] * midGain};
1565         const float oldDensityStep{-oldDensityGain * fadeStep};
1566         const float densityStep{densityGain * fadeStep};
1567         size_t late_delay_tap0{offset - mLateDelayTap[j][0]};
1568         size_t late_delay_tap1{offset - mLateDelayTap[j][1]};
1569         size_t late_feedb_tap0{offset - mLate.Offset[j][0]};
1570         size_t late_feedb_tap1{offset - mLate.Offset[j][1]};
1571         float fadeCount{fade};
1572
1573         for(size_t i{0u};i < todo;)
1574         {
1575             late_delay_tap0 &= main_delay.Mask;
1576             late_delay_tap1 &= main_delay.Mask;
1577             size_t td{minz(todo - i, main_delay.Mask+1 - maxz(late_delay_tap0, late_delay_tap1))};
1578             do {
1579                 fadeCount += 1.0f;
1580
1581                 const float fdelay{mLate.Mod.ModDelays[i]};
1582                 const size_t delay{float2uint(fdelay)};
1583                 const float frac{fdelay - static_cast<float>(delay)};
1584
1585                 const float out00{late_delay.Line[(late_feedb_tap0-delay) & late_delay.Mask][j]};
1586                 const float out01{late_delay.Line[(late_feedb_tap0-delay-1) & late_delay.Mask][j]};
1587                 ++late_feedb_tap0;
1588                 const float out10{late_delay.Line[(late_feedb_tap1-delay) & late_delay.Mask][j]};
1589                 const float out11{late_delay.Line[(late_feedb_tap1-delay-1) & late_delay.Mask][j]};
1590                 ++late_feedb_tap1;
1591
1592                 const float fade0{oldDensityGain + oldDensityStep*fadeCount};
1593                 const float fade1{densityStep*fadeCount};
1594                 const float gfade0{oldMidGain + oldMidStep*fadeCount};
1595                 const float gfade1{midStep*fadeCount};
1596                 mTempSamples[j][i] = lerpf(out00, out01, frac)*gfade0 +
1597                     lerpf(out10, out11, frac)*gfade1 +
1598                     main_delay.Line[late_delay_tap0++][j]*fade0 +
1599                     main_delay.Line[late_delay_tap1++][j]*fade1;
1600                 ++i;
1601             } while(--td);
1602         }
1603         mLate.T60[j].process({mTempSamples[j].data(), todo});
1604     }
1605
1606     mLate.VecAp.processFaded(mTempSamples, offset, mixX, mixY, fade, fadeStep, todo);
1607     for(size_t j{0u};j < NUM_LINES;j++)
1608         std::copy_n(mTempSamples[j].begin(), todo, mLateSamples[j].begin());
1609
1610     VectorScatterRevDelayIn(late_delay, offset, mixX, mixY, mTempSamples, todo);
1611 }
1612
1613 void ReverbState::process(const size_t samplesToDo, const al::span<const FloatBufferLine> samplesIn, const al::span<FloatBufferLine> samplesOut)
1614 {
1615     size_t offset{mOffset};
1616
1617     ASSUME(samplesToDo > 0);
1618
1619     /* Convert B-Format to A-Format for processing. */
1620     const size_t numInput{minz(samplesIn.size(), NUM_LINES)};
1621     const al::span<float> tmpspan{al::assume_aligned<16>(mTempLine.data()), samplesToDo};
1622     for(size_t c{0u};c < NUM_LINES;c++)
1623     {
1624         std::fill(tmpspan.begin(), tmpspan.end(), 0.0f);
1625         for(size_t i{0};i < numInput;++i)
1626         {
1627             const float gain{B2A[c][i]};
1628             const float *RESTRICT input{al::assume_aligned<16>(samplesIn[i].data())};
1629
1630             for(float &sample : tmpspan)
1631             {
1632                 sample += *input * gain;
1633                 ++input;
1634             }
1635         }
1636
1637         /* Band-pass the incoming samples and feed the initial delay line. */
1638         DualBiquad{mFilter[c].Lp, mFilter[c].Hp}.process(tmpspan, tmpspan.data());
1639         mDelay.write(offset, c, tmpspan.cbegin(), samplesToDo);
1640     }
1641
1642     /* Process reverb for these samples. */
1643     if LIKELY(!mDoFading)
1644     {
1645         for(size_t base{0};base < samplesToDo;)
1646         {
1647             /* Calculate the number of samples we can do this iteration. */
1648             size_t todo{minz(samplesToDo - base, mMaxUpdate[0])};
1649             /* Some mixers require maintaining a 4-sample alignment, so ensure
1650              * that if it's not the last iteration.
1651              */
1652             if(base+todo < samplesToDo) todo &= ~size_t{3};
1653             ASSUME(todo > 0);
1654
1655             /* Generate non-faded early reflections and late reverb. */
1656             earlyUnfaded(offset, todo);
1657             lateUnfaded(offset, todo);
1658
1659             /* Finally, mix early reflections and late reverb. */
1660             mixOut(samplesOut, samplesToDo-base, base, todo);
1661
1662             offset += todo;
1663             base += todo;
1664         }
1665     }
1666     else
1667     {
1668         const float fadeStep{1.0f / static_cast<float>(samplesToDo)};
1669         for(size_t base{0};base < samplesToDo;)
1670         {
1671             size_t todo{minz(samplesToDo - base, minz(mMaxUpdate[0], mMaxUpdate[1]))};
1672             if(base+todo < samplesToDo) todo &= ~size_t{3};
1673             ASSUME(todo > 0);
1674
1675             /* Generate cross-faded early reflections and late reverb. */
1676             auto fadeCount = static_cast<float>(base);
1677             earlyFaded(offset, todo, fadeCount, fadeStep);
1678             lateFaded(offset, todo, fadeCount, fadeStep);
1679
1680             mixOut(samplesOut, samplesToDo-base, base, todo);
1681
1682             offset += todo;
1683             base += todo;
1684         }
1685
1686         /* Update the cross-fading delay line taps. */
1687         for(size_t c{0u};c < NUM_LINES;c++)
1688         {
1689             mEarlyDelayTap[c][0] = mEarlyDelayTap[c][1];
1690             mEarlyDelayCoeff[c][0] = mEarlyDelayCoeff[c][1];
1691             mLateDelayTap[c][0] = mLateDelayTap[c][1];
1692             mEarly.VecAp.Offset[c][0] = mEarly.VecAp.Offset[c][1];
1693             mEarly.Offset[c][0] = mEarly.Offset[c][1];
1694             mEarly.Coeff[c][0] = mEarly.Coeff[c][1];
1695             mLate.Offset[c][0] = mLate.Offset[c][1];
1696             mLate.T60[c].MidGain[0] = mLate.T60[c].MidGain[1];
1697             mLate.VecAp.Offset[c][0] = mLate.VecAp.Offset[c][1];
1698         }
1699         mLate.DensityGain[0] = mLate.DensityGain[1];
1700         mLate.Mod.Depth[0] = mLate.Mod.Depth[1];
1701         mMaxUpdate[0] = mMaxUpdate[1];
1702         mDoFading = false;
1703     }
1704     mOffset = offset;
1705 }
1706
1707
1708 struct ReverbStateFactory final : public EffectStateFactory {
1709     al::intrusive_ptr<EffectState> create() override
1710     { return al::intrusive_ptr<EffectState>{new ReverbState{}}; }
1711 };
1712
1713 struct StdReverbStateFactory final : public EffectStateFactory {
1714     al::intrusive_ptr<EffectState> create() override
1715     { return al::intrusive_ptr<EffectState>{new ReverbState{}}; }
1716 };
1717
1718 } // namespace
1719
1720 EffectStateFactory *ReverbStateFactory_getFactory()
1721 {
1722     static ReverbStateFactory ReverbFactory{};
1723     return &ReverbFactory;
1724 }
1725
1726 EffectStateFactory *StdReverbStateFactory_getFactory()
1727 {
1728     static StdReverbStateFactory ReverbFactory{};
1729     return &ReverbFactory;
1730 }