alc/effects/reverb.cpp

   1 /**
   2  * Ambisonic reverb engine for the OpenAL cross platform audio library
   3  * Copyright (C) 2008-2017 by Chris Robinson and Christopher Fitzgerald.
   4  * This library is free software; you can redistribute it and/or
   5  *  modify it under the terms of the GNU Library General Public
   6  *  License as published by the Free Software Foundation; either
   7  *  version 2 of the License, or (at your option) any later version.
   8  *
   9  * This library is distributed in the hope that it will be useful,
  10  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  11  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  12  *  Library General Public License for more details.
  13  *
  14  * You should have received a copy of the GNU Library General Public
  15  *  License along with this library; if not, write to the
  16  *  Free Software Foundation, Inc.,
  17  *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  18  * Or go to http://www.gnu.org/copyleft/lgpl.html
  19  */
  20
  21 #include "config.h"
  22
  23 #include <algorithm>
  24 #include <array>
  25 #include <cstdio>
  26 #include <functional>
  27 #include <iterator>
  28 #include <numeric>
  29 #include <stdint.h>
  30
  31 #include "alc/effects/base.h"
  32 #include "almalloc.h"
  33 #include "alnumbers.h"
  34 #include "alnumeric.h"
  35 #include "alspan.h"
  36 #include "core/ambidefs.h"
  37 #include "core/bufferline.h"
  38 #include "core/context.h"
  39 #include "core/devformat.h"
  40 #include "core/device.h"
  41 #include "core/effectslot.h"
  42 #include "core/filters/biquad.h"
  43 #include "core/filters/splitter.h"
  44 #include "core/mixer.h"
  45 #include "core/mixer/defs.h"
  46 #include "intrusive_ptr.h"
  47 #include "opthelpers.h"
  48 #include "vecmat.h"
  49 #include "vector.h"
  50
  51 /* This is a user config option for modifying the overall output of the reverb
  52  * effect.
  53  */
  54 float ReverbBoost = 1.0f;
  55
  56 namespace {
  57
  58 using uint = unsigned int;
  59
  60 constexpr float MaxModulationTime{4.0f};
  61 constexpr float DefaultModulationTime{0.25f};
  62
  63 #define MOD_FRACBITS 24
  64 #define MOD_FRACONE  (1<<MOD_FRACBITS)
  65 #define MOD_FRACMASK (MOD_FRACONE-1)
  66
  67
  68 using namespace std::placeholders;
  69
  70 /* Max samples per process iteration. Used to limit the size needed for
  71  * temporary buffers. Must be a multiple of 4 for SIMD alignment.
  72  */
  73 constexpr size_t MAX_UPDATE_SAMPLES{256};
  74
  75 /* The number of spatialized lines or channels to process. Four channels allows
  76  * for a 3D A-Format response. NOTE: This can't be changed without taking care
  77  * of the conversion matrices, and a few places where the length arrays are
  78  * assumed to have 4 elements.
  79  */
  80 constexpr size_t NUM_LINES{4u};
  81
  82
  83 /* This coefficient is used to define the maximum frequency range controlled by
  84  * the modulation depth. The current value of 0.05 will allow it to swing from
  85  * 0.95x to 1.05x. This value must be below 1. At 1 it will cause the sampler
  86  * to stall on the downswing, and above 1 it will cause it to sample backwards.
  87  * The value 0.05 seems be nearest to Creative hardware behavior.
  88  */
  89 constexpr float MODULATION_DEPTH_COEFF{0.05f};
  90
  91
  92 /* The B-Format to A-Format conversion matrix. The arrangement of rows is
  93  * deliberately chosen to align the resulting lines to their spatial opposites
  94  * (0:above front left <-> 3:above back right, 1:below front right <-> 2:below
  95  * back left). It's not quite opposite, since the A-Format results in a
  96  * tetrahedron, but it's close enough. Should the model be extended to 8-lines
  97  * in the future, true opposites can be used.
  98  */
  99 alignas(16) constexpr float B2A[NUM_LINES][NUM_LINES]{
 100     { 0.5f,  0.5f,  0.5f,  0.5f },
 101     { 0.5f, -0.5f, -0.5f,  0.5f },
 102     { 0.5f,  0.5f, -0.5f, -0.5f },
 103     { 0.5f, -0.5f,  0.5f, -0.5f }
 104 };
 105
 106 /* Converts A-Format to B-Format for early reflections. */
 107 alignas(16) constexpr std::array<std::array<float,NUM_LINES>,NUM_LINES> EarlyA2B{{
 108     {{ 0.5f,  0.5f,  0.5f,  0.5f }},
 109     {{ 0.5f, -0.5f,  0.5f, -0.5f }},
 110     {{ 0.5f, -0.5f, -0.5f,  0.5f }},
 111     {{ 0.5f,  0.5f, -0.5f, -0.5f }}
 112 }};
 113
 114 /* Converts A-Format to B-Format for late reverb. */
 115 constexpr auto InvSqrt2 = static_cast<float>(1.0/al::numbers::sqrt2);
 116 alignas(16) constexpr std::array<std::array<float,NUM_LINES>,NUM_LINES> LateA2B{{
 117     {{ 0.5f,  0.5f,  0.5f,  0.5f }},
 118     {{ InvSqrt2, -InvSqrt2,  0.0f,  0.0f }},
 119     {{ 0.0f,  0.0f,  InvSqrt2, -InvSqrt2 }},
 120     {{ 0.5f,  0.5f, -0.5f, -0.5f }}
 121 }};
 122
 123 /* The all-pass and delay lines have a variable length dependent on the
 124  * effect's density parameter, which helps alter the perceived environment
 125  * size. The size-to-density conversion is a cubed scale:
 126  *
 127  * density = min(1.0, pow(size, 3.0) / DENSITY_SCALE);
 128  *
 129  * The line lengths scale linearly with room size, so the inverse density
 130  * conversion is needed, taking the cube root of the re-scaled density to
 131  * calculate the line length multiplier:
 132  *
 133  *     length_mult = max(5.0, cbrt(density*DENSITY_SCALE));
 134  *
 135  * The density scale below will result in a max line multiplier of 50, for an
 136  * effective size range of 5m to 50m.
 137  */
 138 constexpr float DENSITY_SCALE{125000.0f};
 139
 140 /* All delay line lengths are specified in seconds.
 141  *
 142  * To approximate early reflections, we break them up into primary (those
 143  * arriving from the same direction as the source) and secondary (those
 144  * arriving from the opposite direction).
 145  *
 146  * The early taps decorrelate the 4-channel signal to approximate an average
 147  * room response for the primary reflections after the initial early delay.
 148  *
 149  * Given an average room dimension (d_a) and the speed of sound (c) we can
 150  * calculate the average reflection delay (r_a) regardless of listener and
 151  * source positions as:
 152  *
 153  *     r_a = d_a / c
 154  *     c   = 343.3
 155  *
 156  * This can extended to finding the average difference (r_d) between the
 157  * maximum (r_1) and minimum (r_0) reflection delays:
 158  *
 159  *     r_0 = 2 / 3 r_a
 160  *         = r_a - r_d / 2
 161  *         = r_d
 162  *     r_1 = 4 / 3 r_a
 163  *         = r_a + r_d / 2
 164  *         = 2 r_d
 165  *     r_d = 2 / 3 r_a
 166  *         = r_1 - r_0
 167  *
 168  * As can be determined by integrating the 1D model with a source (s) and
 169  * listener (l) positioned across the dimension of length (d_a):
 170  *
 171  *     r_d = int_(l=0)^d_a (int_(s=0)^d_a |2 d_a - 2 (l + s)| ds) dl / c
 172  *
 173  * The initial taps (T_(i=0)^N) are then specified by taking a power series
 174  * that ranges between r_0 and half of r_1 less r_0:
 175  *
 176  *     R_i = 2^(i / (2 N - 1)) r_d
 177  *         = r_0 + (2^(i / (2 N - 1)) - 1) r_d
 178  *         = r_0 + T_i
 179  *     T_i = R_i - r_0
 180  *         = (2^(i / (2 N - 1)) - 1) r_d
 181  *
 182  * Assuming an average of 1m, we get the following taps:
 183  */
 184 constexpr std::array<float,NUM_LINES> EARLY_TAP_LENGTHS{{
 185     0.0000000e+0f, 2.0213520e-4f, 4.2531060e-4f, 6.7171600e-4f
 186 }};
 187
 188 /* The early all-pass filter lengths are based on the early tap lengths:
 189  *
 190  *     A_i = R_i / a
 191  *
 192  * Where a is the approximate maximum all-pass cycle limit (20).
 193  */
 194 constexpr std::array<float,NUM_LINES> EARLY_ALLPASS_LENGTHS{{
 195     9.7096800e-5f, 1.0720356e-4f, 1.1836234e-4f, 1.3068260e-4f
 196 }};
 197
 198 /* The early delay lines are used to transform the primary reflections into
 199  * the secondary reflections.  The A-format is arranged in such a way that
 200  * the channels/lines are spatially opposite:
 201  *
 202  *     C_i is opposite C_(N-i-1)
 203  *
 204  * The delays of the two opposing reflections (R_i and O_i) from a source
 205  * anywhere along a particular dimension always sum to twice its full delay:
 206  *
 207  *     2 r_a = R_i + O_i
 208  *
 209  * With that in mind we can determine the delay between the two reflections
 210  * and thus specify our early line lengths (L_(i=0)^N) using:
 211  *
 212  *     O_i = 2 r_a - R_(N-i-1)
 213  *     L_i = O_i - R_(N-i-1)
 214  *         = 2 (r_a - R_(N-i-1))
 215  *         = 2 (r_a - T_(N-i-1) - r_0)
 216  *         = 2 r_a (1 - (2 / 3) 2^((N - i - 1) / (2 N - 1)))
 217  *
 218  * Using an average dimension of 1m, we get:
 219  */
 220 constexpr std::array<float,NUM_LINES> EARLY_LINE_LENGTHS{{
 221     5.9850400e-4f, 1.0913150e-3f, 1.5376658e-3f, 1.9419362e-3f
 222 }};
 223
 224 /* The late all-pass filter lengths are based on the late line lengths:
 225  *
 226  *     A_i = (5 / 3) L_i / r_1
 227  */
 228 constexpr std::array<float,NUM_LINES> LATE_ALLPASS_LENGTHS{{
 229     1.6182800e-4f, 2.0389060e-4f, 2.8159360e-4f, 3.2365600e-4f
 230 }};
 231
 232 /* The late lines are used to approximate the decaying cycle of recursive
 233  * late reflections.
 234  *
 235  * Splitting the lines in half, we start with the shortest reflection paths
 236  * (L_(i=0)^(N/2)):
 237  *
 238  *     L_i = 2^(i / (N - 1)) r_d
 239  *
 240  * Then for the opposite (longest) reflection paths (L_(i=N/2)^N):
 241  *
 242  *     L_i = 2 r_a - L_(i-N/2)
 243  *         = 2 r_a - 2^((i - N / 2) / (N - 1)) r_d
 244  *
 245  * For our 1m average room, we get:
 246  */
 247 constexpr std::array<float,NUM_LINES> LATE_LINE_LENGTHS{{
 248     1.9419362e-3f, 2.4466860e-3f, 3.3791220e-3f, 3.8838720e-3f
 249 }};
 250
 251
 252 using ReverbUpdateLine = std::array<float,MAX_UPDATE_SAMPLES>;
 253
 254 struct DelayLineI {
 255     /* The delay lines use interleaved samples, with the lengths being powers
 256      * of 2 to allow the use of bit-masking instead of a modulus for wrapping.
 257      */
 258     size_t Mask{0u};
 259     union {
 260         uintptr_t LineOffset{0u};
 261         std::array<float,NUM_LINES> *Line;
 262     };
 263
 264     /* Given the allocated sample buffer, this function updates each delay line
 265      * offset.
 266      */
 267     void realizeLineOffset(std::array<float,NUM_LINES> *sampleBuffer) noexcept
 268     { Line = sampleBuffer + LineOffset; }
 269
 270     /* Calculate the length of a delay line and store its mask and offset. */
 271     uint calcLineLength(const float length, const uintptr_t offset, const float frequency,
 272         const uint extra)
 273     {
 274         /* All line lengths are powers of 2, calculated from their lengths in
 275          * seconds, rounded up.
 276          */
 277         uint samples{float2uint(std::ceil(length*frequency))};
 278         samples = NextPowerOf2(samples + extra);
 279
 280         /* All lines share a single sample buffer. */
 281         Mask = samples - 1;
 282         LineOffset = offset;
 283
 284         /* Return the sample count for accumulation. */
 285         return samples;
 286     }
 287
 288     void write(size_t offset, const size_t c, const float *RESTRICT in, const size_t count) const noexcept
 289     {
 290         ASSUME(count > 0);
 291         for(size_t i{0u};i < count;)
 292         {
 293             offset &= Mask;
 294             size_t td{minz(Mask+1 - offset, count - i)};
 295             do {
 296                 Line[offset++][c] = in[i++];
 297             } while(--td);
 298         }
 299     }
 300 };
 301
 302 struct VecAllpass {
 303     DelayLineI Delay;
 304     float Coeff{0.0f};
 305     size_t Offset[NUM_LINES]{};
 306
 307     void process(const al::span<ReverbUpdateLine,NUM_LINES> samples, size_t offset,
 308         const float xCoeff, const float yCoeff, const size_t todo);
 309 };
 310
 311 struct T60Filter {
 312     /* Two filters are used to adjust the signal. One to control the low
 313      * frequencies, and one to control the high frequencies.
 314      */
 315     float MidGain{0.0f};
 316     BiquadFilter HFFilter, LFFilter;
 317
 318     void calcCoeffs(const float length, const float lfDecayTime, const float mfDecayTime,
 319         const float hfDecayTime, const float lf0norm, const float hf0norm);
 320
 321     /* Applies the two T60 damping filter sections. */
 322     void process(const al::span<float> samples)
 323     { DualBiquad{HFFilter, LFFilter}.process(samples, samples.data()); }
 324
 325     void clear() noexcept { HFFilter.clear(); LFFilter.clear(); }
 326 };
 327
 328 struct EarlyReflections {
 329     /* A Gerzon vector all-pass filter is used to simulate initial diffusion.
 330      * The spread from this filter also helps smooth out the reverb tail.
 331      */
 332     VecAllpass VecAp;
 333
 334     /* An echo line is used to complete the second half of the early
 335      * reflections.
 336      */
 337     DelayLineI Delay;
 338     size_t Offset[NUM_LINES]{};
 339     float Coeff[NUM_LINES]{};
 340
 341     /* The gain for each output channel based on 3D panning. */
 342     float CurrentGain[NUM_LINES][MaxAmbiChannels]{};
 343     float PanGain[NUM_LINES][MaxAmbiChannels]{};
 344
 345     void updateLines(const float density_mult, const float diffusion, const float decayTime,
 346         const float frequency);
 347 };
 348
 349
 350 struct Modulation {
 351     /* The vibrato time is tracked with an index over a (MOD_FRACONE)
 352      * normalized range.
 353      */
 354     uint Index, Step;
 355
 356     /* The depth of frequency change, in samples. */
 357     float Depth;
 358
 359     float ModDelays[MAX_UPDATE_SAMPLES];
 360
 361     void updateModulator(float modTime, float modDepth, float frequency);
 362
 363     void calcDelays(size_t todo);
 364 };
 365
 366 struct LateReverb {
 367     /* A recursive delay line is used fill in the reverb tail. */
 368     DelayLineI Delay;
 369     size_t     Offset[NUM_LINES]{};
 370
 371     /* Attenuation to compensate for the modal density and decay rate of the
 372      * late lines.
 373      */
 374     float DensityGain{0.0f};
 375
 376     /* T60 decay filters are used to simulate absorption. */
 377     T60Filter T60[NUM_LINES];
 378
 379     Modulation Mod;
 380
 381     /* A Gerzon vector all-pass filter is used to simulate diffusion. */
 382     VecAllpass VecAp;
 383
 384     /* The gain for each output channel based on 3D panning. */
 385     float CurrentGain[NUM_LINES][MaxAmbiChannels]{};
 386     float PanGain[NUM_LINES][MaxAmbiChannels]{};
 387
 388     void updateLines(const float density_mult, const float diffusion, const float lfDecayTime,
 389         const float mfDecayTime, const float hfDecayTime, const float lf0norm,
 390         const float hf0norm, const float frequency);
 391
 392     void clear() noexcept
 393     {
 394         for(auto &filter : T60)
 395             filter.clear();
 396     }
 397 };
 398
 399 struct ReverbPipeline {
 400     /* Master effect filters */
 401     struct {
 402         BiquadFilter Lp;
 403         BiquadFilter Hp;
 404     } mFilter[NUM_LINES];
 405
 406     /* Core delay line (early reflections and late reverb tap from this). */
 407     DelayLineI mEarlyDelayIn;
 408     DelayLineI mLateDelayIn;
 409
 410     /* Tap points for early reflection delay. */
 411     size_t mEarlyDelayTap[NUM_LINES][2]{};
 412     float mEarlyDelayCoeff[NUM_LINES]{};
 413
 414     /* Tap points for late reverb feed and delay. */
 415     size_t mLateDelayTap[NUM_LINES][2]{};
 416
 417     /* Coefficients for the all-pass and line scattering matrices. */
 418     float mMixX{0.0f};
 419     float mMixY{0.0f};
 420
 421     EarlyReflections mEarly;
 422
 423     LateReverb mLate;
 424
 425     std::array<std::array<BandSplitter,NUM_LINES>,2> mAmbiSplitter;
 426
 427     size_t mFadeSampleCount{1};
 428
 429     void updateDelayLine(const float earlyDelay, const float lateDelay, const float density_mult,
 430         const float decayTime, const float frequency);
 431     void update3DPanning(const float *ReflectionsPan, const float *LateReverbPan,
 432         const float earlyGain, const float lateGain, const bool doUpmix, const MixParams *mainMix);
 433
 434     void processEarly(size_t offset, const size_t samplesToDo,
 435         const al::span<ReverbUpdateLine,NUM_LINES> tempSamples,
 436         const al::span<FloatBufferLine,NUM_LINES> outSamples);
 437     void processLate(size_t offset, const size_t samplesToDo,
 438         const al::span<ReverbUpdateLine,NUM_LINES> tempSamples,
 439         const al::span<FloatBufferLine,NUM_LINES> outSamples);
 440
 441     void clear() noexcept
 442     {
 443         for(auto &filter : mFilter)
 444         {
 445             filter.Lp.clear();
 446             filter.Hp.clear();
 447         }
 448         mLate.clear();
 449         for(auto &filters : mAmbiSplitter)
 450         {
 451             for(auto &filter : filters)
 452                 filter.clear();
 453         }
 454     }
 455 };
 456
 457 struct ReverbState final : public EffectState {
 458     /* All delay lines are allocated as a single buffer to reduce memory
 459      * fragmentation and management code.
 460      */
 461     al::vector<std::array<float,NUM_LINES>,16> mSampleBuffer;
 462
 463     struct {
 464         /* Calculated parameters which indicate if cross-fading is needed after
 465          * an update.
 466          */
 467         float Density{1.0f};
 468         float Diffusion{1.0f};
 469         float DecayTime{1.49f};
 470         float HFDecayTime{0.83f * 1.49f};
 471         float LFDecayTime{1.0f * 1.49f};
 472         float ModulationTime{0.25f};
 473         float ModulationDepth{0.0f};
 474         float HFReference{5000.0f};
 475         float LFReference{250.0f};
 476     } mParams;
 477
 478     enum PipelineState : uint8_t {
 479         DeviceClear,
 480         StartFade,
 481         Fading,
 482         Cleanup,
 483         Normal,
 484     };
 485     PipelineState mPipelineState{DeviceClear};
 486     uint8_t mCurrentPipeline{0};
 487
 488     ReverbPipeline mPipelines[2];
 489
 490     /* The current write offset for all delay lines. */
 491     size_t mOffset{};
 492
 493     /* Temporary storage used when processing. */
 494     union {
 495         alignas(16) FloatBufferLine mTempLine{};
 496         alignas(16) std::array<ReverbUpdateLine,NUM_LINES> mTempSamples;
 497     };
 498     alignas(16) std::array<FloatBufferLine,NUM_LINES> mEarlySamples{};
 499     alignas(16) std::array<FloatBufferLine,NUM_LINES> mLateSamples{};
 500
 501     std::array<float,MaxAmbiOrder+1> mOrderScales{};
 502
 503     bool mUpmixOutput{false};
 504
 505
 506     void MixOutPlain(ReverbPipeline &pipeline, const al::span<FloatBufferLine> samplesOut,
 507         const size_t todo)
 508     {
 509         ASSUME(todo > 0);
 510
 511         /* When not upsampling, the panning gains convert to B-Format and pan
 512          * at the same time.
 513          */
 514         for(size_t c{0u};c < NUM_LINES;c++)
 515         {
 516             const al::span<float> tmpspan{mEarlySamples[c].data(), todo};
 517             MixSamples(tmpspan, samplesOut, pipeline.mEarly.CurrentGain[c],
 518                 pipeline.mEarly.PanGain[c], todo, 0);
 519         }
 520         for(size_t c{0u};c < NUM_LINES;c++)
 521         {
 522             const al::span<float> tmpspan{mLateSamples[c].data(), todo};
 523             MixSamples(tmpspan, samplesOut, pipeline.mLate.CurrentGain[c],
 524                 pipeline.mLate.PanGain[c], todo, 0);
 525         }
 526     }
 527
 528     void MixOutAmbiUp(ReverbPipeline &pipeline, const al::span<FloatBufferLine> samplesOut,
 529         const size_t todo)
 530     {
 531         ASSUME(todo > 0);
 532
 533         auto DoMixRow = [](const al::span<float> OutBuffer, const al::span<const float,4> Gains,
 534             const float *InSamples, const size_t InStride)
 535         {
 536             std::fill(OutBuffer.begin(), OutBuffer.end(), 0.0f);
 537             for(const float gain : Gains)
 538             {
 539                 const float *RESTRICT input{al::assume_aligned<16>(InSamples)};
 540                 InSamples += InStride;
 541
 542                 if(!(std::fabs(gain) > GainSilenceThreshold))
 543                     continue;
 544
 545                 auto mix_sample = [gain](const float sample, const float in) noexcept -> float
 546                 { return sample + in*gain; };
 547                 std::transform(OutBuffer.begin(), OutBuffer.end(), input, OutBuffer.begin(),
 548                     mix_sample);
 549             }
 550         };
 551
 552         /* When upsampling, the B-Format conversion needs to be done separately
 553          * so the proper HF scaling can be applied to each B-Format channel.
 554          * The panning gains then pan and upsample the B-Format channels.
 555          */
 556         const al::span<float> tmpspan{al::assume_aligned<16>(mTempLine.data()), todo};
 557         for(size_t c{0u};c < NUM_LINES;c++)
 558         {
 559             DoMixRow(tmpspan, EarlyA2B[c], mEarlySamples[0].data(), mEarlySamples[0].size());
 560
 561             /* Apply scaling to the B-Format's HF response to "upsample" it to
 562              * higher-order output.
 563              */
 564             const float hfscale{(c==0) ? mOrderScales[0] : mOrderScales[1]};
 565             pipeline.mAmbiSplitter[0][c].processHfScale(tmpspan, hfscale);
 566
 567             MixSamples(tmpspan, samplesOut, pipeline.mEarly.CurrentGain[c],
 568                 pipeline.mEarly.PanGain[c], todo, 0);
 569         }
 570         for(size_t c{0u};c < NUM_LINES;c++)
 571         {
 572             DoMixRow(tmpspan, LateA2B[c], mLateSamples[0].data(), mLateSamples[0].size());
 573
 574             const float hfscale{(c==0) ? mOrderScales[0] : mOrderScales[1]};
 575             pipeline.mAmbiSplitter[1][c].processHfScale(tmpspan, hfscale);
 576
 577             MixSamples(tmpspan, samplesOut, pipeline.mLate.CurrentGain[c],
 578                 pipeline.mLate.PanGain[c], todo, 0);
 579         }
 580     }
 581
 582     void mixOut(ReverbPipeline &pipeline, const al::span<FloatBufferLine> samplesOut, const size_t todo)
 583     {
 584         if(mUpmixOutput)
 585             MixOutAmbiUp(pipeline, samplesOut, todo);
 586         else
 587             MixOutPlain(pipeline, samplesOut, todo);
 588     }
 589
 590     void allocLines(const float frequency);
 591
 592     void deviceUpdate(const DeviceBase *device, const Buffer &buffer) override;
 593     void update(const ContextBase *context, const EffectSlot *slot, const EffectProps *props,
 594         const EffectTarget target) override;
 595     void process(const size_t samplesToDo, const al::span<const FloatBufferLine> samplesIn,
 596         const al::span<FloatBufferLine> samplesOut) override;
 597
 598     DEF_NEWDEL(ReverbState)
 599 };
 600
 601 /**************************************
 602  *  Device Update                     *
 603  **************************************/
 604
 605 inline float CalcDelayLengthMult(float density)
 606 { return maxf(5.0f, std::cbrt(density*DENSITY_SCALE)); }
 607
 608 /* Calculates the delay line metrics and allocates the shared sample buffer
 609  * for all lines given the sample rate (frequency).
 610  */
 611 void ReverbState::allocLines(const float frequency)
 612 {
 613     /* All delay line lengths are calculated to accomodate the full range of
 614      * lengths given their respective paramters.
 615      */
 616     size_t totalSamples{0u};
 617
 618     /* Multiplier for the maximum density value, i.e. density=1, which is
 619      * actually the least density...
 620      */
 621     const float multiplier{CalcDelayLengthMult(1.0f)};
 622
 623     /* The modulator's line length is calculated from the maximum modulation
 624      * time and depth coefficient, and halfed for the low-to-high frequency
 625      * swing.
 626      */
 627     constexpr float max_mod_delay{MaxModulationTime*MODULATION_DEPTH_COEFF / 2.0f};
 628
 629     for(auto &pipeline : mPipelines)
 630     {
 631         /* The main delay length includes the maximum early reflection delay,
 632          * the largest early tap width, the maximum late reverb delay, and the
 633          * largest late tap width.  Finally, it must also be extended by the
 634          * update size (BufferLineSize) for block processing.
 635          */
 636         float length{ReverbMaxReflectionsDelay + EARLY_TAP_LENGTHS.back()*multiplier};
 637         totalSamples += pipeline.mEarlyDelayIn.calcLineLength(length, totalSamples, frequency,
 638             BufferLineSize);
 639
 640         constexpr float LateLineDiffAvg{(LATE_LINE_LENGTHS.back()-LATE_LINE_LENGTHS.front()) /
 641             float{NUM_LINES}};
 642         length = ReverbMaxLateReverbDelay + LateLineDiffAvg*multiplier;
 643         totalSamples += pipeline.mLateDelayIn.calcLineLength(length, totalSamples, frequency,
 644             BufferLineSize);
 645
 646         /* The early vector all-pass line. */
 647         length = EARLY_ALLPASS_LENGTHS.back() * multiplier;
 648         totalSamples += pipeline.mEarly.VecAp.Delay.calcLineLength(length, totalSamples, frequency,
 649             0);
 650
 651         /* The early reflection line. */
 652         length = EARLY_LINE_LENGTHS.back() * multiplier;
 653         totalSamples += pipeline.mEarly.Delay.calcLineLength(length, totalSamples, frequency,
 654             MAX_UPDATE_SAMPLES);
 655
 656         /* The late vector all-pass line. */
 657         length = LATE_ALLPASS_LENGTHS.back() * multiplier;
 658         totalSamples += pipeline.mLate.VecAp.Delay.calcLineLength(length, totalSamples, frequency,
 659             0);
 660
 661         /* The late delay lines are calculated from the largest maximum density
 662          * line length, and the maximum modulation delay. An additional sample
 663          * is added to keep it stable when there is no modulation.
 664          */
 665         length = LATE_LINE_LENGTHS.back()*multiplier + max_mod_delay;
 666         totalSamples += pipeline.mLate.Delay.calcLineLength(length, totalSamples, frequency, 1);
 667     }
 668
 669     if(totalSamples != mSampleBuffer.size())
 670         decltype(mSampleBuffer)(totalSamples).swap(mSampleBuffer);
 671
 672     /* Clear the sample buffer. */
 673     std::fill(mSampleBuffer.begin(), mSampleBuffer.end(), decltype(mSampleBuffer)::value_type{});
 674
 675     /* Update all delays to reflect the new sample buffer. */
 676     for(auto &pipeline : mPipelines)
 677     {
 678         pipeline.mEarlyDelayIn.realizeLineOffset(mSampleBuffer.data());
 679         pipeline.mLateDelayIn.realizeLineOffset(mSampleBuffer.data());
 680         pipeline.mEarly.VecAp.Delay.realizeLineOffset(mSampleBuffer.data());
 681         pipeline.mEarly.Delay.realizeLineOffset(mSampleBuffer.data());
 682         pipeline.mLate.VecAp.Delay.realizeLineOffset(mSampleBuffer.data());
 683         pipeline.mLate.Delay.realizeLineOffset(mSampleBuffer.data());
 684     }
 685 }
 686
 687 void ReverbState::deviceUpdate(const DeviceBase *device, const Buffer&)
 688 {
 689     const auto frequency = static_cast<float>(device->Frequency);
 690
 691     /* Allocate the delay lines. */
 692     allocLines(frequency);
 693
 694     for(auto &pipeline : mPipelines)
 695     {
 696         /* Clear filters and gain coefficients since the delay lines were all just
 697         * cleared (if not reallocated).
 698         */
 699         for(auto &filter : pipeline.mFilter)
 700         {
 701             filter.Lp.clear();
 702             filter.Hp.clear();
 703         }
 704
 705         std::fill(std::begin(pipeline.mEarlyDelayCoeff),std::end(pipeline.mEarlyDelayCoeff), 0.0f);
 706         std::fill(std::begin(pipeline.mEarlyDelayCoeff),std::end(pipeline.mEarlyDelayCoeff), 0.0f);
 707
 708         pipeline.mLate.DensityGain = 0.0f;
 709         for(auto &t60 : pipeline.mLate.T60)
 710         {
 711             t60.MidGain = 0.0f;
 712             t60.HFFilter.clear();
 713             t60.LFFilter.clear();
 714         }
 715
 716         pipeline.mLate.Mod.Index = 0;
 717         pipeline.mLate.Mod.Step = 1;
 718         pipeline.mLate.Mod.Depth = 0.0f;
 719
 720         for(auto &gains : pipeline.mEarly.CurrentGain)
 721             std::fill(std::begin(gains), std::end(gains), 0.0f);
 722         for(auto &gains : pipeline.mEarly.PanGain)
 723             std::fill(std::begin(gains), std::end(gains), 0.0f);
 724         for(auto &gains : pipeline.mLate.CurrentGain)
 725             std::fill(std::begin(gains), std::end(gains), 0.0f);
 726         for(auto &gains : pipeline.mLate.PanGain)
 727             std::fill(std::begin(gains), std::end(gains), 0.0f);
 728     }
 729     mPipelineState = DeviceClear;
 730
 731     /* Reset offset base. */
 732     mOffset = 0;
 733
 734     if(device->mAmbiOrder > 1)
 735     {
 736         mUpmixOutput = true;
 737         mOrderScales = AmbiScale::GetHFOrderScales(1, device->mAmbiOrder, device->m2DMixing);
 738     }
 739     else
 740     {
 741         mUpmixOutput = false;
 742         mOrderScales.fill(1.0f);
 743     }
 744     mPipelines[0].mAmbiSplitter[0][0].init(device->mXOverFreq / frequency);
 745     for(auto &pipeline : mPipelines)
 746     {
 747         std::fill(pipeline.mAmbiSplitter[0].begin(), pipeline.mAmbiSplitter[0].end(),
 748             pipeline.mAmbiSplitter[0][0]);
 749         std::fill(pipeline.mAmbiSplitter[1].begin(), pipeline.mAmbiSplitter[1].end(),
 750             pipeline.mAmbiSplitter[0][0]);
 751     }
 752 }
 753
 754 /**************************************
 755  *  Effect Update                     *
 756  **************************************/
 757
 758 /* Calculate a decay coefficient given the length of each cycle and the time
 759  * until the decay reaches -60 dB.
 760  */
 761 inline float CalcDecayCoeff(const float length, const float decayTime)
 762 { return std::pow(ReverbDecayGain, length/decayTime); }
 763
 764 /* Calculate a decay length from a coefficient and the time until the decay
 765  * reaches -60 dB.
 766  */
 767 inline float CalcDecayLength(const float coeff, const float decayTime)
 768 {
 769     constexpr float log10_decaygain{-3.0f/*std::log10(ReverbDecayGain)*/};
 770     return std::log10(coeff) * decayTime / log10_decaygain;
 771 }
 772
 773 /* Calculate an attenuation to be applied to the input of any echo models to
 774  * compensate for modal density and decay time.
 775  */
 776 inline float CalcDensityGain(const float a)
 777 {
 778     /* The energy of a signal can be obtained by finding the area under the
 779      * squared signal.  This takes the form of Sum(x_n^2), where x is the
 780      * amplitude for the sample n.
 781      *
 782      * Decaying feedback matches exponential decay of the form Sum(a^n),
 783      * where a is the attenuation coefficient, and n is the sample.  The area
 784      * under this decay curve can be calculated as:  1 / (1 - a).
 785      *
 786      * Modifying the above equation to find the area under the squared curve
 787      * (for energy) yields:  1 / (1 - a^2).  Input attenuation can then be
 788      * calculated by inverting the square root of this approximation,
 789      * yielding:  1 / sqrt(1 / (1 - a^2)), simplified to: sqrt(1 - a^2).
 790      */
 791     return std::sqrt(1.0f - a*a);
 792 }
 793
 794 /* Calculate the scattering matrix coefficients given a diffusion factor. */
 795 inline void CalcMatrixCoeffs(const float diffusion, float *x, float *y)
 796 {
 797     /* The matrix is of order 4, so n is sqrt(4 - 1). */
 798     constexpr float n{al::numbers::sqrt3_v<float>};
 799     const float t{diffusion * std::atan(n)};
 800
 801     /* Calculate the first mixing matrix coefficient. */
 802     *x = std::cos(t);
 803     /* Calculate the second mixing matrix coefficient. */
 804     *y = std::sin(t) / n;
 805 }
 806
 807 /* Calculate the limited HF ratio for use with the late reverb low-pass
 808  * filters.
 809  */
 810 float CalcLimitedHfRatio(const float hfRatio, const float airAbsorptionGainHF,
 811     const float decayTime)
 812 {
 813     /* Find the attenuation due to air absorption in dB (converting delay
 814      * time to meters using the speed of sound).  Then reversing the decay
 815      * equation, solve for HF ratio.  The delay length is cancelled out of
 816      * the equation, so it can be calculated once for all lines.
 817      */
 818     float limitRatio{1.0f / SpeedOfSoundMetersPerSec /
 819         CalcDecayLength(airAbsorptionGainHF, decayTime)};
 820
 821     /* Using the limit calculated above, apply the upper bound to the HF ratio. */
 822     return minf(limitRatio, hfRatio);
 823 }
 824
 825
 826 /* Calculates the 3-band T60 damping coefficients for a particular delay line
 827  * of specified length, using a combination of two shelf filter sections given
 828  * decay times for each band split at two reference frequencies.
 829  */
 830 void T60Filter::calcCoeffs(const float length, const float lfDecayTime,
 831     const float mfDecayTime, const float hfDecayTime, const float lf0norm,
 832     const float hf0norm)
 833 {
 834     const float mfGain{CalcDecayCoeff(length, mfDecayTime)};
 835     const float lfGain{CalcDecayCoeff(length, lfDecayTime) / mfGain};
 836     const float hfGain{CalcDecayCoeff(length, hfDecayTime) / mfGain};
 837
 838     MidGain = mfGain;
 839     LFFilter.setParamsFromSlope(BiquadType::LowShelf, lf0norm, lfGain, 1.0f);
 840     HFFilter.setParamsFromSlope(BiquadType::HighShelf, hf0norm, hfGain, 1.0f);
 841 }
 842
 843 /* Update the early reflection line lengths and gain coefficients. */
 844 void EarlyReflections::updateLines(const float density_mult, const float diffusion,
 845     const float decayTime, const float frequency)
 846 {
 847     /* Calculate the all-pass feed-back/forward coefficient. */
 848     VecAp.Coeff = diffusion*diffusion * InvSqrt2;
 849
 850     for(size_t i{0u};i < NUM_LINES;i++)
 851     {
 852         /* Calculate the delay length of each all-pass line. */
 853         float length{EARLY_ALLPASS_LENGTHS[i] * density_mult};
 854         VecAp.Offset[i] = float2uint(length * frequency);
 855
 856         /* Calculate the delay length of each delay line. */
 857         length = EARLY_LINE_LENGTHS[i] * density_mult;
 858         Offset[i] = float2uint(length * frequency);
 859
 860         /* Calculate the gain (coefficient) for each line. */
 861         Coeff[i] = CalcDecayCoeff(length, decayTime);
 862     }
 863 }
 864
 865 /* Update the EAX modulation step and depth. Keep in mind that this kind of
 866  * vibrato is additive and not multiplicative as one may expect. The downswing
 867  * will sound stronger than the upswing.
 868  */
 869 void Modulation::updateModulator(float modTime, float modDepth, float frequency)
 870 {
 871     /* Modulation is calculated in two parts.
 872      *
 873      * The modulation time effects the sinus rate, altering the speed of
 874      * frequency changes. An index is incremented for each sample with an
 875      * appropriate step size to generate an LFO, which will vary the feedback
 876      * delay over time.
 877      */
 878     Step = maxu(fastf2u(MOD_FRACONE / (frequency * modTime)), 1);
 879
 880     /* The modulation depth effects the amount of frequency change over the
 881      * range of the sinus. It needs to be scaled by the modulation time so that
 882      * a given depth produces a consistent change in frequency over all ranges
 883      * of time. Since the depth is applied to a sinus value, it needs to be
 884      * halved once for the sinus range and again for the sinus swing in time
 885      * (half of it is spent decreasing the frequency, half is spent increasing
 886      * it).
 887      */
 888     if(modTime >= DefaultModulationTime)
 889     {
 890         /* To cancel the effects of a long period modulation on the late
 891          * reverberation, the amount of pitch should be varied (decreased)
 892          * according to the modulation time. The natural form is varying
 893          * inversely, in fact resulting in an invariant.
 894          */
 895         Depth = MODULATION_DEPTH_COEFF / 4.0f * DefaultModulationTime * modDepth * frequency;
 896     }
 897     else
 898         Depth = MODULATION_DEPTH_COEFF / 4.0f * modTime * modDepth * frequency;
 899 }
 900
 901 /* Update the late reverb line lengths and T60 coefficients. */
 902 void LateReverb::updateLines(const float density_mult, const float diffusion,
 903     const float lfDecayTime, const float mfDecayTime, const float hfDecayTime,
 904     const float lf0norm, const float hf0norm, const float frequency)
 905 {
 906     /* Scaling factor to convert the normalized reference frequencies from
 907      * representing 0...freq to 0...max_reference.
 908      */
 909     constexpr float MaxHFReference{20000.0f};
 910     const float norm_weight_factor{frequency / MaxHFReference};
 911
 912     const float late_allpass_avg{
 913         std::accumulate(LATE_ALLPASS_LENGTHS.begin(), LATE_ALLPASS_LENGTHS.end(), 0.0f) /
 914         float{NUM_LINES}};
 915
 916     /* To compensate for changes in modal density and decay time of the late
 917      * reverb signal, the input is attenuated based on the maximal energy of
 918      * the outgoing signal.  This approximation is used to keep the apparent
 919      * energy of the signal equal for all ranges of density and decay time.
 920      *
 921      * The average length of the delay lines is used to calculate the
 922      * attenuation coefficient.
 923      */
 924     float length{std::accumulate(LATE_LINE_LENGTHS.begin(), LATE_LINE_LENGTHS.end(), 0.0f) /
 925         float{NUM_LINES} + late_allpass_avg};
 926     length *= density_mult;
 927     /* The density gain calculation uses an average decay time weighted by
 928      * approximate bandwidth. This attempts to compensate for losses of energy
 929      * that reduce decay time due to scattering into highly attenuated bands.
 930      */
 931     const float decayTimeWeighted{
 932         lf0norm*norm_weight_factor*lfDecayTime +
 933         (hf0norm - lf0norm)*norm_weight_factor*mfDecayTime +
 934         (1.0f - hf0norm*norm_weight_factor)*hfDecayTime};
 935     DensityGain = CalcDensityGain(CalcDecayCoeff(length, decayTimeWeighted));
 936
 937     /* Calculate the all-pass feed-back/forward coefficient. */
 938     VecAp.Coeff = diffusion*diffusion * InvSqrt2;
 939
 940     for(size_t i{0u};i < NUM_LINES;i++)
 941     {
 942         /* Calculate the delay length of each all-pass line. */
 943         length = LATE_ALLPASS_LENGTHS[i] * density_mult;
 944         VecAp.Offset[i] = float2uint(length * frequency);
 945
 946         /* Calculate the delay length of each feedback delay line. */
 947         length = LATE_LINE_LENGTHS[i] * density_mult;
 948         Offset[i] = float2uint(length*frequency + 0.5f);
 949
 950         /* Approximate the absorption that the vector all-pass would exhibit
 951          * given the current diffusion so we don't have to process a full T60
 952          * filter for each of its four lines. Also include the average
 953          * modulation delay (depth is half the max delay in samples).
 954          */
 955         length += lerpf(LATE_ALLPASS_LENGTHS[i], late_allpass_avg, diffusion)*density_mult +
 956             Mod.Depth/frequency;
 957
 958         /* Calculate the T60 damping coefficients for each line. */
 959         T60[i].calcCoeffs(length, lfDecayTime, mfDecayTime, hfDecayTime, lf0norm, hf0norm);
 960     }
 961 }
 962
 963
 964 /* Update the offsets for the main effect delay line. */
 965 void ReverbPipeline::updateDelayLine(const float earlyDelay, const float lateDelay,
 966     const float density_mult, const float decayTime, const float frequency)
 967 {
 968     /* Early reflection taps are decorrelated by means of an average room
 969      * reflection approximation described above the definition of the taps.
 970      * This approximation is linear and so the above density multiplier can
 971      * be applied to adjust the width of the taps.  A single-band decay
 972      * coefficient is applied to simulate initial attenuation and absorption.
 973      *
 974      * Late reverb taps are based on the late line lengths to allow a zero-
 975      * delay path and offsets that would continue the propagation naturally
 976      * into the late lines.
 977      */
 978     for(size_t i{0u};i < NUM_LINES;i++)
 979     {
 980         float length{EARLY_TAP_LENGTHS[i]*density_mult};
 981         mEarlyDelayTap[i][1] = float2uint((earlyDelay+length) * frequency);
 982         mEarlyDelayCoeff[i] = CalcDecayCoeff(length, decayTime);
 983
 984         length = (LATE_LINE_LENGTHS[i] - LATE_LINE_LENGTHS.front())/float{NUM_LINES}*density_mult +
 985             lateDelay;
 986         mLateDelayTap[i][1] = float2uint(length * frequency);
 987     }
 988 }
 989
 990 /* Creates a transform matrix given a reverb vector. The vector pans the reverb
 991  * reflections toward the given direction, using its magnitude (up to 1) as a
 992  * focal strength. This function results in a B-Format transformation matrix
 993  * that spatially focuses the signal in the desired direction.
 994  */
 995 std::array<std::array<float,4>,4> GetTransformFromVector(const float *vec)
 996 {
 997     /* Normalize the panning vector according to the N3D scale, which has an
 998      * extra sqrt(3) term on the directional components. Converting from OpenAL
 999      * to B-Format also requires negating X (ACN 1) and Z (ACN 3). Note however
1000      * that the reverb panning vectors use left-handed coordinates, unlike the
1001      * rest of OpenAL which use right-handed. This is fixed by negating Z,
1002      * which cancels out with the B-Format Z negation.
1003      */
1004     float norm[3];
1005     float mag{std::sqrt(vec[0]*vec[0] + vec[1]*vec[1] + vec[2]*vec[2])};
1006     if(mag > 1.0f)
1007     {
1008         norm[0] = vec[0] / mag * -al::numbers::sqrt3_v<float>;
1009         norm[1] = vec[1] / mag * al::numbers::sqrt3_v<float>;
1010         norm[2] = vec[2] / mag * al::numbers::sqrt3_v<float>;
1011         mag = 1.0f;
1012     }
1013     else
1014     {
1015         /* If the magnitude is less than or equal to 1, just apply the sqrt(3)
1016          * term. There's no need to renormalize the magnitude since it would
1017          * just be reapplied in the matrix.
1018          */
1019         norm[0] = vec[0] * -al::numbers::sqrt3_v<float>;
1020         norm[1] = vec[1] * al::numbers::sqrt3_v<float>;
1021         norm[2] = vec[2] * al::numbers::sqrt3_v<float>;
1022     }
1023
1024     return std::array<std::array<float,4>,4>{{
1025         {{1.0f,   0.0f,    0.0f,   0.0f}},
1026         {{norm[0], 1.0f-mag, 0.0f, 0.0f}},
1027         {{norm[1], 0.0f, 1.0f-mag, 0.0f}},
1028         {{norm[2], 0.0f, 0.0f, 1.0f-mag}}
1029     }};
1030 }
1031
1032 /* Update the early and late 3D panning gains. */
1033 void ReverbPipeline::update3DPanning(const float *ReflectionsPan, const float *LateReverbPan,
1034     const float earlyGain, const float lateGain, const bool doUpmix, const MixParams *mainMix)
1035 {
1036     /* Create matrices that transform a B-Format signal according to the
1037      * panning vectors.
1038      */
1039     const std::array<std::array<float,4>,4> earlymat{GetTransformFromVector(ReflectionsPan)};
1040     const std::array<std::array<float,4>,4> latemat{GetTransformFromVector(LateReverbPan)};
1041
1042     if(doUpmix)
1043     {
1044         /* When upsampling, combine the early and late transforms with the
1045          * first-order upsample matrix. This results in panning gains that
1046          * apply the panning transform to first-order B-Format, which is then
1047          * upsampled.
1048          */
1049         auto mult_matrix = [](const al::span<const std::array<float,4>,4> mtx1)
1050         {
1051             auto&& mtx2 = AmbiScale::FirstOrderUp;
1052             std::array<std::array<float,MaxAmbiChannels>,NUM_LINES> res{};
1053
1054             for(size_t i{0};i < mtx1[0].size();++i)
1055             {
1056                 for(size_t j{0};j < mtx2[0].size();++j)
1057                 {
1058                     double sum{0.0};
1059                     for(size_t k{0};k < mtx1.size();++k)
1060                         sum += double{mtx1[k][i]} * mtx2[k][j];
1061                     res[i][j] = static_cast<float>(sum);
1062                 }
1063             }
1064
1065             return res;
1066         };
1067         auto earlycoeffs = mult_matrix(earlymat);
1068         auto latecoeffs = mult_matrix(latemat);
1069
1070         for(size_t i{0u};i < NUM_LINES;i++)
1071             ComputePanGains(mainMix, earlycoeffs[i].data(), earlyGain, mEarly.PanGain[i]);
1072         for(size_t i{0u};i < NUM_LINES;i++)
1073             ComputePanGains(mainMix, latecoeffs[i].data(), lateGain, mLate.PanGain[i]);
1074     }
1075     else
1076     {
1077         /* When not upsampling, combine the early and late A-to-B-Format
1078          * conversions with their respective transform. This results panning
1079          * gains that convert A-Format to B-Format, which is then panned.
1080          */
1081         auto mult_matrix = [](const al::span<const std::array<float,NUM_LINES>,4> mtx1,
1082             const al::span<const std::array<float,4>,4> mtx2)
1083         {
1084             std::array<std::array<float,MaxAmbiChannels>,NUM_LINES> res{};
1085
1086             for(size_t i{0};i < mtx1[0].size();++i)
1087             {
1088                 for(size_t j{0};j < mtx2.size();++j)
1089                 {
1090                     double sum{0.0};
1091                     for(size_t k{0};k < mtx1.size();++k)
1092                         sum += double{mtx1[k][i]} * mtx2[j][k];
1093                     res[i][j] = static_cast<float>(sum);
1094                 }
1095             }
1096
1097             return res;
1098         };
1099         auto earlycoeffs = mult_matrix(EarlyA2B, earlymat);
1100         auto latecoeffs = mult_matrix(LateA2B, latemat);
1101
1102         for(size_t i{0u};i < NUM_LINES;i++)
1103             ComputePanGains(mainMix, earlycoeffs[i].data(), earlyGain, mEarly.PanGain[i]);
1104         for(size_t i{0u};i < NUM_LINES;i++)
1105             ComputePanGains(mainMix, latecoeffs[i].data(), lateGain, mLate.PanGain[i]);
1106     }
1107 }
1108
1109 void ReverbState::update(const ContextBase *Context, const EffectSlot *Slot,
1110     const EffectProps *props, const EffectTarget target)
1111 {
1112     const DeviceBase *Device{Context->mDevice};
1113     const auto frequency = static_cast<float>(Device->Frequency);
1114
1115     /* If the HF limit parameter is flagged, calculate an appropriate limit
1116      * based on the air absorption parameter.
1117      */
1118     float hfRatio{props->Reverb.DecayHFRatio};
1119     if(props->Reverb.DecayHFLimit && props->Reverb.AirAbsorptionGainHF < 1.0f)
1120         hfRatio = CalcLimitedHfRatio(hfRatio, props->Reverb.AirAbsorptionGainHF,
1121             props->Reverb.DecayTime);
1122
1123     /* Calculate the LF/HF decay times. */
1124     constexpr float MinDecayTime{0.1f}, MaxDecayTime{20.0f};
1125     const float lfDecayTime{clampf(props->Reverb.DecayTime*props->Reverb.DecayLFRatio,
1126         MinDecayTime, MaxDecayTime)};
1127     const float hfDecayTime{clampf(props->Reverb.DecayTime*hfRatio, MinDecayTime, MaxDecayTime)};
1128
1129     /* Determine if a full update is required. */
1130     const bool fullUpdate{mPipelineState == DeviceClear ||
1131         /* Density is essentially a master control for the feedback delays, so
1132          * changes the offsets of many delay lines.
1133          */
1134         mParams.Density != props->Reverb.Density ||
1135         /* Diffusion and decay times influences the decay rate (gain) of the
1136          * late reverb T60 filter.
1137          */
1138         mParams.Diffusion != props->Reverb.Diffusion ||
1139         mParams.DecayTime != props->Reverb.DecayTime ||
1140         mParams.HFDecayTime != hfDecayTime ||
1141         mParams.LFDecayTime != lfDecayTime ||
1142         /* Modulation time and depth both require fading the modulation delay. */
1143         mParams.ModulationTime != props->Reverb.ModulationTime ||
1144         mParams.ModulationDepth != props->Reverb.ModulationDepth ||
1145         /* HF/LF References control the weighting used to calculate the density
1146          * gain.
1147          */
1148         mParams.HFReference != props->Reverb.HFReference ||
1149         mParams.LFReference != props->Reverb.LFReference};
1150     if(fullUpdate)
1151     {
1152         mParams.Density = props->Reverb.Density;
1153         mParams.Diffusion = props->Reverb.Diffusion;
1154         mParams.DecayTime = props->Reverb.DecayTime;
1155         mParams.HFDecayTime = hfDecayTime;
1156         mParams.LFDecayTime = lfDecayTime;
1157         mParams.ModulationTime = props->Reverb.ModulationTime;
1158         mParams.ModulationDepth = props->Reverb.ModulationDepth;
1159         mParams.HFReference = props->Reverb.HFReference;
1160         mParams.LFReference = props->Reverb.LFReference;
1161
1162         mPipelineState = (mPipelineState != DeviceClear) ? StartFade : Normal;
1163         mCurrentPipeline ^= 1;
1164     }
1165     auto &pipeline = mPipelines[mCurrentPipeline];
1166
1167     /* Update early and late 3D panning. */
1168     mOutTarget = target.Main->Buffer;
1169     const float gain{props->Reverb.Gain * Slot->Gain * ReverbBoost};
1170     pipeline.update3DPanning(props->Reverb.ReflectionsPan, props->Reverb.LateReverbPan,
1171         props->Reverb.ReflectionsGain*gain, props->Reverb.LateReverbGain*gain, mUpmixOutput,
1172         target.Main);
1173
1174     if(!fullUpdate)
1175     {
1176         /* Calculate the master filters */
1177         float hf0norm{minf(mParams.HFReference/frequency, 0.49f)};
1178         pipeline.mFilter[0].Lp.setParamsFromSlope(BiquadType::HighShelf, hf0norm, props->Reverb.GainHF, 1.0f);
1179         float lf0norm{minf(mParams.LFReference/frequency, 0.49f)};
1180         pipeline.mFilter[0].Hp.setParamsFromSlope(BiquadType::LowShelf, lf0norm, props->Reverb.GainLF, 1.0f);
1181         for(size_t i{1u};i < NUM_LINES;i++)
1182         {
1183             pipeline.mFilter[i].Lp.copyParamsFrom(pipeline.mFilter[0].Lp);
1184             pipeline.mFilter[i].Hp.copyParamsFrom(pipeline.mFilter[0].Hp);
1185         }
1186
1187         /* The density-based room size (delay length) multiplier. */
1188         const float density_mult{CalcDelayLengthMult(mParams.Density)};
1189
1190         /* Update the main effect delay and associated taps. */
1191         pipeline.updateDelayLine(props->Reverb.ReflectionsDelay, props->Reverb.LateReverbDelay,
1192             density_mult, mParams.DecayTime, frequency);
1193     }
1194     else
1195     {
1196         float hf0norm{minf(props->Reverb.HFReference/frequency, 0.49f)};
1197         pipeline.mFilter[0].Lp.setParamsFromSlope(BiquadType::HighShelf, hf0norm, props->Reverb.GainHF, 1.0f);
1198         float lf0norm{minf(props->Reverb.LFReference/frequency, 0.49f)};
1199         pipeline.mFilter[0].Hp.setParamsFromSlope(BiquadType::LowShelf, lf0norm, props->Reverb.GainLF, 1.0f);
1200         for(size_t i{1u};i < NUM_LINES;i++)
1201         {
1202             pipeline.mFilter[i].Lp.copyParamsFrom(pipeline.mFilter[0].Lp);
1203             pipeline.mFilter[i].Hp.copyParamsFrom(pipeline.mFilter[0].Hp);
1204         }
1205
1206         const float density_mult{CalcDelayLengthMult(props->Reverb.Density)};
1207
1208         pipeline.updateDelayLine(props->Reverb.ReflectionsDelay, props->Reverb.LateReverbDelay,
1209             density_mult, props->Reverb.DecayTime, frequency);
1210
1211         /* Update the early lines. */
1212         pipeline.mEarly.updateLines(density_mult, props->Reverb.Diffusion, props->Reverb.DecayTime,
1213             frequency);
1214
1215         /* Get the mixing matrix coefficients. */
1216         CalcMatrixCoeffs(props->Reverb.Diffusion, &pipeline.mMixX, &pipeline.mMixY);
1217
1218         /* Update the modulator rate and depth. */
1219         pipeline.mLate.Mod.updateModulator(props->Reverb.ModulationTime,
1220             props->Reverb.ModulationDepth, frequency);
1221
1222         /* Update the late lines. */
1223         pipeline.mLate.updateLines(density_mult, props->Reverb.Diffusion, lfDecayTime,
1224             props->Reverb.DecayTime, hfDecayTime, lf0norm, hf0norm, frequency);
1225
1226         const float decayCount{minf(props->Reverb.DecayTime*frequency, 1'000'000.0f)};
1227         pipeline.mFadeSampleCount = static_cast<size_t>(decayCount);
1228     }
1229 }
1230
1231
1232 /**************************************
1233  *  Effect Processing                 *
1234  **************************************/
1235
1236 /* Applies a scattering matrix to the 4-line (vector) input.  This is used
1237  * for both the below vector all-pass model and to perform modal feed-back
1238  * delay network (FDN) mixing.
1239  *
1240  * The matrix is derived from a skew-symmetric matrix to form a 4D rotation
1241  * matrix with a single unitary rotational parameter:
1242  *
1243  *     [  d,  a,  b,  c ]          1 = a^2 + b^2 + c^2 + d^2
1244  *     [ -a,  d,  c, -b ]
1245  *     [ -b, -c,  d,  a ]
1246  *     [ -c,  b, -a,  d ]
1247  *
1248  * The rotation is constructed from the effect's diffusion parameter,
1249  * yielding:
1250  *
1251  *     1 = x^2 + 3 y^2
1252  *
1253  * Where a, b, and c are the coefficient y with differing signs, and d is the
1254  * coefficient x.  The final matrix is thus:
1255  *
1256  *     [  x,  y, -y,  y ]          n = sqrt(matrix_order - 1)
1257  *     [ -y,  x,  y,  y ]          t = diffusion_parameter * atan(n)
1258  *     [  y, -y,  x,  y ]          x = cos(t)
1259  *     [ -y, -y, -y,  x ]          y = sin(t) / n
1260  *
1261  * Any square orthogonal matrix with an order that is a power of two will
1262  * work (where ^T is transpose, ^-1 is inverse):
1263  *
1264  *     M^T = M^-1
1265  *
1266  * Using that knowledge, finding an appropriate matrix can be accomplished
1267  * naively by searching all combinations of:
1268  *
1269  *     M = D + S - S^T
1270  *
1271  * Where D is a diagonal matrix (of x), and S is a triangular matrix (of y)
1272  * whose combination of signs are being iterated.
1273  */
1274 inline auto VectorPartialScatter(const std::array<float,NUM_LINES> &RESTRICT in,
1275     const float xCoeff, const float yCoeff) -> std::array<float,NUM_LINES>
1276 {
1277     return std::array<float,NUM_LINES>{{
1278         xCoeff*in[0] + yCoeff*(          in[1] + -in[2] + in[3]),
1279         xCoeff*in[1] + yCoeff*(-in[0]          +  in[2] + in[3]),
1280         xCoeff*in[2] + yCoeff*( in[0] + -in[1]          + in[3]),
1281         xCoeff*in[3] + yCoeff*(-in[0] + -in[1] + -in[2]        )
1282     }};
1283 }
1284
1285 /* Utilizes the above, but reverses the input channels. */
1286 void VectorScatterRevDelayIn(const DelayLineI delay, size_t offset, const float xCoeff,
1287     const float yCoeff, const al::span<const ReverbUpdateLine,NUM_LINES> in, const size_t count)
1288 {
1289     ASSUME(count > 0);
1290
1291     for(size_t i{0u};i < count;)
1292     {
1293         offset &= delay.Mask;
1294         size_t td{minz(delay.Mask+1 - offset, count-i)};
1295         do {
1296             std::array<float,NUM_LINES> f;
1297             for(size_t j{0u};j < NUM_LINES;j++)
1298                 f[NUM_LINES-1-j] = in[j][i];
1299             ++i;
1300
1301             delay.Line[offset++] = VectorPartialScatter(f, xCoeff, yCoeff);
1302         } while(--td);
1303     }
1304 }
1305
1306 /* This applies a Gerzon multiple-in/multiple-out (MIMO) vector all-pass
1307  * filter to the 4-line input.
1308  *
1309  * It works by vectorizing a regular all-pass filter and replacing the delay
1310  * element with a scattering matrix (like the one above) and a diagonal
1311  * matrix of delay elements.
1312  *
1313  * Two static specializations are used for transitional (cross-faded) delay
1314  * line processing and non-transitional processing.
1315  */
1316 void VecAllpass::process(const al::span<ReverbUpdateLine,NUM_LINES> samples, size_t offset,
1317     const float xCoeff, const float yCoeff, const size_t todo)
1318 {
1319     const DelayLineI delay{Delay};
1320     const float feedCoeff{Coeff};
1321
1322     ASSUME(todo > 0);
1323
1324     size_t vap_offset[NUM_LINES];
1325     for(size_t j{0u};j < NUM_LINES;j++)
1326         vap_offset[j] = offset - Offset[j];
1327     for(size_t i{0u};i < todo;)
1328     {
1329         for(size_t j{0u};j < NUM_LINES;j++)
1330             vap_offset[j] &= delay.Mask;
1331         offset &= delay.Mask;
1332
1333         size_t maxoff{offset};
1334         for(size_t j{0u};j < NUM_LINES;j++)
1335             maxoff = maxz(maxoff, vap_offset[j]);
1336         size_t td{minz(delay.Mask+1 - maxoff, todo - i)};
1337
1338         do {
1339             std::array<float,NUM_LINES> f;
1340             for(size_t j{0u};j < NUM_LINES;j++)
1341             {
1342                 const float input{samples[j][i]};
1343                 const float out{delay.Line[vap_offset[j]++][j] - feedCoeff*input};
1344                 f[j] = input + feedCoeff*out;
1345
1346                 samples[j][i] = out;
1347             }
1348             ++i;
1349
1350             delay.Line[offset++] = VectorPartialScatter(f, xCoeff, yCoeff);
1351         } while(--td);
1352     }
1353 }
1354
1355 /* This generates early reflections.
1356  *
1357  * This is done by obtaining the primary reflections (those arriving from the
1358  * same direction as the source) from the main delay line.  These are
1359  * attenuated and all-pass filtered (based on the diffusion parameter).
1360  *
1361  * The early lines are then fed in reverse (according to the approximately
1362  * opposite spatial location of the A-Format lines) to create the secondary
1363  * reflections (those arriving from the opposite direction as the source).
1364  *
1365  * The early response is then completed by combining the primary reflections
1366  * with the delayed and attenuated output from the early lines.
1367  *
1368  * Finally, the early response is reversed, scattered (based on diffusion),
1369  * and fed into the late reverb section of the main delay line.
1370  */
1371 void ReverbPipeline::processEarly(size_t offset, const size_t samplesToDo,
1372     const al::span<ReverbUpdateLine, NUM_LINES> tempSamples,
1373     const al::span<FloatBufferLine, NUM_LINES> outSamples)
1374 {
1375     const DelayLineI early_delay{mEarly.Delay};
1376     const DelayLineI in_delay{mEarlyDelayIn};
1377     const float mixX{mMixX};
1378     const float mixY{mMixY};
1379
1380     ASSUME(samplesToDo > 0);
1381
1382     for(size_t base{0};base < samplesToDo;)
1383     {
1384         const size_t todo{minz(samplesToDo-base, MAX_UPDATE_SAMPLES)};
1385
1386         /* First, load decorrelated samples from the main delay line as the
1387          * primary reflections.
1388          */
1389         const float fadeStep{1.0f / static_cast<float>(todo)};
1390         for(size_t j{0u};j < NUM_LINES;j++)
1391         {
1392             size_t early_delay_tap0{offset - mEarlyDelayTap[j][0]};
1393             size_t early_delay_tap1{offset - mEarlyDelayTap[j][1]};
1394             const float coeff{mEarlyDelayCoeff[j]};
1395             const float coeffStep{early_delay_tap0 != early_delay_tap1 ? coeff*fadeStep : 0.0f};
1396             float fadeCount{0.0f};
1397
1398             for(size_t i{0u};i < todo;)
1399             {
1400                 early_delay_tap0 &= in_delay.Mask;
1401                 early_delay_tap1 &= in_delay.Mask;
1402                 const size_t max_tap{maxz(early_delay_tap0, early_delay_tap1)};
1403                 size_t td{minz(in_delay.Mask+1 - max_tap, todo-i)};
1404                 do {
1405                     const float fade0{coeff - coeffStep*fadeCount};
1406                     const float fade1{coeffStep*fadeCount};
1407                     fadeCount += 1.0f;
1408                     tempSamples[j][i++] = in_delay.Line[early_delay_tap0++][j]*fade0 +
1409                         in_delay.Line[early_delay_tap1++][j]*fade1;
1410                 } while(--td);
1411             }
1412
1413             mEarlyDelayTap[j][0] = mEarlyDelayTap[j][1];
1414         }
1415
1416         /* Apply a vector all-pass, to help color the initial reflections based
1417          * on the diffusion strength.
1418          */
1419         mEarly.VecAp.process(tempSamples, offset, mixX, mixY, todo);
1420
1421         /* Apply a delay and bounce to generate secondary reflections, combine
1422          * with the primary reflections and write out the result for mixing.
1423          */
1424         for(size_t j{0u};j < NUM_LINES;j++)
1425             early_delay.write(offset, NUM_LINES-1-j, tempSamples[j].data(), todo);
1426         for(size_t j{0u};j < NUM_LINES;j++)
1427         {
1428             size_t feedb_tap{offset - mEarly.Offset[j]};
1429             const float feedb_coeff{mEarly.Coeff[j]};
1430             float *RESTRICT out{al::assume_aligned<16>(outSamples[j].data() + base)};
1431
1432             for(size_t i{0u};i < todo;)
1433             {
1434                 feedb_tap &= early_delay.Mask;
1435                 size_t td{minz(early_delay.Mask+1 - feedb_tap, todo - i)};
1436                 do {
1437                     tempSamples[j][i] += early_delay.Line[feedb_tap++][j]*feedb_coeff;
1438                     out[i] = tempSamples[j][i];
1439                     ++i;
1440                 } while(--td);
1441             }
1442         }
1443
1444         /* Finally, write the result to the late delay line input for the late
1445          * reverb stage to pick up at the appropriate time, applying a scatter
1446          * and bounce to improve the initial diffusion in the late reverb.
1447          */
1448         VectorScatterRevDelayIn(mLateDelayIn, offset, mixX, mixY, tempSamples, todo);
1449
1450         base += todo;
1451         offset += todo;
1452     }
1453 }
1454
1455 void Modulation::calcDelays(size_t todo)
1456 {
1457     constexpr float mod_scale{al::numbers::pi_v<float> * 2.0f / MOD_FRACONE};
1458     uint idx{Index};
1459     const uint step{Step};
1460     const float depth{Depth};
1461     for(size_t i{0};i < todo;++i)
1462     {
1463         idx += step;
1464         const float lfo{std::sin(static_cast<float>(idx&MOD_FRACMASK) * mod_scale)};
1465         ModDelays[i] = (lfo+1.0f) * depth;
1466     }
1467     Index = idx;
1468 }
1469
1470
1471 /* This generates the reverb tail using a modified feed-back delay network
1472  * (FDN).
1473  *
1474  * Results from the early reflections are mixed with the output from the
1475  * modulated late delay lines.
1476  *
1477  * The late response is then completed by T60 and all-pass filtering the mix.
1478  *
1479  * Finally, the lines are reversed (so they feed their opposite directions)
1480  * and scattered with the FDN matrix before re-feeding the delay lines.
1481  */
1482 void ReverbPipeline::processLate(size_t offset, const size_t samplesToDo,
1483     const al::span<ReverbUpdateLine, NUM_LINES> tempSamples,
1484     const al::span<FloatBufferLine, NUM_LINES> outSamples)
1485 {
1486     const DelayLineI late_delay{mLate.Delay};
1487     const DelayLineI in_delay{mLateDelayIn};
1488     const float mixX{mMixX};
1489     const float mixY{mMixY};
1490
1491     ASSUME(samplesToDo > 0);
1492
1493     for(size_t base{0};base < samplesToDo;)
1494     {
1495         const size_t todo{minz(samplesToDo-base, minz(mLate.Offset[0], MAX_UPDATE_SAMPLES))};
1496         ASSUME(todo > 0);
1497
1498         /* First, calculate the modulated delays for the late feedback. */
1499         mLate.Mod.calcDelays(todo);
1500
1501         /* Next, load decorrelated samples from the main and feedback delay
1502          * lines. Filter the signal to apply its frequency-dependent decay.
1503          */
1504         const float fadeStep{1.0f / static_cast<float>(todo)};
1505         for(size_t j{0u};j < NUM_LINES;j++)
1506         {
1507             size_t late_delay_tap0{offset - mLateDelayTap[j][0]};
1508             size_t late_delay_tap1{offset - mLateDelayTap[j][1]};
1509             size_t late_feedb_tap{offset - mLate.Offset[j]};
1510             const float midGain{mLate.T60[j].MidGain};
1511             const float densityGain{mLate.DensityGain * midGain};
1512             const float densityStep{late_delay_tap0 != late_delay_tap1 ?
1513                 densityGain*fadeStep : 0.0f};
1514             float fadeCount{0.0f};
1515
1516             for(size_t i{0u};i < todo;)
1517             {
1518                 late_delay_tap0 &= in_delay.Mask;
1519                 late_delay_tap1 &= in_delay.Mask;
1520                 size_t td{minz(todo-i, in_delay.Mask+1 - maxz(late_delay_tap0, late_delay_tap1))};
1521                 do {
1522                     /* Calculate the read offset and fraction between it and
1523                      * the next sample.
1524                      */
1525                     const float fdelay{mLate.Mod.ModDelays[i]};
1526                     const size_t delay{float2uint(fdelay)};
1527                     const float frac{fdelay - static_cast<float>(delay)};
1528
1529                     /* Get the two samples crossed by the delayed offset. */
1530                     const float out0{late_delay.Line[(late_feedb_tap-delay) & late_delay.Mask][j]};
1531                     const float out1{late_delay.Line[(late_feedb_tap-delay-1) & late_delay.Mask][j]};
1532                     ++late_feedb_tap;
1533
1534                     /* The output is obtained by linearly interpolating the two
1535                      * samples that were acquired above, and combined with the
1536                      * main delay tap.
1537                      */
1538                     const float fade0{densityGain - densityStep*fadeCount};
1539                     const float fade1{densityStep*fadeCount};
1540                     fadeCount += 1.0f;
1541                     tempSamples[j][i] = lerpf(out0, out1, frac)*midGain +
1542                         in_delay.Line[late_delay_tap0++][j]*fade0 +
1543                         in_delay.Line[late_delay_tap1++][j]*fade1;
1544                     ++i;
1545                 } while(--td);
1546             }
1547             mLateDelayTap[j][0] = mLateDelayTap[j][1];
1548
1549             mLate.T60[j].process({tempSamples[j].data(), todo});
1550         }
1551
1552         /* Apply a vector all-pass to improve micro-surface diffusion, and
1553          * write out the results for mixing.
1554          */
1555         mLate.VecAp.process(tempSamples, offset, mixX, mixY, todo);
1556         for(size_t j{0u};j < NUM_LINES;j++)
1557             std::copy_n(tempSamples[j].begin(), todo, outSamples[j].begin()+base);
1558
1559         /* Finally, scatter and bounce the results to refeed the feedback buffer. */
1560         VectorScatterRevDelayIn(late_delay, offset, mixX, mixY, tempSamples, todo);
1561
1562         base += todo;
1563         offset += todo;
1564     }
1565 }
1566
1567 void ReverbState::process(const size_t samplesToDo, const al::span<const FloatBufferLine> samplesIn, const al::span<FloatBufferLine> samplesOut)
1568 {
1569     const size_t offset{mOffset};
1570
1571     ASSUME(samplesToDo > 0);
1572
1573     auto &oldpipeline = mPipelines[mCurrentPipeline^1];
1574     auto &pipeline = mPipelines[mCurrentPipeline];
1575
1576     if(mPipelineState >= Fading)
1577     {
1578         /* Convert B-Format to A-Format for processing. */
1579         const size_t numInput{minz(samplesIn.size(), NUM_LINES)};
1580         const al::span<float> tmpspan{al::assume_aligned<16>(mTempLine.data()), samplesToDo};
1581         for(size_t c{0u};c < NUM_LINES;c++)
1582         {
1583             std::fill(tmpspan.begin(), tmpspan.end(), 0.0f);
1584             for(size_t i{0};i < numInput;++i)
1585             {
1586                 const float gain{B2A[c][i]};
1587                 const float *RESTRICT input{al::assume_aligned<16>(samplesIn[i].data())};
1588
1589                 auto mix_sample = [gain](const float sample, const float in) noexcept -> float
1590                 { return sample + in*gain; };
1591                 std::transform(tmpspan.begin(), tmpspan.end(), input, tmpspan.begin(),
1592                     mix_sample);
1593             }
1594
1595             /* Band-pass the incoming samples and feed the initial delay line. */
1596             auto&& filter = DualBiquad{pipeline.mFilter[c].Lp, pipeline.mFilter[c].Hp};
1597             filter.process(tmpspan, tmpspan.data());
1598             pipeline.mEarlyDelayIn.write(offset, c, tmpspan.cbegin(), samplesToDo);
1599         }
1600         if(mPipelineState == Fading)
1601         {
1602             /* Give the old pipeline silence if it's still fading out. */
1603             for(size_t c{0u};c < NUM_LINES;c++)
1604             {
1605                 std::fill(tmpspan.begin(), tmpspan.end(), 0.0f);
1606
1607                 auto&& filter = DualBiquad{oldpipeline.mFilter[c].Lp, oldpipeline.mFilter[c].Hp};
1608                 filter.process(tmpspan, tmpspan.data());
1609                 oldpipeline.mEarlyDelayIn.write(offset, c, tmpspan.cbegin(), samplesToDo);
1610             }
1611         }
1612     }
1613     else
1614     {
1615         /* At the start of a fade, fade in input for the current pipeline, and
1616          * fade out input for the old pipeline.
1617          */
1618         const size_t numInput{minz(samplesIn.size(), NUM_LINES)};
1619         const al::span<float> tmpspan{al::assume_aligned<16>(mTempLine.data()), samplesToDo};
1620         const float fadeStep{1.0f / static_cast<float>(samplesToDo)};
1621
1622         for(size_t c{0u};c < NUM_LINES;c++)
1623         {
1624             std::fill(tmpspan.begin(), tmpspan.end(), 0.0f);
1625             for(size_t i{0};i < numInput;++i)
1626             {
1627                 const float gain{B2A[c][i]};
1628                 const float *RESTRICT input{al::assume_aligned<16>(samplesIn[i].data())};
1629
1630                 auto mix_sample = [gain](const float sample, const float in) noexcept -> float
1631                 { return sample + in*gain; };
1632                 std::transform(tmpspan.begin(), tmpspan.end(), input, tmpspan.begin(),
1633                     mix_sample);
1634             }
1635             float stepCount{0.0f};
1636             for(float &sample : tmpspan)
1637             {
1638                 stepCount += 1.0f;
1639                 sample *= stepCount*fadeStep;
1640             }
1641
1642             auto&& filter = DualBiquad{pipeline.mFilter[c].Lp, pipeline.mFilter[c].Hp};
1643             filter.process(tmpspan, tmpspan.data());
1644             pipeline.mEarlyDelayIn.write(offset, c, tmpspan.cbegin(), samplesToDo);
1645         }
1646         for(size_t c{0u};c < NUM_LINES;c++)
1647         {
1648             std::fill(tmpspan.begin(), tmpspan.end(), 0.0f);
1649             for(size_t i{0};i < numInput;++i)
1650             {
1651                 const float gain{B2A[c][i]};
1652                 const float *RESTRICT input{al::assume_aligned<16>(samplesIn[i].data())};
1653
1654                 auto mix_sample = [gain](const float sample, const float in) noexcept -> float
1655                 { return sample + in*gain; };
1656                 std::transform(tmpspan.begin(), tmpspan.end(), input, tmpspan.begin(),
1657                     mix_sample);
1658             }
1659             float stepCount{0.0f};
1660             for(float &sample : tmpspan)
1661             {
1662                 stepCount += 1.0f;
1663                 sample *= 1.0f - stepCount*fadeStep;
1664             }
1665
1666             auto&& filter = DualBiquad{oldpipeline.mFilter[c].Lp, oldpipeline.mFilter[c].Hp};
1667             filter.process(tmpspan, tmpspan.data());
1668             oldpipeline.mEarlyDelayIn.write(offset, c, tmpspan.cbegin(), samplesToDo);
1669         }
1670         mPipelineState = Fading;
1671     }
1672
1673     /* Process reverb for these samples. and mix them to the output. */
1674     pipeline.processEarly(offset, samplesToDo, mTempSamples, mEarlySamples);
1675     pipeline.processLate(offset, samplesToDo, mTempSamples, mLateSamples);
1676     mixOut(pipeline, samplesOut, samplesToDo);
1677
1678     if(mPipelineState != Normal)
1679     {
1680         if(mPipelineState == Cleanup)
1681         {
1682             size_t numSamples{mSampleBuffer.size()/2};
1683             size_t pipelineOffset{numSamples * (mCurrentPipeline^1)};
1684             std::fill_n(mSampleBuffer.data()+pipelineOffset, numSamples,
1685                 decltype(mSampleBuffer)::value_type{});
1686
1687             oldpipeline.clear();
1688             mPipelineState = Normal;
1689         }
1690         else
1691         {
1692             /* If this is the final mix for this old pipeline, set the target
1693              * gains to 0 to ensure a complete fade out, and set the state to
1694              * Cleanup so the next invocation cleans up the delay buffers and
1695              * filters.
1696              */
1697             if(samplesToDo >= oldpipeline.mFadeSampleCount)
1698             {
1699                 for(auto &gains : oldpipeline.mEarly.PanGain)
1700                     std::fill(std::begin(gains), std::end(gains), 0.0f);
1701                 for(auto &gains : oldpipeline.mLate.PanGain)
1702                     std::fill(std::begin(gains), std::end(gains), 0.0f);
1703                 oldpipeline.mFadeSampleCount = 0;
1704                 mPipelineState = Cleanup;
1705             }
1706             else
1707                 oldpipeline.mFadeSampleCount -= samplesToDo;
1708
1709             /* Process the old reverb for these samples. */
1710             oldpipeline.processEarly(offset, samplesToDo, mTempSamples, mEarlySamples);
1711             oldpipeline.processLate(offset, samplesToDo, mTempSamples, mLateSamples);
1712             mixOut(oldpipeline, samplesOut, samplesToDo);
1713         }
1714     }
1715
1716     mOffset = offset + samplesToDo;
1717 }
1718
1719
1720 struct ReverbStateFactory final : public EffectStateFactory {
1721     al::intrusive_ptr<EffectState> create() override
1722     { return al::intrusive_ptr<EffectState>{new ReverbState{}}; }
1723 };
1724
1725 struct StdReverbStateFactory final : public EffectStateFactory {
1726     al::intrusive_ptr<EffectState> create() override
1727     { return al::intrusive_ptr<EffectState>{new ReverbState{}}; }
1728 };
1729
1730 } // namespace
1731
1732 EffectStateFactory *ReverbStateFactory_getFactory()
1733 {
1734     static ReverbStateFactory ReverbFactory{};
1735     return &ReverbFactory;
1736 }
1737
1738 EffectStateFactory *StdReverbStateFactory_getFactory()
1739 {
1740     static StdReverbStateFactory ReverbFactory{};
1741     return &ReverbFactory;
1742 }