alc/alu.cpp

   1 /**
   2  * OpenAL cross platform audio library
   3  * Copyright (C) 1999-2007 by authors.
   4  * This library is free software; you can redistribute it and/or
   5  *  modify it under the terms of the GNU Library General Public
   6  *  License as published by the Free Software Foundation; either
   7  *  version 2 of the License, or (at your option) any later version.
   8  *
   9  * This library is distributed in the hope that it will be useful,
  10  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  11  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  12  *  Library General Public License for more details.
  13  *
  14  * You should have received a copy of the GNU Library General Public
  15  *  License along with this library; if not, write to the
  16  *  Free Software Foundation, Inc.,
  17  *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  18  * Or go to http://www.gnu.org/copyleft/lgpl.html
  19  */
  20
  21 #include "config.h"
  22
  23 #include "alu.h"
  24
  25 #include <algorithm>
  26 #include <array>
  27 #include <atomic>
  28 #include <cassert>
  29 #include <chrono>
  30 #include <climits>
  31 #include <cstdarg>
  32 #include <cstdio>
  33 #include <cstdlib>
  34 #include <functional>
  35 #include <iterator>
  36 #include <limits>
  37 #include <memory>
  38 #include <new>
  39 #include <stdint.h>
  40 #include <utility>
  41
  42 #include "almalloc.h"
  43 #include "alnumbers.h"
  44 #include "alnumeric.h"
  45 #include "alspan.h"
  46 #include "alstring.h"
  47 #include "atomic.h"
  48 #include "core/ambidefs.h"
  49 #include "core/async_event.h"
  50 #include "core/bformatdec.h"
  51 #include "core/bs2b.h"
  52 #include "core/bsinc_defs.h"
  53 #include "core/bsinc_tables.h"
  54 #include "core/bufferline.h"
  55 #include "core/buffer_storage.h"
  56 #include "core/context.h"
  57 #include "core/cpu_caps.h"
  58 #include "core/devformat.h"
  59 #include "core/device.h"
  60 #include "core/effects/base.h"
  61 #include "core/effectslot.h"
  62 #include "core/filters/biquad.h"
  63 #include "core/filters/nfc.h"
  64 #include "core/fpu_ctrl.h"
  65 #include "core/hrtf.h"
  66 #include "core/mastering.h"
  67 #include "core/mixer.h"
  68 #include "core/mixer/defs.h"
  69 #include "core/mixer/hrtfdefs.h"
  70 #include "core/resampler_limits.h"
  71 #include "core/uhjfilter.h"
  72 #include "core/voice.h"
  73 #include "core/voice_change.h"
  74 #include "intrusive_ptr.h"
  75 #include "opthelpers.h"
  76 #include "ringbuffer.h"
  77 #include "strutils.h"
  78 #include "threads.h"
  79 #include "vecmat.h"
  80 #include "vector.h"
  81
  82 struct CTag;
  83 #ifdef HAVE_SSE
  84 struct SSETag;
  85 #endif
  86 #ifdef HAVE_SSE2
  87 struct SSE2Tag;
  88 #endif
  89 #ifdef HAVE_SSE4_1
  90 struct SSE4Tag;
  91 #endif
  92 #ifdef HAVE_NEON
  93 struct NEONTag;
  94 #endif
  95 struct PointTag;
  96 struct LerpTag;
  97 struct CubicTag;
  98 struct BSincTag;
  99 struct FastBSincTag;
 100
 101
 102 static_assert(!(MaxResamplerPadding&1), "MaxResamplerPadding is not a multiple of two");
 103
 104
 105 namespace {
 106
 107 using uint = unsigned int;
 108
 109 constexpr uint MaxPitch{10};
 110
 111 static_assert((BufferLineSize-1)/MaxPitch > 0, "MaxPitch is too large for BufferLineSize!");
 112 static_assert((INT_MAX>>MixerFracBits)/MaxPitch > BufferLineSize,
 113     "MaxPitch and/or BufferLineSize are too large for MixerFracBits!");
 114
 115 using namespace std::placeholders;
 116
 117 float InitConeScale()
 118 {
 119     float ret{1.0f};
 120     if(auto optval = al::getenv("__ALSOFT_HALF_ANGLE_CONES"))
 121     {
 122         if(al::strcasecmp(optval->c_str(), "true") == 0
 123             || strtol(optval->c_str(), nullptr, 0) == 1)
 124             ret *= 0.5f;
 125     }
 126     return ret;
 127 }
 128 /* Cone scalar */
 129 const float ConeScale{InitConeScale()};
 130
 131 /* Localized scalars for mono sources (initialized in aluInit, after
 132  * configuration is loaded).
 133  */
 134 float XScale{1.0f};
 135 float YScale{1.0f};
 136 float ZScale{1.0f};
 137
 138
 139 struct ChanMap {
 140     Channel channel;
 141     float angle;
 142     float elevation;
 143 };
 144
 145 using HrtfDirectMixerFunc = void(*)(const FloatBufferSpan LeftOut, const FloatBufferSpan RightOut,
 146     const al::span<const FloatBufferLine> InSamples, float2 *AccumSamples, float *TempBuf,
 147     HrtfChannelState *ChanState, const size_t IrSize, const size_t BufferSize);
 148
 149 HrtfDirectMixerFunc MixDirectHrtf{MixDirectHrtf_<CTag>};
 150
 151 inline HrtfDirectMixerFunc SelectHrtfMixer(void)
 152 {
 153 #ifdef HAVE_NEON
 154     if((CPUCapFlags&CPU_CAP_NEON))
 155         return MixDirectHrtf_<NEONTag>;
 156 #endif
 157 #ifdef HAVE_SSE
 158     if((CPUCapFlags&CPU_CAP_SSE))
 159         return MixDirectHrtf_<SSETag>;
 160 #endif
 161
 162     return MixDirectHrtf_<CTag>;
 163 }
 164
 165
 166 inline void BsincPrepare(const uint increment, BsincState *state, const BSincTable *table)
 167 {
 168     size_t si{BSincScaleCount - 1};
 169     float sf{0.0f};
 170
 171     if(increment > MixerFracOne)
 172     {
 173         sf = MixerFracOne/static_cast<float>(increment) - table->scaleBase;
 174         sf = maxf(0.0f, BSincScaleCount*sf*table->scaleRange - 1.0f);
 175         si = float2uint(sf);
 176         /* The interpolation factor is fit to this diagonally-symmetric curve
 177          * to reduce the transition ripple caused by interpolating different
 178          * scales of the sinc function.
 179          */
 180         sf = 1.0f - std::cos(std::asin(sf - static_cast<float>(si)));
 181     }
 182
 183     state->sf = sf;
 184     state->m = table->m[si];
 185     state->l = (state->m/2) - 1;
 186     state->filter = table->Tab + table->filterOffset[si];
 187 }
 188
 189 inline ResamplerFunc SelectResampler(Resampler resampler, uint increment)
 190 {
 191     switch(resampler)
 192     {
 193     case Resampler::Point:
 194         return Resample_<PointTag,CTag>;
 195     case Resampler::Linear:
 196 #ifdef HAVE_NEON
 197         if((CPUCapFlags&CPU_CAP_NEON))
 198             return Resample_<LerpTag,NEONTag>;
 199 #endif
 200 #ifdef HAVE_SSE4_1
 201         if((CPUCapFlags&CPU_CAP_SSE4_1))
 202             return Resample_<LerpTag,SSE4Tag>;
 203 #endif
 204 #ifdef HAVE_SSE2
 205         if((CPUCapFlags&CPU_CAP_SSE2))
 206             return Resample_<LerpTag,SSE2Tag>;
 207 #endif
 208         return Resample_<LerpTag,CTag>;
 209     case Resampler::Cubic:
 210         return Resample_<CubicTag,CTag>;
 211     case Resampler::BSinc12:
 212     case Resampler::BSinc24:
 213         if(increment > MixerFracOne)
 214         {
 215 #ifdef HAVE_NEON
 216             if((CPUCapFlags&CPU_CAP_NEON))
 217                 return Resample_<BSincTag,NEONTag>;
 218 #endif
 219 #ifdef HAVE_SSE
 220             if((CPUCapFlags&CPU_CAP_SSE))
 221                 return Resample_<BSincTag,SSETag>;
 222 #endif
 223             return Resample_<BSincTag,CTag>;
 224         }
 225         /* fall-through */
 226     case Resampler::FastBSinc12:
 227     case Resampler::FastBSinc24:
 228 #ifdef HAVE_NEON
 229         if((CPUCapFlags&CPU_CAP_NEON))
 230             return Resample_<FastBSincTag,NEONTag>;
 231 #endif
 232 #ifdef HAVE_SSE
 233         if((CPUCapFlags&CPU_CAP_SSE))
 234             return Resample_<FastBSincTag,SSETag>;
 235 #endif
 236         return Resample_<FastBSincTag,CTag>;
 237     }
 238
 239     return Resample_<PointTag,CTag>;
 240 }
 241
 242 } // namespace
 243
 244 void aluInit(CompatFlagBitset flags)
 245 {
 246     MixDirectHrtf = SelectHrtfMixer();
 247     XScale = flags.test(CompatFlags::ReverseX) ? -1.0f : 1.0f;
 248     YScale = flags.test(CompatFlags::ReverseY) ? -1.0f : 1.0f;
 249     ZScale = flags.test(CompatFlags::ReverseZ) ? -1.0f : 1.0f;
 250 }
 251
 252
 253 ResamplerFunc PrepareResampler(Resampler resampler, uint increment, InterpState *state)
 254 {
 255     switch(resampler)
 256     {
 257     case Resampler::Point:
 258     case Resampler::Linear:
 259     case Resampler::Cubic:
 260         break;
 261     case Resampler::FastBSinc12:
 262     case Resampler::BSinc12:
 263         BsincPrepare(increment, &state->bsinc, &bsinc12);
 264         break;
 265     case Resampler::FastBSinc24:
 266     case Resampler::BSinc24:
 267         BsincPrepare(increment, &state->bsinc, &bsinc24);
 268         break;
 269     }
 270     return SelectResampler(resampler, increment);
 271 }
 272
 273
 274 void DeviceBase::ProcessHrtf(const size_t SamplesToDo)
 275 {
 276     /* HRTF is stereo output only. */
 277     const uint lidx{RealOut.ChannelIndex[FrontLeft]};
 278     const uint ridx{RealOut.ChannelIndex[FrontRight]};
 279
 280     MixDirectHrtf(RealOut.Buffer[lidx], RealOut.Buffer[ridx], Dry.Buffer, HrtfAccumData,
 281         mHrtfState->mTemp.data(), mHrtfState->mChannels.data(), mHrtfState->mIrSize, SamplesToDo);
 282 }
 283
 284 void DeviceBase::ProcessAmbiDec(const size_t SamplesToDo)
 285 {
 286     AmbiDecoder->process(RealOut.Buffer, Dry.Buffer.data(), SamplesToDo);
 287 }
 288
 289 void DeviceBase::ProcessAmbiDecStablized(const size_t SamplesToDo)
 290 {
 291     /* Decode with front image stablization. */
 292     const uint lidx{RealOut.ChannelIndex[FrontLeft]};
 293     const uint ridx{RealOut.ChannelIndex[FrontRight]};
 294     const uint cidx{RealOut.ChannelIndex[FrontCenter]};
 295
 296     AmbiDecoder->processStablize(RealOut.Buffer, Dry.Buffer.data(), lidx, ridx, cidx,
 297         SamplesToDo);
 298 }
 299
 300 void DeviceBase::ProcessUhj(const size_t SamplesToDo)
 301 {
 302     /* UHJ is stereo output only. */
 303     const uint lidx{RealOut.ChannelIndex[FrontLeft]};
 304     const uint ridx{RealOut.ChannelIndex[FrontRight]};
 305
 306     /* Encode to stereo-compatible 2-channel UHJ output. */
 307     mUhjEncoder->encode(RealOut.Buffer[lidx].data(), RealOut.Buffer[ridx].data(),
 308         Dry.Buffer.data(), SamplesToDo);
 309 }
 310
 311 void DeviceBase::ProcessBs2b(const size_t SamplesToDo)
 312 {
 313     /* First, decode the ambisonic mix to the "real" output. */
 314     AmbiDecoder->process(RealOut.Buffer, Dry.Buffer.data(), SamplesToDo);
 315
 316     /* BS2B is stereo output only. */
 317     const uint lidx{RealOut.ChannelIndex[FrontLeft]};
 318     const uint ridx{RealOut.ChannelIndex[FrontRight]};
 319
 320     /* Now apply the BS2B binaural/crossfeed filter. */
 321     bs2b_cross_feed(Bs2b.get(), RealOut.Buffer[lidx].data(), RealOut.Buffer[ridx].data(),
 322         SamplesToDo);
 323 }
 324
 325
 326 namespace {
 327
 328 using AmbiRotateMatrix = std::array<std::array<float,MaxAmbiChannels>,MaxAmbiChannels>;
 329
 330 /* This RNG method was created based on the math found in opusdec. It's quick,
 331  * and starting with a seed value of 22222, is suitable for generating
 332  * whitenoise.
 333  */
 334 inline uint dither_rng(uint *seed) noexcept
 335 {
 336     *seed = (*seed * 96314165) + 907633515;
 337     return *seed;
 338 }
 339
 340
 341 inline auto& GetAmbiScales(AmbiScaling scaletype) noexcept
 342 {
 343     switch(scaletype)
 344     {
 345     case AmbiScaling::FuMa: return AmbiScale::FromFuMa();
 346     case AmbiScaling::SN3D: return AmbiScale::FromSN3D();
 347     case AmbiScaling::UHJ: return AmbiScale::FromUHJ();
 348     case AmbiScaling::N3D: break;
 349     }
 350     return AmbiScale::FromN3D();
 351 }
 352
 353 inline auto& GetAmbiLayout(AmbiLayout layouttype) noexcept
 354 {
 355     if(layouttype == AmbiLayout::FuMa) return AmbiIndex::FromFuMa();
 356     return AmbiIndex::FromACN();
 357 }
 358
 359 inline auto& GetAmbi2DLayout(AmbiLayout layouttype) noexcept
 360 {
 361     if(layouttype == AmbiLayout::FuMa) return AmbiIndex::FromFuMa2D();
 362     return AmbiIndex::FromACN2D();
 363 }
 364
 365
 366 bool CalcContextParams(ContextBase *ctx)
 367 {
 368     ContextProps *props{ctx->mParams.ContextUpdate.exchange(nullptr, std::memory_order_acq_rel)};
 369     if(!props) return false;
 370
 371     const alu::Vector pos{props->Position[0], props->Position[1], props->Position[2], 1.0f};
 372     ctx->mParams.Position = pos;
 373
 374     /* AT then UP */
 375     alu::Vector N{props->OrientAt[0], props->OrientAt[1], props->OrientAt[2], 0.0f};
 376     N.normalize();
 377     alu::Vector V{props->OrientUp[0], props->OrientUp[1], props->OrientUp[2], 0.0f};
 378     V.normalize();
 379     /* Build and normalize right-vector */
 380     alu::Vector U{N.cross_product(V)};
 381     U.normalize();
 382
 383     const alu::Matrix rot{
 384         U[0], V[0], -N[0], 0.0,
 385         U[1], V[1], -N[1], 0.0,
 386         U[2], V[2], -N[2], 0.0,
 387          0.0,  0.0,   0.0, 1.0};
 388     const alu::Vector vel{props->Velocity[0], props->Velocity[1], props->Velocity[2], 0.0};
 389
 390     ctx->mParams.Matrix = rot;
 391     ctx->mParams.Velocity = rot * vel;
 392
 393     ctx->mParams.Gain = props->Gain * ctx->mGainBoost;
 394     ctx->mParams.MetersPerUnit = props->MetersPerUnit;
 395     ctx->mParams.AirAbsorptionGainHF = props->AirAbsorptionGainHF;
 396
 397     ctx->mParams.DopplerFactor = props->DopplerFactor;
 398     ctx->mParams.SpeedOfSound = props->SpeedOfSound * props->DopplerVelocity;
 399
 400     ctx->mParams.SourceDistanceModel = props->SourceDistanceModel;
 401     ctx->mParams.mDistanceModel = props->mDistanceModel;
 402
 403     AtomicReplaceHead(ctx->mFreeContextProps, props);
 404     return true;
 405 }
 406
 407 bool CalcEffectSlotParams(EffectSlot *slot, EffectSlot **sorted_slots, ContextBase *context)
 408 {
 409     EffectSlotProps *props{slot->Update.exchange(nullptr, std::memory_order_acq_rel)};
 410     if(!props) return false;
 411
 412     /* If the effect slot target changed, clear the first sorted entry to force
 413      * a re-sort.
 414      */
 415     if(slot->Target != props->Target)
 416         *sorted_slots = nullptr;
 417     slot->Gain = props->Gain;
 418     slot->AuxSendAuto = props->AuxSendAuto;
 419     slot->Target = props->Target;
 420     slot->EffectType = props->Type;
 421     slot->mEffectProps = props->Props;
 422     if(props->Type == EffectSlotType::Reverb || props->Type == EffectSlotType::EAXReverb)
 423     {
 424         slot->RoomRolloff = props->Props.Reverb.RoomRolloffFactor;
 425         slot->DecayTime = props->Props.Reverb.DecayTime;
 426         slot->DecayLFRatio = props->Props.Reverb.DecayLFRatio;
 427         slot->DecayHFRatio = props->Props.Reverb.DecayHFRatio;
 428         slot->DecayHFLimit = props->Props.Reverb.DecayHFLimit;
 429         slot->AirAbsorptionGainHF = props->Props.Reverb.AirAbsorptionGainHF;
 430     }
 431     else
 432     {
 433         slot->RoomRolloff = 0.0f;
 434         slot->DecayTime = 0.0f;
 435         slot->DecayLFRatio = 0.0f;
 436         slot->DecayHFRatio = 0.0f;
 437         slot->DecayHFLimit = false;
 438         slot->AirAbsorptionGainHF = 1.0f;
 439     }
 440
 441     EffectState *state{props->State.release()};
 442     EffectState *oldstate{slot->mEffectState};
 443     slot->mEffectState = state;
 444
 445     /* Only release the old state if it won't get deleted, since we can't be
 446      * deleting/freeing anything in the mixer.
 447      */
 448     if(!oldstate->releaseIfNoDelete())
 449     {
 450         /* Otherwise, if it would be deleted send it off with a release event. */
 451         RingBuffer *ring{context->mAsyncEvents.get()};
 452         auto evt_vec = ring->getWriteVector();
 453         if LIKELY(evt_vec.first.len > 0)
 454         {
 455             AsyncEvent *evt{al::construct_at(reinterpret_cast<AsyncEvent*>(evt_vec.first.buf),
 456                 AsyncEvent::ReleaseEffectState)};
 457             evt->u.mEffectState = oldstate;
 458             ring->writeAdvance(1);
 459         }
 460         else
 461         {
 462             /* If writing the event failed, the queue was probably full. Store
 463              * the old state in the property object where it can eventually be
 464              * cleaned up sometime later (not ideal, but better than blocking
 465              * or leaking).
 466              */
 467             props->State.reset(oldstate);
 468         }
 469     }
 470
 471     AtomicReplaceHead(context->mFreeEffectslotProps, props);
 472
 473     EffectTarget output;
 474     if(EffectSlot *target{slot->Target})
 475         output = EffectTarget{&target->Wet, nullptr};
 476     else
 477     {
 478         DeviceBase *device{context->mDevice};
 479         output = EffectTarget{&device->Dry, &device->RealOut};
 480     }
 481     state->update(context, slot, &slot->mEffectProps, output);
 482     return true;
 483 }
 484
 485
 486 /* Scales the given azimuth toward the side (+/- pi/2 radians) for positions in
 487  * front.
 488  */
 489 inline float ScaleAzimuthFront(float azimuth, float scale)
 490 {
 491     const float abs_azi{std::fabs(azimuth)};
 492     if(!(abs_azi >= al::numbers::pi_v<float>*0.5f))
 493         return std::copysign(minf(abs_azi*scale, al::numbers::pi_v<float>*0.5f), azimuth);
 494     return azimuth;
 495 }
 496
 497 /* Wraps the given value in radians to stay between [-pi,+pi] */
 498 inline float WrapRadians(float r)
 499 {
 500     static constexpr float Pi{al::numbers::pi_v<float>};
 501     static constexpr float Pi2{Pi*2.0f};
 502     if(r >  Pi) return std::fmod(Pi+r, Pi2) - Pi;
 503     if(r < -Pi) return Pi - std::fmod(Pi-r, Pi2);
 504     return r;
 505 }
 506
 507 /* Begin ambisonic rotation helpers.
 508  *
 509  * Rotating first-order B-Format just needs a straight-forward X/Y/Z rotation
 510  * matrix. Higher orders, however, are more complicated. The method implemented
 511  * here is a recursive algorithm (the rotation for first-order is used to help
 512  * generate the second-order rotation, which helps generate the third-order
 513  * rotation, etc).
 514  *
 515  * Adapted from
 516  * <https://github.com/polarch/Spherical-Harmonic-Transform/blob/master/getSHrotMtx.m>,
 517  * provided under the BSD 3-Clause license.
 518  *
 519  * Copyright (c) 2015, Archontis Politis
 520  * Copyright (c) 2019, Christopher Robinson
 521  *
 522  * The u, v, and w coefficients used for generating higher-order rotations are
 523  * precomputed since they're constant. The second-order coefficients are
 524  * followed by the third-order coefficients, etc.
 525  */
 526 struct RotatorCoeffs {
 527     float u, v, w;
 528
 529     template<size_t N0, size_t N1>
 530     static std::array<RotatorCoeffs,N0+N1> ConcatArrays(const std::array<RotatorCoeffs,N0> &lhs,
 531         const std::array<RotatorCoeffs,N1> &rhs)
 532     {
 533         std::array<RotatorCoeffs,N0+N1> ret;
 534         auto iter = std::copy(lhs.cbegin(), lhs.cend(), ret.begin());
 535         std::copy(rhs.cbegin(), rhs.cend(), iter);
 536         return ret;
 537     }
 538
 539     template<int l, int num_elems=l*2+1>
 540     static std::array<RotatorCoeffs,num_elems*num_elems> GenCoeffs()
 541     {
 542         std::array<RotatorCoeffs,num_elems*num_elems> ret{};
 543         auto coeffs = ret.begin();
 544
 545         for(int m{-l};m <= l;++m)
 546         {
 547             for(int n{-l};n <= l;++n)
 548             {
 549                 // compute u,v,w terms of Eq.8.1 (Table I)
 550                 const bool d{m == 0}; // the delta function d_m0
 551                 const float denom{static_cast<float>((std::abs(n) == l) ?
 552                     (2*l) * (2*l - 1) : (l*l - n*n))};
 553
 554                 const int abs_m{std::abs(m)};
 555                 coeffs->u = std::sqrt(static_cast<float>(l*l - m*m)/denom);
 556                 coeffs->v = std::sqrt(static_cast<float>(l+abs_m-1) * static_cast<float>(l+abs_m) /
 557                     denom) * (1.0f+d) * (1.0f - 2.0f*d) * 0.5f;
 558                 coeffs->w = std::sqrt(static_cast<float>(l-abs_m-1) * static_cast<float>(l-abs_m) /
 559                     denom) * (1.0f-d) * -0.5f;
 560                 ++coeffs;
 561             }
 562         }
 563
 564         return ret;
 565     }
 566 };
 567 const auto RotatorCoeffArray = RotatorCoeffs::ConcatArrays(RotatorCoeffs::GenCoeffs<2>(),
 568     RotatorCoeffs::GenCoeffs<3>());
 569
 570 /**
 571  * Given the matrix, pre-filled with the (zeroth- and) first-order rotation
 572  * coefficients, this fills in the coefficients for the higher orders up to and
 573  * including the given order. The matrix is in ACN layout.
 574  */
 575 void AmbiRotator(AmbiRotateMatrix &matrix, const int order)
 576 {
 577     /* Don't do anything for < 2nd order. */
 578     if(order < 2) return;
 579
 580     auto P = [](const int i, const int l, const int a, const int n, const size_t last_band,
 581         const AmbiRotateMatrix &R)
 582     {
 583         const float ri1{ R[static_cast<uint>(i+2)][ 1+2]};
 584         const float rim1{R[static_cast<uint>(i+2)][-1+2]};
 585         const float ri0{ R[static_cast<uint>(i+2)][ 0+2]};
 586
 587         auto vec = R[static_cast<uint>(a+l-1) + last_band].cbegin() + last_band;
 588         if(n == -l)
 589             return ri1*vec[0] + rim1*vec[static_cast<uint>(l-1)*size_t{2}];
 590         if(n == l)
 591             return ri1*vec[static_cast<uint>(l-1)*size_t{2}] - rim1*vec[0];
 592         return ri0*vec[static_cast<uint>(n+l-1)];
 593     };
 594
 595     auto U = [P](const int l, const int m, const int n, const size_t last_band,
 596         const AmbiRotateMatrix &R)
 597     {
 598         return P(0, l, m, n, last_band, R);
 599     };
 600     auto V = [P](const int l, const int m, const int n, const size_t last_band,
 601         const AmbiRotateMatrix &R)
 602     {
 603         using namespace al::numbers;
 604         if(m > 0)
 605         {
 606             const bool d{m == 1};
 607             const float p0{P( 1, l,  m-1, n, last_band, R)};
 608             const float p1{P(-1, l, -m+1, n, last_band, R)};
 609             return d ? p0*sqrt2_v<float> : (p0 - p1);
 610         }
 611         const bool d{m == -1};
 612         const float p0{P( 1, l,  m+1, n, last_band, R)};
 613         const float p1{P(-1, l, -m-1, n, last_band, R)};
 614         return d ? p1*sqrt2_v<float> : (p0 + p1);
 615     };
 616     auto W = [P](const int l, const int m, const int n, const size_t last_band,
 617         const AmbiRotateMatrix &R)
 618     {
 619         assert(m != 0);
 620         if(m > 0)
 621         {
 622             const float p0{P( 1, l,  m+1, n, last_band, R)};
 623             const float p1{P(-1, l, -m-1, n, last_band, R)};
 624             return p0 + p1;
 625         }
 626         const float p0{P( 1, l,  m-1, n, last_band, R)};
 627         const float p1{P(-1, l, -m+1, n, last_band, R)};
 628         return p0 - p1;
 629     };
 630
 631     // compute rotation matrix of each subsequent band recursively
 632     auto coeffs = RotatorCoeffArray.cbegin();
 633     size_t band_idx{4}, last_band{1};
 634     for(int l{2};l <= order;++l)
 635     {
 636         size_t y{band_idx};
 637         for(int m{-l};m <= l;++m,++y)
 638         {
 639             size_t x{band_idx};
 640             for(int n{-l};n <= l;++n,++x)
 641             {
 642                 float r{0.0f};
 643
 644                 // computes Eq.8.1
 645                 const float u{coeffs->u};
 646                 if(u != 0.0f) r += u * U(l, m, n, last_band, matrix);
 647                 const float v{coeffs->v};
 648                 if(v != 0.0f) r += v * V(l, m, n, last_band, matrix);
 649                 const float w{coeffs->w};
 650                 if(w != 0.0f) r += w * W(l, m, n, last_band, matrix);
 651
 652                 matrix[y][x] = r;
 653                 ++coeffs;
 654             }
 655         }
 656         last_band = band_idx;
 657         band_idx += static_cast<uint>(l)*size_t{2} + 1;
 658     }
 659 }
 660 /* End ambisonic rotation helpers. */
 661
 662
 663 constexpr float Deg2Rad(float x) noexcept
 664 { return static_cast<float>(al::numbers::pi / 180.0 * x); }
 665
 666 struct GainTriplet { float Base, HF, LF; };
 667
 668 void CalcPanningAndFilters(Voice *voice, const float xpos, const float ypos, const float zpos,
 669     const float Distance, const float Spread, const GainTriplet &DryGain,
 670     const al::span<const GainTriplet,MAX_SENDS> WetGain, EffectSlot *(&SendSlots)[MAX_SENDS],
 671     const VoiceProps *props, const ContextParams &Context, const DeviceBase *Device)
 672 {
 673     static constexpr ChanMap MonoMap[1]{
 674         { FrontCenter, 0.0f, 0.0f }
 675     }, RearMap[2]{
 676         { BackLeft,  Deg2Rad(-150.0f), Deg2Rad(0.0f) },
 677         { BackRight, Deg2Rad( 150.0f), Deg2Rad(0.0f) }
 678     }, QuadMap[4]{
 679         { FrontLeft,  Deg2Rad( -45.0f), Deg2Rad(0.0f) },
 680         { FrontRight, Deg2Rad(  45.0f), Deg2Rad(0.0f) },
 681         { BackLeft,   Deg2Rad(-135.0f), Deg2Rad(0.0f) },
 682         { BackRight,  Deg2Rad( 135.0f), Deg2Rad(0.0f) }
 683     }, X51Map[6]{
 684         { FrontLeft,   Deg2Rad( -30.0f), Deg2Rad(0.0f) },
 685         { FrontRight,  Deg2Rad(  30.0f), Deg2Rad(0.0f) },
 686         { FrontCenter, Deg2Rad(   0.0f), Deg2Rad(0.0f) },
 687         { LFE, 0.0f, 0.0f },
 688         { SideLeft,    Deg2Rad(-110.0f), Deg2Rad(0.0f) },
 689         { SideRight,   Deg2Rad( 110.0f), Deg2Rad(0.0f) }
 690     }, X61Map[7]{
 691         { FrontLeft,   Deg2Rad(-30.0f), Deg2Rad(0.0f) },
 692         { FrontRight,  Deg2Rad( 30.0f), Deg2Rad(0.0f) },
 693         { FrontCenter, Deg2Rad(  0.0f), Deg2Rad(0.0f) },
 694         { LFE, 0.0f, 0.0f },
 695         { BackCenter,  Deg2Rad(180.0f), Deg2Rad(0.0f) },
 696         { SideLeft,    Deg2Rad(-90.0f), Deg2Rad(0.0f) },
 697         { SideRight,   Deg2Rad( 90.0f), Deg2Rad(0.0f) }
 698     }, X71Map[8]{
 699         { FrontLeft,   Deg2Rad( -30.0f), Deg2Rad(0.0f) },
 700         { FrontRight,  Deg2Rad(  30.0f), Deg2Rad(0.0f) },
 701         { FrontCenter, Deg2Rad(   0.0f), Deg2Rad(0.0f) },
 702         { LFE, 0.0f, 0.0f },
 703         { BackLeft,    Deg2Rad(-150.0f), Deg2Rad(0.0f) },
 704         { BackRight,   Deg2Rad( 150.0f), Deg2Rad(0.0f) },
 705         { SideLeft,    Deg2Rad( -90.0f), Deg2Rad(0.0f) },
 706         { SideRight,   Deg2Rad(  90.0f), Deg2Rad(0.0f) }
 707     };
 708
 709     ChanMap StereoMap[2]{
 710         { FrontLeft,  Deg2Rad(-30.0f), Deg2Rad(0.0f) },
 711         { FrontRight, Deg2Rad( 30.0f), Deg2Rad(0.0f) }
 712     };
 713
 714     const auto Frequency = static_cast<float>(Device->Frequency);
 715     const uint NumSends{Device->NumAuxSends};
 716
 717     const size_t num_channels{voice->mChans.size()};
 718     ASSUME(num_channels > 0);
 719
 720     for(auto &chandata : voice->mChans)
 721     {
 722         chandata.mDryParams.Hrtf.Target = HrtfFilter{};
 723         chandata.mDryParams.Gains.Target.fill(0.0f);
 724         std::for_each(chandata.mWetParams.begin(), chandata.mWetParams.begin()+NumSends,
 725             [](SendParams &params) -> void { params.Gains.Target.fill(0.0f); });
 726     }
 727
 728     DirectMode DirectChannels{props->DirectChannels};
 729     const ChanMap *chans{nullptr};
 730     switch(voice->mFmtChannels)
 731     {
 732     case FmtMono:
 733         chans = MonoMap;
 734         /* Mono buffers are never played direct. */
 735         DirectChannels = DirectMode::Off;
 736         break;
 737
 738     case FmtStereo:
 739         if(DirectChannels == DirectMode::Off)
 740         {
 741             /* Convert counter-clockwise to clock-wise, and wrap between
 742              * [-pi,+pi].
 743              */
 744             StereoMap[0].angle = WrapRadians(-props->StereoPan[0]);
 745             StereoMap[1].angle = WrapRadians(-props->StereoPan[1]);
 746         }
 747         chans = StereoMap;
 748         break;
 749
 750     case FmtRear: chans = RearMap; break;
 751     case FmtQuad: chans = QuadMap; break;
 752     case FmtX51: chans = X51Map; break;
 753     case FmtX61: chans = X61Map; break;
 754     case FmtX71: chans = X71Map; break;
 755
 756     case FmtBFormat2D:
 757     case FmtBFormat3D:
 758     case FmtUHJ2:
 759     case FmtUHJ3:
 760     case FmtUHJ4:
 761     case FmtSuperStereo:
 762         DirectChannels = DirectMode::Off;
 763         break;
 764     }
 765
 766     voice->mFlags.reset(VoiceHasHrtf).reset(VoiceHasNfc);
 767     if(auto *decoder{voice->mDecoder.get()})
 768         decoder->mWidthControl = minf(props->EnhWidth, 0.7f);
 769
 770     if(IsAmbisonic(voice->mFmtChannels))
 771     {
 772         /* Special handling for B-Format and UHJ sources. */
 773
 774         if(Device->AvgSpeakerDist > 0.0f && voice->mFmtChannels != FmtUHJ2
 775             && voice->mFmtChannels != FmtSuperStereo)
 776         {
 777             if(!(Distance > std::numeric_limits<float>::epsilon()))
 778             {
 779                 /* NOTE: The NFCtrlFilters were created with a w0 of 0, which
 780                  * is what we want for FOA input. The first channel may have
 781                  * been previously re-adjusted if panned, so reset it.
 782                  */
 783                 voice->mChans[0].mDryParams.NFCtrlFilter.adjust(0.0f);
 784             }
 785             else
 786             {
 787                 /* Clamp the distance for really close sources, to prevent
 788                  * excessive bass.
 789                  */
 790                 const float mdist{maxf(Distance, Device->AvgSpeakerDist/4.0f)};
 791                 const float w0{SpeedOfSoundMetersPerSec / (mdist * Frequency)};
 792
 793                 /* Only need to adjust the first channel of a B-Format source. */
 794                 voice->mChans[0].mDryParams.NFCtrlFilter.adjust(w0);
 795             }
 796
 797             voice->mFlags.set(VoiceHasNfc);
 798         }
 799
 800         /* Panning a B-Format sound toward some direction is easy. Just pan the
 801          * first (W) channel as a normal mono sound. The angular spread is used
 802          * as a directional scalar to blend between full coverage and full
 803          * panning.
 804          */
 805         const float coverage{!(Distance > std::numeric_limits<float>::epsilon()) ? 1.0f :
 806             (al::numbers::inv_pi_v<float>/2.0f * Spread)};
 807
 808         auto calc_coeffs = [xpos,ypos,zpos](RenderMode mode)
 809         {
 810             if(mode != RenderMode::Pairwise)
 811                 return CalcDirectionCoeffs({xpos, ypos, zpos}, 0.0f);
 812
 813             /* Clamp Y, in case rounding errors caused it to end up outside
 814              * of -1...+1.
 815              */
 816             const float ev{std::asin(clampf(ypos, -1.0f, 1.0f))};
 817             /* Negate Z for right-handed coords with -Z in front. */
 818             const float az{std::atan2(xpos, -zpos)};
 819
 820             /* A scalar of 1.5 for plain stereo results in +/-60 degrees
 821              * being moved to +/-90 degrees for direct right and left
 822              * speaker responses.
 823              */
 824             return CalcAngleCoeffs(ScaleAzimuthFront(az, 1.5f), ev, 0.0f);
 825         };
 826         auto coeffs = calc_coeffs(Device->mRenderMode);
 827         std::transform(coeffs.begin()+1, coeffs.end(), coeffs.begin()+1,
 828             std::bind(std::multiplies<float>{}, _1, 1.0f-coverage));
 829
 830         /* NOTE: W needs to be scaled according to channel scaling. */
 831         auto&& scales = GetAmbiScales(voice->mAmbiScaling);
 832         ComputePanGains(&Device->Dry, coeffs.data(), DryGain.Base*scales[0],
 833             voice->mChans[0].mDryParams.Gains.Target);
 834         for(uint i{0};i < NumSends;i++)
 835         {
 836             if(const EffectSlot *Slot{SendSlots[i]})
 837                 ComputePanGains(&Slot->Wet, coeffs.data(), WetGain[i].Base*scales[0],
 838                     voice->mChans[0].mWetParams[i].Gains.Target);
 839         }
 840
 841         if(coverage > 0.0f)
 842         {
 843             /* Local B-Format sources have their XYZ channels rotated according
 844              * to the orientation.
 845              */
 846             /* AT then UP */
 847             alu::Vector N{props->OrientAt[0], props->OrientAt[1], props->OrientAt[2], 0.0f};
 848             N.normalize();
 849             alu::Vector V{props->OrientUp[0], props->OrientUp[1], props->OrientUp[2], 0.0f};
 850             V.normalize();
 851             if(!props->HeadRelative)
 852             {
 853                 N = Context.Matrix * N;
 854                 V = Context.Matrix * V;
 855             }
 856             /* Build and normalize right-vector */
 857             alu::Vector U{N.cross_product(V)};
 858             U.normalize();
 859
 860             /* Build a rotation matrix. Manually fill the zeroth- and first-
 861              * order elements, then construct the rotation for the higher
 862              * orders.
 863              */
 864             AmbiRotateMatrix shrot{};
 865             shrot[0][0] = 1.0f;
 866             shrot[1][1] =  U[0]; shrot[1][2] = -V[0]; shrot[1][3] = -N[0];
 867             shrot[2][1] = -U[1]; shrot[2][2] =  V[1]; shrot[2][3] =  N[1];
 868             shrot[3][1] =  U[2]; shrot[3][2] = -V[2]; shrot[3][3] = -N[2];
 869             AmbiRotator(shrot, static_cast<int>(minu(voice->mAmbiOrder, Device->mAmbiOrder)));
 870
 871             /* Convert the rotation matrix for input ordering and scaling, and
 872              * whether input is 2D or 3D.
 873              */
 874             const uint8_t *index_map{Is2DAmbisonic(voice->mFmtChannels) ?
 875                 GetAmbi2DLayout(voice->mAmbiLayout).data() :
 876                 GetAmbiLayout(voice->mAmbiLayout).data()};
 877
 878             static const uint8_t ChansPerOrder[MaxAmbiOrder+1]{1, 3, 5, 7,};
 879             static const uint8_t OrderOffset[MaxAmbiOrder+1]{0, 1, 4, 9,};
 880             for(size_t c{1};c < num_channels;c++)
 881             {
 882                 const size_t acn{index_map[c]};
 883                 const size_t order{AmbiIndex::OrderFromChannel()[acn]};
 884                 const size_t tocopy{ChansPerOrder[order]};
 885                 const size_t offset{OrderOffset[order]};
 886                 const float scale{scales[acn] * coverage};
 887                 auto in = shrot.cbegin() + offset;
 888
 889                 coeffs = std::array<float,MaxAmbiChannels>{};
 890                 for(size_t x{0};x < tocopy;++x)
 891                     coeffs[offset+x] = in[x][acn] * scale;
 892
 893                 ComputePanGains(&Device->Dry, coeffs.data(), DryGain.Base,
 894                     voice->mChans[c].mDryParams.Gains.Target);
 895
 896                 for(uint i{0};i < NumSends;i++)
 897                 {
 898                     if(const EffectSlot *Slot{SendSlots[i]})
 899                         ComputePanGains(&Slot->Wet, coeffs.data(), WetGain[i].Base,
 900                             voice->mChans[c].mWetParams[i].Gains.Target);
 901                 }
 902             }
 903         }
 904     }
 905     else if(DirectChannels != DirectMode::Off && !Device->RealOut.RemixMap.empty())
 906     {
 907         /* Direct source channels always play local. Skip the virtual channels
 908          * and write inputs to the matching real outputs.
 909          */
 910         voice->mDirect.Buffer = Device->RealOut.Buffer;
 911
 912         for(size_t c{0};c < num_channels;c++)
 913         {
 914             uint idx{GetChannelIdxByName(Device->RealOut, chans[c].channel)};
 915             if(idx != INVALID_CHANNEL_INDEX)
 916                 voice->mChans[c].mDryParams.Gains.Target[idx] = DryGain.Base;
 917             else if(DirectChannels == DirectMode::RemixMismatch)
 918             {
 919                 auto match_channel = [chans,c](const InputRemixMap &map) noexcept -> bool
 920                 { return chans[c].channel == map.channel; };
 921                 auto remap = std::find_if(Device->RealOut.RemixMap.cbegin(),
 922                     Device->RealOut.RemixMap.cend(), match_channel);
 923                 if(remap != Device->RealOut.RemixMap.cend())
 924                 {
 925                     for(const auto &target : remap->targets)
 926                     {
 927                         idx = GetChannelIdxByName(Device->RealOut, target.channel);
 928                         if(idx != INVALID_CHANNEL_INDEX)
 929                             voice->mChans[c].mDryParams.Gains.Target[idx] = DryGain.Base *
 930                                 target.mix;
 931                     }
 932                 }
 933             }
 934         }
 935
 936         /* Auxiliary sends still use normal channel panning since they mix to
 937          * B-Format, which can't channel-match.
 938          */
 939         for(size_t c{0};c < num_channels;c++)
 940         {
 941             const auto coeffs = CalcAngleCoeffs(chans[c].angle, chans[c].elevation, 0.0f);
 942
 943             for(uint i{0};i < NumSends;i++)
 944             {
 945                 if(const EffectSlot *Slot{SendSlots[i]})
 946                     ComputePanGains(&Slot->Wet, coeffs.data(), WetGain[i].Base,
 947                         voice->mChans[c].mWetParams[i].Gains.Target);
 948             }
 949         }
 950     }
 951     else if(Device->mRenderMode == RenderMode::Hrtf)
 952     {
 953         /* Full HRTF rendering. Skip the virtual channels and render to the
 954          * real outputs.
 955          */
 956         voice->mDirect.Buffer = Device->RealOut.Buffer;
 957
 958         if(Distance > std::numeric_limits<float>::epsilon())
 959         {
 960             const float ev{std::asin(clampf(ypos, -1.0f, 1.0f))};
 961             const float az{std::atan2(xpos, -zpos)};
 962
 963             /* Get the HRIR coefficients and delays just once, for the given
 964              * source direction.
 965              */
 966             GetHrtfCoeffs(Device->mHrtf.get(), ev, az, Distance, Spread,
 967                 voice->mChans[0].mDryParams.Hrtf.Target.Coeffs,
 968                 voice->mChans[0].mDryParams.Hrtf.Target.Delay);
 969             voice->mChans[0].mDryParams.Hrtf.Target.Gain = DryGain.Base;
 970
 971             /* Remaining channels use the same results as the first. */
 972             for(size_t c{1};c < num_channels;c++)
 973             {
 974                 /* Skip LFE */
 975                 if(chans[c].channel == LFE) continue;
 976                 voice->mChans[c].mDryParams.Hrtf.Target = voice->mChans[0].mDryParams.Hrtf.Target;
 977             }
 978
 979             /* Calculate the directional coefficients once, which apply to all
 980              * input channels of the source sends.
 981              */
 982             const auto coeffs = CalcDirectionCoeffs({xpos, ypos, zpos}, Spread);
 983
 984             for(size_t c{0};c < num_channels;c++)
 985             {
 986                 /* Skip LFE */
 987                 if(chans[c].channel == LFE)
 988                     continue;
 989                 for(uint i{0};i < NumSends;i++)
 990                 {
 991                     if(const EffectSlot *Slot{SendSlots[i]})
 992                         ComputePanGains(&Slot->Wet, coeffs.data(), WetGain[i].Base,
 993                             voice->mChans[c].mWetParams[i].Gains.Target);
 994                 }
 995             }
 996         }
 997         else
 998         {
 999             /* Local sources on HRTF play with each channel panned to its
1000              * relative location around the listener, providing "virtual
1001              * speaker" responses.
1002              */
1003             for(size_t c{0};c < num_channels;c++)
1004             {
1005                 /* Skip LFE */
1006                 if(chans[c].channel == LFE)
1007                     continue;
1008
1009                 /* Get the HRIR coefficients and delays for this channel
1010                  * position.
1011                  */
1012                 GetHrtfCoeffs(Device->mHrtf.get(), chans[c].elevation, chans[c].angle,
1013                     std::numeric_limits<float>::infinity(), Spread,
1014                     voice->mChans[c].mDryParams.Hrtf.Target.Coeffs,
1015                     voice->mChans[c].mDryParams.Hrtf.Target.Delay);
1016                 voice->mChans[c].mDryParams.Hrtf.Target.Gain = DryGain.Base;
1017
1018                 /* Normal panning for auxiliary sends. */
1019                 const auto coeffs = CalcAngleCoeffs(chans[c].angle, chans[c].elevation, Spread);
1020
1021                 for(uint i{0};i < NumSends;i++)
1022                 {
1023                     if(const EffectSlot *Slot{SendSlots[i]})
1024                         ComputePanGains(&Slot->Wet, coeffs.data(), WetGain[i].Base,
1025                             voice->mChans[c].mWetParams[i].Gains.Target);
1026                 }
1027             }
1028         }
1029
1030         voice->mFlags.set(VoiceHasHrtf);
1031     }
1032     else
1033     {
1034         /* Non-HRTF rendering. Use normal panning to the output. */
1035
1036         if(Distance > std::numeric_limits<float>::epsilon())
1037         {
1038             /* Calculate NFC filter coefficient if needed. */
1039             if(Device->AvgSpeakerDist > 0.0f)
1040             {
1041                 /* Clamp the distance for really close sources, to prevent
1042                  * excessive bass.
1043                  */
1044                 const float mdist{maxf(Distance, Device->AvgSpeakerDist/4.0f)};
1045                 const float w0{SpeedOfSoundMetersPerSec / (mdist * Frequency)};
1046
1047                 /* Adjust NFC filters. */
1048                 for(size_t c{0};c < num_channels;c++)
1049                     voice->mChans[c].mDryParams.NFCtrlFilter.adjust(w0);
1050
1051                 voice->mFlags.set(VoiceHasNfc);
1052             }
1053
1054             /* Calculate the directional coefficients once, which apply to all
1055              * input channels.
1056              */
1057             auto calc_coeffs = [xpos,ypos,zpos,Spread](RenderMode mode)
1058             {
1059                 if(mode != RenderMode::Pairwise)
1060                     return CalcDirectionCoeffs({xpos, ypos, zpos}, Spread);
1061                 const float ev{std::asin(clampf(ypos, -1.0f, 1.0f))};
1062                 const float az{std::atan2(xpos, -zpos)};
1063                 return CalcAngleCoeffs(ScaleAzimuthFront(az, 1.5f), ev, Spread);
1064             };
1065             const auto coeffs = calc_coeffs(Device->mRenderMode);
1066
1067             for(size_t c{0};c < num_channels;c++)
1068             {
1069                 /* Special-case LFE */
1070                 if(chans[c].channel == LFE)
1071                 {
1072                     if(Device->Dry.Buffer.data() == Device->RealOut.Buffer.data())
1073                     {
1074                         const uint idx{GetChannelIdxByName(Device->RealOut, chans[c].channel)};
1075                         if(idx != INVALID_CHANNEL_INDEX)
1076                             voice->mChans[c].mDryParams.Gains.Target[idx] = DryGain.Base;
1077                     }
1078                     continue;
1079                 }
1080
1081                 ComputePanGains(&Device->Dry, coeffs.data(), DryGain.Base,
1082                     voice->mChans[c].mDryParams.Gains.Target);
1083                 for(uint i{0};i < NumSends;i++)
1084                 {
1085                     if(const EffectSlot *Slot{SendSlots[i]})
1086                         ComputePanGains(&Slot->Wet, coeffs.data(), WetGain[i].Base,
1087                             voice->mChans[c].mWetParams[i].Gains.Target);
1088                 }
1089             }
1090         }
1091         else
1092         {
1093             if(Device->AvgSpeakerDist > 0.0f)
1094             {
1095                 /* If the source distance is 0, simulate a plane-wave by using
1096                  * infinite distance, which results in a w0 of 0.
1097                  */
1098                 static constexpr float w0{0.0f};
1099                 for(size_t c{0};c < num_channels;c++)
1100                     voice->mChans[c].mDryParams.NFCtrlFilter.adjust(w0);
1101
1102                 voice->mFlags.set(VoiceHasNfc);
1103             }
1104
1105             for(size_t c{0};c < num_channels;c++)
1106             {
1107                 /* Special-case LFE */
1108                 if(chans[c].channel == LFE)
1109                 {
1110                     if(Device->Dry.Buffer.data() == Device->RealOut.Buffer.data())
1111                     {
1112                         const uint idx{GetChannelIdxByName(Device->RealOut, chans[c].channel)};
1113                         if(idx != INVALID_CHANNEL_INDEX)
1114                             voice->mChans[c].mDryParams.Gains.Target[idx] = DryGain.Base;
1115                     }
1116                     continue;
1117                 }
1118
1119                 const auto coeffs = CalcAngleCoeffs((Device->mRenderMode == RenderMode::Pairwise)
1120                     ? ScaleAzimuthFront(chans[c].angle, 3.0f) : chans[c].angle,
1121                     chans[c].elevation, Spread);
1122
1123                 ComputePanGains(&Device->Dry, coeffs.data(), DryGain.Base,
1124                     voice->mChans[c].mDryParams.Gains.Target);
1125                 for(uint i{0};i < NumSends;i++)
1126                 {
1127                     if(const EffectSlot *Slot{SendSlots[i]})
1128                         ComputePanGains(&Slot->Wet, coeffs.data(), WetGain[i].Base,
1129                             voice->mChans[c].mWetParams[i].Gains.Target);
1130                 }
1131             }
1132         }
1133     }
1134
1135     {
1136         const float hfNorm{props->Direct.HFReference / Frequency};
1137         const float lfNorm{props->Direct.LFReference / Frequency};
1138
1139         voice->mDirect.FilterType = AF_None;
1140         if(DryGain.HF != 1.0f) voice->mDirect.FilterType |= AF_LowPass;
1141         if(DryGain.LF != 1.0f) voice->mDirect.FilterType |= AF_HighPass;
1142
1143         auto &lowpass = voice->mChans[0].mDryParams.LowPass;
1144         auto &highpass = voice->mChans[0].mDryParams.HighPass;
1145         lowpass.setParamsFromSlope(BiquadType::HighShelf, hfNorm, DryGain.HF, 1.0f);
1146         highpass.setParamsFromSlope(BiquadType::LowShelf, lfNorm, DryGain.LF, 1.0f);
1147         for(size_t c{1};c < num_channels;c++)
1148         {
1149             voice->mChans[c].mDryParams.LowPass.copyParamsFrom(lowpass);
1150             voice->mChans[c].mDryParams.HighPass.copyParamsFrom(highpass);
1151         }
1152     }
1153     for(uint i{0};i < NumSends;i++)
1154     {
1155         const float hfNorm{props->Send[i].HFReference / Frequency};
1156         const float lfNorm{props->Send[i].LFReference / Frequency};
1157
1158         voice->mSend[i].FilterType = AF_None;
1159         if(WetGain[i].HF != 1.0f) voice->mSend[i].FilterType |= AF_LowPass;
1160         if(WetGain[i].LF != 1.0f) voice->mSend[i].FilterType |= AF_HighPass;
1161
1162         auto &lowpass = voice->mChans[0].mWetParams[i].LowPass;
1163         auto &highpass = voice->mChans[0].mWetParams[i].HighPass;
1164         lowpass.setParamsFromSlope(BiquadType::HighShelf, hfNorm, WetGain[i].HF, 1.0f);
1165         highpass.setParamsFromSlope(BiquadType::LowShelf, lfNorm, WetGain[i].LF, 1.0f);
1166         for(size_t c{1};c < num_channels;c++)
1167         {
1168             voice->mChans[c].mWetParams[i].LowPass.copyParamsFrom(lowpass);
1169             voice->mChans[c].mWetParams[i].HighPass.copyParamsFrom(highpass);
1170         }
1171     }
1172 }
1173
1174 void CalcNonAttnSourceParams(Voice *voice, const VoiceProps *props, const ContextBase *context)
1175 {
1176     const DeviceBase *Device{context->mDevice};
1177     EffectSlot *SendSlots[MAX_SENDS];
1178
1179     voice->mDirect.Buffer = Device->Dry.Buffer;
1180     for(uint i{0};i < Device->NumAuxSends;i++)
1181     {
1182         SendSlots[i] = props->Send[i].Slot;
1183         if(!SendSlots[i] || SendSlots[i]->EffectType == EffectSlotType::None)
1184         {
1185             SendSlots[i] = nullptr;
1186             voice->mSend[i].Buffer = {};
1187         }
1188         else
1189             voice->mSend[i].Buffer = SendSlots[i]->Wet.Buffer;
1190     }
1191
1192     /* Calculate the stepping value */
1193     const auto Pitch = static_cast<float>(voice->mFrequency) /
1194         static_cast<float>(Device->Frequency) * props->Pitch;
1195     if(Pitch > float{MaxPitch})
1196         voice->mStep = MaxPitch<<MixerFracBits;
1197     else
1198         voice->mStep = maxu(fastf2u(Pitch * MixerFracOne), 1);
1199     voice->mResampler = PrepareResampler(props->mResampler, voice->mStep, &voice->mResampleState);
1200
1201     /* Calculate gains */
1202     GainTriplet DryGain;
1203     DryGain.Base  = minf(clampf(props->Gain, props->MinGain, props->MaxGain) * props->Direct.Gain *
1204         context->mParams.Gain, GainMixMax);
1205     DryGain.HF = props->Direct.GainHF;
1206     DryGain.LF = props->Direct.GainLF;
1207     GainTriplet WetGain[MAX_SENDS];
1208     for(uint i{0};i < Device->NumAuxSends;i++)
1209     {
1210         WetGain[i].Base = minf(clampf(props->Gain, props->MinGain, props->MaxGain) *
1211             props->Send[i].Gain * context->mParams.Gain, GainMixMax);
1212         WetGain[i].HF = props->Send[i].GainHF;
1213         WetGain[i].LF = props->Send[i].GainLF;
1214     }
1215
1216     CalcPanningAndFilters(voice, 0.0f, 0.0f, -1.0f, 0.0f, 0.0f, DryGain, WetGain, SendSlots, props,
1217         context->mParams, Device);
1218 }
1219
1220 void CalcAttnSourceParams(Voice *voice, const VoiceProps *props, const ContextBase *context)
1221 {
1222     const DeviceBase *Device{context->mDevice};
1223     const uint NumSends{Device->NumAuxSends};
1224
1225     /* Set mixing buffers and get send parameters. */
1226     voice->mDirect.Buffer = Device->Dry.Buffer;
1227     EffectSlot *SendSlots[MAX_SENDS];
1228     uint UseDryAttnForRoom{0};
1229     for(uint i{0};i < NumSends;i++)
1230     {
1231         SendSlots[i] = props->Send[i].Slot;
1232         if(!SendSlots[i] || SendSlots[i]->EffectType == EffectSlotType::None)
1233             SendSlots[i] = nullptr;
1234         else if(!SendSlots[i]->AuxSendAuto)
1235         {
1236             /* If the slot's auxiliary send auto is off, the data sent to the
1237              * effect slot is the same as the dry path, sans filter effects.
1238              */
1239             UseDryAttnForRoom |= 1u<<i;
1240         }
1241
1242         if(!SendSlots[i])
1243             voice->mSend[i].Buffer = {};
1244         else
1245             voice->mSend[i].Buffer = SendSlots[i]->Wet.Buffer;
1246     }
1247
1248     /* Transform source to listener space (convert to head relative) */
1249     alu::Vector Position{props->Position[0], props->Position[1], props->Position[2], 1.0f};
1250     alu::Vector Velocity{props->Velocity[0], props->Velocity[1], props->Velocity[2], 0.0f};
1251     alu::Vector Direction{props->Direction[0], props->Direction[1], props->Direction[2], 0.0f};
1252     if(!props->HeadRelative)
1253     {
1254         /* Transform source vectors */
1255         Position = context->mParams.Matrix * (Position - context->mParams.Position);
1256         Velocity = context->mParams.Matrix * Velocity;
1257         Direction = context->mParams.Matrix * Direction;
1258     }
1259     else
1260     {
1261         /* Offset the source velocity to be relative of the listener velocity */
1262         Velocity += context->mParams.Velocity;
1263     }
1264
1265     const bool directional{Direction.normalize() > 0.0f};
1266     alu::Vector ToSource{Position[0], Position[1], Position[2], 0.0f};
1267     const float Distance{ToSource.normalize()};
1268
1269     /* Calculate distance attenuation */
1270     float ClampedDist{Distance};
1271     float DryGainBase{props->Gain};
1272     float WetGainBase{props->Gain};
1273
1274     switch(context->mParams.SourceDistanceModel ? props->mDistanceModel
1275         : context->mParams.mDistanceModel)
1276     {
1277         case DistanceModel::InverseClamped:
1278             if(props->MaxDistance < props->RefDistance) break;
1279             ClampedDist = clampf(ClampedDist, props->RefDistance, props->MaxDistance);
1280             /*fall-through*/
1281         case DistanceModel::Inverse:
1282             if(props->RefDistance > 0.0f)
1283             {
1284                 float dist{lerpf(props->RefDistance, ClampedDist, props->RolloffFactor)};
1285                 if(dist > 0.0f) DryGainBase *= props->RefDistance / dist;
1286
1287                 dist = lerpf(props->RefDistance, ClampedDist, props->RoomRolloffFactor);
1288                 if(dist > 0.0f) WetGainBase *= props->RefDistance / dist;
1289             }
1290             break;
1291
1292         case DistanceModel::LinearClamped:
1293             if(props->MaxDistance < props->RefDistance) break;
1294             ClampedDist = clampf(ClampedDist, props->RefDistance, props->MaxDistance);
1295             /*fall-through*/
1296         case DistanceModel::Linear:
1297             if(props->MaxDistance != props->RefDistance)
1298             {
1299                 float attn{(ClampedDist-props->RefDistance) /
1300                     (props->MaxDistance-props->RefDistance) * props->RolloffFactor};
1301                 DryGainBase *= maxf(1.0f - attn, 0.0f);
1302
1303                 attn = (ClampedDist-props->RefDistance) /
1304                     (props->MaxDistance-props->RefDistance) * props->RoomRolloffFactor;
1305                 WetGainBase *= maxf(1.0f - attn, 0.0f);
1306             }
1307             break;
1308
1309         case DistanceModel::ExponentClamped:
1310             if(props->MaxDistance < props->RefDistance) break;
1311             ClampedDist = clampf(ClampedDist, props->RefDistance, props->MaxDistance);
1312             /*fall-through*/
1313         case DistanceModel::Exponent:
1314             if(ClampedDist > 0.0f && props->RefDistance > 0.0f)
1315             {
1316                 const float dist_ratio{ClampedDist/props->RefDistance};
1317                 DryGainBase *= std::pow(dist_ratio, -props->RolloffFactor);
1318                 WetGainBase *= std::pow(dist_ratio, -props->RoomRolloffFactor);
1319             }
1320             break;
1321
1322         case DistanceModel::Disable:
1323             break;
1324     }
1325
1326     /* Calculate directional soundcones */
1327     float ConeHF{1.0f}, WetConeHF{1.0f};
1328     if(directional && props->InnerAngle < 360.0f)
1329     {
1330         static constexpr float Rad2Deg{static_cast<float>(180.0 / al::numbers::pi)};
1331         const float Angle{Rad2Deg*2.0f * std::acos(-Direction.dot_product(ToSource)) * ConeScale};
1332
1333         float ConeGain{1.0f};
1334         if(Angle >= props->OuterAngle)
1335         {
1336             ConeGain = props->OuterGain;
1337             ConeHF = lerpf(1.0f, props->OuterGainHF, props->DryGainHFAuto);
1338         }
1339         else if(Angle >= props->InnerAngle)
1340         {
1341             const float scale{(Angle-props->InnerAngle) / (props->OuterAngle-props->InnerAngle)};
1342             ConeGain = lerpf(1.0f, props->OuterGain, scale);
1343             ConeHF = lerpf(1.0f, props->OuterGainHF, scale * props->DryGainHFAuto);
1344         }
1345
1346         DryGainBase *= ConeGain;
1347         WetGainBase *= lerpf(1.0f, ConeGain, props->WetGainAuto);
1348
1349         WetConeHF = lerpf(1.0f, ConeHF, props->WetGainHFAuto);
1350     }
1351
1352     /* Apply gain and frequency filters */
1353     DryGainBase = clampf(DryGainBase, props->MinGain, props->MaxGain) * context->mParams.Gain;
1354     WetGainBase = clampf(WetGainBase, props->MinGain, props->MaxGain) * context->mParams.Gain;
1355
1356     GainTriplet DryGain{};
1357     DryGain.Base = minf(DryGainBase * props->Direct.Gain, GainMixMax);
1358     DryGain.HF = ConeHF * props->Direct.GainHF;
1359     DryGain.LF = props->Direct.GainLF;
1360     GainTriplet WetGain[MAX_SENDS]{};
1361     for(uint i{0};i < NumSends;i++)
1362     {
1363         /* If this effect slot's Auxiliary Send Auto is off, then use the dry
1364          * path distance and cone attenuation, otherwise use the wet (room)
1365          * path distance and cone attenuation. The send filter is used instead
1366          * of the direct filter, regardless.
1367          */
1368         const bool use_room{!(UseDryAttnForRoom&(1u<<i))};
1369         const float gain{use_room ? WetGainBase : DryGainBase};
1370         WetGain[i].Base = minf(gain * props->Send[i].Gain, GainMixMax);
1371         WetGain[i].HF = (use_room ? WetConeHF : ConeHF) * props->Send[i].GainHF;
1372         WetGain[i].LF = props->Send[i].GainLF;
1373     }
1374
1375     /* Distance-based air absorption and initial send decay. */
1376     if(likely(Distance > props->RefDistance))
1377     {
1378         const float distance_base{(Distance-props->RefDistance) * props->RolloffFactor};
1379         const float absorption{distance_base * context->mParams.MetersPerUnit *
1380             props->AirAbsorptionFactor};
1381         if(absorption > std::numeric_limits<float>::epsilon())
1382         {
1383             const float hfattn{std::pow(context->mParams.AirAbsorptionGainHF, absorption)};
1384             DryGain.HF *= hfattn;
1385             for(uint i{0u};i < NumSends;++i)
1386                 WetGain[i].HF *= hfattn;
1387         }
1388
1389         /* If the source's Auxiliary Send Filter Gain Auto is off, no extra
1390          * adjustment is applied to the send gains.
1391          */
1392         for(uint i{props->WetGainAuto ? 0u : NumSends};i < NumSends;++i)
1393         {
1394             if(!SendSlots[i])
1395                 continue;
1396
1397             auto calc_attenuation = [](float distance, float refdist, float rolloff) noexcept
1398             {
1399                 const float dist{lerpf(refdist, distance, rolloff)};
1400                 if(dist > refdist) return refdist / dist;
1401                 return 1.0f;
1402             };
1403
1404             /* The reverb effect's room rolloff factor always applies to an
1405              * inverse distance rolloff model.
1406              */
1407             WetGain[i].Base *= calc_attenuation(Distance, props->RefDistance,
1408                 SendSlots[i]->RoomRolloff);
1409
1410             /* If this effect slot's Auxiliary Send Auto is off, don't apply
1411              * the automatic initial reverb decay (should the reverb's room
1412              * rolloff still apply?).
1413              */
1414             if(!SendSlots[i]->AuxSendAuto)
1415                 continue;
1416
1417             GainTriplet DecayDistance;
1418             /* Calculate the distances to where this effect's decay reaches
1419              * -60dB.
1420              */
1421             DecayDistance.Base = SendSlots[i]->DecayTime * SpeedOfSoundMetersPerSec;
1422             DecayDistance.LF = DecayDistance.Base * SendSlots[i]->DecayLFRatio;
1423             DecayDistance.HF = DecayDistance.Base * SendSlots[i]->DecayHFRatio;
1424             if(SendSlots[i]->DecayHFLimit)
1425             {
1426                 const float airAbsorption{SendSlots[i]->AirAbsorptionGainHF};
1427                 if(airAbsorption < 1.0f)
1428                 {
1429                     /* Calculate the distance to where this effect's air
1430                      * absorption reaches -60dB, and limit the effect's HF
1431                      * decay distance (so it doesn't take any longer to decay
1432                      * than the air would allow).
1433                      */
1434                     static constexpr float log10_decaygain{-3.0f/*std::log10(ReverbDecayGain)*/};
1435                     const float absorb_dist{log10_decaygain / std::log10(airAbsorption)};
1436                     DecayDistance.HF = minf(absorb_dist, DecayDistance.HF);
1437                 }
1438             }
1439
1440             const float baseAttn = calc_attenuation(Distance, props->RefDistance,
1441                 props->RolloffFactor);
1442
1443             /* Apply a decay-time transformation to the wet path, based on the
1444              * source distance. The initial decay of the reverb effect is
1445              * calculated and applied to the wet path.
1446              */
1447             const float fact{distance_base / DecayDistance.Base};
1448             const float gain{std::pow(ReverbDecayGain, fact)*(1.0f-baseAttn) + baseAttn};
1449             WetGain[i].Base *= gain;
1450
1451             if(gain > 0.0f)
1452             {
1453                 const float hffact{distance_base / DecayDistance.HF};
1454                 const float gainhf{std::pow(ReverbDecayGain, hffact)*(1.0f-baseAttn) + baseAttn};
1455                 WetGain[i].HF *= minf(gainhf/gain, 1.0f);
1456                 const float lffact{distance_base / DecayDistance.LF};
1457                 const float gainlf{std::pow(ReverbDecayGain, lffact)*(1.0f-baseAttn) + baseAttn};
1458                 WetGain[i].LF *= minf(gainlf/gain, 1.0f);
1459             }
1460         }
1461     }
1462
1463
1464     /* Initial source pitch */
1465     float Pitch{props->Pitch};
1466
1467     /* Calculate velocity-based doppler effect */
1468     float DopplerFactor{props->DopplerFactor * context->mParams.DopplerFactor};
1469     if(DopplerFactor > 0.0f)
1470     {
1471         const alu::Vector &lvelocity = context->mParams.Velocity;
1472         float vss{Velocity.dot_product(ToSource) * -DopplerFactor};
1473         float vls{lvelocity.dot_product(ToSource) * -DopplerFactor};
1474
1475         const float SpeedOfSound{context->mParams.SpeedOfSound};
1476         if(!(vls < SpeedOfSound))
1477         {
1478             /* Listener moving away from the source at the speed of sound.
1479              * Sound waves can't catch it.
1480              */
1481             Pitch = 0.0f;
1482         }
1483         else if(!(vss < SpeedOfSound))
1484         {
1485             /* Source moving toward the listener at the speed of sound. Sound
1486              * waves bunch up to extreme frequencies.
1487              */
1488             Pitch = std::numeric_limits<float>::infinity();
1489         }
1490         else
1491         {
1492             /* Source and listener movement is nominal. Calculate the proper
1493              * doppler shift.
1494              */
1495             Pitch *= (SpeedOfSound-vls) / (SpeedOfSound-vss);
1496         }
1497     }
1498
1499     /* Adjust pitch based on the buffer and output frequencies, and calculate
1500      * fixed-point stepping value.
1501      */
1502     Pitch *= static_cast<float>(voice->mFrequency) / static_cast<float>(Device->Frequency);
1503     if(Pitch > float{MaxPitch})
1504         voice->mStep = MaxPitch<<MixerFracBits;
1505     else
1506         voice->mStep = maxu(fastf2u(Pitch * MixerFracOne), 1);
1507     voice->mResampler = PrepareResampler(props->mResampler, voice->mStep, &voice->mResampleState);
1508
1509     float spread{0.0f};
1510     if(props->Radius > Distance)
1511         spread = al::numbers::pi_v<float>*2.0f - Distance/props->Radius*al::numbers::pi_v<float>;
1512     else if(Distance > 0.0f)
1513         spread = std::asin(props->Radius/Distance) * 2.0f;
1514
1515     CalcPanningAndFilters(voice, ToSource[0]*XScale, ToSource[1]*YScale, ToSource[2]*ZScale,
1516         Distance*context->mParams.MetersPerUnit, spread, DryGain, WetGain, SendSlots, props,
1517         context->mParams, Device);
1518 }
1519
1520 void CalcSourceParams(Voice *voice, ContextBase *context, bool force)
1521 {
1522     VoicePropsItem *props{voice->mUpdate.exchange(nullptr, std::memory_order_acq_rel)};
1523     if(!props && !force) return;
1524
1525     if(props)
1526     {
1527         voice->mProps = *props;
1528
1529         AtomicReplaceHead(context->mFreeVoiceProps, props);
1530     }
1531
1532     if((voice->mProps.DirectChannels != DirectMode::Off && voice->mFmtChannels != FmtMono
1533             && !IsAmbisonic(voice->mFmtChannels))
1534         || voice->mProps.mSpatializeMode == SpatializeMode::Off
1535         || (voice->mProps.mSpatializeMode==SpatializeMode::Auto && voice->mFmtChannels != FmtMono))
1536         CalcNonAttnSourceParams(voice, &voice->mProps, context);
1537     else
1538         CalcAttnSourceParams(voice, &voice->mProps, context);
1539 }
1540
1541
1542 void SendSourceStateEvent(ContextBase *context, uint id, VChangeState state)
1543 {
1544     RingBuffer *ring{context->mAsyncEvents.get()};
1545     auto evt_vec = ring->getWriteVector();
1546     if(evt_vec.first.len < 1) return;
1547
1548     AsyncEvent *evt{al::construct_at(reinterpret_cast<AsyncEvent*>(evt_vec.first.buf),
1549         AsyncEvent::SourceStateChange)};
1550     evt->u.srcstate.id = id;
1551     switch(state)
1552     {
1553     case VChangeState::Reset:
1554         evt->u.srcstate.state = AsyncEvent::SrcState::Reset;
1555         break;
1556     case VChangeState::Stop:
1557         evt->u.srcstate.state = AsyncEvent::SrcState::Stop;
1558         break;
1559     case VChangeState::Play:
1560         evt->u.srcstate.state = AsyncEvent::SrcState::Play;
1561         break;
1562     case VChangeState::Pause:
1563         evt->u.srcstate.state = AsyncEvent::SrcState::Pause;
1564         break;
1565     /* Shouldn't happen. */
1566     case VChangeState::Restart:
1567         ASSUME(0);
1568     }
1569
1570     ring->writeAdvance(1);
1571 }
1572
1573 void ProcessVoiceChanges(ContextBase *ctx)
1574 {
1575     VoiceChange *cur{ctx->mCurrentVoiceChange.load(std::memory_order_acquire)};
1576     VoiceChange *next{cur->mNext.load(std::memory_order_acquire)};
1577     if(!next) return;
1578
1579     const uint enabledevt{ctx->mEnabledEvts.load(std::memory_order_acquire)};
1580     do {
1581         cur = next;
1582
1583         bool sendevt{false};
1584         if(cur->mState == VChangeState::Reset || cur->mState == VChangeState::Stop)
1585         {
1586             if(Voice *voice{cur->mVoice})
1587             {
1588                 voice->mCurrentBuffer.store(nullptr, std::memory_order_relaxed);
1589                 voice->mLoopBuffer.store(nullptr, std::memory_order_relaxed);
1590                 /* A source ID indicates the voice was playing or paused, which
1591                  * gets a reset/stop event.
1592                  */
1593                 sendevt = voice->mSourceID.exchange(0u, std::memory_order_relaxed) != 0u;
1594                 Voice::State oldvstate{Voice::Playing};
1595                 voice->mPlayState.compare_exchange_strong(oldvstate, Voice::Stopping,
1596                     std::memory_order_relaxed, std::memory_order_acquire);
1597                 voice->mPendingChange.store(false, std::memory_order_release);
1598             }
1599             /* Reset state change events are always sent, even if the voice is
1600              * already stopped or even if there is no voice.
1601              */
1602             sendevt |= (cur->mState == VChangeState::Reset);
1603         }
1604         else if(cur->mState == VChangeState::Pause)
1605         {
1606             Voice *voice{cur->mVoice};
1607             Voice::State oldvstate{Voice::Playing};
1608             sendevt = voice->mPlayState.compare_exchange_strong(oldvstate, Voice::Stopping,
1609                 std::memory_order_release, std::memory_order_acquire);
1610         }
1611         else if(cur->mState == VChangeState::Play)
1612         {
1613             /* NOTE: When playing a voice, sending a source state change event
1614              * depends if there's an old voice to stop and if that stop is
1615              * successful. If there is no old voice, a playing event is always
1616              * sent. If there is an old voice, an event is sent only if the
1617              * voice is already stopped.
1618              */
1619             if(Voice *oldvoice{cur->mOldVoice})
1620             {
1621                 oldvoice->mCurrentBuffer.store(nullptr, std::memory_order_relaxed);
1622                 oldvoice->mLoopBuffer.store(nullptr, std::memory_order_relaxed);
1623                 oldvoice->mSourceID.store(0u, std::memory_order_relaxed);
1624                 Voice::State oldvstate{Voice::Playing};
1625                 sendevt = !oldvoice->mPlayState.compare_exchange_strong(oldvstate, Voice::Stopping,
1626                     std::memory_order_relaxed, std::memory_order_acquire);
1627                 oldvoice->mPendingChange.store(false, std::memory_order_release);
1628             }
1629             else
1630                 sendevt = true;
1631
1632             Voice *voice{cur->mVoice};
1633             voice->mPlayState.store(Voice::Playing, std::memory_order_release);
1634         }
1635         else if(cur->mState == VChangeState::Restart)
1636         {
1637             /* Restarting a voice never sends a source change event. */
1638             Voice *oldvoice{cur->mOldVoice};
1639             oldvoice->mCurrentBuffer.store(nullptr, std::memory_order_relaxed);
1640             oldvoice->mLoopBuffer.store(nullptr, std::memory_order_relaxed);
1641             /* If there's no sourceID, the old voice finished so don't start
1642              * the new one at its new offset.
1643              */
1644             if(oldvoice->mSourceID.exchange(0u, std::memory_order_relaxed) != 0u)
1645             {
1646                 /* Otherwise, set the voice to stopping if it's not already (it
1647                  * might already be, if paused), and play the new voice as
1648                  * appropriate.
1649                  */
1650                 Voice::State oldvstate{Voice::Playing};
1651                 oldvoice->mPlayState.compare_exchange_strong(oldvstate, Voice::Stopping,
1652                     std::memory_order_relaxed, std::memory_order_acquire);
1653
1654                 Voice *voice{cur->mVoice};
1655                 voice->mPlayState.store((oldvstate == Voice::Playing) ? Voice::Playing
1656                     : Voice::Stopped, std::memory_order_release);
1657             }
1658             oldvoice->mPendingChange.store(false, std::memory_order_release);
1659         }
1660         if(sendevt && (enabledevt&AsyncEvent::SourceStateChange))
1661             SendSourceStateEvent(ctx, cur->mSourceID, cur->mState);
1662
1663         next = cur->mNext.load(std::memory_order_acquire);
1664     } while(next);
1665     ctx->mCurrentVoiceChange.store(cur, std::memory_order_release);
1666 }
1667
1668 void ProcessParamUpdates(ContextBase *ctx, const EffectSlotArray &slots,
1669     const al::span<Voice*> voices)
1670 {
1671     ProcessVoiceChanges(ctx);
1672
1673     IncrementRef(ctx->mUpdateCount);
1674     if LIKELY(!ctx->mHoldUpdates.load(std::memory_order_acquire))
1675     {
1676         bool force{CalcContextParams(ctx)};
1677         auto sorted_slots = const_cast<EffectSlot**>(slots.data() + slots.size());
1678         for(EffectSlot *slot : slots)
1679             force |= CalcEffectSlotParams(slot, sorted_slots, ctx);
1680
1681         for(Voice *voice : voices)
1682         {
1683             /* Only update voices that have a source. */
1684             if(voice->mSourceID.load(std::memory_order_relaxed) != 0)
1685                 CalcSourceParams(voice, ctx, force);
1686         }
1687     }
1688     IncrementRef(ctx->mUpdateCount);
1689 }
1690
1691 void ProcessContexts(DeviceBase *device, const uint SamplesToDo)
1692 {
1693     ASSUME(SamplesToDo > 0);
1694
1695     for(ContextBase *ctx : *device->mContexts.load(std::memory_order_acquire))
1696     {
1697         const EffectSlotArray &auxslots = *ctx->mActiveAuxSlots.load(std::memory_order_acquire);
1698         const al::span<Voice*> voices{ctx->getVoicesSpanAcquired()};
1699
1700         /* Process pending propery updates for objects on the context. */
1701         ProcessParamUpdates(ctx, auxslots, voices);
1702
1703         /* Clear auxiliary effect slot mixing buffers. */
1704         for(EffectSlot *slot : auxslots)
1705         {
1706             for(auto &buffer : slot->Wet.Buffer)
1707                 buffer.fill(0.0f);
1708         }
1709
1710         /* Process voices that have a playing source. */
1711         for(Voice *voice : voices)
1712         {
1713             const Voice::State vstate{voice->mPlayState.load(std::memory_order_acquire)};
1714             if(vstate != Voice::Stopped && vstate != Voice::Pending)
1715                 voice->mix(vstate, ctx, SamplesToDo);
1716         }
1717
1718         /* Process effects. */
1719         if(const size_t num_slots{auxslots.size()})
1720         {
1721             auto slots = auxslots.data();
1722             auto slots_end = slots + num_slots;
1723
1724             /* Sort the slots into extra storage, so that effect slots come
1725              * before their effect slot target (or their targets' target).
1726              */
1727             const al::span<EffectSlot*> sorted_slots{const_cast<EffectSlot**>(slots_end),
1728                 num_slots};
1729             /* Skip sorting if it has already been done. */
1730             if(!sorted_slots[0])
1731             {
1732                 /* First, copy the slots to the sorted list, then partition the
1733                  * sorted list so that all slots without a target slot go to
1734                  * the end.
1735                  */
1736                 std::copy(slots, slots_end, sorted_slots.begin());
1737                 auto split_point = std::partition(sorted_slots.begin(), sorted_slots.end(),
1738                     [](const EffectSlot *slot) noexcept -> bool
1739                     { return slot->Target != nullptr; });
1740                 /* There must be at least one slot without a slot target. */
1741                 assert(split_point != sorted_slots.end());
1742
1743                 /* Simple case: no more than 1 slot has a target slot. Either
1744                  * all slots go right to the output, or the remaining one must
1745                  * target an already-partitioned slot.
1746                  */
1747                 if(split_point - sorted_slots.begin() > 1)
1748                 {
1749                     /* At least two slots target other slots. Starting from the
1750                      * back of the sorted list, continue partitioning the front
1751                      * of the list given each target until all targets are
1752                      * accounted for. This ensures all slots without a target
1753                      * go last, all slots directly targeting those last slots
1754                      * go second-to-last, all slots directly targeting those
1755                      * second-last slots go third-to-last, etc.
1756                      */
1757                     auto next_target = sorted_slots.end();
1758                     do {
1759                         /* This shouldn't happen, but if there's unsorted slots
1760                          * left that don't target any sorted slots, they can't
1761                          * contribute to the output, so leave them.
1762                          */
1763                         if UNLIKELY(next_target == split_point)
1764                             break;
1765
1766                         --next_target;
1767                         split_point = std::partition(sorted_slots.begin(), split_point,
1768                             [next_target](const EffectSlot *slot) noexcept -> bool
1769                             { return slot->Target != *next_target; });
1770                     } while(split_point - sorted_slots.begin() > 1);
1771                 }
1772             }
1773
1774             for(const EffectSlot *slot : sorted_slots)
1775             {
1776                 EffectState *state{slot->mEffectState};
1777                 state->process(SamplesToDo, slot->Wet.Buffer, state->mOutTarget);
1778             }
1779         }
1780
1781         /* Signal the event handler if there are any events to read. */
1782         RingBuffer *ring{ctx->mAsyncEvents.get()};
1783         if(ring->readSpace() > 0)
1784             ctx->mEventSem.post();
1785     }
1786 }
1787
1788
1789 void ApplyDistanceComp(const al::span<FloatBufferLine> Samples, const size_t SamplesToDo,
1790     const DistanceComp::ChanData *distcomp)
1791 {
1792     ASSUME(SamplesToDo > 0);
1793
1794     for(auto &chanbuffer : Samples)
1795     {
1796         const float gain{distcomp->Gain};
1797         const size_t base{distcomp->Length};
1798         float *distbuf{al::assume_aligned<16>(distcomp->Buffer)};
1799         ++distcomp;
1800
1801         if(base < 1)
1802             continue;
1803
1804         float *inout{al::assume_aligned<16>(chanbuffer.data())};
1805         auto inout_end = inout + SamplesToDo;
1806         if LIKELY(SamplesToDo >= base)
1807         {
1808             auto delay_end = std::rotate(inout, inout_end - base, inout_end);
1809             std::swap_ranges(inout, delay_end, distbuf);
1810         }
1811         else
1812         {
1813             auto delay_start = std::swap_ranges(inout, inout_end, distbuf);
1814             std::rotate(distbuf, delay_start, distbuf + base);
1815         }
1816         std::transform(inout, inout_end, inout, std::bind(std::multiplies<float>{}, _1, gain));
1817     }
1818 }
1819
1820 void ApplyDither(const al::span<FloatBufferLine> Samples, uint *dither_seed,
1821     const float quant_scale, const size_t SamplesToDo)
1822 {
1823     ASSUME(SamplesToDo > 0);
1824
1825     /* Dithering. Generate whitenoise (uniform distribution of random values
1826      * between -1 and +1) and add it to the sample values, after scaling up to
1827      * the desired quantization depth amd before rounding.
1828      */
1829     const float invscale{1.0f / quant_scale};
1830     uint seed{*dither_seed};
1831     auto dither_sample = [&seed,invscale,quant_scale](const float sample) noexcept -> float
1832     {
1833         float val{sample * quant_scale};
1834         uint rng0{dither_rng(&seed)};
1835         uint rng1{dither_rng(&seed)};
1836         val += static_cast<float>(rng0*(1.0/UINT_MAX) - rng1*(1.0/UINT_MAX));
1837         return fast_roundf(val) * invscale;
1838     };
1839     for(FloatBufferLine &inout : Samples)
1840         std::transform(inout.begin(), inout.begin()+SamplesToDo, inout.begin(), dither_sample);
1841     *dither_seed = seed;
1842 }
1843
1844
1845 /* Base template left undefined. Should be marked =delete, but Clang 3.8.1
1846  * chokes on that given the inline specializations.
1847  */
1848 template<typename T>
1849 inline T SampleConv(float) noexcept;
1850
1851 template<> inline float SampleConv(float val) noexcept
1852 { return val; }
1853 template<> inline int32_t SampleConv(float val) noexcept
1854 {
1855     /* Floats have a 23-bit mantissa, plus an implied 1 bit and a sign bit.
1856      * This means a normalized float has at most 25 bits of signed precision.
1857      * When scaling and clamping for a signed 32-bit integer, these following
1858      * values are the best a float can give.
1859      */
1860     return fastf2i(clampf(val*2147483648.0f, -2147483648.0f, 2147483520.0f));
1861 }
1862 template<> inline int16_t SampleConv(float val) noexcept
1863 { return static_cast<int16_t>(fastf2i(clampf(val*32768.0f, -32768.0f, 32767.0f))); }
1864 template<> inline int8_t SampleConv(float val) noexcept
1865 { return static_cast<int8_t>(fastf2i(clampf(val*128.0f, -128.0f, 127.0f))); }
1866
1867 /* Define unsigned output variations. */
1868 template<> inline uint32_t SampleConv(float val) noexcept
1869 { return static_cast<uint32_t>(SampleConv<int32_t>(val)) + 2147483648u; }
1870 template<> inline uint16_t SampleConv(float val) noexcept
1871 { return static_cast<uint16_t>(SampleConv<int16_t>(val) + 32768); }
1872 template<> inline uint8_t SampleConv(float val) noexcept
1873 { return static_cast<uint8_t>(SampleConv<int8_t>(val) + 128); }
1874
1875 template<DevFmtType T>
1876 void Write(const al::span<const FloatBufferLine> InBuffer, void *OutBuffer, const size_t Offset,
1877     const size_t SamplesToDo, const size_t FrameStep)
1878 {
1879     ASSUME(FrameStep > 0);
1880     ASSUME(SamplesToDo > 0);
1881
1882     DevFmtType_t<T> *outbase{static_cast<DevFmtType_t<T>*>(OutBuffer) + Offset*FrameStep};
1883     size_t c{0};
1884     for(const FloatBufferLine &inbuf : InBuffer)
1885     {
1886         DevFmtType_t<T> *out{outbase++};
1887         auto conv_sample = [FrameStep,&out](const float s) noexcept -> void
1888         {
1889             *out = SampleConv<DevFmtType_t<T>>(s);
1890             out += FrameStep;
1891         };
1892         std::for_each(inbuf.begin(), inbuf.begin()+SamplesToDo, conv_sample);
1893         ++c;
1894     }
1895     if(const size_t extra{FrameStep - c})
1896     {
1897         const auto silence = SampleConv<DevFmtType_t<T>>(0.0f);
1898         for(size_t i{0};i < SamplesToDo;++i)
1899         {
1900             std::fill_n(outbase, extra, silence);
1901             outbase += FrameStep;
1902         }
1903     }
1904 }
1905
1906 } // namespace
1907
1908 uint DeviceBase::renderSamples(const uint numSamples)
1909 {
1910     const uint samplesToDo{minu(numSamples, BufferLineSize)};
1911
1912     /* Clear main mixing buffers. */
1913     for(FloatBufferLine &buffer : MixBuffer)
1914         buffer.fill(0.0f);
1915
1916     /* Increment the mix count at the start (lsb should now be 1). */
1917     IncrementRef(MixCount);
1918
1919     /* Process and mix each context's sources and effects. */
1920     ProcessContexts(this, samplesToDo);
1921
1922     /* Increment the clock time. Every second's worth of samples is converted
1923      * and added to clock base so that large sample counts don't overflow
1924      * during conversion. This also guarantees a stable conversion.
1925      */
1926     SamplesDone += samplesToDo;
1927     ClockBase += std::chrono::seconds{SamplesDone / Frequency};
1928     SamplesDone %= Frequency;
1929
1930     /* Increment the mix count at the end (lsb should now be 0). */
1931     IncrementRef(MixCount);
1932
1933     /* Apply any needed post-process for finalizing the Dry mix to the RealOut
1934      * (Ambisonic decode, UHJ encode, etc).
1935      */
1936     postProcess(samplesToDo);
1937
1938     /* Apply compression, limiting sample amplitude if needed or desired. */
1939     if(Limiter) Limiter->process(samplesToDo, RealOut.Buffer.data());
1940
1941     /* Apply delays and attenuation for mismatched speaker distances. */
1942     if(ChannelDelays)
1943         ApplyDistanceComp(RealOut.Buffer, samplesToDo, ChannelDelays->mChannels.data());
1944
1945     /* Apply dithering. The compressor should have left enough headroom for the
1946      * dither noise to not saturate.
1947      */
1948     if(DitherDepth > 0.0f)
1949         ApplyDither(RealOut.Buffer, &DitherSeed, DitherDepth, samplesToDo);
1950
1951     return samplesToDo;
1952 }
1953
1954 void DeviceBase::renderSamples(const al::span<float*> outBuffers, const uint numSamples)
1955 {
1956     FPUCtl mixer_mode{};
1957     uint total{0};
1958     while(const uint todo{numSamples - total})
1959     {
1960         const uint samplesToDo{renderSamples(todo)};
1961
1962         auto *srcbuf = RealOut.Buffer.data();
1963         for(auto *dstbuf : outBuffers)
1964         {
1965             std::copy_n(srcbuf->data(), samplesToDo, dstbuf + total);
1966             ++srcbuf;
1967         }
1968
1969         total += samplesToDo;
1970     }
1971 }
1972
1973 void DeviceBase::renderSamples(void *outBuffer, const uint numSamples, const size_t frameStep)
1974 {
1975     FPUCtl mixer_mode{};
1976     uint total{0};
1977     while(const uint todo{numSamples - total})
1978     {
1979         const uint samplesToDo{renderSamples(todo)};
1980
1981         if LIKELY(outBuffer)
1982         {
1983             /* Finally, interleave and convert samples, writing to the device's
1984              * output buffer.
1985              */
1986             switch(FmtType)
1987             {
1988 #define HANDLE_WRITE(T) case T:                                               \
1989     Write<T>(RealOut.Buffer, outBuffer, total, samplesToDo, frameStep); break;
1990             HANDLE_WRITE(DevFmtByte)
1991             HANDLE_WRITE(DevFmtUByte)
1992             HANDLE_WRITE(DevFmtShort)
1993             HANDLE_WRITE(DevFmtUShort)
1994             HANDLE_WRITE(DevFmtInt)
1995             HANDLE_WRITE(DevFmtUInt)
1996             HANDLE_WRITE(DevFmtFloat)
1997 #undef HANDLE_WRITE
1998             }
1999         }
2000
2001         total += samplesToDo;
2002     }
2003 }
2004
2005 void DeviceBase::handleDisconnect(const char *msg, ...)
2006 {
2007     IncrementRef(MixCount);
2008     if(Connected.exchange(false, std::memory_order_acq_rel))
2009     {
2010         AsyncEvent evt{AsyncEvent::Disconnected};
2011
2012         va_list args;
2013         va_start(args, msg);
2014         int msglen{vsnprintf(evt.u.disconnect.msg, sizeof(evt.u.disconnect.msg), msg, args)};
2015         va_end(args);
2016
2017         if(msglen < 0 || static_cast<size_t>(msglen) >= sizeof(evt.u.disconnect.msg))
2018             evt.u.disconnect.msg[sizeof(evt.u.disconnect.msg)-1] = 0;
2019
2020         for(ContextBase *ctx : *mContexts.load())
2021         {
2022             const uint enabledevt{ctx->mEnabledEvts.load(std::memory_order_acquire)};
2023             if((enabledevt&AsyncEvent::Disconnected))
2024             {
2025                 RingBuffer *ring{ctx->mAsyncEvents.get()};
2026                 auto evt_data = ring->getWriteVector().first;
2027                 if(evt_data.len > 0)
2028                 {
2029                     al::construct_at(reinterpret_cast<AsyncEvent*>(evt_data.buf), evt);
2030                     ring->writeAdvance(1);
2031                     ctx->mEventSem.post();
2032                 }
2033             }
2034
2035             if(!ctx->mStopVoicesOnDisconnect)
2036             {
2037                 ProcessVoiceChanges(ctx);
2038                 continue;
2039             }
2040
2041             auto voicelist = ctx->getVoicesSpanAcquired();
2042             auto stop_voice = [](Voice *voice) -> void
2043             {
2044                 voice->mCurrentBuffer.store(nullptr, std::memory_order_relaxed);
2045                 voice->mLoopBuffer.store(nullptr, std::memory_order_relaxed);
2046                 voice->mSourceID.store(0u, std::memory_order_relaxed);
2047                 voice->mPlayState.store(Voice::Stopped, std::memory_order_release);
2048             };
2049             std::for_each(voicelist.begin(), voicelist.end(), stop_voice);
2050         }
2051     }
2052     IncrementRef(MixCount);
2053 }