alc/alu.cpp

   1 /**
   2  * OpenAL cross platform audio library
   3  * Copyright (C) 1999-2007 by authors.
   4  * This library is free software; you can redistribute it and/or
   5  *  modify it under the terms of the GNU Library General Public
   6  *  License as published by the Free Software Foundation; either
   7  *  version 2 of the License, or (at your option) any later version.
   8  *
   9  * This library is distributed in the hope that it will be useful,
  10  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  11  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  12  *  Library General Public License for more details.
  13  *
  14  * You should have received a copy of the GNU Library General Public
  15  *  License along with this library; if not, write to the
  16  *  Free Software Foundation, Inc.,
  17  *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  18  * Or go to http://www.gnu.org/copyleft/lgpl.html
  19  */
  20
  21 #include "config.h"
  22
  23 #include "alu.h"
  24
  25 #include <algorithm>
  26 #include <array>
  27 #include <atomic>
  28 #include <cassert>
  29 #include <chrono>
  30 #include <climits>
  31 #include <cstdarg>
  32 #include <cstdio>
  33 #include <cstdlib>
  34 #include <functional>
  35 #include <iterator>
  36 #include <limits>
  37 #include <memory>
  38 #include <new>
  39 #include <stdint.h>
  40 #include <utility>
  41
  42 #include "almalloc.h"
  43 #include "alnumbers.h"
  44 #include "alnumeric.h"
  45 #include "alspan.h"
  46 #include "alstring.h"
  47 #include "atomic.h"
  48 #include "core/ambidefs.h"
  49 #include "core/async_event.h"
  50 #include "core/bformatdec.h"
  51 #include "core/bs2b.h"
  52 #include "core/bsinc_defs.h"
  53 #include "core/bsinc_tables.h"
  54 #include "core/bufferline.h"
  55 #include "core/buffer_storage.h"
  56 #include "core/context.h"
  57 #include "core/cpu_caps.h"
  58 #include "core/devformat.h"
  59 #include "core/device.h"
  60 #include "core/effects/base.h"
  61 #include "core/effectslot.h"
  62 #include "core/filters/biquad.h"
  63 #include "core/filters/nfc.h"
  64 #include "core/fpu_ctrl.h"
  65 #include "core/hrtf.h"
  66 #include "core/mastering.h"
  67 #include "core/mixer.h"
  68 #include "core/mixer/defs.h"
  69 #include "core/mixer/hrtfdefs.h"
  70 #include "core/resampler_limits.h"
  71 #include "core/uhjfilter.h"
  72 #include "core/voice.h"
  73 #include "core/voice_change.h"
  74 #include "intrusive_ptr.h"
  75 #include "opthelpers.h"
  76 #include "ringbuffer.h"
  77 #include "strutils.h"
  78 #include "threads.h"
  79 #include "vecmat.h"
  80 #include "vector.h"
  81
  82 struct CTag;
  83 #ifdef HAVE_SSE
  84 struct SSETag;
  85 #endif
  86 #ifdef HAVE_SSE2
  87 struct SSE2Tag;
  88 #endif
  89 #ifdef HAVE_SSE4_1
  90 struct SSE4Tag;
  91 #endif
  92 #ifdef HAVE_NEON
  93 struct NEONTag;
  94 #endif
  95 struct PointTag;
  96 struct LerpTag;
  97 struct CubicTag;
  98 struct BSincTag;
  99 struct FastBSincTag;
 100
 101
 102 static_assert(!(MaxResamplerPadding&1), "MaxResamplerPadding is not a multiple of two");
 103
 104
 105 namespace {
 106
 107 using uint = unsigned int;
 108
 109 constexpr uint MaxPitch{10};
 110
 111 static_assert((BufferLineSize-1)/MaxPitch > 0, "MaxPitch is too large for BufferLineSize!");
 112 static_assert((INT_MAX>>MixerFracBits)/MaxPitch > BufferLineSize,
 113     "MaxPitch and/or BufferLineSize are too large for MixerFracBits!");
 114
 115 using namespace std::placeholders;
 116
 117 float InitConeScale()
 118 {
 119     float ret{1.0f};
 120     if(auto optval = al::getenv("__ALSOFT_HALF_ANGLE_CONES"))
 121     {
 122         if(al::strcasecmp(optval->c_str(), "true") == 0
 123             || strtol(optval->c_str(), nullptr, 0) == 1)
 124             ret *= 0.5f;
 125     }
 126     return ret;
 127 }
 128 /* Cone scalar */
 129 const float ConeScale{InitConeScale()};
 130
 131 /* Localized scalars for mono sources (initialized in aluInit, after
 132  * configuration is loaded).
 133  */
 134 float XScale{1.0f};
 135 float YScale{1.0f};
 136 float ZScale{1.0f};
 137
 138 } // namespace
 139
 140 namespace {
 141
 142 struct ChanMap {
 143     Channel channel;
 144     float angle;
 145     float elevation;
 146 };
 147
 148 using HrtfDirectMixerFunc = void(*)(const FloatBufferSpan LeftOut, const FloatBufferSpan RightOut,
 149     const al::span<const FloatBufferLine> InSamples, float2 *AccumSamples, float *TempBuf,
 150     HrtfChannelState *ChanState, const size_t IrSize, const size_t BufferSize);
 151
 152 HrtfDirectMixerFunc MixDirectHrtf{MixDirectHrtf_<CTag>};
 153
 154 inline HrtfDirectMixerFunc SelectHrtfMixer(void)
 155 {
 156 #ifdef HAVE_NEON
 157     if((CPUCapFlags&CPU_CAP_NEON))
 158         return MixDirectHrtf_<NEONTag>;
 159 #endif
 160 #ifdef HAVE_SSE
 161     if((CPUCapFlags&CPU_CAP_SSE))
 162         return MixDirectHrtf_<SSETag>;
 163 #endif
 164
 165     return MixDirectHrtf_<CTag>;
 166 }
 167
 168
 169 inline void BsincPrepare(const uint increment, BsincState *state, const BSincTable *table)
 170 {
 171     size_t si{BSincScaleCount - 1};
 172     float sf{0.0f};
 173
 174     if(increment > MixerFracOne)
 175     {
 176         sf = MixerFracOne/static_cast<float>(increment) - table->scaleBase;
 177         sf = maxf(0.0f, BSincScaleCount*sf*table->scaleRange - 1.0f);
 178         si = float2uint(sf);
 179         /* The interpolation factor is fit to this diagonally-symmetric curve
 180          * to reduce the transition ripple caused by interpolating different
 181          * scales of the sinc function.
 182          */
 183         sf = 1.0f - std::cos(std::asin(sf - static_cast<float>(si)));
 184     }
 185
 186     state->sf = sf;
 187     state->m = table->m[si];
 188     state->l = (state->m/2) - 1;
 189     state->filter = table->Tab + table->filterOffset[si];
 190 }
 191
 192 inline ResamplerFunc SelectResampler(Resampler resampler, uint increment)
 193 {
 194     switch(resampler)
 195     {
 196     case Resampler::Point:
 197         return Resample_<PointTag,CTag>;
 198     case Resampler::Linear:
 199 #ifdef HAVE_NEON
 200         if((CPUCapFlags&CPU_CAP_NEON))
 201             return Resample_<LerpTag,NEONTag>;
 202 #endif
 203 #ifdef HAVE_SSE4_1
 204         if((CPUCapFlags&CPU_CAP_SSE4_1))
 205             return Resample_<LerpTag,SSE4Tag>;
 206 #endif
 207 #ifdef HAVE_SSE2
 208         if((CPUCapFlags&CPU_CAP_SSE2))
 209             return Resample_<LerpTag,SSE2Tag>;
 210 #endif
 211         return Resample_<LerpTag,CTag>;
 212     case Resampler::Cubic:
 213         return Resample_<CubicTag,CTag>;
 214     case Resampler::BSinc12:
 215     case Resampler::BSinc24:
 216         if(increment <= MixerFracOne)
 217         {
 218             /* fall-through */
 219         case Resampler::FastBSinc12:
 220         case Resampler::FastBSinc24:
 221 #ifdef HAVE_NEON
 222             if((CPUCapFlags&CPU_CAP_NEON))
 223                 return Resample_<FastBSincTag,NEONTag>;
 224 #endif
 225 #ifdef HAVE_SSE
 226             if((CPUCapFlags&CPU_CAP_SSE))
 227                 return Resample_<FastBSincTag,SSETag>;
 228 #endif
 229             return Resample_<FastBSincTag,CTag>;
 230         }
 231 #ifdef HAVE_NEON
 232         if((CPUCapFlags&CPU_CAP_NEON))
 233             return Resample_<BSincTag,NEONTag>;
 234 #endif
 235 #ifdef HAVE_SSE
 236         if((CPUCapFlags&CPU_CAP_SSE))
 237             return Resample_<BSincTag,SSETag>;
 238 #endif
 239         return Resample_<BSincTag,CTag>;
 240     }
 241
 242     return Resample_<PointTag,CTag>;
 243 }
 244
 245 } // namespace
 246
 247 void aluInit(CompatFlagBitset flags)
 248 {
 249     MixDirectHrtf = SelectHrtfMixer();
 250     XScale = flags.test(CompatFlags::ReverseX) ? -1.0f : 1.0f;
 251     YScale = flags.test(CompatFlags::ReverseY) ? -1.0f : 1.0f;
 252     ZScale = flags.test(CompatFlags::ReverseZ) ? -1.0f : 1.0f;
 253 }
 254
 255
 256 ResamplerFunc PrepareResampler(Resampler resampler, uint increment, InterpState *state)
 257 {
 258     switch(resampler)
 259     {
 260     case Resampler::Point:
 261     case Resampler::Linear:
 262     case Resampler::Cubic:
 263         break;
 264     case Resampler::FastBSinc12:
 265     case Resampler::BSinc12:
 266         BsincPrepare(increment, &state->bsinc, &bsinc12);
 267         break;
 268     case Resampler::FastBSinc24:
 269     case Resampler::BSinc24:
 270         BsincPrepare(increment, &state->bsinc, &bsinc24);
 271         break;
 272     }
 273     return SelectResampler(resampler, increment);
 274 }
 275
 276
 277 void DeviceBase::ProcessHrtf(const size_t SamplesToDo)
 278 {
 279     /* HRTF is stereo output only. */
 280     const uint lidx{RealOut.ChannelIndex[FrontLeft]};
 281     const uint ridx{RealOut.ChannelIndex[FrontRight]};
 282
 283     MixDirectHrtf(RealOut.Buffer[lidx], RealOut.Buffer[ridx], Dry.Buffer, HrtfAccumData,
 284         mHrtfState->mTemp.data(), mHrtfState->mChannels.data(), mHrtfState->mIrSize, SamplesToDo);
 285 }
 286
 287 void DeviceBase::ProcessAmbiDec(const size_t SamplesToDo)
 288 {
 289     AmbiDecoder->process(RealOut.Buffer, Dry.Buffer.data(), SamplesToDo);
 290 }
 291
 292 void DeviceBase::ProcessAmbiDecStablized(const size_t SamplesToDo)
 293 {
 294     /* Decode with front image stablization. */
 295     const uint lidx{RealOut.ChannelIndex[FrontLeft]};
 296     const uint ridx{RealOut.ChannelIndex[FrontRight]};
 297     const uint cidx{RealOut.ChannelIndex[FrontCenter]};
 298
 299     AmbiDecoder->processStablize(RealOut.Buffer, Dry.Buffer.data(), lidx, ridx, cidx,
 300         SamplesToDo);
 301 }
 302
 303 void DeviceBase::ProcessUhj(const size_t SamplesToDo)
 304 {
 305     /* UHJ is stereo output only. */
 306     const uint lidx{RealOut.ChannelIndex[FrontLeft]};
 307     const uint ridx{RealOut.ChannelIndex[FrontRight]};
 308
 309     /* Encode to stereo-compatible 2-channel UHJ output. */
 310     mUhjEncoder->encode(RealOut.Buffer[lidx].data(), RealOut.Buffer[ridx].data(),
 311         Dry.Buffer.data(), SamplesToDo);
 312 }
 313
 314 void DeviceBase::ProcessBs2b(const size_t SamplesToDo)
 315 {
 316     /* First, decode the ambisonic mix to the "real" output. */
 317     AmbiDecoder->process(RealOut.Buffer, Dry.Buffer.data(), SamplesToDo);
 318
 319     /* BS2B is stereo output only. */
 320     const uint lidx{RealOut.ChannelIndex[FrontLeft]};
 321     const uint ridx{RealOut.ChannelIndex[FrontRight]};
 322
 323     /* Now apply the BS2B binaural/crossfeed filter. */
 324     bs2b_cross_feed(Bs2b.get(), RealOut.Buffer[lidx].data(), RealOut.Buffer[ridx].data(),
 325         SamplesToDo);
 326 }
 327
 328
 329 namespace {
 330
 331 /* This RNG method was created based on the math found in opusdec. It's quick,
 332  * and starting with a seed value of 22222, is suitable for generating
 333  * whitenoise.
 334  */
 335 inline uint dither_rng(uint *seed) noexcept
 336 {
 337     *seed = (*seed * 96314165) + 907633515;
 338     return *seed;
 339 }
 340
 341
 342 inline auto& GetAmbiScales(AmbiScaling scaletype) noexcept
 343 {
 344     switch(scaletype)
 345     {
 346     case AmbiScaling::FuMa: return AmbiScale::FromFuMa();
 347     case AmbiScaling::SN3D: return AmbiScale::FromSN3D();
 348     case AmbiScaling::UHJ: return AmbiScale::FromUHJ();
 349     case AmbiScaling::N3D: break;
 350     }
 351     return AmbiScale::FromN3D();
 352 }
 353
 354 inline auto& GetAmbiLayout(AmbiLayout layouttype) noexcept
 355 {
 356     if(layouttype == AmbiLayout::FuMa) return AmbiIndex::FromFuMa();
 357     return AmbiIndex::FromACN();
 358 }
 359
 360 inline auto& GetAmbi2DLayout(AmbiLayout layouttype) noexcept
 361 {
 362     if(layouttype == AmbiLayout::FuMa) return AmbiIndex::FromFuMa2D();
 363     return AmbiIndex::FromACN2D();
 364 }
 365
 366
 367 bool CalcContextParams(ContextBase *ctx)
 368 {
 369     ContextProps *props{ctx->mParams.ContextUpdate.exchange(nullptr, std::memory_order_acq_rel)};
 370     if(!props) return false;
 371
 372     const alu::Vector pos{props->Position[0], props->Position[1], props->Position[2], 1.0f};
 373     ctx->mParams.Position = pos;
 374
 375     /* AT then UP */
 376     alu::Vector N{props->OrientAt[0], props->OrientAt[1], props->OrientAt[2], 0.0f};
 377     N.normalize();
 378     alu::Vector V{props->OrientUp[0], props->OrientUp[1], props->OrientUp[2], 0.0f};
 379     V.normalize();
 380     /* Build and normalize right-vector */
 381     alu::Vector U{N.cross_product(V)};
 382     U.normalize();
 383
 384     const alu::Matrix rot{
 385         U[0], V[0], -N[0], 0.0,
 386         U[1], V[1], -N[1], 0.0,
 387         U[2], V[2], -N[2], 0.0,
 388          0.0,  0.0,   0.0, 1.0};
 389     const alu::Vector vel{props->Velocity[0], props->Velocity[1], props->Velocity[2], 0.0};
 390
 391     ctx->mParams.Matrix = rot;
 392     ctx->mParams.Velocity = rot * vel;
 393
 394     ctx->mParams.Gain = props->Gain * ctx->mGainBoost;
 395     ctx->mParams.MetersPerUnit = props->MetersPerUnit;
 396     ctx->mParams.AirAbsorptionGainHF = props->AirAbsorptionGainHF;
 397
 398     ctx->mParams.DopplerFactor = props->DopplerFactor;
 399     ctx->mParams.SpeedOfSound = props->SpeedOfSound * props->DopplerVelocity;
 400
 401     ctx->mParams.SourceDistanceModel = props->SourceDistanceModel;
 402     ctx->mParams.mDistanceModel = props->mDistanceModel;
 403
 404     AtomicReplaceHead(ctx->mFreeContextProps, props);
 405     return true;
 406 }
 407
 408 bool CalcEffectSlotParams(EffectSlot *slot, EffectSlot **sorted_slots, ContextBase *context)
 409 {
 410     EffectSlotProps *props{slot->Update.exchange(nullptr, std::memory_order_acq_rel)};
 411     if(!props) return false;
 412
 413     /* If the effect slot target changed, clear the first sorted entry to force
 414      * a re-sort.
 415      */
 416     if(slot->Target != props->Target)
 417         *sorted_slots = nullptr;
 418     slot->Gain = props->Gain;
 419     slot->AuxSendAuto = props->AuxSendAuto;
 420     slot->Target = props->Target;
 421     slot->EffectType = props->Type;
 422     slot->mEffectProps = props->Props;
 423     if(props->Type == EffectSlotType::Reverb || props->Type == EffectSlotType::EAXReverb)
 424     {
 425         slot->RoomRolloff = props->Props.Reverb.RoomRolloffFactor;
 426         slot->DecayTime = props->Props.Reverb.DecayTime;
 427         slot->DecayLFRatio = props->Props.Reverb.DecayLFRatio;
 428         slot->DecayHFRatio = props->Props.Reverb.DecayHFRatio;
 429         slot->DecayHFLimit = props->Props.Reverb.DecayHFLimit;
 430         slot->AirAbsorptionGainHF = props->Props.Reverb.AirAbsorptionGainHF;
 431     }
 432     else
 433     {
 434         slot->RoomRolloff = 0.0f;
 435         slot->DecayTime = 0.0f;
 436         slot->DecayLFRatio = 0.0f;
 437         slot->DecayHFRatio = 0.0f;
 438         slot->DecayHFLimit = false;
 439         slot->AirAbsorptionGainHF = 1.0f;
 440     }
 441
 442     EffectState *state{props->State.release()};
 443     EffectState *oldstate{slot->mEffectState};
 444     slot->mEffectState = state;
 445
 446     /* Only release the old state if it won't get deleted, since we can't be
 447      * deleting/freeing anything in the mixer.
 448      */
 449     if(!oldstate->releaseIfNoDelete())
 450     {
 451         /* Otherwise, if it would be deleted send it off with a release event. */
 452         RingBuffer *ring{context->mAsyncEvents.get()};
 453         auto evt_vec = ring->getWriteVector();
 454         if LIKELY(evt_vec.first.len > 0)
 455         {
 456             AsyncEvent *evt{al::construct_at(reinterpret_cast<AsyncEvent*>(evt_vec.first.buf),
 457                 AsyncEvent::ReleaseEffectState)};
 458             evt->u.mEffectState = oldstate;
 459             ring->writeAdvance(1);
 460         }
 461         else
 462         {
 463             /* If writing the event failed, the queue was probably full. Store
 464              * the old state in the property object where it can eventually be
 465              * cleaned up sometime later (not ideal, but better than blocking
 466              * or leaking).
 467              */
 468             props->State.reset(oldstate);
 469         }
 470     }
 471
 472     AtomicReplaceHead(context->mFreeEffectslotProps, props);
 473
 474     EffectTarget output;
 475     if(EffectSlot *target{slot->Target})
 476         output = EffectTarget{&target->Wet, nullptr};
 477     else
 478     {
 479         DeviceBase *device{context->mDevice};
 480         output = EffectTarget{&device->Dry, &device->RealOut};
 481     }
 482     state->update(context, slot, &slot->mEffectProps, output);
 483     return true;
 484 }
 485
 486
 487 /* Scales the given azimuth toward the side (+/- pi/2 radians) for positions in
 488  * front.
 489  */
 490 inline float ScaleAzimuthFront(float azimuth, float scale)
 491 {
 492     const float abs_azi{std::fabs(azimuth)};
 493     if(!(abs_azi >= al::numbers::pi_v<float>*0.5f))
 494         return std::copysign(minf(abs_azi*scale, al::numbers::pi_v<float>*0.5f), azimuth);
 495     return azimuth;
 496 }
 497
 498 /* Wraps the given value in radians to stay between [-pi,+pi] */
 499 inline float WrapRadians(float r)
 500 {
 501     static constexpr float Pi{al::numbers::pi_v<float>};
 502     static constexpr float Pi2{Pi*2.0f};
 503     if(r >  Pi) return std::fmod(Pi+r, Pi2) - Pi;
 504     if(r < -Pi) return Pi - std::fmod(Pi-r, Pi2);
 505     return r;
 506 }
 507
 508 /* Begin ambisonic rotation helpers.
 509  *
 510  * Rotating first-order B-Format just needs a straight-forward X/Y/Z rotation
 511  * matrix. Higher orders, however, are more complicated. The method implemented
 512  * here is a recursive algorithm (the rotation for first-order is used to help
 513  * generate the second-order rotation, which helps generate the third-order
 514  * rotation, etc).
 515  *
 516  * Adapted from
 517  * <https://github.com/polarch/Spherical-Harmonic-Transform/blob/master/getSHrotMtx.m>,
 518  * provided under the BSD 3-Clause license.
 519  *
 520  * Copyright (c) 2015, Archontis Politis
 521  * Copyright (c) 2019, Christopher Robinson
 522  *
 523  * The u, v, and w coefficients used for generating higher-order rotations are
 524  * precomputed since they're constant. The second-order coefficients are
 525  * followed by the third-order coefficients, etc.
 526  */
 527 struct RotatorCoeffs {
 528     float u, v, w;
 529
 530     template<size_t N0, size_t N1>
 531     static std::array<RotatorCoeffs,N0+N1> ConcatArrays(const std::array<RotatorCoeffs,N0> &lhs,
 532         const std::array<RotatorCoeffs,N1> &rhs)
 533     {
 534         std::array<RotatorCoeffs,N0+N1> ret;
 535         auto iter = std::copy(lhs.cbegin(), lhs.cend(), ret.begin());
 536         std::copy(rhs.cbegin(), rhs.cend(), iter);
 537         return ret;
 538     }
 539
 540     template<int l, int num_elems=l*2+1>
 541     static std::array<RotatorCoeffs,num_elems*num_elems> GenCoeffs()
 542     {
 543         std::array<RotatorCoeffs,num_elems*num_elems> ret{};
 544         auto coeffs = ret.begin();
 545
 546         for(int m{-l};m <= l;++m)
 547         {
 548             for(int n{-l};n <= l;++n)
 549             {
 550                 // compute u,v,w terms of Eq.8.1 (Table I)
 551                 const bool d{m == 0}; // the delta function d_m0
 552                 const float denom{static_cast<float>((std::abs(n) == l) ?
 553                     (2*l) * (2*l - 1) : (l*l - n*n))};
 554
 555                 const int abs_m{std::abs(m)};
 556                 coeffs->u = std::sqrt(static_cast<float>(l*l - m*m)/denom);
 557                 coeffs->v = std::sqrt(static_cast<float>(l+abs_m-1) * static_cast<float>(l+abs_m) /
 558                     denom) * (1.0f+d) * (1.0f - 2.0f*d) * 0.5f;
 559                 coeffs->w = std::sqrt(static_cast<float>(l-abs_m-1) * static_cast<float>(l-abs_m) /
 560                     denom) * (1.0f-d) * -0.5f;
 561                 ++coeffs;
 562             }
 563         }
 564
 565         return ret;
 566     }
 567 };
 568 const auto RotatorCoeffArray = RotatorCoeffs::ConcatArrays(RotatorCoeffs::GenCoeffs<2>(),
 569     RotatorCoeffs::GenCoeffs<3>());
 570
 571 /**
 572  * Given the matrix, pre-filled with the (zeroth- and) first-order rotation
 573  * coefficients, this fills in the coefficients for the higher orders up to and
 574  * including the given order. The matrix is in ACN layout.
 575  */
 576 void AmbiRotator(std::array<std::array<float,MaxAmbiChannels>,MaxAmbiChannels> &matrix,
 577     const int order)
 578 {
 579     /* Don't do anything for < 2nd order. */
 580     if(order < 2) return;
 581
 582     auto P = [](const int i, const int l, const int a, const int n, const size_t last_band,
 583         const std::array<std::array<float,MaxAmbiChannels>,MaxAmbiChannels> &R)
 584     {
 585         const float ri1{ R[static_cast<uint>(i+2)][ 1+2]};
 586         const float rim1{R[static_cast<uint>(i+2)][-1+2]};
 587         const float ri0{ R[static_cast<uint>(i+2)][ 0+2]};
 588
 589         auto vec = R[static_cast<uint>(a+l-1) + last_band].cbegin() + last_band;
 590         if(n == -l)
 591             return ri1*vec[0] + rim1*vec[static_cast<uint>(l-1)*size_t{2}];
 592         if(n == l)
 593             return ri1*vec[static_cast<uint>(l-1)*size_t{2}] - rim1*vec[0];
 594         return ri0*vec[static_cast<uint>(n+l-1)];
 595     };
 596
 597     auto U = [P](const int l, const int m, const int n, const size_t last_band,
 598         const std::array<std::array<float,MaxAmbiChannels>,MaxAmbiChannels> &R)
 599     {
 600         return P(0, l, m, n, last_band, R);
 601     };
 602     auto V = [P](const int l, const int m, const int n, const size_t last_band,
 603         const std::array<std::array<float,MaxAmbiChannels>,MaxAmbiChannels> &R)
 604     {
 605         using namespace al::numbers;
 606         if(m > 0)
 607         {
 608             const bool d{m == 1};
 609             const float p0{P( 1, l,  m-1, n, last_band, R)};
 610             const float p1{P(-1, l, -m+1, n, last_band, R)};
 611             return d ? p0*sqrt2_v<float> : (p0 - p1);
 612         }
 613         const bool d{m == -1};
 614         const float p0{P( 1, l,  m+1, n, last_band, R)};
 615         const float p1{P(-1, l, -m-1, n, last_band, R)};
 616         return d ? p1*sqrt2_v<float> : (p0 + p1);
 617     };
 618     auto W = [P](const int l, const int m, const int n, const size_t last_band,
 619         const std::array<std::array<float,MaxAmbiChannels>,MaxAmbiChannels> &R)
 620     {
 621         assert(m != 0);
 622         if(m > 0)
 623         {
 624             const float p0{P( 1, l,  m+1, n, last_band, R)};
 625             const float p1{P(-1, l, -m-1, n, last_band, R)};
 626             return p0 + p1;
 627         }
 628         const float p0{P( 1, l,  m-1, n, last_band, R)};
 629         const float p1{P(-1, l, -m+1, n, last_band, R)};
 630         return p0 - p1;
 631     };
 632
 633     // compute rotation matrix of each subsequent band recursively
 634     auto coeffs = RotatorCoeffArray.cbegin();
 635     size_t band_idx{4}, last_band{1};
 636     for(int l{2};l <= order;++l)
 637     {
 638         size_t y{band_idx};
 639         for(int m{-l};m <= l;++m,++y)
 640         {
 641             size_t x{band_idx};
 642             for(int n{-l};n <= l;++n,++x)
 643             {
 644                 float r{0.0f};
 645
 646                 // computes Eq.8.1
 647                 const float u{coeffs->u};
 648                 if(u != 0.0f) r += u * U(l, m, n, last_band, matrix);
 649                 const float v{coeffs->v};
 650                 if(v != 0.0f) r += v * V(l, m, n, last_band, matrix);
 651                 const float w{coeffs->w};
 652                 if(w != 0.0f) r += w * W(l, m, n, last_band, matrix);
 653
 654                 matrix[y][x] = r;
 655                 ++coeffs;
 656             }
 657         }
 658         last_band = band_idx;
 659         band_idx += static_cast<uint>(l)*size_t{2} + 1;
 660     }
 661 }
 662 /* End ambisonic rotation helpers. */
 663
 664
 665 constexpr float Deg2Rad(float x) noexcept
 666 { return static_cast<float>(al::numbers::pi / 180.0 * x); }
 667
 668 struct GainTriplet { float Base, HF, LF; };
 669
 670 void CalcPanningAndFilters(Voice *voice, const float xpos, const float ypos, const float zpos,
 671     const float Distance, const float Spread, const GainTriplet &DryGain,
 672     const al::span<const GainTriplet,MAX_SENDS> WetGain, EffectSlot *(&SendSlots)[MAX_SENDS],
 673     const VoiceProps *props, const ContextParams &Context, const DeviceBase *Device)
 674 {
 675     static constexpr ChanMap MonoMap[1]{
 676         { FrontCenter, 0.0f, 0.0f }
 677     }, RearMap[2]{
 678         { BackLeft,  Deg2Rad(-150.0f), Deg2Rad(0.0f) },
 679         { BackRight, Deg2Rad( 150.0f), Deg2Rad(0.0f) }
 680     }, QuadMap[4]{
 681         { FrontLeft,  Deg2Rad( -45.0f), Deg2Rad(0.0f) },
 682         { FrontRight, Deg2Rad(  45.0f), Deg2Rad(0.0f) },
 683         { BackLeft,   Deg2Rad(-135.0f), Deg2Rad(0.0f) },
 684         { BackRight,  Deg2Rad( 135.0f), Deg2Rad(0.0f) }
 685     }, X51Map[6]{
 686         { FrontLeft,   Deg2Rad( -30.0f), Deg2Rad(0.0f) },
 687         { FrontRight,  Deg2Rad(  30.0f), Deg2Rad(0.0f) },
 688         { FrontCenter, Deg2Rad(   0.0f), Deg2Rad(0.0f) },
 689         { LFE, 0.0f, 0.0f },
 690         { SideLeft,    Deg2Rad(-110.0f), Deg2Rad(0.0f) },
 691         { SideRight,   Deg2Rad( 110.0f), Deg2Rad(0.0f) }
 692     }, X61Map[7]{
 693         { FrontLeft,   Deg2Rad(-30.0f), Deg2Rad(0.0f) },
 694         { FrontRight,  Deg2Rad( 30.0f), Deg2Rad(0.0f) },
 695         { FrontCenter, Deg2Rad(  0.0f), Deg2Rad(0.0f) },
 696         { LFE, 0.0f, 0.0f },
 697         { BackCenter,  Deg2Rad(180.0f), Deg2Rad(0.0f) },
 698         { SideLeft,    Deg2Rad(-90.0f), Deg2Rad(0.0f) },
 699         { SideRight,   Deg2Rad( 90.0f), Deg2Rad(0.0f) }
 700     }, X71Map[8]{
 701         { FrontLeft,   Deg2Rad( -30.0f), Deg2Rad(0.0f) },
 702         { FrontRight,  Deg2Rad(  30.0f), Deg2Rad(0.0f) },
 703         { FrontCenter, Deg2Rad(   0.0f), Deg2Rad(0.0f) },
 704         { LFE, 0.0f, 0.0f },
 705         { BackLeft,    Deg2Rad(-150.0f), Deg2Rad(0.0f) },
 706         { BackRight,   Deg2Rad( 150.0f), Deg2Rad(0.0f) },
 707         { SideLeft,    Deg2Rad( -90.0f), Deg2Rad(0.0f) },
 708         { SideRight,   Deg2Rad(  90.0f), Deg2Rad(0.0f) }
 709     };
 710
 711     ChanMap StereoMap[2]{
 712         { FrontLeft,  Deg2Rad(-30.0f), Deg2Rad(0.0f) },
 713         { FrontRight, Deg2Rad( 30.0f), Deg2Rad(0.0f) }
 714     };
 715
 716     const auto Frequency = static_cast<float>(Device->Frequency);
 717     const uint NumSends{Device->NumAuxSends};
 718
 719     const size_t num_channels{voice->mChans.size()};
 720     ASSUME(num_channels > 0);
 721
 722     for(auto &chandata : voice->mChans)
 723     {
 724         chandata.mDryParams.Hrtf.Target = HrtfFilter{};
 725         chandata.mDryParams.Gains.Target.fill(0.0f);
 726         std::for_each(chandata.mWetParams.begin(), chandata.mWetParams.begin()+NumSends,
 727             [](SendParams &params) -> void { params.Gains.Target.fill(0.0f); });
 728     }
 729
 730     DirectMode DirectChannels{props->DirectChannels};
 731     const ChanMap *chans{nullptr};
 732     switch(voice->mFmtChannels)
 733     {
 734     case FmtMono:
 735         chans = MonoMap;
 736         /* Mono buffers are never played direct. */
 737         DirectChannels = DirectMode::Off;
 738         break;
 739
 740     case FmtStereo:
 741         if(DirectChannels == DirectMode::Off)
 742         {
 743             /* Convert counter-clockwise to clock-wise, and wrap between
 744              * [-pi,+pi].
 745              */
 746             StereoMap[0].angle = WrapRadians(-props->StereoPan[0]);
 747             StereoMap[1].angle = WrapRadians(-props->StereoPan[1]);
 748         }
 749         chans = StereoMap;
 750         break;
 751
 752     case FmtRear: chans = RearMap; break;
 753     case FmtQuad: chans = QuadMap; break;
 754     case FmtX51: chans = X51Map; break;
 755     case FmtX61: chans = X61Map; break;
 756     case FmtX71: chans = X71Map; break;
 757
 758     case FmtBFormat2D:
 759     case FmtBFormat3D:
 760     case FmtUHJ2:
 761     case FmtUHJ3:
 762     case FmtUHJ4:
 763     case FmtSuperStereo:
 764         DirectChannels = DirectMode::Off;
 765         break;
 766     }
 767
 768     voice->mFlags.reset(VoiceHasHrtf).reset(VoiceHasNfc);
 769     if(auto *decoder{voice->mDecoder.get()})
 770         decoder->mWidthControl = props->EnhWidth;
 771
 772     if(IsAmbisonic(voice->mFmtChannels))
 773     {
 774         /* Special handling for B-Format and UHJ sources. */
 775
 776         if(Device->AvgSpeakerDist > 0.0f && voice->mFmtChannels != FmtUHJ2
 777             && voice->mFmtChannels != FmtSuperStereo)
 778         {
 779             if(!(Distance > std::numeric_limits<float>::epsilon()))
 780             {
 781                 /* NOTE: The NFCtrlFilters were created with a w0 of 0, which
 782                  * is what we want for FOA input. The first channel may have
 783                  * been previously re-adjusted if panned, so reset it.
 784                  */
 785                 voice->mChans[0].mDryParams.NFCtrlFilter.adjust(0.0f);
 786             }
 787             else
 788             {
 789                 /* Clamp the distance for really close sources, to prevent
 790                  * excessive bass.
 791                  */
 792                 const float mdist{maxf(Distance, Device->AvgSpeakerDist/4.0f)};
 793                 const float w0{SpeedOfSoundMetersPerSec / (mdist * Frequency)};
 794
 795                 /* Only need to adjust the first channel of a B-Format source. */
 796                 voice->mChans[0].mDryParams.NFCtrlFilter.adjust(w0);
 797             }
 798
 799             voice->mFlags.set(VoiceHasNfc);
 800         }
 801
 802         /* Panning a B-Format sound toward some direction is easy. Just pan the
 803          * first (W) channel as a normal mono sound. The angular spread is used
 804          * as a directional scalar to blend between full coverage and full
 805          * panning.
 806          */
 807         const float coverage{!(Distance > std::numeric_limits<float>::epsilon()) ? 1.0f :
 808             (al::numbers::inv_pi_v<float>/2.0f * Spread)};
 809
 810         auto calc_coeffs = [xpos,ypos,zpos](RenderMode mode)
 811         {
 812             if(mode != RenderMode::Pairwise)
 813                 return CalcDirectionCoeffs({xpos, ypos, zpos}, 0.0f);
 814
 815             /* Clamp Y, in case rounding errors caused it to end up outside
 816              * of -1...+1.
 817              */
 818             const float ev{std::asin(clampf(ypos, -1.0f, 1.0f))};
 819             /* Negate Z for right-handed coords with -Z in front. */
 820             const float az{std::atan2(xpos, -zpos)};
 821
 822             /* A scalar of 1.5 for plain stereo results in +/-60 degrees
 823              * being moved to +/-90 degrees for direct right and left
 824              * speaker responses.
 825              */
 826             return CalcAngleCoeffs(ScaleAzimuthFront(az, 1.5f), ev, 0.0f);
 827         };
 828         auto coeffs = calc_coeffs(Device->mRenderMode);
 829         std::transform(coeffs.begin()+1, coeffs.end(), coeffs.begin()+1,
 830             std::bind(std::multiplies<float>{}, _1, 1.0f-coverage));
 831
 832         /* NOTE: W needs to be scaled according to channel scaling. */
 833         auto&& scales = GetAmbiScales(voice->mAmbiScaling);
 834         ComputePanGains(&Device->Dry, coeffs.data(), DryGain.Base*scales[0],
 835             voice->mChans[0].mDryParams.Gains.Target);
 836         for(uint i{0};i < NumSends;i++)
 837         {
 838             if(const EffectSlot *Slot{SendSlots[i]})
 839                 ComputePanGains(&Slot->Wet, coeffs.data(), WetGain[i].Base*scales[0],
 840                     voice->mChans[0].mWetParams[i].Gains.Target);
 841         }
 842
 843         if(coverage > 0.0f)
 844         {
 845             /* Local B-Format sources have their XYZ channels rotated according
 846              * to the orientation.
 847              */
 848             /* AT then UP */
 849             alu::Vector N{props->OrientAt[0], props->OrientAt[1], props->OrientAt[2], 0.0f};
 850             N.normalize();
 851             alu::Vector V{props->OrientUp[0], props->OrientUp[1], props->OrientUp[2], 0.0f};
 852             V.normalize();
 853             if(!props->HeadRelative)
 854             {
 855                 N = Context.Matrix * N;
 856                 V = Context.Matrix * V;
 857             }
 858             /* Build and normalize right-vector */
 859             alu::Vector U{N.cross_product(V)};
 860             U.normalize();
 861
 862             /* Build a rotation matrix. Manually fill the zeroth- and first-
 863              * order elements, then construct the rotation for the higher
 864              * orders.
 865              */
 866             std::array<std::array<float,MaxAmbiChannels>,MaxAmbiChannels> shrot{};
 867             shrot[0][0] = 1.0f;
 868             shrot[1][1] =  U[0]; shrot[1][2] = -V[0]; shrot[1][3] = -N[0];
 869             shrot[2][1] = -U[1]; shrot[2][2] =  V[1]; shrot[2][3] =  N[1];
 870             shrot[3][1] =  U[2]; shrot[3][2] = -V[2]; shrot[3][3] = -N[2];
 871             AmbiRotator(shrot, static_cast<int>(minu(voice->mAmbiOrder, Device->mAmbiOrder)));
 872
 873             /* Convert the rotation matrix for input ordering and scaling, and
 874              * whether input is 2D or 3D.
 875              */
 876             const uint8_t *index_map{Is2DAmbisonic(voice->mFmtChannels) ?
 877                 GetAmbi2DLayout(voice->mAmbiLayout).data() :
 878                 GetAmbiLayout(voice->mAmbiLayout).data()};
 879
 880             static const uint8_t ChansPerOrder[MaxAmbiOrder+1]{1, 3, 5, 7,};
 881             static const uint8_t OrderOffset[MaxAmbiOrder+1]{0, 1, 4, 9,};
 882             for(size_t c{1};c < num_channels;c++)
 883             {
 884                 const size_t acn{index_map[c]};
 885                 const size_t order{AmbiIndex::OrderFromChannel()[acn]};
 886                 const size_t tocopy{ChansPerOrder[order]};
 887                 const size_t offset{OrderOffset[order]};
 888                 const float scale{scales[acn] * coverage};
 889                 auto in = shrot.cbegin() + offset;
 890
 891                 coeffs = std::array<float,MaxAmbiChannels>{};
 892                 for(size_t x{0};x < tocopy;++x)
 893                     coeffs[offset+x] = in[x][acn] * scale;
 894
 895                 ComputePanGains(&Device->Dry, coeffs.data(), DryGain.Base,
 896                     voice->mChans[c].mDryParams.Gains.Target);
 897
 898                 for(uint i{0};i < NumSends;i++)
 899                 {
 900                     if(const EffectSlot *Slot{SendSlots[i]})
 901                         ComputePanGains(&Slot->Wet, coeffs.data(), WetGain[i].Base,
 902                             voice->mChans[c].mWetParams[i].Gains.Target);
 903                 }
 904             }
 905         }
 906     }
 907     else if(DirectChannels != DirectMode::Off && !Device->RealOut.RemixMap.empty())
 908     {
 909         /* Direct source channels always play local. Skip the virtual channels
 910          * and write inputs to the matching real outputs.
 911          */
 912         voice->mDirect.Buffer = Device->RealOut.Buffer;
 913
 914         for(size_t c{0};c < num_channels;c++)
 915         {
 916             uint idx{GetChannelIdxByName(Device->RealOut, chans[c].channel)};
 917             if(idx != INVALID_CHANNEL_INDEX)
 918                 voice->mChans[c].mDryParams.Gains.Target[idx] = DryGain.Base;
 919             else if(DirectChannels == DirectMode::RemixMismatch)
 920             {
 921                 auto match_channel = [chans,c](const InputRemixMap &map) noexcept -> bool
 922                 { return chans[c].channel == map.channel; };
 923                 auto remap = std::find_if(Device->RealOut.RemixMap.cbegin(),
 924                     Device->RealOut.RemixMap.cend(), match_channel);
 925                 if(remap != Device->RealOut.RemixMap.cend())
 926                 {
 927                     for(const auto &target : remap->targets)
 928                     {
 929                         idx = GetChannelIdxByName(Device->RealOut, target.channel);
 930                         if(idx != INVALID_CHANNEL_INDEX)
 931                             voice->mChans[c].mDryParams.Gains.Target[idx] = DryGain.Base *
 932                                 target.mix;
 933                     }
 934                 }
 935             }
 936         }
 937
 938         /* Auxiliary sends still use normal channel panning since they mix to
 939          * B-Format, which can't channel-match.
 940          */
 941         for(size_t c{0};c < num_channels;c++)
 942         {
 943             const auto coeffs = CalcAngleCoeffs(chans[c].angle, chans[c].elevation, 0.0f);
 944
 945             for(uint i{0};i < NumSends;i++)
 946             {
 947                 if(const EffectSlot *Slot{SendSlots[i]})
 948                     ComputePanGains(&Slot->Wet, coeffs.data(), WetGain[i].Base,
 949                         voice->mChans[c].mWetParams[i].Gains.Target);
 950             }
 951         }
 952     }
 953     else if(Device->mRenderMode == RenderMode::Hrtf)
 954     {
 955         /* Full HRTF rendering. Skip the virtual channels and render to the
 956          * real outputs.
 957          */
 958         voice->mDirect.Buffer = Device->RealOut.Buffer;
 959
 960         if(Distance > std::numeric_limits<float>::epsilon())
 961         {
 962             const float ev{std::asin(clampf(ypos, -1.0f, 1.0f))};
 963             const float az{std::atan2(xpos, -zpos)};
 964
 965             /* Get the HRIR coefficients and delays just once, for the given
 966              * source direction.
 967              */
 968             GetHrtfCoeffs(Device->mHrtf.get(), ev, az, Distance, Spread,
 969                 voice->mChans[0].mDryParams.Hrtf.Target.Coeffs,
 970                 voice->mChans[0].mDryParams.Hrtf.Target.Delay);
 971             voice->mChans[0].mDryParams.Hrtf.Target.Gain = DryGain.Base;
 972
 973             /* Remaining channels use the same results as the first. */
 974             for(size_t c{1};c < num_channels;c++)
 975             {
 976                 /* Skip LFE */
 977                 if(chans[c].channel == LFE) continue;
 978                 voice->mChans[c].mDryParams.Hrtf.Target = voice->mChans[0].mDryParams.Hrtf.Target;
 979             }
 980
 981             /* Calculate the directional coefficients once, which apply to all
 982              * input channels of the source sends.
 983              */
 984             const auto coeffs = CalcDirectionCoeffs({xpos, ypos, zpos}, Spread);
 985
 986             for(size_t c{0};c < num_channels;c++)
 987             {
 988                 /* Skip LFE */
 989                 if(chans[c].channel == LFE)
 990                     continue;
 991                 for(uint i{0};i < NumSends;i++)
 992                 {
 993                     if(const EffectSlot *Slot{SendSlots[i]})
 994                         ComputePanGains(&Slot->Wet, coeffs.data(), WetGain[i].Base,
 995                             voice->mChans[c].mWetParams[i].Gains.Target);
 996                 }
 997             }
 998         }
 999         else
1000         {
1001             /* Local sources on HRTF play with each channel panned to its
1002              * relative location around the listener, providing "virtual
1003              * speaker" responses.
1004              */
1005             for(size_t c{0};c < num_channels;c++)
1006             {
1007                 /* Skip LFE */
1008                 if(chans[c].channel == LFE)
1009                     continue;
1010
1011                 /* Get the HRIR coefficients and delays for this channel
1012                  * position.
1013                  */
1014                 GetHrtfCoeffs(Device->mHrtf.get(), chans[c].elevation, chans[c].angle,
1015                     std::numeric_limits<float>::infinity(), Spread,
1016                     voice->mChans[c].mDryParams.Hrtf.Target.Coeffs,
1017                     voice->mChans[c].mDryParams.Hrtf.Target.Delay);
1018                 voice->mChans[c].mDryParams.Hrtf.Target.Gain = DryGain.Base;
1019
1020                 /* Normal panning for auxiliary sends. */
1021                 const auto coeffs = CalcAngleCoeffs(chans[c].angle, chans[c].elevation, Spread);
1022
1023                 for(uint i{0};i < NumSends;i++)
1024                 {
1025                     if(const EffectSlot *Slot{SendSlots[i]})
1026                         ComputePanGains(&Slot->Wet, coeffs.data(), WetGain[i].Base,
1027                             voice->mChans[c].mWetParams[i].Gains.Target);
1028                 }
1029             }
1030         }
1031
1032         voice->mFlags.set(VoiceHasHrtf);
1033     }
1034     else
1035     {
1036         /* Non-HRTF rendering. Use normal panning to the output. */
1037
1038         if(Distance > std::numeric_limits<float>::epsilon())
1039         {
1040             /* Calculate NFC filter coefficient if needed. */
1041             if(Device->AvgSpeakerDist > 0.0f)
1042             {
1043                 /* Clamp the distance for really close sources, to prevent
1044                  * excessive bass.
1045                  */
1046                 const float mdist{maxf(Distance, Device->AvgSpeakerDist/4.0f)};
1047                 const float w0{SpeedOfSoundMetersPerSec / (mdist * Frequency)};
1048
1049                 /* Adjust NFC filters. */
1050                 for(size_t c{0};c < num_channels;c++)
1051                     voice->mChans[c].mDryParams.NFCtrlFilter.adjust(w0);
1052
1053                 voice->mFlags.set(VoiceHasNfc);
1054             }
1055
1056             /* Calculate the directional coefficients once, which apply to all
1057              * input channels.
1058              */
1059             auto calc_coeffs = [xpos,ypos,zpos,Spread](RenderMode mode)
1060             {
1061                 if(mode != RenderMode::Pairwise)
1062                     return CalcDirectionCoeffs({xpos, ypos, zpos}, Spread);
1063                 const float ev{std::asin(clampf(ypos, -1.0f, 1.0f))};
1064                 const float az{std::atan2(xpos, -zpos)};
1065                 return CalcAngleCoeffs(ScaleAzimuthFront(az, 1.5f), ev, Spread);
1066             };
1067             const auto coeffs = calc_coeffs(Device->mRenderMode);
1068
1069             for(size_t c{0};c < num_channels;c++)
1070             {
1071                 /* Special-case LFE */
1072                 if(chans[c].channel == LFE)
1073                 {
1074                     if(Device->Dry.Buffer.data() == Device->RealOut.Buffer.data())
1075                     {
1076                         const uint idx{GetChannelIdxByName(Device->RealOut, chans[c].channel)};
1077                         if(idx != INVALID_CHANNEL_INDEX)
1078                             voice->mChans[c].mDryParams.Gains.Target[idx] = DryGain.Base;
1079                     }
1080                     continue;
1081                 }
1082
1083                 ComputePanGains(&Device->Dry, coeffs.data(), DryGain.Base,
1084                     voice->mChans[c].mDryParams.Gains.Target);
1085                 for(uint i{0};i < NumSends;i++)
1086                 {
1087                     if(const EffectSlot *Slot{SendSlots[i]})
1088                         ComputePanGains(&Slot->Wet, coeffs.data(), WetGain[i].Base,
1089                             voice->mChans[c].mWetParams[i].Gains.Target);
1090                 }
1091             }
1092         }
1093         else
1094         {
1095             if(Device->AvgSpeakerDist > 0.0f)
1096             {
1097                 /* If the source distance is 0, simulate a plane-wave by using
1098                  * infinite distance, which results in a w0 of 0.
1099                  */
1100                 static constexpr float w0{0.0f};
1101                 for(size_t c{0};c < num_channels;c++)
1102                     voice->mChans[c].mDryParams.NFCtrlFilter.adjust(w0);
1103
1104                 voice->mFlags.set(VoiceHasNfc);
1105             }
1106
1107             for(size_t c{0};c < num_channels;c++)
1108             {
1109                 /* Special-case LFE */
1110                 if(chans[c].channel == LFE)
1111                 {
1112                     if(Device->Dry.Buffer.data() == Device->RealOut.Buffer.data())
1113                     {
1114                         const uint idx{GetChannelIdxByName(Device->RealOut, chans[c].channel)};
1115                         if(idx != INVALID_CHANNEL_INDEX)
1116                             voice->mChans[c].mDryParams.Gains.Target[idx] = DryGain.Base;
1117                     }
1118                     continue;
1119                 }
1120
1121                 const auto coeffs = CalcAngleCoeffs((Device->mRenderMode == RenderMode::Pairwise)
1122                     ? ScaleAzimuthFront(chans[c].angle, 3.0f) : chans[c].angle,
1123                     chans[c].elevation, Spread);
1124
1125                 ComputePanGains(&Device->Dry, coeffs.data(), DryGain.Base,
1126                     voice->mChans[c].mDryParams.Gains.Target);
1127                 for(uint i{0};i < NumSends;i++)
1128                 {
1129                     if(const EffectSlot *Slot{SendSlots[i]})
1130                         ComputePanGains(&Slot->Wet, coeffs.data(), WetGain[i].Base,
1131                             voice->mChans[c].mWetParams[i].Gains.Target);
1132                 }
1133             }
1134         }
1135     }
1136
1137     {
1138         const float hfNorm{props->Direct.HFReference / Frequency};
1139         const float lfNorm{props->Direct.LFReference / Frequency};
1140
1141         voice->mDirect.FilterType = AF_None;
1142         if(DryGain.HF != 1.0f) voice->mDirect.FilterType |= AF_LowPass;
1143         if(DryGain.LF != 1.0f) voice->mDirect.FilterType |= AF_HighPass;
1144
1145         auto &lowpass = voice->mChans[0].mDryParams.LowPass;
1146         auto &highpass = voice->mChans[0].mDryParams.HighPass;
1147         lowpass.setParamsFromSlope(BiquadType::HighShelf, hfNorm, DryGain.HF, 1.0f);
1148         highpass.setParamsFromSlope(BiquadType::LowShelf, lfNorm, DryGain.LF, 1.0f);
1149         for(size_t c{1};c < num_channels;c++)
1150         {
1151             voice->mChans[c].mDryParams.LowPass.copyParamsFrom(lowpass);
1152             voice->mChans[c].mDryParams.HighPass.copyParamsFrom(highpass);
1153         }
1154     }
1155     for(uint i{0};i < NumSends;i++)
1156     {
1157         const float hfNorm{props->Send[i].HFReference / Frequency};
1158         const float lfNorm{props->Send[i].LFReference / Frequency};
1159
1160         voice->mSend[i].FilterType = AF_None;
1161         if(WetGain[i].HF != 1.0f) voice->mSend[i].FilterType |= AF_LowPass;
1162         if(WetGain[i].LF != 1.0f) voice->mSend[i].FilterType |= AF_HighPass;
1163
1164         auto &lowpass = voice->mChans[0].mWetParams[i].LowPass;
1165         auto &highpass = voice->mChans[0].mWetParams[i].HighPass;
1166         lowpass.setParamsFromSlope(BiquadType::HighShelf, hfNorm, WetGain[i].HF, 1.0f);
1167         highpass.setParamsFromSlope(BiquadType::LowShelf, lfNorm, WetGain[i].LF, 1.0f);
1168         for(size_t c{1};c < num_channels;c++)
1169         {
1170             voice->mChans[c].mWetParams[i].LowPass.copyParamsFrom(lowpass);
1171             voice->mChans[c].mWetParams[i].HighPass.copyParamsFrom(highpass);
1172         }
1173     }
1174 }
1175
1176 void CalcNonAttnSourceParams(Voice *voice, const VoiceProps *props, const ContextBase *context)
1177 {
1178     const DeviceBase *Device{context->mDevice};
1179     EffectSlot *SendSlots[MAX_SENDS];
1180
1181     voice->mDirect.Buffer = Device->Dry.Buffer;
1182     for(uint i{0};i < Device->NumAuxSends;i++)
1183     {
1184         SendSlots[i] = props->Send[i].Slot;
1185         if(!SendSlots[i] || SendSlots[i]->EffectType == EffectSlotType::None)
1186         {
1187             SendSlots[i] = nullptr;
1188             voice->mSend[i].Buffer = {};
1189         }
1190         else
1191             voice->mSend[i].Buffer = SendSlots[i]->Wet.Buffer;
1192     }
1193
1194     /* Calculate the stepping value */
1195     const auto Pitch = static_cast<float>(voice->mFrequency) /
1196         static_cast<float>(Device->Frequency) * props->Pitch;
1197     if(Pitch > float{MaxPitch})
1198         voice->mStep = MaxPitch<<MixerFracBits;
1199     else
1200         voice->mStep = maxu(fastf2u(Pitch * MixerFracOne), 1);
1201     voice->mResampler = PrepareResampler(props->mResampler, voice->mStep, &voice->mResampleState);
1202
1203     /* Calculate gains */
1204     GainTriplet DryGain;
1205     DryGain.Base  = minf(clampf(props->Gain, props->MinGain, props->MaxGain) * props->Direct.Gain *
1206         context->mParams.Gain, GainMixMax);
1207     DryGain.HF = props->Direct.GainHF;
1208     DryGain.LF = props->Direct.GainLF;
1209     GainTriplet WetGain[MAX_SENDS];
1210     for(uint i{0};i < Device->NumAuxSends;i++)
1211     {
1212         WetGain[i].Base = minf(clampf(props->Gain, props->MinGain, props->MaxGain) *
1213             props->Send[i].Gain * context->mParams.Gain, GainMixMax);
1214         WetGain[i].HF = props->Send[i].GainHF;
1215         WetGain[i].LF = props->Send[i].GainLF;
1216     }
1217
1218     CalcPanningAndFilters(voice, 0.0f, 0.0f, -1.0f, 0.0f, 0.0f, DryGain, WetGain, SendSlots, props,
1219         context->mParams, Device);
1220 }
1221
1222 void CalcAttnSourceParams(Voice *voice, const VoiceProps *props, const ContextBase *context)
1223 {
1224     const DeviceBase *Device{context->mDevice};
1225     const uint NumSends{Device->NumAuxSends};
1226
1227     /* Set mixing buffers and get send parameters. */
1228     voice->mDirect.Buffer = Device->Dry.Buffer;
1229     EffectSlot *SendSlots[MAX_SENDS];
1230     uint UseDryAttnForRoom{0};
1231     for(uint i{0};i < NumSends;i++)
1232     {
1233         SendSlots[i] = props->Send[i].Slot;
1234         if(!SendSlots[i] || SendSlots[i]->EffectType == EffectSlotType::None)
1235             SendSlots[i] = nullptr;
1236         else if(!SendSlots[i]->AuxSendAuto)
1237         {
1238             /* If the slot's auxiliary send auto is off, the data sent to the
1239              * effect slot is the same as the dry path, sans filter effects.
1240              */
1241             UseDryAttnForRoom |= 1u<<i;
1242         }
1243
1244         if(!SendSlots[i])
1245             voice->mSend[i].Buffer = {};
1246         else
1247             voice->mSend[i].Buffer = SendSlots[i]->Wet.Buffer;
1248     }
1249
1250     /* Transform source to listener space (convert to head relative) */
1251     alu::Vector Position{props->Position[0], props->Position[1], props->Position[2], 1.0f};
1252     alu::Vector Velocity{props->Velocity[0], props->Velocity[1], props->Velocity[2], 0.0f};
1253     alu::Vector Direction{props->Direction[0], props->Direction[1], props->Direction[2], 0.0f};
1254     if(!props->HeadRelative)
1255     {
1256         /* Transform source vectors */
1257         Position = context->mParams.Matrix * (Position - context->mParams.Position);
1258         Velocity = context->mParams.Matrix * Velocity;
1259         Direction = context->mParams.Matrix * Direction;
1260     }
1261     else
1262     {
1263         /* Offset the source velocity to be relative of the listener velocity */
1264         Velocity += context->mParams.Velocity;
1265     }
1266
1267     const bool directional{Direction.normalize() > 0.0f};
1268     alu::Vector ToSource{Position[0], Position[1], Position[2], 0.0f};
1269     const float Distance{ToSource.normalize()};
1270
1271     /* Calculate distance attenuation */
1272     float ClampedDist{Distance};
1273     float DryGainBase{props->Gain};
1274     float WetGainBase{props->Gain};
1275
1276     switch(context->mParams.SourceDistanceModel ? props->mDistanceModel
1277         : context->mParams.mDistanceModel)
1278     {
1279         case DistanceModel::InverseClamped:
1280             ClampedDist = clampf(ClampedDist, props->RefDistance, props->MaxDistance);
1281             if(props->MaxDistance < props->RefDistance) break;
1282             /*fall-through*/
1283         case DistanceModel::Inverse:
1284             if(!(props->RefDistance > 0.0f))
1285                 ClampedDist = props->RefDistance;
1286             else
1287             {
1288                 float dist{lerp(props->RefDistance, ClampedDist, props->RolloffFactor)};
1289                 if(dist > 0.0f) DryGainBase *= props->RefDistance / dist;
1290
1291                 dist = lerp(props->RefDistance, ClampedDist, props->RoomRolloffFactor);
1292                 if(dist > 0.0f) WetGainBase *= props->RefDistance / dist;
1293             }
1294             break;
1295
1296         case DistanceModel::LinearClamped:
1297             ClampedDist = clampf(ClampedDist, props->RefDistance, props->MaxDistance);
1298             if(props->MaxDistance < props->RefDistance) break;
1299             /*fall-through*/
1300         case DistanceModel::Linear:
1301             if(!(props->MaxDistance != props->RefDistance))
1302                 ClampedDist = props->RefDistance;
1303             else
1304             {
1305                 float attn{(ClampedDist-props->RefDistance) /
1306                     (props->MaxDistance-props->RefDistance) * props->RolloffFactor};
1307                 DryGainBase *= maxf(1.0f - attn, 0.0f);
1308
1309                 attn = (ClampedDist-props->RefDistance) /
1310                     (props->MaxDistance-props->RefDistance) * props->RoomRolloffFactor;
1311                 WetGainBase *= maxf(1.0f - attn, 0.0f);
1312             }
1313             break;
1314
1315         case DistanceModel::ExponentClamped:
1316             ClampedDist = clampf(ClampedDist, props->RefDistance, props->MaxDistance);
1317             if(props->MaxDistance < props->RefDistance) break;
1318             /*fall-through*/
1319         case DistanceModel::Exponent:
1320             if(!(ClampedDist > 0.0f && props->RefDistance > 0.0f))
1321                 ClampedDist = props->RefDistance;
1322             else
1323             {
1324                 const float dist_ratio{ClampedDist/props->RefDistance};
1325                 DryGainBase *= std::pow(dist_ratio, -props->RolloffFactor);
1326                 WetGainBase *= std::pow(dist_ratio, -props->RoomRolloffFactor);
1327             }
1328             break;
1329
1330         case DistanceModel::Disable:
1331             break;
1332     }
1333
1334     /* Calculate directional soundcones */
1335     float ConeHF{1.0f}, WetConeHF{1.0f};
1336     if(directional && props->InnerAngle < 360.0f)
1337     {
1338         static constexpr float Rad2Deg{static_cast<float>(180.0 / al::numbers::pi)};
1339         const float Angle{Rad2Deg*2.0f * std::acos(-Direction.dot_product(ToSource)) * ConeScale};
1340
1341         float ConeGain{1.0f};
1342         if(Angle >= props->OuterAngle)
1343         {
1344             ConeGain = props->OuterGain;
1345             ConeHF = lerp(1.0f, props->OuterGainHF, props->DryGainHFAuto);
1346         }
1347         else if(Angle >= props->InnerAngle)
1348         {
1349             const float scale{(Angle-props->InnerAngle) / (props->OuterAngle-props->InnerAngle)};
1350             ConeGain = lerp(1.0f, props->OuterGain, scale);
1351             ConeHF = lerp(1.0f, props->OuterGainHF, scale * props->DryGainHFAuto);
1352         }
1353
1354         DryGainBase *= ConeGain;
1355         WetGainBase *= lerp(1.0f, ConeGain, props->WetGainAuto);
1356
1357         WetConeHF = lerp(1.0f, ConeHF, props->WetGainHFAuto);
1358     }
1359
1360     /* Apply gain and frequency filters */
1361     DryGainBase = clampf(DryGainBase, props->MinGain, props->MaxGain) * context->mParams.Gain;
1362     WetGainBase = clampf(WetGainBase, props->MinGain, props->MaxGain) * context->mParams.Gain;
1363
1364     GainTriplet DryGain{};
1365     DryGain.Base = minf(DryGainBase * props->Direct.Gain, GainMixMax);
1366     DryGain.HF = ConeHF * props->Direct.GainHF;
1367     DryGain.LF = props->Direct.GainLF;
1368     GainTriplet WetGain[MAX_SENDS]{};
1369     for(uint i{0};i < NumSends;i++)
1370     {
1371         /* If this effect slot's Auxiliary Send Auto is off, then use the dry
1372          * path distance and cone attenuation, otherwise use the wet (room)
1373          * path distance and cone attenuation. The send filter is used instead
1374          * of the direct filter, regardless.
1375          */
1376         const bool use_room{!(UseDryAttnForRoom&(1u<<i))};
1377         const float gain{use_room ? WetGainBase : DryGainBase};
1378         WetGain[i].Base = minf(gain * props->Send[i].Gain, GainMixMax);
1379         WetGain[i].HF = (use_room ? WetConeHF : ConeHF) * props->Send[i].GainHF;
1380         WetGain[i].LF = props->Send[i].GainLF;
1381     }
1382
1383     /* Distance-based air absorption and initial send decay. */
1384     if(likely(Distance > props->RefDistance))
1385     {
1386         const float distance_base{(Distance-props->RefDistance) * props->RolloffFactor};
1387         const float absorption{distance_base * context->mParams.MetersPerUnit *
1388             props->AirAbsorptionFactor};
1389         if(absorption > std::numeric_limits<float>::epsilon())
1390         {
1391             const float hfattn{std::pow(context->mParams.AirAbsorptionGainHF, absorption)};
1392             DryGain.HF *= hfattn;
1393             for(uint i{0u};i < NumSends;++i)
1394                 WetGain[i].HF *= hfattn;
1395         }
1396
1397         /* If the source's Auxiliary Send Filter Gain Auto is off, no extra
1398          * adjustment is applied to the send gains.
1399          */
1400         for(uint i{props->WetGainAuto ? 0u : NumSends};i < NumSends;++i)
1401         {
1402             if(!SendSlots[i])
1403                 continue;
1404
1405             auto calc_attenuation = [](float distance, float refdist, float rolloff) noexcept
1406             {
1407                 const float dist{lerp(refdist, distance, rolloff)};
1408                 if(dist > refdist) return refdist / dist;
1409                 return 1.0f;
1410             };
1411
1412             /* The reverb effect's room rolloff factor always applies to an
1413              * inverse distance rolloff model.
1414              */
1415             WetGain[i].Base *= calc_attenuation(Distance, props->RefDistance,
1416                 SendSlots[i]->RoomRolloff);
1417
1418             /* If this effect slot's Auxiliary Send Auto is off, don't apply
1419              * the automatic initial reverb decay (should the reverb's room
1420              * rolloff still apply?).
1421              */
1422             if(!SendSlots[i]->AuxSendAuto)
1423                 continue;
1424
1425             GainTriplet DecayDistance;
1426             /* Calculate the distances to where this effect's decay reaches
1427              * -60dB.
1428              */
1429             DecayDistance.Base = SendSlots[i]->DecayTime * SpeedOfSoundMetersPerSec;
1430             DecayDistance.LF = DecayDistance.Base * SendSlots[i]->DecayLFRatio;
1431             DecayDistance.HF = DecayDistance.Base * SendSlots[i]->DecayHFRatio;
1432             if(SendSlots[i]->DecayHFLimit)
1433             {
1434                 const float airAbsorption{SendSlots[i]->AirAbsorptionGainHF};
1435                 if(airAbsorption < 1.0f)
1436                 {
1437                     /* Calculate the distance to where this effect's air
1438                      * absorption reaches -60dB, and limit the effect's HF
1439                      * decay distance (so it doesn't take any longer to decay
1440                      * than the air would allow).
1441                      */
1442                     static constexpr float log10_decaygain{-3.0f/*std::log10(ReverbDecayGain)*/};
1443                     const float absorb_dist{log10_decaygain / std::log10(airAbsorption)};
1444                     DecayDistance.HF = minf(absorb_dist, DecayDistance.HF);
1445                 }
1446             }
1447
1448             const float baseAttn = calc_attenuation(Distance, props->RefDistance,
1449                 props->RolloffFactor);
1450
1451             /* Apply a decay-time transformation to the wet path, based on the
1452              * source distance. The initial decay of the reverb effect is
1453              * calculated and applied to the wet path.
1454              */
1455             const float fact{distance_base / DecayDistance.Base};
1456             const float gain{std::pow(ReverbDecayGain, fact)*(1.0f-baseAttn) + baseAttn};
1457             WetGain[i].Base *= gain;
1458
1459             if(gain > 0.0f)
1460             {
1461                 const float hffact{distance_base / DecayDistance.HF};
1462                 const float gainhf{std::pow(ReverbDecayGain, hffact)*(1.0f-baseAttn) + baseAttn};
1463                 WetGain[i].HF *= minf(gainhf/gain, 1.0f);
1464                 const float lffact{distance_base / DecayDistance.LF};
1465                 const float gainlf{std::pow(ReverbDecayGain, lffact)*(1.0f-baseAttn) + baseAttn};
1466                 WetGain[i].LF *= minf(gainlf/gain, 1.0f);
1467             }
1468         }
1469     }
1470
1471
1472     /* Initial source pitch */
1473     float Pitch{props->Pitch};
1474
1475     /* Calculate velocity-based doppler effect */
1476     float DopplerFactor{props->DopplerFactor * context->mParams.DopplerFactor};
1477     if(DopplerFactor > 0.0f)
1478     {
1479         const alu::Vector &lvelocity = context->mParams.Velocity;
1480         float vss{Velocity.dot_product(ToSource) * -DopplerFactor};
1481         float vls{lvelocity.dot_product(ToSource) * -DopplerFactor};
1482
1483         const float SpeedOfSound{context->mParams.SpeedOfSound};
1484         if(!(vls < SpeedOfSound))
1485         {
1486             /* Listener moving away from the source at the speed of sound.
1487              * Sound waves can't catch it.
1488              */
1489             Pitch = 0.0f;
1490         }
1491         else if(!(vss < SpeedOfSound))
1492         {
1493             /* Source moving toward the listener at the speed of sound. Sound
1494              * waves bunch up to extreme frequencies.
1495              */
1496             Pitch = std::numeric_limits<float>::infinity();
1497         }
1498         else
1499         {
1500             /* Source and listener movement is nominal. Calculate the proper
1501              * doppler shift.
1502              */
1503             Pitch *= (SpeedOfSound-vls) / (SpeedOfSound-vss);
1504         }
1505     }
1506
1507     /* Adjust pitch based on the buffer and output frequencies, and calculate
1508      * fixed-point stepping value.
1509      */
1510     Pitch *= static_cast<float>(voice->mFrequency) / static_cast<float>(Device->Frequency);
1511     if(Pitch > float{MaxPitch})
1512         voice->mStep = MaxPitch<<MixerFracBits;
1513     else
1514         voice->mStep = maxu(fastf2u(Pitch * MixerFracOne), 1);
1515     voice->mResampler = PrepareResampler(props->mResampler, voice->mStep, &voice->mResampleState);
1516
1517     float spread{0.0f};
1518     if(props->Radius > Distance)
1519         spread = al::numbers::pi_v<float>*2.0f - Distance/props->Radius*al::numbers::pi_v<float>;
1520     else if(Distance > 0.0f)
1521         spread = std::asin(props->Radius/Distance) * 2.0f;
1522
1523     CalcPanningAndFilters(voice, ToSource[0]*XScale, ToSource[1]*YScale, ToSource[2]*ZScale,
1524         Distance*context->mParams.MetersPerUnit, spread, DryGain, WetGain, SendSlots, props,
1525         context->mParams, Device);
1526 }
1527
1528 void CalcSourceParams(Voice *voice, ContextBase *context, bool force)
1529 {
1530     VoicePropsItem *props{voice->mUpdate.exchange(nullptr, std::memory_order_acq_rel)};
1531     if(!props && !force) return;
1532
1533     if(props)
1534     {
1535         voice->mProps = *props;
1536
1537         AtomicReplaceHead(context->mFreeVoiceProps, props);
1538     }
1539
1540     if((voice->mProps.DirectChannels != DirectMode::Off && voice->mFmtChannels != FmtMono
1541             && !IsAmbisonic(voice->mFmtChannels))
1542         || voice->mProps.mSpatializeMode == SpatializeMode::Off
1543         || (voice->mProps.mSpatializeMode==SpatializeMode::Auto && voice->mFmtChannels != FmtMono))
1544         CalcNonAttnSourceParams(voice, &voice->mProps, context);
1545     else
1546         CalcAttnSourceParams(voice, &voice->mProps, context);
1547 }
1548
1549
1550 void SendSourceStateEvent(ContextBase *context, uint id, VChangeState state)
1551 {
1552     RingBuffer *ring{context->mAsyncEvents.get()};
1553     auto evt_vec = ring->getWriteVector();
1554     if(evt_vec.first.len < 1) return;
1555
1556     AsyncEvent *evt{al::construct_at(reinterpret_cast<AsyncEvent*>(evt_vec.first.buf),
1557         AsyncEvent::SourceStateChange)};
1558     evt->u.srcstate.id = id;
1559     switch(state)
1560     {
1561     case VChangeState::Reset:
1562         evt->u.srcstate.state = AsyncEvent::SrcState::Reset;
1563         break;
1564     case VChangeState::Stop:
1565         evt->u.srcstate.state = AsyncEvent::SrcState::Stop;
1566         break;
1567     case VChangeState::Play:
1568         evt->u.srcstate.state = AsyncEvent::SrcState::Play;
1569         break;
1570     case VChangeState::Pause:
1571         evt->u.srcstate.state = AsyncEvent::SrcState::Pause;
1572         break;
1573     /* Shouldn't happen. */
1574     case VChangeState::Restart:
1575         ASSUME(0);
1576     }
1577
1578     ring->writeAdvance(1);
1579 }
1580
1581 void ProcessVoiceChanges(ContextBase *ctx)
1582 {
1583     VoiceChange *cur{ctx->mCurrentVoiceChange.load(std::memory_order_acquire)};
1584     VoiceChange *next{cur->mNext.load(std::memory_order_acquire)};
1585     if(!next) return;
1586
1587     const uint enabledevt{ctx->mEnabledEvts.load(std::memory_order_acquire)};
1588     do {
1589         cur = next;
1590
1591         bool sendevt{false};
1592         if(cur->mState == VChangeState::Reset || cur->mState == VChangeState::Stop)
1593         {
1594             if(Voice *voice{cur->mVoice})
1595             {
1596                 voice->mCurrentBuffer.store(nullptr, std::memory_order_relaxed);
1597                 voice->mLoopBuffer.store(nullptr, std::memory_order_relaxed);
1598                 /* A source ID indicates the voice was playing or paused, which
1599                  * gets a reset/stop event.
1600                  */
1601                 sendevt = voice->mSourceID.exchange(0u, std::memory_order_relaxed) != 0u;
1602                 Voice::State oldvstate{Voice::Playing};
1603                 voice->mPlayState.compare_exchange_strong(oldvstate, Voice::Stopping,
1604                     std::memory_order_relaxed, std::memory_order_acquire);
1605                 voice->mPendingChange.store(false, std::memory_order_release);
1606             }
1607             /* Reset state change events are always sent, even if the voice is
1608              * already stopped or even if there is no voice.
1609              */
1610             sendevt |= (cur->mState == VChangeState::Reset);
1611         }
1612         else if(cur->mState == VChangeState::Pause)
1613         {
1614             Voice *voice{cur->mVoice};
1615             Voice::State oldvstate{Voice::Playing};
1616             sendevt = voice->mPlayState.compare_exchange_strong(oldvstate, Voice::Stopping,
1617                 std::memory_order_release, std::memory_order_acquire);
1618         }
1619         else if(cur->mState == VChangeState::Play)
1620         {
1621             /* NOTE: When playing a voice, sending a source state change event
1622              * depends if there's an old voice to stop and if that stop is
1623              * successful. If there is no old voice, a playing event is always
1624              * sent. If there is an old voice, an event is sent only if the
1625              * voice is already stopped.
1626              */
1627             if(Voice *oldvoice{cur->mOldVoice})
1628             {
1629                 oldvoice->mCurrentBuffer.store(nullptr, std::memory_order_relaxed);
1630                 oldvoice->mLoopBuffer.store(nullptr, std::memory_order_relaxed);
1631                 oldvoice->mSourceID.store(0u, std::memory_order_relaxed);
1632                 Voice::State oldvstate{Voice::Playing};
1633                 sendevt = !oldvoice->mPlayState.compare_exchange_strong(oldvstate, Voice::Stopping,
1634                     std::memory_order_relaxed, std::memory_order_acquire);
1635                 oldvoice->mPendingChange.store(false, std::memory_order_release);
1636             }
1637             else
1638                 sendevt = true;
1639
1640             Voice *voice{cur->mVoice};
1641             voice->mPlayState.store(Voice::Playing, std::memory_order_release);
1642         }
1643         else if(cur->mState == VChangeState::Restart)
1644         {
1645             /* Restarting a voice never sends a source change event. */
1646             Voice *oldvoice{cur->mOldVoice};
1647             oldvoice->mCurrentBuffer.store(nullptr, std::memory_order_relaxed);
1648             oldvoice->mLoopBuffer.store(nullptr, std::memory_order_relaxed);
1649             /* If there's no sourceID, the old voice finished so don't start
1650              * the new one at its new offset.
1651              */
1652             if(oldvoice->mSourceID.exchange(0u, std::memory_order_relaxed) != 0u)
1653             {
1654                 /* Otherwise, set the voice to stopping if it's not already (it
1655                  * might already be, if paused), and play the new voice as
1656                  * appropriate.
1657                  */
1658                 Voice::State oldvstate{Voice::Playing};
1659                 oldvoice->mPlayState.compare_exchange_strong(oldvstate, Voice::Stopping,
1660                     std::memory_order_relaxed, std::memory_order_acquire);
1661
1662                 Voice *voice{cur->mVoice};
1663                 voice->mPlayState.store((oldvstate == Voice::Playing) ? Voice::Playing
1664                     : Voice::Stopped, std::memory_order_release);
1665             }
1666             oldvoice->mPendingChange.store(false, std::memory_order_release);
1667         }
1668         if(sendevt && (enabledevt&AsyncEvent::SourceStateChange))
1669             SendSourceStateEvent(ctx, cur->mSourceID, cur->mState);
1670
1671         next = cur->mNext.load(std::memory_order_acquire);
1672     } while(next);
1673     ctx->mCurrentVoiceChange.store(cur, std::memory_order_release);
1674 }
1675
1676 void ProcessParamUpdates(ContextBase *ctx, const EffectSlotArray &slots,
1677     const al::span<Voice*> voices)
1678 {
1679     ProcessVoiceChanges(ctx);
1680
1681     IncrementRef(ctx->mUpdateCount);
1682     if LIKELY(!ctx->mHoldUpdates.load(std::memory_order_acquire))
1683     {
1684         bool force{CalcContextParams(ctx)};
1685         auto sorted_slots = const_cast<EffectSlot**>(slots.data() + slots.size());
1686         for(EffectSlot *slot : slots)
1687             force |= CalcEffectSlotParams(slot, sorted_slots, ctx);
1688
1689         for(Voice *voice : voices)
1690         {
1691             /* Only update voices that have a source. */
1692             if(voice->mSourceID.load(std::memory_order_relaxed) != 0)
1693                 CalcSourceParams(voice, ctx, force);
1694         }
1695     }
1696     IncrementRef(ctx->mUpdateCount);
1697 }
1698
1699 void ProcessContexts(DeviceBase *device, const uint SamplesToDo)
1700 {
1701     ASSUME(SamplesToDo > 0);
1702
1703     for(ContextBase *ctx : *device->mContexts.load(std::memory_order_acquire))
1704     {
1705         const EffectSlotArray &auxslots = *ctx->mActiveAuxSlots.load(std::memory_order_acquire);
1706         const al::span<Voice*> voices{ctx->getVoicesSpanAcquired()};
1707
1708         /* Process pending propery updates for objects on the context. */
1709         ProcessParamUpdates(ctx, auxslots, voices);
1710
1711         /* Clear auxiliary effect slot mixing buffers. */
1712         for(EffectSlot *slot : auxslots)
1713         {
1714             for(auto &buffer : slot->Wet.Buffer)
1715                 buffer.fill(0.0f);
1716         }
1717
1718         /* Process voices that have a playing source. */
1719         for(Voice *voice : voices)
1720         {
1721             const Voice::State vstate{voice->mPlayState.load(std::memory_order_acquire)};
1722             if(vstate != Voice::Stopped && vstate != Voice::Pending)
1723                 voice->mix(vstate, ctx, SamplesToDo);
1724         }
1725
1726         /* Process effects. */
1727         if(const size_t num_slots{auxslots.size()})
1728         {
1729             auto slots = auxslots.data();
1730             auto slots_end = slots + num_slots;
1731
1732             /* Sort the slots into extra storage, so that effect slots come
1733              * before their effect slot target (or their targets' target).
1734              */
1735             const al::span<EffectSlot*> sorted_slots{const_cast<EffectSlot**>(slots_end),
1736                 num_slots};
1737             /* Skip sorting if it has already been done. */
1738             if(!sorted_slots[0])
1739             {
1740                 /* First, copy the slots to the sorted list, then partition the
1741                  * sorted list so that all slots without a target slot go to
1742                  * the end.
1743                  */
1744                 std::copy(slots, slots_end, sorted_slots.begin());
1745                 auto split_point = std::partition(sorted_slots.begin(), sorted_slots.end(),
1746                     [](const EffectSlot *slot) noexcept -> bool
1747                     { return slot->Target != nullptr; });
1748                 /* There must be at least one slot without a slot target. */
1749                 assert(split_point != sorted_slots.end());
1750
1751                 /* Simple case: no more than 1 slot has a target slot. Either
1752                  * all slots go right to the output, or the remaining one must
1753                  * target an already-partitioned slot.
1754                  */
1755                 if(split_point - sorted_slots.begin() > 1)
1756                 {
1757                     /* At least two slots target other slots. Starting from the
1758                      * back of the sorted list, continue partitioning the front
1759                      * of the list given each target until all targets are
1760                      * accounted for. This ensures all slots without a target
1761                      * go last, all slots directly targeting those last slots
1762                      * go second-to-last, all slots directly targeting those
1763                      * second-last slots go third-to-last, etc.
1764                      */
1765                     auto next_target = sorted_slots.end();
1766                     do {
1767                         /* This shouldn't happen, but if there's unsorted slots
1768                          * left that don't target any sorted slots, they can't
1769                          * contribute to the output, so leave them.
1770                          */
1771                         if UNLIKELY(next_target == split_point)
1772                             break;
1773
1774                         --next_target;
1775                         split_point = std::partition(sorted_slots.begin(), split_point,
1776                             [next_target](const EffectSlot *slot) noexcept -> bool
1777                             { return slot->Target != *next_target; });
1778                     } while(split_point - sorted_slots.begin() > 1);
1779                 }
1780             }
1781
1782             for(const EffectSlot *slot : sorted_slots)
1783             {
1784                 EffectState *state{slot->mEffectState};
1785                 state->process(SamplesToDo, slot->Wet.Buffer, state->mOutTarget);
1786             }
1787         }
1788
1789         /* Signal the event handler if there are any events to read. */
1790         RingBuffer *ring{ctx->mAsyncEvents.get()};
1791         if(ring->readSpace() > 0)
1792             ctx->mEventSem.post();
1793     }
1794 }
1795
1796
1797 void ApplyDistanceComp(const al::span<FloatBufferLine> Samples, const size_t SamplesToDo,
1798     const DistanceComp::ChanData *distcomp)
1799 {
1800     ASSUME(SamplesToDo > 0);
1801
1802     for(auto &chanbuffer : Samples)
1803     {
1804         const float gain{distcomp->Gain};
1805         const size_t base{distcomp->Length};
1806         float *distbuf{al::assume_aligned<16>(distcomp->Buffer)};
1807         ++distcomp;
1808
1809         if(base < 1)
1810             continue;
1811
1812         float *inout{al::assume_aligned<16>(chanbuffer.data())};
1813         auto inout_end = inout + SamplesToDo;
1814         if LIKELY(SamplesToDo >= base)
1815         {
1816             auto delay_end = std::rotate(inout, inout_end - base, inout_end);
1817             std::swap_ranges(inout, delay_end, distbuf);
1818         }
1819         else
1820         {
1821             auto delay_start = std::swap_ranges(inout, inout_end, distbuf);
1822             std::rotate(distbuf, delay_start, distbuf + base);
1823         }
1824         std::transform(inout, inout_end, inout, std::bind(std::multiplies<float>{}, _1, gain));
1825     }
1826 }
1827
1828 void ApplyDither(const al::span<FloatBufferLine> Samples, uint *dither_seed,
1829     const float quant_scale, const size_t SamplesToDo)
1830 {
1831     ASSUME(SamplesToDo > 0);
1832
1833     /* Dithering. Generate whitenoise (uniform distribution of random values
1834      * between -1 and +1) and add it to the sample values, after scaling up to
1835      * the desired quantization depth amd before rounding.
1836      */
1837     const float invscale{1.0f / quant_scale};
1838     uint seed{*dither_seed};
1839     auto dither_sample = [&seed,invscale,quant_scale](const float sample) noexcept -> float
1840     {
1841         float val{sample * quant_scale};
1842         uint rng0{dither_rng(&seed)};
1843         uint rng1{dither_rng(&seed)};
1844         val += static_cast<float>(rng0*(1.0/UINT_MAX) - rng1*(1.0/UINT_MAX));
1845         return fast_roundf(val) * invscale;
1846     };
1847     for(FloatBufferLine &inout : Samples)
1848         std::transform(inout.begin(), inout.begin()+SamplesToDo, inout.begin(), dither_sample);
1849     *dither_seed = seed;
1850 }
1851
1852
1853 /* Base template left undefined. Should be marked =delete, but Clang 3.8.1
1854  * chokes on that given the inline specializations.
1855  */
1856 template<typename T>
1857 inline T SampleConv(float) noexcept;
1858
1859 template<> inline float SampleConv(float val) noexcept
1860 { return val; }
1861 template<> inline int32_t SampleConv(float val) noexcept
1862 {
1863     /* Floats have a 23-bit mantissa, plus an implied 1 bit and a sign bit.
1864      * This means a normalized float has at most 25 bits of signed precision.
1865      * When scaling and clamping for a signed 32-bit integer, these following
1866      * values are the best a float can give.
1867      */
1868     return fastf2i(clampf(val*2147483648.0f, -2147483648.0f, 2147483520.0f));
1869 }
1870 template<> inline int16_t SampleConv(float val) noexcept
1871 { return static_cast<int16_t>(fastf2i(clampf(val*32768.0f, -32768.0f, 32767.0f))); }
1872 template<> inline int8_t SampleConv(float val) noexcept
1873 { return static_cast<int8_t>(fastf2i(clampf(val*128.0f, -128.0f, 127.0f))); }
1874
1875 /* Define unsigned output variations. */
1876 template<> inline uint32_t SampleConv(float val) noexcept
1877 { return static_cast<uint32_t>(SampleConv<int32_t>(val)) + 2147483648u; }
1878 template<> inline uint16_t SampleConv(float val) noexcept
1879 { return static_cast<uint16_t>(SampleConv<int16_t>(val) + 32768); }
1880 template<> inline uint8_t SampleConv(float val) noexcept
1881 { return static_cast<uint8_t>(SampleConv<int8_t>(val) + 128); }
1882
1883 template<DevFmtType T>
1884 void Write(const al::span<const FloatBufferLine> InBuffer, void *OutBuffer, const size_t Offset,
1885     const size_t SamplesToDo, const size_t FrameStep)
1886 {
1887     ASSUME(FrameStep > 0);
1888     ASSUME(SamplesToDo > 0);
1889
1890     DevFmtType_t<T> *outbase{static_cast<DevFmtType_t<T>*>(OutBuffer) + Offset*FrameStep};
1891     size_t c{0};
1892     for(const FloatBufferLine &inbuf : InBuffer)
1893     {
1894         DevFmtType_t<T> *out{outbase++};
1895         auto conv_sample = [FrameStep,&out](const float s) noexcept -> void
1896         {
1897             *out = SampleConv<DevFmtType_t<T>>(s);
1898             out += FrameStep;
1899         };
1900         std::for_each(inbuf.begin(), inbuf.begin()+SamplesToDo, conv_sample);
1901         ++c;
1902     }
1903     if(const size_t extra{FrameStep - c})
1904     {
1905         const auto silence = SampleConv<DevFmtType_t<T>>(0.0f);
1906         for(size_t i{0};i < SamplesToDo;++i)
1907         {
1908             std::fill_n(outbase, extra, silence);
1909             outbase += FrameStep;
1910         }
1911     }
1912 }
1913
1914 } // namespace
1915
1916 uint DeviceBase::renderSamples(const uint numSamples)
1917 {
1918     const uint samplesToDo{minu(numSamples, BufferLineSize)};
1919
1920     /* Clear main mixing buffers. */
1921     for(FloatBufferLine &buffer : MixBuffer)
1922         buffer.fill(0.0f);
1923
1924     /* Increment the mix count at the start (lsb should now be 1). */
1925     IncrementRef(MixCount);
1926
1927     /* Process and mix each context's sources and effects. */
1928     ProcessContexts(this, samplesToDo);
1929
1930     /* Increment the clock time. Every second's worth of samples is converted
1931      * and added to clock base so that large sample counts don't overflow
1932      * during conversion. This also guarantees a stable conversion.
1933      */
1934     SamplesDone += samplesToDo;
1935     ClockBase += std::chrono::seconds{SamplesDone / Frequency};
1936     SamplesDone %= Frequency;
1937
1938     /* Increment the mix count at the end (lsb should now be 0). */
1939     IncrementRef(MixCount);
1940
1941     /* Apply any needed post-process for finalizing the Dry mix to the RealOut
1942      * (Ambisonic decode, UHJ encode, etc).
1943      */
1944     postProcess(samplesToDo);
1945
1946     /* Apply compression, limiting sample amplitude if needed or desired. */
1947     if(Limiter) Limiter->process(samplesToDo, RealOut.Buffer.data());
1948
1949     /* Apply delays and attenuation for mismatched speaker distances. */
1950     if(ChannelDelays)
1951         ApplyDistanceComp(RealOut.Buffer, samplesToDo, ChannelDelays->mChannels.data());
1952
1953     /* Apply dithering. The compressor should have left enough headroom for the
1954      * dither noise to not saturate.
1955      */
1956     if(DitherDepth > 0.0f)
1957         ApplyDither(RealOut.Buffer, &DitherSeed, DitherDepth, samplesToDo);
1958
1959     return samplesToDo;
1960 }
1961
1962 void DeviceBase::renderSamples(const al::span<float*> outBuffers, const uint numSamples)
1963 {
1964     FPUCtl mixer_mode{};
1965     uint total{0};
1966     while(const uint todo{numSamples - total})
1967     {
1968         const uint samplesToDo{renderSamples(todo)};
1969
1970         auto *srcbuf = RealOut.Buffer.data();
1971         for(auto *dstbuf : outBuffers)
1972         {
1973             std::copy_n(srcbuf->data(), samplesToDo, dstbuf + total);
1974             ++srcbuf;
1975         }
1976
1977         total += samplesToDo;
1978     }
1979 }
1980
1981 void DeviceBase::renderSamples(void *outBuffer, const uint numSamples, const size_t frameStep)
1982 {
1983     FPUCtl mixer_mode{};
1984     uint total{0};
1985     while(const uint todo{numSamples - total})
1986     {
1987         const uint samplesToDo{renderSamples(todo)};
1988
1989         if LIKELY(outBuffer)
1990         {
1991             /* Finally, interleave and convert samples, writing to the device's
1992              * output buffer.
1993              */
1994             switch(FmtType)
1995             {
1996 #define HANDLE_WRITE(T) case T:                                               \
1997     Write<T>(RealOut.Buffer, outBuffer, total, samplesToDo, frameStep); break;
1998             HANDLE_WRITE(DevFmtByte)
1999             HANDLE_WRITE(DevFmtUByte)
2000             HANDLE_WRITE(DevFmtShort)
2001             HANDLE_WRITE(DevFmtUShort)
2002             HANDLE_WRITE(DevFmtInt)
2003             HANDLE_WRITE(DevFmtUInt)
2004             HANDLE_WRITE(DevFmtFloat)
2005 #undef HANDLE_WRITE
2006             }
2007         }
2008
2009         total += samplesToDo;
2010     }
2011 }
2012
2013 void DeviceBase::handleDisconnect(const char *msg, ...)
2014 {
2015     if(!Connected.exchange(false, std::memory_order_acq_rel))
2016         return;
2017
2018     AsyncEvent evt{AsyncEvent::Disconnected};
2019
2020     va_list args;
2021     va_start(args, msg);
2022     int msglen{vsnprintf(evt.u.disconnect.msg, sizeof(evt.u.disconnect.msg), msg, args)};
2023     va_end(args);
2024
2025     if(msglen < 0 || static_cast<size_t>(msglen) >= sizeof(evt.u.disconnect.msg))
2026         evt.u.disconnect.msg[sizeof(evt.u.disconnect.msg)-1] = 0;
2027
2028     IncrementRef(MixCount);
2029     for(ContextBase *ctx : *mContexts.load())
2030     {
2031         const uint enabledevt{ctx->mEnabledEvts.load(std::memory_order_acquire)};
2032         if((enabledevt&AsyncEvent::Disconnected))
2033         {
2034             RingBuffer *ring{ctx->mAsyncEvents.get()};
2035             auto evt_data = ring->getWriteVector().first;
2036             if(evt_data.len > 0)
2037             {
2038                 al::construct_at(reinterpret_cast<AsyncEvent*>(evt_data.buf), evt);
2039                 ring->writeAdvance(1);
2040                 ctx->mEventSem.post();
2041             }
2042         }
2043
2044         if(!ctx->mStopVoicesOnDisconnect)
2045         {
2046             ProcessVoiceChanges(ctx);
2047             continue;
2048         }
2049
2050         auto voicelist = ctx->getVoicesSpanAcquired();
2051         auto stop_voice = [](Voice *voice) -> void
2052         {
2053             voice->mCurrentBuffer.store(nullptr, std::memory_order_relaxed);
2054             voice->mLoopBuffer.store(nullptr, std::memory_order_relaxed);
2055             voice->mSourceID.store(0u, std::memory_order_relaxed);
2056             voice->mPlayState.store(Voice::Stopped, std::memory_order_release);
2057         };
2058         std::for_each(voicelist.begin(), voicelist.end(), stop_voice);
2059     }
2060     IncrementRef(MixCount);
2061 }