alc/alu.cpp

   1 /**
   2  * OpenAL cross platform audio library
   3  * Copyright (C) 1999-2007 by authors.
   4  * This library is free software; you can redistribute it and/or
   5  *  modify it under the terms of the GNU Library General Public
   6  *  License as published by the Free Software Foundation; either
   7  *  version 2 of the License, or (at your option) any later version.
   8  *
   9  * This library is distributed in the hope that it will be useful,
  10  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  11  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  12  *  Library General Public License for more details.
  13  *
  14  * You should have received a copy of the GNU Library General Public
  15  *  License along with this library; if not, write to the
  16  *  Free Software Foundation, Inc.,
  17  *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  18  * Or go to http://www.gnu.org/copyleft/lgpl.html
  19  */
  20
  21 #include "config.h"
  22
  23 #include "alu.h"
  24
  25 #include <algorithm>
  26 #include <array>
  27 #include <atomic>
  28 #include <cassert>
  29 #include <chrono>
  30 #include <climits>
  31 #include <cstdarg>
  32 #include <cstdint>
  33 #include <cstdio>
  34 #include <cstdlib>
  35 #include <functional>
  36 #include <iterator>
  37 #include <limits>
  38 #include <memory>
  39 #include <new>
  40 #include <optional>
  41 #include <utility>
  42
  43 #include "almalloc.h"
  44 #include "alnumbers.h"
  45 #include "alnumeric.h"
  46 #include "alspan.h"
  47 #include "alstring.h"
  48 #include "atomic.h"
  49 #include "core/ambidefs.h"
  50 #include "core/async_event.h"
  51 #include "core/bformatdec.h"
  52 #include "core/bs2b.h"
  53 #include "core/bsinc_defs.h"
  54 #include "core/bsinc_tables.h"
  55 #include "core/bufferline.h"
  56 #include "core/buffer_storage.h"
  57 #include "core/context.h"
  58 #include "core/cpu_caps.h"
  59 #include "core/cubic_tables.h"
  60 #include "core/devformat.h"
  61 #include "core/device.h"
  62 #include "core/effects/base.h"
  63 #include "core/effectslot.h"
  64 #include "core/filters/biquad.h"
  65 #include "core/filters/nfc.h"
  66 #include "core/fpu_ctrl.h"
  67 #include "core/hrtf.h"
  68 #include "core/mastering.h"
  69 #include "core/mixer.h"
  70 #include "core/mixer/defs.h"
  71 #include "core/mixer/hrtfdefs.h"
  72 #include "core/resampler_limits.h"
  73 #include "core/uhjfilter.h"
  74 #include "core/voice.h"
  75 #include "core/voice_change.h"
  76 #include "intrusive_ptr.h"
  77 #include "opthelpers.h"
  78 #include "ringbuffer.h"
  79 #include "strutils.h"
  80 #include "vecmat.h"
  81 #include "vector.h"
  82
  83 struct CTag;
  84 #ifdef HAVE_SSE
  85 struct SSETag;
  86 #endif
  87 #ifdef HAVE_SSE2
  88 struct SSE2Tag;
  89 #endif
  90 #ifdef HAVE_SSE4_1
  91 struct SSE4Tag;
  92 #endif
  93 #ifdef HAVE_NEON
  94 struct NEONTag;
  95 #endif
  96 struct PointTag;
  97 struct LerpTag;
  98 struct CubicTag;
  99 struct BSincTag;
 100 struct FastBSincTag;
 101
 102
 103 static_assert(!(MaxResamplerPadding&1), "MaxResamplerPadding is not a multiple of two");
 104
 105
 106 namespace {
 107
 108 using uint = unsigned int;
 109 using namespace std::chrono;
 110 using namespace std::string_view_literals;
 111
 112 float InitConeScale()
 113 {
 114     float ret{1.0f};
 115     if(auto optval = al::getenv("__ALSOFT_HALF_ANGLE_CONES"))
 116     {
 117         if(al::case_compare(*optval, "true"sv) == 0
 118             || strtol(optval->c_str(), nullptr, 0) == 1)
 119             ret *= 0.5f;
 120     }
 121     return ret;
 122 }
 123 /* Cone scalar */
 124 const float ConeScale{InitConeScale()};
 125
 126 /* Localized scalars for mono sources (initialized in aluInit, after
 127  * configuration is loaded).
 128  */
 129 float XScale{1.0f};
 130 float YScale{1.0f};
 131 float ZScale{1.0f};
 132
 133 /* Source distance scale for NFC filters. */
 134 float NfcScale{1.0f};
 135
 136
 137 using HrtfDirectMixerFunc = void(*)(const FloatBufferSpan LeftOut, const FloatBufferSpan RightOut,
 138     const al::span<const FloatBufferLine> InSamples, const al::span<float2> AccumSamples,
 139     const al::span<float,BufferLineSize> TempBuf, const al::span<HrtfChannelState> ChanState,
 140     const size_t IrSize, const size_t SamplesToDo);
 141
 142 HrtfDirectMixerFunc MixDirectHrtf{MixDirectHrtf_<CTag>};
 143
 144 inline HrtfDirectMixerFunc SelectHrtfMixer()
 145 {
 146 #ifdef HAVE_NEON
 147     if((CPUCapFlags&CPU_CAP_NEON))
 148         return MixDirectHrtf_<NEONTag>;
 149 #endif
 150 #ifdef HAVE_SSE
 151     if((CPUCapFlags&CPU_CAP_SSE))
 152         return MixDirectHrtf_<SSETag>;
 153 #endif
 154
 155     return MixDirectHrtf_<CTag>;
 156 }
 157
 158
 159 inline void BsincPrepare(const uint increment, BsincState *state, const BSincTable *table)
 160 {
 161     size_t si{BSincScaleCount - 1};
 162     float sf{0.0f};
 163
 164     if(increment > MixerFracOne)
 165     {
 166         sf = MixerFracOne/static_cast<float>(increment) - table->scaleBase;
 167         sf = std::max(0.0f, BSincScaleCount*sf*table->scaleRange - 1.0f);
 168         si = float2uint(sf);
 169         /* The interpolation factor is fit to this diagonally-symmetric curve
 170          * to reduce the transition ripple caused by interpolating different
 171          * scales of the sinc function.
 172          */
 173         sf = 1.0f - std::cos(std::asin(sf - static_cast<float>(si)));
 174     }
 175
 176     state->sf = sf;
 177     state->m = table->m[si];
 178     state->l = (state->m/2) - 1;
 179     state->filter = table->Tab.subspan(table->filterOffset[si]);
 180 }
 181
 182 inline ResamplerFunc SelectResampler(Resampler resampler, uint increment)
 183 {
 184     switch(resampler)
 185     {
 186     case Resampler::Point:
 187         return Resample_<PointTag,CTag>;
 188     case Resampler::Linear:
 189 #ifdef HAVE_NEON
 190         if((CPUCapFlags&CPU_CAP_NEON))
 191             return Resample_<LerpTag,NEONTag>;
 192 #endif
 193 #ifdef HAVE_SSE4_1
 194         if((CPUCapFlags&CPU_CAP_SSE4_1))
 195             return Resample_<LerpTag,SSE4Tag>;
 196 #endif
 197 #ifdef HAVE_SSE2
 198         if((CPUCapFlags&CPU_CAP_SSE2))
 199             return Resample_<LerpTag,SSE2Tag>;
 200 #endif
 201         return Resample_<LerpTag,CTag>;
 202     case Resampler::Spline:
 203     case Resampler::Gaussian:
 204 #ifdef HAVE_NEON
 205         if((CPUCapFlags&CPU_CAP_NEON))
 206             return Resample_<CubicTag,NEONTag>;
 207 #endif
 208 #ifdef HAVE_SSE4_1
 209         if((CPUCapFlags&CPU_CAP_SSE4_1))
 210             return Resample_<CubicTag,SSE4Tag>;
 211 #endif
 212 #ifdef HAVE_SSE2
 213         if((CPUCapFlags&CPU_CAP_SSE2))
 214             return Resample_<CubicTag,SSE2Tag>;
 215 #endif
 216 #ifdef HAVE_SSE
 217         if((CPUCapFlags&CPU_CAP_SSE))
 218             return Resample_<CubicTag,SSETag>;
 219 #endif
 220         return Resample_<CubicTag,CTag>;
 221     case Resampler::BSinc12:
 222     case Resampler::BSinc24:
 223         if(increment > MixerFracOne)
 224         {
 225 #ifdef HAVE_NEON
 226             if((CPUCapFlags&CPU_CAP_NEON))
 227                 return Resample_<BSincTag,NEONTag>;
 228 #endif
 229 #ifdef HAVE_SSE
 230             if((CPUCapFlags&CPU_CAP_SSE))
 231                 return Resample_<BSincTag,SSETag>;
 232 #endif
 233             return Resample_<BSincTag,CTag>;
 234         }
 235         /* fall-through */
 236     case Resampler::FastBSinc12:
 237     case Resampler::FastBSinc24:
 238 #ifdef HAVE_NEON
 239         if((CPUCapFlags&CPU_CAP_NEON))
 240             return Resample_<FastBSincTag,NEONTag>;
 241 #endif
 242 #ifdef HAVE_SSE
 243         if((CPUCapFlags&CPU_CAP_SSE))
 244             return Resample_<FastBSincTag,SSETag>;
 245 #endif
 246         return Resample_<FastBSincTag,CTag>;
 247     }
 248
 249     return Resample_<PointTag,CTag>;
 250 }
 251
 252 } // namespace
 253
 254 void aluInit(CompatFlagBitset flags, const float nfcscale)
 255 {
 256     MixDirectHrtf = SelectHrtfMixer();
 257     XScale = flags.test(CompatFlags::ReverseX) ? -1.0f : 1.0f;
 258     YScale = flags.test(CompatFlags::ReverseY) ? -1.0f : 1.0f;
 259     ZScale = flags.test(CompatFlags::ReverseZ) ? -1.0f : 1.0f;
 260
 261     NfcScale = std::clamp(nfcscale, 0.0001f, 10000.0f);
 262 }
 263
 264
 265 ResamplerFunc PrepareResampler(Resampler resampler, uint increment, InterpState *state)
 266 {
 267     switch(resampler)
 268     {
 269     case Resampler::Point:
 270     case Resampler::Linear:
 271         break;
 272     case Resampler::Spline:
 273         state->emplace<CubicState>(al::span{gSplineFilter.mTable});
 274         break;
 275     case Resampler::Gaussian:
 276         state->emplace<CubicState>(al::span{gGaussianFilter.mTable});
 277         break;
 278     case Resampler::FastBSinc12:
 279     case Resampler::BSinc12:
 280         BsincPrepare(increment, &state->emplace<BsincState>(), &gBSinc12);
 281         break;
 282     case Resampler::FastBSinc24:
 283     case Resampler::BSinc24:
 284         BsincPrepare(increment, &state->emplace<BsincState>(), &gBSinc24);
 285         break;
 286     }
 287     return SelectResampler(resampler, increment);
 288 }
 289
 290
 291 void DeviceBase::ProcessHrtf(const size_t SamplesToDo)
 292 {
 293     /* HRTF is stereo output only. */
 294     const size_t lidx{RealOut.ChannelIndex[FrontLeft]};
 295     const size_t ridx{RealOut.ChannelIndex[FrontRight]};
 296
 297     MixDirectHrtf(RealOut.Buffer[lidx], RealOut.Buffer[ridx], Dry.Buffer, HrtfAccumData,
 298         mHrtfState->mTemp, mHrtfState->mChannels, mHrtfState->mIrSize, SamplesToDo);
 299 }
 300
 301 void DeviceBase::ProcessAmbiDec(const size_t SamplesToDo)
 302 {
 303     AmbiDecoder->process(RealOut.Buffer, Dry.Buffer, SamplesToDo);
 304 }
 305
 306 void DeviceBase::ProcessAmbiDecStablized(const size_t SamplesToDo)
 307 {
 308     /* Decode with front image stablization. */
 309     const size_t lidx{RealOut.ChannelIndex[FrontLeft]};
 310     const size_t ridx{RealOut.ChannelIndex[FrontRight]};
 311     const size_t cidx{RealOut.ChannelIndex[FrontCenter]};
 312
 313     AmbiDecoder->processStablize(RealOut.Buffer, Dry.Buffer, lidx, ridx, cidx, SamplesToDo);
 314 }
 315
 316 void DeviceBase::ProcessUhj(const size_t SamplesToDo)
 317 {
 318     /* UHJ is stereo output only. */
 319     const size_t lidx{RealOut.ChannelIndex[FrontLeft]};
 320     const size_t ridx{RealOut.ChannelIndex[FrontRight]};
 321
 322     /* Encode to stereo-compatible 2-channel UHJ output. */
 323     mUhjEncoder->encode(RealOut.Buffer[lidx].data(), RealOut.Buffer[ridx].data(),
 324         {{Dry.Buffer[0].data(), Dry.Buffer[1].data(), Dry.Buffer[2].data()}}, SamplesToDo);
 325 }
 326
 327 void DeviceBase::ProcessBs2b(const size_t SamplesToDo)
 328 {
 329     /* First, decode the ambisonic mix to the "real" output. */
 330     AmbiDecoder->process(RealOut.Buffer, Dry.Buffer, SamplesToDo);
 331
 332     /* BS2B is stereo output only. */
 333     const size_t lidx{RealOut.ChannelIndex[FrontLeft]};
 334     const size_t ridx{RealOut.ChannelIndex[FrontRight]};
 335
 336     /* Now apply the BS2B binaural/crossfeed filter. */
 337     Bs2b->cross_feed(RealOut.Buffer[lidx].data(), RealOut.Buffer[ridx].data(), SamplesToDo);
 338 }
 339
 340
 341 namespace {
 342
 343 /* This RNG method was created based on the math found in opusdec. It's quick,
 344  * and starting with a seed value of 22222, is suitable for generating
 345  * whitenoise.
 346  */
 347 inline uint dither_rng(uint *seed) noexcept
 348 {
 349     *seed = (*seed * 96314165) + 907633515;
 350     return *seed;
 351 }
 352
 353
 354 /* Ambisonic upsampler function. It's effectively a matrix multiply. It takes
 355  * an 'upsampler' and 'rotator' as the input matrices, and creates a matrix
 356  * that behaves as if the B-Format input was first decoded to a speaker array
 357  * at its input order, encoded back into the higher order mix, then finally
 358  * rotated.
 359  */
 360 void UpsampleBFormatTransform(
 361     const al::span<std::array<float,MaxAmbiChannels>,MaxAmbiChannels> output,
 362     const al::span<const std::array<float,MaxAmbiChannels>> upsampler,
 363     const al::span<const std::array<float,MaxAmbiChannels>,MaxAmbiChannels> rotator,
 364     size_t ambi_order)
 365 {
 366     const size_t num_chans{AmbiChannelsFromOrder(ambi_order)};
 367     for(size_t i{0};i < upsampler.size();++i)
 368         output[i].fill(0.0f);
 369     for(size_t i{0};i < upsampler.size();++i)
 370     {
 371         for(size_t k{0};k < num_chans;++k)
 372         {
 373             const float a{upsampler[i][k]};
 374             /* Write the full number of channels. The compiler will have an
 375              * easier time optimizing if it has a fixed length.
 376              */
 377             std::transform(rotator[k].cbegin(), rotator[k].cend(), output[i].cbegin(),
 378                 output[i].begin(), [a](float rot, float dst) noexcept { return rot*a + dst; });
 379         }
 380     }
 381 }
 382
 383
 384 constexpr auto GetAmbiScales(AmbiScaling scaletype) noexcept
 385 {
 386     switch(scaletype)
 387     {
 388     case AmbiScaling::FuMa: return al::span{AmbiScale::FromFuMa};
 389     case AmbiScaling::SN3D: return al::span{AmbiScale::FromSN3D};
 390     case AmbiScaling::UHJ: return al::span{AmbiScale::FromUHJ};
 391     case AmbiScaling::N3D: break;
 392     }
 393     return al::span{AmbiScale::FromN3D};
 394 }
 395
 396 constexpr auto GetAmbiLayout(AmbiLayout layouttype) noexcept
 397 {
 398     if(layouttype == AmbiLayout::FuMa) return al::span{AmbiIndex::FromFuMa};
 399     return al::span{AmbiIndex::FromACN};
 400 }
 401
 402 constexpr auto GetAmbi2DLayout(AmbiLayout layouttype) noexcept
 403 {
 404     if(layouttype == AmbiLayout::FuMa) return al::span{AmbiIndex::FromFuMa2D};
 405     return al::span{AmbiIndex::FromACN2D};
 406 }
 407
 408
 409 bool CalcContextParams(ContextBase *ctx)
 410 {
 411     ContextProps *props{ctx->mParams.ContextUpdate.exchange(nullptr, std::memory_order_acq_rel)};
 412     if(!props) return false;
 413
 414     const alu::Vector pos{props->Position[0], props->Position[1], props->Position[2], 1.0f};
 415     ctx->mParams.Position = pos;
 416
 417     /* AT then UP */
 418     alu::Vector N{props->OrientAt[0], props->OrientAt[1], props->OrientAt[2], 0.0f};
 419     N.normalize();
 420     alu::Vector V{props->OrientUp[0], props->OrientUp[1], props->OrientUp[2], 0.0f};
 421     V.normalize();
 422     /* Build and normalize right-vector */
 423     alu::Vector U{N.cross_product(V)};
 424     U.normalize();
 425
 426     const alu::Matrix rot{
 427         U[0], V[0], -N[0], 0.0,
 428         U[1], V[1], -N[1], 0.0,
 429         U[2], V[2], -N[2], 0.0,
 430          0.0,  0.0,   0.0, 1.0};
 431     const alu::Vector vel{props->Velocity[0], props->Velocity[1], props->Velocity[2], 0.0};
 432
 433     ctx->mParams.Matrix = rot;
 434     ctx->mParams.Velocity = rot * vel;
 435
 436     ctx->mParams.Gain = props->Gain * ctx->mGainBoost;
 437     ctx->mParams.MetersPerUnit = props->MetersPerUnit;
 438     ctx->mParams.AirAbsorptionGainHF = props->AirAbsorptionGainHF;
 439
 440     ctx->mParams.DopplerFactor = props->DopplerFactor;
 441     ctx->mParams.SpeedOfSound = props->SpeedOfSound * props->DopplerVelocity;
 442
 443     ctx->mParams.SourceDistanceModel = props->SourceDistanceModel;
 444     ctx->mParams.mDistanceModel = props->mDistanceModel;
 445
 446     AtomicReplaceHead(ctx->mFreeContextProps, props);
 447     return true;
 448 }
 449
 450 bool CalcEffectSlotParams(EffectSlot *slot, EffectSlot **sorted_slots, ContextBase *context)
 451 {
 452     EffectSlotProps *props{slot->Update.exchange(nullptr, std::memory_order_acq_rel)};
 453     if(!props) return false;
 454
 455     /* If the effect slot target changed, clear the first sorted entry to force
 456      * a re-sort.
 457      */
 458     if(slot->Target != props->Target)
 459         *sorted_slots = nullptr;
 460     slot->Gain = props->Gain;
 461     slot->AuxSendAuto = props->AuxSendAuto;
 462     slot->Target = props->Target;
 463     slot->EffectType = props->Type;
 464     slot->mEffectProps = props->Props;
 465     if(auto *reverbprops = std::get_if<ReverbProps>(&props->Props))
 466     {
 467         slot->RoomRolloff = reverbprops->RoomRolloffFactor;
 468         slot->DecayTime = reverbprops->DecayTime;
 469         slot->DecayLFRatio = reverbprops->DecayLFRatio;
 470         slot->DecayHFRatio = reverbprops->DecayHFRatio;
 471         slot->DecayHFLimit = reverbprops->DecayHFLimit;
 472         slot->AirAbsorptionGainHF = reverbprops->AirAbsorptionGainHF;
 473     }
 474     else
 475     {
 476         slot->RoomRolloff = 0.0f;
 477         slot->DecayTime = 0.0f;
 478         slot->DecayLFRatio = 0.0f;
 479         slot->DecayHFRatio = 0.0f;
 480         slot->DecayHFLimit = false;
 481         slot->AirAbsorptionGainHF = 1.0f;
 482     }
 483
 484     EffectState *state{props->State.release()};
 485     EffectState *oldstate{slot->mEffectState.release()};
 486     slot->mEffectState.reset(state);
 487
 488     /* Only release the old state if it won't get deleted, since we can't be
 489      * deleting/freeing anything in the mixer.
 490      */
 491     if(!oldstate->releaseIfNoDelete())
 492     {
 493         /* Otherwise, if it would be deleted send it off with a release event. */
 494         RingBuffer *ring{context->mAsyncEvents.get()};
 495         auto evt_vec = ring->getWriteVector();
 496         if(evt_vec.first.len > 0) LIKELY
 497         {
 498             auto &evt = InitAsyncEvent<AsyncEffectReleaseEvent>(evt_vec.first.buf);
 499             evt.mEffectState = oldstate;
 500             ring->writeAdvance(1);
 501         }
 502         else
 503         {
 504             /* If writing the event failed, the queue was probably full. Store
 505              * the old state in the property object where it can eventually be
 506              * cleaned up sometime later (not ideal, but better than blocking
 507              * or leaking).
 508              */
 509             props->State.reset(oldstate);
 510         }
 511     }
 512
 513     AtomicReplaceHead(context->mFreeEffectSlotProps, props);
 514
 515     const auto output = [slot,context]() -> EffectTarget
 516     {
 517         if(EffectSlot *target{slot->Target})
 518             return EffectTarget{&target->Wet, nullptr};
 519         DeviceBase *device{context->mDevice};
 520         return EffectTarget{&device->Dry, &device->RealOut};
 521     }();
 522     state->update(context, slot, &slot->mEffectProps, output);
 523     return true;
 524 }
 525
 526
 527 /* Scales the azimuth of the given vector by 3 if it's in front. Effectively
 528  * scales +/-30 degrees to +/-90 degrees, leaving > +90 and < -90 alone.
 529  */
 530 inline std::array<float,3> ScaleAzimuthFront3(std::array<float,3> pos)
 531 {
 532     if(pos[2] < 0.0f)
 533     {
 534         /* Normalize the length of the x,z components for a 2D vector of the
 535          * azimuth angle. Negate Z since {0,0,-1} is angle 0.
 536          */
 537         const float len2d{std::sqrt(pos[0]*pos[0] + pos[2]*pos[2])};
 538         float x{pos[0] / len2d};
 539         float z{-pos[2] / len2d};
 540
 541         /* Z > cos(pi/6) = -30 < azimuth < 30 degrees. */
 542         if(z > 0.866025403785f)
 543         {
 544             /* Triple the angle represented by x,z. */
 545             x = x*3.0f - x*x*x*4.0f;
 546             z = z*z*z*4.0f - z*3.0f;
 547
 548             /* Scale the vector back to fit in 3D. */
 549             pos[0] = x * len2d;
 550             pos[2] = -z * len2d;
 551         }
 552         else
 553         {
 554             /* If azimuth >= 30 degrees, clamp to 90 degrees. */
 555             pos[0] = std::copysign(len2d, pos[0]);
 556             pos[2] = 0.0f;
 557         }
 558     }
 559     return pos;
 560 }
 561
 562 /* Scales the azimuth of the given vector by 1.5 (3/2) if it's in front. */
 563 inline std::array<float,3> ScaleAzimuthFront3_2(std::array<float,3> pos)
 564 {
 565     if(pos[2] < 0.0f)
 566     {
 567         const float len2d{std::sqrt(pos[0]*pos[0] + pos[2]*pos[2])};
 568         float x{pos[0] / len2d};
 569         float z{-pos[2] / len2d};
 570
 571         /* Z > cos(pi/3) = -60 < azimuth < 60 degrees. */
 572         if(z > 0.5f)
 573         {
 574             /* Halve the angle represented by x,z. */
 575             x = std::copysign(std::sqrt((1.0f - z) * 0.5f), x);
 576             z = std::sqrt((1.0f + z) * 0.5f);
 577
 578             /* Triple the angle represented by x,z. */
 579             x = x*3.0f - x*x*x*4.0f;
 580             z = z*z*z*4.0f - z*3.0f;
 581
 582             /* Scale the vector back to fit in 3D. */
 583             pos[0] = x * len2d;
 584             pos[2] = -z * len2d;
 585         }
 586         else
 587         {
 588             /* If azimuth >= 60 degrees, clamp to 90 degrees. */
 589             pos[0] = std::copysign(len2d, pos[0]);
 590             pos[2] = 0.0f;
 591         }
 592     }
 593     return pos;
 594 }
 595
 596
 597 /* Begin ambisonic rotation helpers.
 598  *
 599  * Rotating first-order B-Format just needs a straight-forward X/Y/Z rotation
 600  * matrix. Higher orders, however, are more complicated. The method implemented
 601  * here is a recursive algorithm (the rotation for first-order is used to help
 602  * generate the second-order rotation, which helps generate the third-order
 603  * rotation, etc).
 604  *
 605  * Adapted from
 606  * <https://github.com/polarch/Spherical-Harmonic-Transform/blob/master/getSHrotMtx.m>,
 607  * provided under the BSD 3-Clause license.
 608  *
 609  * Copyright (c) 2015, Archontis Politis
 610  * Copyright (c) 2019, Christopher Robinson
 611  *
 612  * The u, v, and w coefficients used for generating higher-order rotations are
 613  * precomputed since they're constant. The second-order coefficients are
 614  * followed by the third-order coefficients, etc.
 615  */
 616 constexpr size_t CalcRotatorSize(size_t l) noexcept
 617 {
 618     if(l >= 2)
 619         return (l*2 + 1)*(l*2 + 1) + CalcRotatorSize(l-1);
 620     return 0;
 621 }
 622
 623 struct RotatorCoeffs {
 624     struct CoeffValues {
 625         float u, v, w;
 626     };
 627     std::array<CoeffValues,CalcRotatorSize(MaxAmbiOrder)> mCoeffs{};
 628
 629     RotatorCoeffs()
 630     {
 631         auto coeffs = mCoeffs.begin();
 632
 633         for(int l=2;l <= MaxAmbiOrder;++l)
 634         {
 635             for(int n{-l};n <= l;++n)
 636             {
 637                 for(int m{-l};m <= l;++m)
 638                 {
 639                     /* compute u,v,w terms of Eq.8.1 (Table I)
 640                      *
 641                      * const bool d{m == 0}; // the delta function d_m0
 642                      * const double denom{(std::abs(n) == l) ?
 643                      *     (2*l) * (2*l - 1) : (l*l - n*n)};
 644                      *
 645                      * const int abs_m{std::abs(m)};
 646                      * coeffs->u = std::sqrt((l*l - m*m) / denom);
 647                      * coeffs->v = std::sqrt((l+abs_m-1) * (l+abs_m) / denom) *
 648                      *     (1.0+d) * (1.0 - 2.0*d) * 0.5;
 649                      * coeffs->w = std::sqrt((l-abs_m-1) * (l-abs_m) / denom) *
 650                      *     (1.0-d) * -0.5;
 651                      */
 652
 653                     const double denom{static_cast<double>((std::abs(n) == l) ?
 654                           (2*l) * (2*l - 1) : (l*l - n*n))};
 655
 656                     if(m == 0)
 657                     {
 658                         coeffs->u = static_cast<float>(std::sqrt(l * l / denom));
 659                         coeffs->v = static_cast<float>(std::sqrt((l-1) * l / denom) * -1.0);
 660                         coeffs->w = 0.0f;
 661                     }
 662                     else
 663                     {
 664                         const int abs_m{std::abs(m)};
 665                         coeffs->u = static_cast<float>(std::sqrt((l*l - m*m) / denom));
 666                         coeffs->v = static_cast<float>(std::sqrt((l+abs_m-1) * (l+abs_m) / denom) *
 667                             0.5);
 668                         coeffs->w = static_cast<float>(std::sqrt((l-abs_m-1) * (l-abs_m) / denom) *
 669                             -0.5);
 670                     }
 671                     ++coeffs;
 672                 }
 673             }
 674         }
 675     }
 676 };
 677 const RotatorCoeffs RotatorCoeffArray{};
 678
 679 /**
 680  * Given the matrix, pre-filled with the (zeroth- and) first-order rotation
 681  * coefficients, this fills in the coefficients for the higher orders up to and
 682  * including the given order. The matrix is in ACN layout.
 683  */
 684 void AmbiRotator(AmbiRotateMatrix &matrix, const int order)
 685 {
 686     /* Don't do anything for < 2nd order. */
 687     if(order < 2) return;
 688
 689     auto P = [](const int i, const int l, const int a, const int n, const size_t last_band,
 690         const AmbiRotateMatrix &R)
 691     {
 692         const float ri1{ R[ 1+2][static_cast<size_t>(i+2_z)]};
 693         const float rim1{R[-1+2][static_cast<size_t>(i+2_z)]};
 694         const float ri0{ R[ 0+2][static_cast<size_t>(i+2_z)]};
 695
 696         const size_t y{last_band + static_cast<size_t>(a+l-1)};
 697         if(n == -l)
 698             return ri1*R[last_band][y] + rim1*R[last_band + static_cast<size_t>(l-1_z)*2][y];
 699         if(n == l)
 700             return ri1*R[last_band + static_cast<size_t>(l-1_z)*2][y] - rim1*R[last_band][y];
 701         return ri0*R[last_band + static_cast<size_t>(l-1_z+n)][y];
 702     };
 703
 704     auto U = [P](const int l, const int m, const int n, const size_t last_band,
 705         const AmbiRotateMatrix &R)
 706     {
 707         return P(0, l, m, n, last_band, R);
 708     };
 709     auto V = [P](const int l, const int m, const int n, const size_t last_band,
 710         const AmbiRotateMatrix &R)
 711     {
 712         using namespace al::numbers;
 713         if(m > 0)
 714         {
 715             const bool d{m == 1};
 716             const float p0{P( 1, l,  m-1, n, last_band, R)};
 717             const float p1{P(-1, l, -m+1, n, last_band, R)};
 718             return d ? p0*sqrt2_v<float> : (p0 - p1);
 719         }
 720         const bool d{m == -1};
 721         const float p0{P( 1, l,  m+1, n, last_band, R)};
 722         const float p1{P(-1, l, -m-1, n, last_band, R)};
 723         return d ? p1*sqrt2_v<float> : (p0 + p1);
 724     };
 725     auto W = [P](const int l, const int m, const int n, const size_t last_band,
 726         const AmbiRotateMatrix &R)
 727     {
 728         assert(m != 0);
 729         if(m > 0)
 730         {
 731             const float p0{P( 1, l,  m+1, n, last_band, R)};
 732             const float p1{P(-1, l, -m-1, n, last_band, R)};
 733             return p0 + p1;
 734         }
 735         const float p0{P( 1, l,  m-1, n, last_band, R)};
 736         const float p1{P(-1, l, -m+1, n, last_band, R)};
 737         return p0 - p1;
 738     };
 739
 740     // compute rotation matrix of each subsequent band recursively
 741     auto coeffs = RotatorCoeffArray.mCoeffs.cbegin();
 742     size_t band_idx{4}, last_band{1};
 743     for(int l{2};l <= order;++l)
 744     {
 745         size_t y{band_idx};
 746         for(int n{-l};n <= l;++n,++y)
 747         {
 748             size_t x{band_idx};
 749             for(int m{-l};m <= l;++m,++x)
 750             {
 751                 float r{0.0f};
 752
 753                 // computes Eq.8.1
 754                 if(const float u{coeffs->u}; u != 0.0f)
 755                     r += u * U(l, m, n, last_band, matrix);
 756                 if(const float v{coeffs->v}; v != 0.0f)
 757                     r += v * V(l, m, n, last_band, matrix);
 758                 if(const float w{coeffs->w}; w != 0.0f)
 759                     r += w * W(l, m, n, last_band, matrix);
 760
 761                 matrix[y][x] = r;
 762                 ++coeffs;
 763             }
 764         }
 765         last_band = band_idx;
 766         band_idx += static_cast<uint>(l)*2_uz + 1;
 767     }
 768 }
 769 /* End ambisonic rotation helpers. */
 770
 771
 772 constexpr float sin30{0.5f};
 773 constexpr float cos30{0.866025403785f};
 774 constexpr float sin45{al::numbers::sqrt2_v<float>*0.5f};
 775 constexpr float cos45{al::numbers::sqrt2_v<float>*0.5f};
 776 constexpr float sin110{ 0.939692620786f};
 777 constexpr float cos110{-0.342020143326f};
 778
 779 struct ChanPosMap {
 780     Channel channel;
 781     std::array<float,3> pos;
 782 };
 783
 784
 785 struct GainTriplet { float Base, HF, LF; };
 786
 787 void CalcPanningAndFilters(Voice *voice, const float xpos, const float ypos, const float zpos,
 788     const float Distance, const float Spread, const GainTriplet &DryGain,
 789     const al::span<const GainTriplet,MaxSendCount> WetGain,
 790     const al::span<EffectSlot*,MaxSendCount> SendSlots, const VoiceProps *props,
 791     const ContextParams &Context, DeviceBase *Device)
 792 {
 793     static constexpr std::array MonoMap{
 794         ChanPosMap{FrontCenter, std::array{0.0f, 0.0f, -1.0f}}
 795     };
 796     static constexpr std::array RearMap{
 797         ChanPosMap{BackLeft,  std::array{-sin30, 0.0f, cos30}},
 798         ChanPosMap{BackRight, std::array{ sin30, 0.0f, cos30}},
 799     };
 800     static constexpr std::array QuadMap{
 801         ChanPosMap{FrontLeft,  std::array{-sin45, 0.0f, -cos45}},
 802         ChanPosMap{FrontRight, std::array{ sin45, 0.0f, -cos45}},
 803         ChanPosMap{BackLeft,   std::array{-sin45, 0.0f,  cos45}},
 804         ChanPosMap{BackRight,  std::array{ sin45, 0.0f,  cos45}},
 805     };
 806     static constexpr std::array X51Map{
 807         ChanPosMap{FrontLeft,   std::array{-sin30, 0.0f, -cos30}},
 808         ChanPosMap{FrontRight,  std::array{ sin30, 0.0f, -cos30}},
 809         ChanPosMap{FrontCenter, std::array{  0.0f, 0.0f, -1.0f}},
 810         ChanPosMap{LFE, {}},
 811         ChanPosMap{SideLeft,    std::array{-sin110, 0.0f, -cos110}},
 812         ChanPosMap{SideRight,   std::array{ sin110, 0.0f, -cos110}},
 813     };
 814     static constexpr std::array X61Map{
 815         ChanPosMap{FrontLeft,   std::array{-sin30, 0.0f, -cos30}},
 816         ChanPosMap{FrontRight,  std::array{ sin30, 0.0f, -cos30}},
 817         ChanPosMap{FrontCenter, std::array{  0.0f, 0.0f, -1.0f}},
 818         ChanPosMap{LFE, {}},
 819         ChanPosMap{BackCenter,  std::array{ 0.0f, 0.0f, 1.0f}},
 820         ChanPosMap{SideLeft,    std::array{-1.0f, 0.0f, 0.0f}},
 821         ChanPosMap{SideRight,   std::array{ 1.0f, 0.0f, 0.0f}},
 822     };
 823     static constexpr std::array X71Map{
 824         ChanPosMap{FrontLeft,   std::array{-sin30, 0.0f, -cos30}},
 825         ChanPosMap{FrontRight,  std::array{ sin30, 0.0f, -cos30}},
 826         ChanPosMap{FrontCenter, std::array{  0.0f, 0.0f, -1.0f}},
 827         ChanPosMap{LFE, {}},
 828         ChanPosMap{BackLeft,    std::array{-sin30, 0.0f, cos30}},
 829         ChanPosMap{BackRight,   std::array{ sin30, 0.0f, cos30}},
 830         ChanPosMap{SideLeft,    std::array{ -1.0f, 0.0f, 0.0f}},
 831         ChanPosMap{SideRight,   std::array{  1.0f, 0.0f, 0.0f}},
 832     };
 833
 834     std::array StereoMap{
 835         ChanPosMap{FrontLeft,   std::array{-sin30, 0.0f, -cos30}},
 836         ChanPosMap{FrontRight,  std::array{ sin30, 0.0f, -cos30}},
 837     };
 838
 839     const auto Frequency = static_cast<float>(Device->Frequency);
 840     const uint NumSends{Device->NumAuxSends};
 841
 842     const size_t num_channels{voice->mChans.size()};
 843     ASSUME(num_channels > 0);
 844
 845     for(auto &chandata : voice->mChans)
 846     {
 847         chandata.mDryParams.Hrtf.Target = HrtfFilter{};
 848         chandata.mDryParams.Gains.Target.fill(0.0f);
 849         std::for_each(chandata.mWetParams.begin(), chandata.mWetParams.begin()+NumSends,
 850             [](SendParams &params) -> void { params.Gains.Target.fill(0.0f); });
 851     }
 852
 853     const auto getChans = [props,&StereoMap](FmtChannels chanfmt) noexcept
 854         -> std::pair<DirectMode,al::span<const ChanPosMap>>
 855     {
 856         switch(chanfmt)
 857         {
 858         case FmtMono:
 859             /* Mono buffers are never played direct. */
 860             return {DirectMode::Off, al::span{MonoMap}};
 861
 862         case FmtStereo:
 863         case FmtMonoDup:
 864             if(props->DirectChannels == DirectMode::Off)
 865             {
 866                 for(size_t i{0};i < 2;++i)
 867                 {
 868                     /* StereoPan is counter-clockwise in radians. */
 869                     const float a{props->StereoPan[i]};
 870                     StereoMap[i].pos[0] = -std::sin(a);
 871                     StereoMap[i].pos[2] = -std::cos(a);
 872                 }
 873             }
 874             return {props->DirectChannels, al::span{StereoMap}};
 875
 876         case FmtRear: return {props->DirectChannels, al::span{RearMap}};
 877         case FmtQuad: return {props->DirectChannels, al::span{QuadMap}};
 878         case FmtX51: return {props->DirectChannels, al::span{X51Map}};
 879         case FmtX61: return {props->DirectChannels, al::span{X61Map}};
 880         case FmtX71: return {props->DirectChannels, al::span{X71Map}};
 881
 882         case FmtBFormat2D:
 883         case FmtBFormat3D:
 884         case FmtUHJ2:
 885         case FmtUHJ3:
 886         case FmtUHJ4:
 887         case FmtSuperStereo:
 888             return {DirectMode::Off, {}};
 889         }
 890         return {props->DirectChannels, {}};
 891     };
 892     const auto [DirectChannels,chans] = getChans(voice->mFmtChannels);
 893
 894     voice->mFlags.reset(VoiceHasHrtf).reset(VoiceHasNfc);
 895     if(auto *decoder{voice->mDecoder.get()})
 896         decoder->mWidthControl = std::min(props->EnhWidth, 0.7f);
 897
 898     const float lgain{std::min(1.0f-props->Panning, 1.0f)};
 899     const float rgain{std::min(1.0f+props->Panning, 1.0f)};
 900     const float mingain{std::min(lgain, rgain)};
 901     auto SelectChannelGain = [lgain,rgain,mingain](const Channel chan) noexcept
 902     {
 903         switch(chan)
 904         {
 905         case FrontLeft: return lgain;
 906         case FrontRight: return rgain;
 907         case FrontCenter: break;
 908         case LFE: break;
 909         case BackLeft: return lgain;
 910         case BackRight: return rgain;
 911         case BackCenter: break;
 912         case SideLeft: return lgain;
 913         case SideRight: return rgain;
 914         case TopCenter: break;
 915         case TopFrontLeft: return lgain;
 916         case TopFrontCenter: break;
 917         case TopFrontRight: return rgain;
 918         case TopBackLeft: return lgain;
 919         case TopBackCenter: break;
 920         case TopBackRight: return rgain;
 921         case BottomFrontLeft: return lgain;
 922         case BottomFrontRight: return rgain;
 923         case BottomBackLeft: return lgain;
 924         case BottomBackRight: return rgain;
 925         case Aux0: case Aux1: case Aux2: case Aux3: case Aux4: case Aux5: case Aux6: case Aux7:
 926         case Aux8: case Aux9: case Aux10: case Aux11: case Aux12: case Aux13: case Aux14:
 927         case Aux15: case MaxChannels: break;
 928         }
 929         return mingain;
 930     };
 931
 932     if(IsAmbisonic(voice->mFmtChannels))
 933     {
 934         /* Special handling for B-Format and UHJ sources. */
 935
 936         if(Device->AvgSpeakerDist > 0.0f && voice->mFmtChannels != FmtUHJ2
 937             && voice->mFmtChannels != FmtSuperStereo)
 938         {
 939             if(!(Distance > std::numeric_limits<float>::epsilon()))
 940             {
 941                 /* NOTE: The NFCtrlFilters were created with a w0 of 0, which
 942                  * is what we want for FOA input. The first channel may have
 943                  * been previously re-adjusted if panned, so reset it.
 944                  */
 945                 voice->mChans[0].mDryParams.NFCtrlFilter.adjust(0.0f);
 946             }
 947             else
 948             {
 949                 /* Clamp the distance for really close sources, to prevent
 950                  * excessive bass.
 951                  */
 952                 const float mdist{std::max(Distance*NfcScale, Device->AvgSpeakerDist/4.0f)};
 953                 const float w0{SpeedOfSoundMetersPerSec / (mdist * Frequency)};
 954
 955                 /* Only need to adjust the first channel of a B-Format source. */
 956                 voice->mChans[0].mDryParams.NFCtrlFilter.adjust(w0);
 957             }
 958
 959             voice->mFlags.set(VoiceHasNfc);
 960         }
 961
 962         /* Panning a B-Format sound toward some direction is easy. Just pan the
 963          * first (W) channel as a normal mono sound. The angular spread is used
 964          * as a directional scalar to blend between full coverage and full
 965          * panning.
 966          */
 967         const float coverage{!(Distance > std::numeric_limits<float>::epsilon()) ? 1.0f :
 968             (al::numbers::inv_pi_v<float>/2.0f * Spread)};
 969
 970         auto calc_coeffs = [xpos,ypos,zpos](RenderMode mode)
 971         {
 972             if(mode != RenderMode::Pairwise)
 973                 return CalcDirectionCoeffs(std::array{xpos, ypos, zpos}, 0.0f);
 974             const auto pos = ScaleAzimuthFront3_2(std::array{xpos, ypos, zpos});
 975             return CalcDirectionCoeffs(pos, 0.0f);
 976         };
 977         const auto scales = GetAmbiScales(voice->mAmbiScaling);
 978         auto coeffs = calc_coeffs(Device->mRenderMode);
 979
 980         if(!(coverage > 0.0f))
 981         {
 982             ComputePanGains(&Device->Dry, coeffs, DryGain.Base*scales[0],
 983                 voice->mChans[0].mDryParams.Gains.Target);
 984             for(uint i{0};i < NumSends;i++)
 985             {
 986                 if(const EffectSlot *Slot{SendSlots[i]})
 987                     ComputePanGains(&Slot->Wet, coeffs, WetGain[i].Base*scales[0],
 988                         voice->mChans[0].mWetParams[i].Gains.Target);
 989             }
 990         }
 991         else
 992         {
 993             /* Local B-Format sources have their XYZ channels rotated according
 994              * to the orientation.
 995              */
 996             /* AT then UP */
 997             alu::Vector N{props->OrientAt[0], props->OrientAt[1], props->OrientAt[2], 0.0f};
 998             N.normalize();
 999             alu::Vector V{props->OrientUp[0], props->OrientUp[1], props->OrientUp[2], 0.0f};
1000             V.normalize();
1001             if(!props->HeadRelative)
1002             {
1003                 N = Context.Matrix * N;
1004                 V = Context.Matrix * V;
1005             }
1006             /* Build and normalize right-vector */
1007             alu::Vector U{N.cross_product(V)};
1008             U.normalize();
1009
1010             /* Build a rotation matrix. Manually fill the zeroth- and first-
1011              * order elements, then construct the rotation for the higher
1012              * orders.
1013              */
1014             AmbiRotateMatrix &shrot = Device->mAmbiRotateMatrix;
1015             shrot.fill(AmbiRotateMatrix::value_type{});
1016
1017             shrot[0][0] = 1.0f;
1018             shrot[1][1] =  U[0]; shrot[1][2] = -U[1]; shrot[1][3] =  U[2];
1019             shrot[2][1] = -V[0]; shrot[2][2] =  V[1]; shrot[2][3] = -V[2];
1020             shrot[3][1] = -N[0]; shrot[3][2] =  N[1]; shrot[3][3] = -N[2];
1021             AmbiRotator(shrot, static_cast<int>(Device->mAmbiOrder));
1022
1023             /* If the device is higher order than the voice, "upsample" the
1024              * matrix.
1025              *
1026              * NOTE: Starting with second-order, a 2D upsample needs to be
1027              * applied with a 2D source and 3D output, even when they're the
1028              * same order. This is because higher orders have a height offset
1029              * on various channels (i.e. when elevation=0, those height-related
1030              * channels should be non-0).
1031              */
1032             AmbiRotateMatrix &mixmatrix = Device->mAmbiRotateMatrix2;
1033             if(Device->mAmbiOrder > voice->mAmbiOrder
1034                 || (Device->mAmbiOrder >= 2 && !Device->m2DMixing
1035                     && Is2DAmbisonic(voice->mFmtChannels)))
1036             {
1037                 if(voice->mAmbiOrder == 1)
1038                 {
1039                     const auto upsampler = Is2DAmbisonic(voice->mFmtChannels) ?
1040                         al::span{AmbiScale::FirstOrder2DUp} : al::span{AmbiScale::FirstOrderUp};
1041                     UpsampleBFormatTransform(mixmatrix, upsampler, shrot, Device->mAmbiOrder);
1042                 }
1043                 else if(voice->mAmbiOrder == 2)
1044                 {
1045                     const auto upsampler = Is2DAmbisonic(voice->mFmtChannels) ?
1046                         al::span{AmbiScale::SecondOrder2DUp} : al::span{AmbiScale::SecondOrderUp};
1047                     UpsampleBFormatTransform(mixmatrix, upsampler, shrot, Device->mAmbiOrder);
1048                 }
1049                 else if(voice->mAmbiOrder == 3)
1050                 {
1051                     const auto upsampler = Is2DAmbisonic(voice->mFmtChannels) ?
1052                         al::span{AmbiScale::ThirdOrder2DUp} : al::span{AmbiScale::ThirdOrderUp};
1053                     UpsampleBFormatTransform(mixmatrix, upsampler, shrot, Device->mAmbiOrder);
1054                 }
1055                 else if(voice->mAmbiOrder == 4)
1056                 {
1057                     const auto upsampler = al::span{AmbiScale::FourthOrder2DUp};
1058                     UpsampleBFormatTransform(mixmatrix, upsampler, shrot, Device->mAmbiOrder);
1059                 }
1060                 else
1061                     al::unreachable();
1062             }
1063             else
1064                 mixmatrix = shrot;
1065
1066             /* Convert the rotation matrix for input ordering and scaling, and
1067              * whether input is 2D or 3D.
1068              */
1069             const auto index_map = Is2DAmbisonic(voice->mFmtChannels) ?
1070                 GetAmbi2DLayout(voice->mAmbiLayout).subspan(0) :
1071                 GetAmbiLayout(voice->mAmbiLayout).subspan(0);
1072
1073             /* Scale the panned W signal inversely to coverage (full coverage
1074              * means no panned signal), and according to the channel scaling.
1075              */
1076             std::for_each(coeffs.begin(), coeffs.end(),
1077                 [scale=(1.0f-coverage)*scales[0]](float &coeff) noexcept { coeff *= scale; });
1078
1079             for(size_t c{0};c < num_channels;c++)
1080             {
1081                 const size_t acn{index_map[c]};
1082                 const float scale{scales[acn] * coverage};
1083
1084                 /* For channel 0, combine the B-Format signal (scaled according
1085                  * to the coverage amount) with the directional pan. For all
1086                  * other channels, use just the (scaled) B-Format signal.
1087                  */
1088                 std::transform(mixmatrix[acn].cbegin(), mixmatrix[acn].cend(), coeffs.begin(),
1089                     coeffs.begin(), [scale](const float in, const float coeff) noexcept
1090                     { return in*scale + coeff; });
1091
1092                 ComputePanGains(&Device->Dry, coeffs, DryGain.Base,
1093                     voice->mChans[c].mDryParams.Gains.Target);
1094
1095                 for(uint i{0};i < NumSends;i++)
1096                 {
1097                     if(const EffectSlot *Slot{SendSlots[i]})
1098                         ComputePanGains(&Slot->Wet, coeffs, WetGain[i].Base,
1099                             voice->mChans[c].mWetParams[i].Gains.Target);
1100                 }
1101
1102                 coeffs = std::array<float,MaxAmbiChannels>{};
1103             }
1104         }
1105     }
1106     else if(DirectChannels != DirectMode::Off && !Device->RealOut.RemixMap.empty())
1107     {
1108         /* Direct source channels always play local. Skip the virtual channels
1109          * and write inputs to the matching real outputs.
1110          */
1111         voice->mDirect.Buffer = Device->RealOut.Buffer;
1112
1113         for(size_t c{0};c < num_channels;c++)
1114         {
1115             const float pangain{SelectChannelGain(chans[c].channel)};
1116             if(uint idx{Device->channelIdxByName(chans[c].channel)}; idx != InvalidChannelIndex)
1117                 voice->mChans[c].mDryParams.Gains.Target[idx] = DryGain.Base * pangain;
1118             else if(DirectChannels == DirectMode::RemixMismatch)
1119             {
1120                 auto match_channel = [channel=chans[c].channel](const InputRemixMap &map) noexcept
1121                 { return channel == map.channel; };
1122                 auto remap = std::find_if(Device->RealOut.RemixMap.cbegin(),
1123                     Device->RealOut.RemixMap.cend(), match_channel);
1124                 if(remap != Device->RealOut.RemixMap.cend())
1125                 {
1126                     for(const auto &target : remap->targets)
1127                     {
1128                         idx = Device->channelIdxByName(target.channel);
1129                         if(idx != InvalidChannelIndex)
1130                             voice->mChans[c].mDryParams.Gains.Target[idx] = DryGain.Base * pangain
1131                                 * target.mix;
1132                     }
1133                 }
1134             }
1135         }
1136
1137         /* Auxiliary sends still use normal channel panning since they mix to
1138          * B-Format, which can't channel-match.
1139          */
1140         for(size_t c{0};c < num_channels;c++)
1141         {
1142             /* Skip LFE */
1143             if(chans[c].channel == LFE)
1144                 continue;
1145
1146             const float pangain{SelectChannelGain(chans[c].channel)};
1147             const auto coeffs = CalcDirectionCoeffs(chans[c].pos, 0.0f);
1148
1149             for(uint i{0};i < NumSends;i++)
1150             {
1151                 if(const EffectSlot *Slot{SendSlots[i]})
1152                     ComputePanGains(&Slot->Wet, coeffs, WetGain[i].Base * pangain,
1153                         voice->mChans[c].mWetParams[i].Gains.Target);
1154             }
1155         }
1156     }
1157     else if(Device->mRenderMode == RenderMode::Hrtf)
1158     {
1159         /* Full HRTF rendering. Skip the virtual channels and render to the
1160          * real outputs.
1161          */
1162         voice->mDirect.Buffer = Device->RealOut.Buffer;
1163
1164         if(Distance > std::numeric_limits<float>::epsilon())
1165         {
1166             if(voice->mFmtChannels == FmtMono)
1167             {
1168                 const float src_ev{std::asin(std::clamp(ypos, -1.0f, 1.0f))};
1169                 const float src_az{std::atan2(xpos, -zpos)};
1170
1171                 Device->mHrtf->getCoeffs(src_ev, src_az, Distance*NfcScale, Spread,
1172                     voice->mChans[0].mDryParams.Hrtf.Target.Coeffs,
1173                     voice->mChans[0].mDryParams.Hrtf.Target.Delay);
1174                 voice->mChans[0].mDryParams.Hrtf.Target.Gain = DryGain.Base;
1175
1176                 const auto coeffs = CalcDirectionCoeffs(std::array{xpos, ypos, zpos}, Spread);
1177                 for(uint i{0};i < NumSends;i++)
1178                 {
1179                     if(const EffectSlot *Slot{SendSlots[i]})
1180                         ComputePanGains(&Slot->Wet, coeffs, WetGain[i].Base,
1181                             voice->mChans[0].mWetParams[i].Gains.Target);
1182                 }
1183             }
1184             else for(size_t c{0};c < num_channels;c++)
1185             {
1186                 using namespace al::numbers;
1187
1188                 /* Skip LFE */
1189                 if(chans[c].channel == LFE) continue;
1190                 const float pangain{SelectChannelGain(chans[c].channel)};
1191
1192                 /* Warp the channel position toward the source position as the
1193                  * source spread decreases. With no spread, all channels are at
1194                  * the source position, at full spread (pi*2), each channel is
1195                  * left unchanged.
1196                  */
1197                 const float a{1.0f - (inv_pi_v<float>/2.0f)*Spread};
1198                 std::array pos{
1199                     lerpf(chans[c].pos[0], xpos, a),
1200                     lerpf(chans[c].pos[1], ypos, a),
1201                     lerpf(chans[c].pos[2], zpos, a)};
1202                 const float len{std::sqrt(pos[0]*pos[0] + pos[1]*pos[1] + pos[2]*pos[2])};
1203                 if(len < 1.0f)
1204                 {
1205                     pos[0] /= len;
1206                     pos[1] /= len;
1207                     pos[2] /= len;
1208                 }
1209
1210                 const float ev{std::asin(std::clamp(pos[1], -1.0f, 1.0f))};
1211                 const float az{std::atan2(pos[0], -pos[2])};
1212
1213                 Device->mHrtf->getCoeffs(ev, az, Distance*NfcScale, 0.0f,
1214                     voice->mChans[c].mDryParams.Hrtf.Target.Coeffs,
1215                     voice->mChans[c].mDryParams.Hrtf.Target.Delay);
1216                 voice->mChans[c].mDryParams.Hrtf.Target.Gain = DryGain.Base * pangain;
1217
1218                 const auto coeffs = CalcDirectionCoeffs(pos, 0.0f);
1219                 for(uint i{0};i < NumSends;i++)
1220                 {
1221                     if(const EffectSlot *Slot{SendSlots[i]})
1222                         ComputePanGains(&Slot->Wet, coeffs, WetGain[i].Base * pangain,
1223                             voice->mChans[c].mWetParams[i].Gains.Target);
1224                 }
1225             }
1226         }
1227         else
1228         {
1229             /* With no distance, spread is only meaningful for mono sources
1230              * where it can be 0 or full (non-mono sources are always full
1231              * spread here).
1232              */
1233             const float spread{Spread * float(voice->mFmtChannels == FmtMono)};
1234
1235             /* Local sources on HRTF play with each channel panned to its
1236              * relative location around the listener, providing "virtual
1237              * speaker" responses.
1238              */
1239             for(size_t c{0};c < num_channels;c++)
1240             {
1241                 /* Skip LFE */
1242                 if(chans[c].channel == LFE)
1243                     continue;
1244                 const float pangain{SelectChannelGain(chans[c].channel)};
1245
1246                 /* Get the HRIR coefficients and delays for this channel
1247                  * position.
1248                  */
1249                 const float ev{std::asin(chans[c].pos[1])};
1250                 const float az{std::atan2(chans[c].pos[0], -chans[c].pos[2])};
1251
1252                 Device->mHrtf->getCoeffs(ev, az, std::numeric_limits<float>::infinity(), spread,
1253                     voice->mChans[c].mDryParams.Hrtf.Target.Coeffs,
1254                     voice->mChans[c].mDryParams.Hrtf.Target.Delay);
1255                 voice->mChans[c].mDryParams.Hrtf.Target.Gain = DryGain.Base * pangain;
1256
1257                 /* Normal panning for auxiliary sends. */
1258                 const auto coeffs = CalcDirectionCoeffs(chans[c].pos, spread);
1259
1260                 for(uint i{0};i < NumSends;i++)
1261                 {
1262                     if(const EffectSlot *Slot{SendSlots[i]})
1263                         ComputePanGains(&Slot->Wet, coeffs, WetGain[i].Base * pangain,
1264                             voice->mChans[c].mWetParams[i].Gains.Target);
1265                 }
1266             }
1267         }
1268
1269         voice->mFlags.set(VoiceHasHrtf);
1270     }
1271     else
1272     {
1273         /* Non-HRTF rendering. Use normal panning to the output. */
1274
1275         if(Distance > std::numeric_limits<float>::epsilon())
1276         {
1277             /* Calculate NFC filter coefficient if needed. */
1278             if(Device->AvgSpeakerDist > 0.0f)
1279             {
1280                 /* Clamp the distance for really close sources, to prevent
1281                  * excessive bass.
1282                  */
1283                 const float mdist{std::max(Distance*NfcScale, Device->AvgSpeakerDist/4.0f)};
1284                 const float w0{SpeedOfSoundMetersPerSec / (mdist * Frequency)};
1285
1286                 /* Adjust NFC filters. */
1287                 for(size_t c{0};c < num_channels;c++)
1288                     voice->mChans[c].mDryParams.NFCtrlFilter.adjust(w0);
1289
1290                 voice->mFlags.set(VoiceHasNfc);
1291             }
1292
1293             if(voice->mFmtChannels == FmtMono)
1294             {
1295                 auto calc_coeffs = [xpos,ypos,zpos,Spread](RenderMode mode)
1296                 {
1297                     if(mode != RenderMode::Pairwise)
1298                         return CalcDirectionCoeffs(std::array{xpos, ypos, zpos}, Spread);
1299                     const auto pos = ScaleAzimuthFront3_2(std::array{xpos, ypos, zpos});
1300                     return CalcDirectionCoeffs(pos, Spread);
1301                 };
1302                 const auto coeffs = calc_coeffs(Device->mRenderMode);
1303
1304                 ComputePanGains(&Device->Dry, coeffs, DryGain.Base,
1305                     voice->mChans[0].mDryParams.Gains.Target);
1306                 for(uint i{0};i < NumSends;i++)
1307                 {
1308                     if(const EffectSlot *Slot{SendSlots[i]})
1309                         ComputePanGains(&Slot->Wet, coeffs, WetGain[i].Base,
1310                             voice->mChans[0].mWetParams[i].Gains.Target);
1311                 }
1312             }
1313             else
1314             {
1315                 using namespace al::numbers;
1316
1317                 for(size_t c{0};c < num_channels;c++)
1318                 {
1319                     const float pangain{SelectChannelGain(chans[c].channel)};
1320
1321                     /* Special-case LFE */
1322                     if(chans[c].channel == LFE)
1323                     {
1324                         if(Device->Dry.Buffer.data() == Device->RealOut.Buffer.data())
1325                         {
1326                             const uint idx{Device->channelIdxByName(chans[c].channel)};
1327                             if(idx != InvalidChannelIndex)
1328                                 voice->mChans[c].mDryParams.Gains.Target[idx] = DryGain.Base
1329                                     * pangain;
1330                         }
1331                         continue;
1332                     }
1333
1334                     /* Warp the channel position toward the source position as
1335                      * the spread decreases. With no spread, all channels are
1336                      * at the source position, at full spread (pi*2), each
1337                      * channel position is left unchanged.
1338                      */
1339                     const float a{1.0f - (inv_pi_v<float>/2.0f)*Spread};
1340                     std::array pos{
1341                         lerpf(chans[c].pos[0], xpos, a),
1342                         lerpf(chans[c].pos[1], ypos, a),
1343                         lerpf(chans[c].pos[2], zpos, a)};
1344                     const float len{std::sqrt(pos[0]*pos[0] + pos[1]*pos[1] + pos[2]*pos[2])};
1345                     if(len < 1.0f)
1346                     {
1347                         pos[0] /= len;
1348                         pos[1] /= len;
1349                         pos[2] /= len;
1350                     }
1351
1352                     if(Device->mRenderMode == RenderMode::Pairwise)
1353                         pos = ScaleAzimuthFront3(pos);
1354                     const auto coeffs = CalcDirectionCoeffs(pos, 0.0f);
1355
1356                     ComputePanGains(&Device->Dry, coeffs, DryGain.Base * pangain,
1357                         voice->mChans[c].mDryParams.Gains.Target);
1358                     for(uint i{0};i < NumSends;i++)
1359                     {
1360                         if(const EffectSlot *Slot{SendSlots[i]})
1361                             ComputePanGains(&Slot->Wet, coeffs, WetGain[i].Base * pangain,
1362                                 voice->mChans[c].mWetParams[i].Gains.Target);
1363                     }
1364                 }
1365             }
1366         }
1367         else
1368         {
1369             if(Device->AvgSpeakerDist > 0.0f)
1370             {
1371                 /* If the source distance is 0, simulate a plane-wave by using
1372                  * infinite distance, which results in a w0 of 0.
1373                  */
1374                 static constexpr float w0{0.0f};
1375                 for(size_t c{0};c < num_channels;c++)
1376                     voice->mChans[c].mDryParams.NFCtrlFilter.adjust(w0);
1377
1378                 voice->mFlags.set(VoiceHasNfc);
1379             }
1380
1381             /* With no distance, spread is only meaningful for mono sources
1382              * where it can be 0 or full (non-mono sources are always full
1383              * spread here).
1384              */
1385             const float spread{Spread * float(voice->mFmtChannels == FmtMono)};
1386             for(size_t c{0};c < num_channels;c++)
1387             {
1388                 const float pangain{SelectChannelGain(chans[c].channel)};
1389
1390                 /* Special-case LFE */
1391                 if(chans[c].channel == LFE)
1392                 {
1393                     if(Device->Dry.Buffer.data() == Device->RealOut.Buffer.data())
1394                     {
1395                         const uint idx{Device->channelIdxByName(chans[c].channel)};
1396                         if(idx != InvalidChannelIndex)
1397                             voice->mChans[c].mDryParams.Gains.Target[idx] = DryGain.Base * pangain;
1398                     }
1399                     continue;
1400                 }
1401
1402                 const auto coeffs = CalcDirectionCoeffs((Device->mRenderMode==RenderMode::Pairwise)
1403                     ? ScaleAzimuthFront3(chans[c].pos) : chans[c].pos, spread);
1404
1405                 ComputePanGains(&Device->Dry, coeffs, DryGain.Base * pangain,
1406                     voice->mChans[c].mDryParams.Gains.Target);
1407                 for(uint i{0};i < NumSends;i++)
1408                 {
1409                     if(const EffectSlot *Slot{SendSlots[i]})
1410                         ComputePanGains(&Slot->Wet, coeffs, WetGain[i].Base * pangain,
1411                             voice->mChans[c].mWetParams[i].Gains.Target);
1412                 }
1413             }
1414         }
1415     }
1416
1417     {
1418         const float hfNorm{props->Direct.HFReference / Frequency};
1419         const float lfNorm{props->Direct.LFReference / Frequency};
1420
1421         voice->mDirect.FilterType = AF_None;
1422         if(DryGain.HF != 1.0f) voice->mDirect.FilterType |= AF_LowPass;
1423         if(DryGain.LF != 1.0f) voice->mDirect.FilterType |= AF_HighPass;
1424
1425         auto &lowpass = voice->mChans[0].mDryParams.LowPass;
1426         auto &highpass = voice->mChans[0].mDryParams.HighPass;
1427         lowpass.setParamsFromSlope(BiquadType::HighShelf, hfNorm, DryGain.HF, 1.0f);
1428         highpass.setParamsFromSlope(BiquadType::LowShelf, lfNorm, DryGain.LF, 1.0f);
1429         for(size_t c{1};c < num_channels;c++)
1430         {
1431             voice->mChans[c].mDryParams.LowPass.copyParamsFrom(lowpass);
1432             voice->mChans[c].mDryParams.HighPass.copyParamsFrom(highpass);
1433         }
1434     }
1435     for(uint i{0};i < NumSends;i++)
1436     {
1437         const float hfNorm{props->Send[i].HFReference / Frequency};
1438         const float lfNorm{props->Send[i].LFReference / Frequency};
1439
1440         voice->mSend[i].FilterType = AF_None;
1441         if(WetGain[i].HF != 1.0f) voice->mSend[i].FilterType |= AF_LowPass;
1442         if(WetGain[i].LF != 1.0f) voice->mSend[i].FilterType |= AF_HighPass;
1443
1444         auto &lowpass = voice->mChans[0].mWetParams[i].LowPass;
1445         auto &highpass = voice->mChans[0].mWetParams[i].HighPass;
1446         lowpass.setParamsFromSlope(BiquadType::HighShelf, hfNorm, WetGain[i].HF, 1.0f);
1447         highpass.setParamsFromSlope(BiquadType::LowShelf, lfNorm, WetGain[i].LF, 1.0f);
1448         for(size_t c{1};c < num_channels;c++)
1449         {
1450             voice->mChans[c].mWetParams[i].LowPass.copyParamsFrom(lowpass);
1451             voice->mChans[c].mWetParams[i].HighPass.copyParamsFrom(highpass);
1452         }
1453     }
1454 }
1455
1456 void CalcNonAttnSourceParams(Voice *voice, const VoiceProps *props, const ContextBase *context)
1457 {
1458     DeviceBase *Device{context->mDevice};
1459     std::array<EffectSlot*,MaxSendCount> SendSlots{};
1460
1461     voice->mDirect.Buffer = Device->Dry.Buffer;
1462     for(uint i{0};i < Device->NumAuxSends;i++)
1463     {
1464         SendSlots[i] = props->Send[i].Slot;
1465         if(!SendSlots[i] || SendSlots[i]->EffectType == EffectSlotType::None)
1466         {
1467             SendSlots[i] = nullptr;
1468             voice->mSend[i].Buffer = {};
1469         }
1470         else
1471             voice->mSend[i].Buffer = SendSlots[i]->Wet.Buffer;
1472     }
1473
1474     /* Calculate the stepping value */
1475     const auto Pitch = static_cast<float>(voice->mFrequency) /
1476         static_cast<float>(Device->Frequency) * props->Pitch;
1477     if(Pitch > float{MaxPitch})
1478         voice->mStep = MaxPitch<<MixerFracBits;
1479     else
1480         voice->mStep = std::max(fastf2u(Pitch * MixerFracOne), 1u);
1481     voice->mResampler = PrepareResampler(props->mResampler, voice->mStep, &voice->mResampleState);
1482
1483     /* Calculate gains */
1484     GainTriplet DryGain{};
1485     DryGain.Base = std::min(std::clamp(props->Gain, props->MinGain, props->MaxGain) *
1486         props->Direct.Gain * context->mParams.Gain, GainMixMax);
1487     DryGain.HF = props->Direct.GainHF;
1488     DryGain.LF = props->Direct.GainLF;
1489
1490     std::array<GainTriplet,MaxSendCount> WetGain{};
1491     for(uint i{0};i < Device->NumAuxSends;i++)
1492     {
1493         WetGain[i].Base = std::min(std::clamp(props->Gain, props->MinGain, props->MaxGain) *
1494             props->Send[i].Gain * context->mParams.Gain, GainMixMax);
1495         WetGain[i].HF = props->Send[i].GainHF;
1496         WetGain[i].LF = props->Send[i].GainLF;
1497     }
1498
1499     CalcPanningAndFilters(voice, 0.0f, 0.0f, -1.0f, 0.0f, 0.0f, DryGain, WetGain, SendSlots, props,
1500         context->mParams, Device);
1501 }
1502
1503 void CalcAttnSourceParams(Voice *voice, const VoiceProps *props, const ContextBase *context)
1504 {
1505     DeviceBase *Device{context->mDevice};
1506     const uint NumSends{Device->NumAuxSends};
1507
1508     /* Set mixing buffers and get send parameters. */
1509     voice->mDirect.Buffer = Device->Dry.Buffer;
1510     std::array<EffectSlot*,MaxSendCount> SendSlots{};
1511     std::array<float,MaxSendCount> RoomRolloff{};
1512     std::bitset<MaxSendCount> UseDryAttnForRoom{0};
1513     for(uint i{0};i < NumSends;i++)
1514     {
1515         SendSlots[i] = props->Send[i].Slot;
1516         if(!SendSlots[i] || SendSlots[i]->EffectType == EffectSlotType::None)
1517             SendSlots[i] = nullptr;
1518         else if(SendSlots[i]->AuxSendAuto)
1519         {
1520             /* NOTE: Contrary to the EFX docs, the effect's room rolloff factor
1521              * applies to the selected distance model along with the source's
1522              * room rolloff factor, not necessarily the inverse distance model.
1523              *
1524              * Generic Software also applies these rolloff factors regardless
1525              * of any setting. It doesn't seem to use the effect slot's send
1526              * auto for anything, though as far as I understand, it's supposed
1527              * to control whether the send gets the same gain/gainhf as the
1528              * direct path (excluding the filter).
1529              */
1530             RoomRolloff[i] = props->RoomRolloffFactor + SendSlots[i]->RoomRolloff;
1531         }
1532         else
1533             UseDryAttnForRoom.set(i);
1534
1535         if(!SendSlots[i])
1536             voice->mSend[i].Buffer = {};
1537         else
1538             voice->mSend[i].Buffer = SendSlots[i]->Wet.Buffer;
1539     }
1540
1541     /* Transform source to listener space (convert to head relative) */
1542     alu::Vector Position{props->Position[0], props->Position[1], props->Position[2], 1.0f};
1543     alu::Vector Velocity{props->Velocity[0], props->Velocity[1], props->Velocity[2], 0.0f};
1544     alu::Vector Direction{props->Direction[0], props->Direction[1], props->Direction[2], 0.0f};
1545     if(!props->HeadRelative)
1546     {
1547         /* Transform source vectors */
1548         Position = context->mParams.Matrix * (Position - context->mParams.Position);
1549         Velocity = context->mParams.Matrix * Velocity;
1550         Direction = context->mParams.Matrix * Direction;
1551     }
1552     else
1553     {
1554         /* Offset the source velocity to be relative of the listener velocity */
1555         Velocity += context->mParams.Velocity;
1556     }
1557
1558     const bool directional{Direction.normalize() > 0.0f};
1559     alu::Vector ToSource{Position[0], Position[1], Position[2], 0.0f};
1560     const float Distance{ToSource.normalize()};
1561
1562     /* Calculate distance attenuation */
1563     float ClampedDist{Distance};
1564     float DryGainBase{props->Gain};
1565     std::array<float,MaxSendCount> WetGainBase{};
1566     WetGainBase.fill(props->Gain);
1567
1568     float DryAttnBase{1.0f};
1569     switch(context->mParams.SourceDistanceModel ? props->mDistanceModel
1570         : context->mParams.mDistanceModel)
1571     {
1572     case DistanceModel::InverseClamped:
1573         if(props->MaxDistance < props->RefDistance) break;
1574         ClampedDist = std::clamp(ClampedDist, props->RefDistance, props->MaxDistance);
1575         /*fall-through*/
1576     case DistanceModel::Inverse:
1577         if(props->RefDistance > 0.0f)
1578         {
1579             float dist{lerpf(props->RefDistance, ClampedDist, props->RolloffFactor)};
1580             if(dist > 0.0f)
1581             {
1582                 DryAttnBase = props->RefDistance / dist;
1583                 DryGainBase *= DryAttnBase;
1584             }
1585
1586             for(size_t i{0};i < NumSends;++i)
1587             {
1588                 dist = lerpf(props->RefDistance, ClampedDist, RoomRolloff[i]);
1589                 if(dist > 0.0f) WetGainBase[i] *= props->RefDistance / dist;
1590             }
1591         }
1592         break;
1593
1594     case DistanceModel::LinearClamped:
1595         if(props->MaxDistance < props->RefDistance) break;
1596         ClampedDist = std::clamp(ClampedDist, props->RefDistance, props->MaxDistance);
1597         /*fall-through*/
1598     case DistanceModel::Linear:
1599         if(props->MaxDistance != props->RefDistance)
1600         {
1601             float attn{(ClampedDist-props->RefDistance) /
1602                 (props->MaxDistance-props->RefDistance) * props->RolloffFactor};
1603             DryAttnBase = std::max(1.0f - attn, 0.0f);
1604             DryGainBase *= DryAttnBase;
1605
1606             for(size_t i{0};i < NumSends;++i)
1607             {
1608                 attn = (ClampedDist-props->RefDistance) /
1609                     (props->MaxDistance-props->RefDistance) * RoomRolloff[i];
1610                 WetGainBase[i] *= std::max(1.0f - attn, 0.0f);
1611             }
1612         }
1613         break;
1614
1615     case DistanceModel::ExponentClamped:
1616         if(props->MaxDistance < props->RefDistance) break;
1617         ClampedDist = std::clamp(ClampedDist, props->RefDistance, props->MaxDistance);
1618         /*fall-through*/
1619     case DistanceModel::Exponent:
1620         if(ClampedDist > 0.0f && props->RefDistance > 0.0f)
1621         {
1622             const float dist_ratio{ClampedDist/props->RefDistance};
1623             DryAttnBase = std::pow(dist_ratio, -props->RolloffFactor);
1624             DryGainBase *= DryAttnBase;
1625             for(size_t i{0};i < NumSends;++i)
1626                 WetGainBase[i] *= std::pow(dist_ratio, -RoomRolloff[i]);
1627         }
1628         break;
1629
1630     case DistanceModel::Disable:
1631         break;
1632     }
1633
1634     /* Calculate directional soundcones */
1635     float ConeHF{1.0f}, WetCone{1.0f}, WetConeHF{1.0f};
1636     if(directional && props->InnerAngle < 360.0f)
1637     {
1638         static constexpr float Rad2Deg{static_cast<float>(180.0 / al::numbers::pi)};
1639         const float Angle{Rad2Deg*2.0f * std::acos(-Direction.dot_product(ToSource)) * ConeScale};
1640
1641         float ConeGain{1.0f};
1642         if(Angle >= props->OuterAngle)
1643         {
1644             ConeGain = props->OuterGain;
1645             if(props->DryGainHFAuto)
1646                 ConeHF = props->OuterGainHF;
1647         }
1648         else if(Angle >= props->InnerAngle)
1649         {
1650             const float scale{(Angle-props->InnerAngle) / (props->OuterAngle-props->InnerAngle)};
1651             ConeGain = lerpf(1.0f, props->OuterGain, scale);
1652             if(props->DryGainHFAuto)
1653                 ConeHF = lerpf(1.0f, props->OuterGainHF, scale);
1654         }
1655
1656         DryGainBase *= ConeGain;
1657         if(props->WetGainAuto)
1658             WetCone = ConeGain;
1659         if(props->WetGainHFAuto)
1660             WetConeHF = ConeHF;
1661     }
1662
1663     /* Apply gain and frequency filters */
1664     GainTriplet DryGain{};
1665     DryGainBase = std::clamp(DryGainBase, props->MinGain, props->MaxGain) * context->mParams.Gain;
1666     DryGain.Base = std::min(DryGainBase * props->Direct.Gain, GainMixMax);
1667     DryGain.HF = ConeHF * props->Direct.GainHF;
1668     DryGain.LF = props->Direct.GainLF;
1669
1670     std::array<GainTriplet,MaxSendCount> WetGain{};
1671     for(uint i{0};i < NumSends;i++)
1672     {
1673         WetGainBase[i] = std::clamp(WetGainBase[i]*WetCone, props->MinGain, props->MaxGain) *
1674             context->mParams.Gain;
1675         /* If this effect slot's Auxiliary Send Auto is off, then use the dry
1676          * path distance and cone attenuation, otherwise use the wet (room)
1677          * path distance and cone attenuation. The send filter is used instead
1678          * of the direct filter, regardless.
1679          */
1680         const bool use_room{!UseDryAttnForRoom.test(i)};
1681         const float gain{use_room ? WetGainBase[i] : DryGainBase};
1682         WetGain[i].Base = std::min(gain * props->Send[i].Gain, GainMixMax);
1683         WetGain[i].HF = (use_room ? WetConeHF : ConeHF) * props->Send[i].GainHF;
1684         WetGain[i].LF = props->Send[i].GainLF;
1685     }
1686
1687     /* Distance-based air absorption and initial send decay. */
1688     if(Distance > props->RefDistance) LIKELY
1689     {
1690         const float distance_base{(Distance-props->RefDistance) * props->RolloffFactor};
1691         const float distance_meters{distance_base * context->mParams.MetersPerUnit};
1692         const float dryabsorb{distance_meters * props->AirAbsorptionFactor};
1693         if(dryabsorb > std::numeric_limits<float>::epsilon())
1694             DryGain.HF *= std::pow(context->mParams.AirAbsorptionGainHF, dryabsorb);
1695
1696         /* If the source's Auxiliary Send Filter Gain Auto is off, no extra
1697          * adjustment is applied to the send gains.
1698          */
1699         for(uint i{props->WetGainAuto ? 0u : NumSends};i < NumSends;++i)
1700         {
1701             if(!SendSlots[i] || !(SendSlots[i]->DecayTime > 0.0f))
1702                 continue;
1703
1704             if(distance_meters > std::numeric_limits<float>::epsilon())
1705                 WetGain[i].HF *= std::pow(SendSlots[i]->AirAbsorptionGainHF, distance_meters);
1706
1707             /* If this effect slot's Auxiliary Send Auto is off, don't apply
1708              * the automatic initial reverb decay.
1709              *
1710              * NOTE: Generic Software applies the initial decay regardless of
1711              * this setting. It doesn't seem to use it for anything, only the
1712              * source's send filter gain auto flag affects this.
1713              */
1714             if(!SendSlots[i]->AuxSendAuto)
1715                 continue;
1716
1717             const float DecayDistance{SendSlots[i]->DecayTime * SpeedOfSoundMetersPerSec};
1718
1719             /* Apply a decay-time transformation to the wet path, based on the
1720              * source distance. The initial decay of the reverb effect is
1721              * calculated and applied to the wet path.
1722              *
1723              * FIXME: This is very likely not correct. It more likely should
1724              * work by calculating a rolloff dynamically based on the reverb
1725              * parameters (and source distance?) and add it to the room rolloff
1726              * with the reverb and source rolloff parameters.
1727              */
1728             const float baseAttn{DryAttnBase};
1729             const float fact{distance_base / DecayDistance};
1730             const float gain{std::pow(ReverbDecayGain, fact)*(1.0f-baseAttn) + baseAttn};
1731             WetGain[i].Base *= gain;
1732         }
1733     }
1734
1735
1736     /* Initial source pitch */
1737     float Pitch{props->Pitch};
1738
1739     /* Calculate velocity-based doppler effect */
1740     float DopplerFactor{props->DopplerFactor * context->mParams.DopplerFactor};
1741     if(DopplerFactor > 0.0f)
1742     {
1743         const alu::Vector &lvelocity = context->mParams.Velocity;
1744         float vss{Velocity.dot_product(ToSource) * -DopplerFactor};
1745         float vls{lvelocity.dot_product(ToSource) * -DopplerFactor};
1746
1747         const float SpeedOfSound{context->mParams.SpeedOfSound};
1748         if(!(vls < SpeedOfSound))
1749         {
1750             /* Listener moving away from the source at the speed of sound.
1751              * Sound waves can't catch it.
1752              */
1753             Pitch = 0.0f;
1754         }
1755         else if(!(vss < SpeedOfSound))
1756         {
1757             /* Source moving toward the listener at the speed of sound. Sound
1758              * waves bunch up to extreme frequencies.
1759              */
1760             Pitch = std::numeric_limits<float>::infinity();
1761         }
1762         else
1763         {
1764             /* Source and listener movement is nominal. Calculate the proper
1765              * doppler shift.
1766              */
1767             Pitch *= (SpeedOfSound-vls) / (SpeedOfSound-vss);
1768         }
1769     }
1770
1771     /* Adjust pitch based on the buffer and output frequencies, and calculate
1772      * fixed-point stepping value.
1773      */
1774     Pitch *= static_cast<float>(voice->mFrequency) / static_cast<float>(Device->Frequency);
1775     if(Pitch > float{MaxPitch})
1776         voice->mStep = MaxPitch<<MixerFracBits;
1777     else
1778         voice->mStep = std::max(fastf2u(Pitch * MixerFracOne), 1u);
1779     voice->mResampler = PrepareResampler(props->mResampler, voice->mStep, &voice->mResampleState);
1780
1781     float spread{0.0f};
1782     if(props->Radius > Distance)
1783         spread = al::numbers::pi_v<float>*2.0f - Distance/props->Radius*al::numbers::pi_v<float>;
1784     else if(Distance > 0.0f)
1785         spread = std::asin(props->Radius/Distance) * 2.0f;
1786
1787     CalcPanningAndFilters(voice, ToSource[0]*XScale, ToSource[1]*YScale, ToSource[2]*ZScale,
1788         Distance, spread, DryGain, WetGain, SendSlots, props, context->mParams, Device);
1789 }
1790
1791 void CalcSourceParams(Voice *voice, ContextBase *context, bool force)
1792 {
1793     VoicePropsItem *props{voice->mUpdate.exchange(nullptr, std::memory_order_acq_rel)};
1794     if(!props && !force) return;
1795
1796     if(props)
1797     {
1798         voice->mProps = static_cast<VoiceProps&>(*props);
1799
1800         AtomicReplaceHead(context->mFreeVoiceProps, props);
1801     }
1802
1803     if((voice->mProps.DirectChannels != DirectMode::Off && voice->mFmtChannels != FmtMono
1804             && !IsAmbisonic(voice->mFmtChannels))
1805         || voice->mProps.mSpatializeMode == SpatializeMode::Off
1806         || (voice->mProps.mSpatializeMode==SpatializeMode::Auto && voice->mFmtChannels != FmtMono))
1807         CalcNonAttnSourceParams(voice, &voice->mProps, context);
1808     else
1809         CalcAttnSourceParams(voice, &voice->mProps, context);
1810 }
1811
1812
1813 void SendSourceStateEvent(ContextBase *context, uint id, VChangeState state)
1814 {
1815     RingBuffer *ring{context->mAsyncEvents.get()};
1816     auto evt_vec = ring->getWriteVector();
1817     if(evt_vec.first.len < 1) return;
1818
1819     auto &evt = InitAsyncEvent<AsyncSourceStateEvent>(evt_vec.first.buf);
1820     evt.mId = id;
1821     switch(state)
1822     {
1823     case VChangeState::Reset:
1824         evt.mState = AsyncSrcState::Reset;
1825         break;
1826     case VChangeState::Stop:
1827         evt.mState = AsyncSrcState::Stop;
1828         break;
1829     case VChangeState::Play:
1830         evt.mState = AsyncSrcState::Play;
1831         break;
1832     case VChangeState::Pause:
1833         evt.mState = AsyncSrcState::Pause;
1834         break;
1835     /* Shouldn't happen. */
1836     case VChangeState::Restart:
1837         al::unreachable();
1838     }
1839
1840     ring->writeAdvance(1);
1841 }
1842
1843 void ProcessVoiceChanges(ContextBase *ctx)
1844 {
1845     VoiceChange *cur{ctx->mCurrentVoiceChange.load(std::memory_order_acquire)};
1846     VoiceChange *next{cur->mNext.load(std::memory_order_acquire)};
1847     if(!next) return;
1848
1849     const auto enabledevt = ctx->mEnabledEvts.load(std::memory_order_acquire);
1850     do {
1851         cur = next;
1852
1853         bool sendevt{false};
1854         if(cur->mState == VChangeState::Reset || cur->mState == VChangeState::Stop)
1855         {
1856             if(Voice *voice{cur->mVoice})
1857             {
1858                 voice->mCurrentBuffer.store(nullptr, std::memory_order_relaxed);
1859                 voice->mLoopBuffer.store(nullptr, std::memory_order_relaxed);
1860                 /* A source ID indicates the voice was playing or paused, which
1861                  * gets a reset/stop event.
1862                  */
1863                 sendevt = voice->mSourceID.exchange(0u, std::memory_order_relaxed) != 0u;
1864                 Voice::State oldvstate{Voice::Playing};
1865                 voice->mPlayState.compare_exchange_strong(oldvstate, Voice::Stopping,
1866                     std::memory_order_relaxed, std::memory_order_acquire);
1867                 voice->mPendingChange.store(false, std::memory_order_release);
1868             }
1869             /* Reset state change events are always sent, even if the voice is
1870              * already stopped or even if there is no voice.
1871              */
1872             sendevt |= (cur->mState == VChangeState::Reset);
1873         }
1874         else if(cur->mState == VChangeState::Pause)
1875         {
1876             Voice *voice{cur->mVoice};
1877             Voice::State oldvstate{Voice::Playing};
1878             sendevt = voice->mPlayState.compare_exchange_strong(oldvstate, Voice::Stopping,
1879                 std::memory_order_release, std::memory_order_acquire);
1880         }
1881         else if(cur->mState == VChangeState::Play)
1882         {
1883             /* NOTE: When playing a voice, sending a source state change event
1884              * depends if there's an old voice to stop and if that stop is
1885              * successful. If there is no old voice, a playing event is always
1886              * sent. If there is an old voice, an event is sent only if the
1887              * voice is already stopped.
1888              */
1889             if(Voice *oldvoice{cur->mOldVoice})
1890             {
1891                 oldvoice->mCurrentBuffer.store(nullptr, std::memory_order_relaxed);
1892                 oldvoice->mLoopBuffer.store(nullptr, std::memory_order_relaxed);
1893                 oldvoice->mSourceID.store(0u, std::memory_order_relaxed);
1894                 Voice::State oldvstate{Voice::Playing};
1895                 sendevt = !oldvoice->mPlayState.compare_exchange_strong(oldvstate, Voice::Stopping,
1896                     std::memory_order_relaxed, std::memory_order_acquire);
1897                 oldvoice->mPendingChange.store(false, std::memory_order_release);
1898             }
1899             else
1900                 sendevt = true;
1901
1902             Voice *voice{cur->mVoice};
1903             voice->mPlayState.store(Voice::Playing, std::memory_order_release);
1904         }
1905         else if(cur->mState == VChangeState::Restart)
1906         {
1907             /* Restarting a voice never sends a source change event. */
1908             Voice *oldvoice{cur->mOldVoice};
1909             oldvoice->mCurrentBuffer.store(nullptr, std::memory_order_relaxed);
1910             oldvoice->mLoopBuffer.store(nullptr, std::memory_order_relaxed);
1911             /* If there's no sourceID, the old voice finished so don't start
1912              * the new one at its new offset.
1913              */
1914             if(oldvoice->mSourceID.exchange(0u, std::memory_order_relaxed) != 0u)
1915             {
1916                 /* Otherwise, set the voice to stopping if it's not already (it
1917                  * might already be, if paused), and play the new voice as
1918                  * appropriate.
1919                  */
1920                 Voice::State oldvstate{Voice::Playing};
1921                 oldvoice->mPlayState.compare_exchange_strong(oldvstate, Voice::Stopping,
1922                     std::memory_order_relaxed, std::memory_order_acquire);
1923
1924                 Voice *voice{cur->mVoice};
1925                 voice->mPlayState.store((oldvstate == Voice::Playing) ? Voice::Playing
1926                     : Voice::Stopped, std::memory_order_release);
1927             }
1928             oldvoice->mPendingChange.store(false, std::memory_order_release);
1929         }
1930         if(sendevt && enabledevt.test(al::to_underlying(AsyncEnableBits::SourceState)))
1931             SendSourceStateEvent(ctx, cur->mSourceID, cur->mState);
1932
1933         next = cur->mNext.load(std::memory_order_acquire);
1934     } while(next);
1935     ctx->mCurrentVoiceChange.store(cur, std::memory_order_release);
1936 }
1937
1938 void ProcessParamUpdates(ContextBase *ctx, const al::span<EffectSlot*> slots,
1939     const al::span<EffectSlot*> sorted_slots, const al::span<Voice*> voices)
1940 {
1941     ProcessVoiceChanges(ctx);
1942
1943     IncrementRef(ctx->mUpdateCount);
1944     if(!ctx->mHoldUpdates.load(std::memory_order_acquire)) LIKELY
1945     {
1946         bool force{CalcContextParams(ctx)};
1947         auto sorted_slot_base = al::to_address(sorted_slots.begin());
1948         for(EffectSlot *slot : slots)
1949             force |= CalcEffectSlotParams(slot, sorted_slot_base, ctx);
1950
1951         for(Voice *voice : voices)
1952         {
1953             /* Only update voices that have a source. */
1954             if(voice->mSourceID.load(std::memory_order_relaxed) != 0)
1955                 CalcSourceParams(voice, ctx, force);
1956         }
1957     }
1958     IncrementRef(ctx->mUpdateCount);
1959 }
1960
1961 void ProcessContexts(DeviceBase *device, const uint SamplesToDo)
1962 {
1963     ASSUME(SamplesToDo > 0);
1964
1965     const nanoseconds curtime{device->mClockBase.load(std::memory_order_relaxed) +
1966         nanoseconds{seconds{device->mSamplesDone.load(std::memory_order_relaxed)}}/
1967         device->Frequency};
1968
1969     for(ContextBase *ctx : *device->mContexts.load(std::memory_order_acquire))
1970     {
1971         const auto auxslotspan = al::span{*ctx->mActiveAuxSlots.load(std::memory_order_acquire)};
1972         const auto auxslots = auxslotspan.first(auxslotspan.size()>>1);
1973         const auto sorted_slots = auxslotspan.last(auxslotspan.size()>>1);
1974         const al::span<Voice*> voices{ctx->getVoicesSpanAcquired()};
1975
1976         /* Process pending property updates for objects on the context. */
1977         ProcessParamUpdates(ctx, auxslots, sorted_slots, voices);
1978
1979         /* Clear auxiliary effect slot mixing buffers. */
1980         for(EffectSlot *slot : auxslots)
1981         {
1982             for(auto &buffer : slot->Wet.Buffer)
1983                 buffer.fill(0.0f);
1984         }
1985
1986         /* Process voices that have a playing source. */
1987         for(Voice *voice : voices)
1988         {
1989             const Voice::State vstate{voice->mPlayState.load(std::memory_order_acquire)};
1990             if(vstate != Voice::Stopped && vstate != Voice::Pending)
1991                 voice->mix(vstate, ctx, curtime, SamplesToDo);
1992         }
1993
1994         /* Process effects. */
1995         if(!auxslots.empty())
1996         {
1997             /* Sort the slots into extra storage, so that effect slots come
1998              * before their effect slot target (or their targets' target). Skip
1999              * sorting if it has already been done.
2000              */
2001             if(!sorted_slots[0])
2002             {
2003                 /* First, copy the slots to the sorted list, then partition the
2004                  * sorted list so that all slots without a target slot go to
2005                  * the end.
2006                  */
2007                 std::copy(auxslots.begin(), auxslots.end(), sorted_slots.begin());
2008                 auto split_point = std::partition(sorted_slots.begin(), sorted_slots.end(),
2009                     [](const EffectSlot *slot) noexcept -> bool
2010                     { return slot->Target != nullptr; });
2011                 /* There must be at least one slot without a slot target. */
2012                 assert(split_point != sorted_slots.end());
2013
2014                 /* Simple case: no more than 1 slot has a target slot. Either
2015                  * all slots go right to the output, or the remaining one must
2016                  * target an already-partitioned slot.
2017                  */
2018                 if(split_point - sorted_slots.begin() > 1)
2019                 {
2020                     /* At least two slots target other slots. Starting from the
2021                      * back of the sorted list, continue partitioning the front
2022                      * of the list given each target until all targets are
2023                      * accounted for. This ensures all slots without a target
2024                      * go last, all slots directly targeting those last slots
2025                      * go second-to-last, all slots directly targeting those
2026                      * second-last slots go third-to-last, etc.
2027                      */
2028                     auto next_target = sorted_slots.end();
2029                     do {
2030                         /* This shouldn't happen, but if there's unsorted slots
2031                          * left that don't target any sorted slots, they can't
2032                          * contribute to the output, so leave them.
2033                          */
2034                         if(next_target == split_point) UNLIKELY
2035                             break;
2036
2037                         --next_target;
2038                         split_point = std::partition(sorted_slots.begin(), split_point,
2039                             [next_target](const EffectSlot *slot) noexcept -> bool
2040                             { return slot->Target != *next_target; });
2041                     } while(split_point - sorted_slots.begin() > 1);
2042                 }
2043             }
2044
2045             for(const EffectSlot *slot : sorted_slots)
2046             {
2047                 EffectState *state{slot->mEffectState.get()};
2048                 state->process(SamplesToDo, slot->Wet.Buffer, state->mOutTarget);
2049             }
2050         }
2051
2052         /* Signal the event handler if there are any events to read. */
2053         RingBuffer *ring{ctx->mAsyncEvents.get()};
2054         if(ring->readSpace() > 0)
2055             ctx->mEventSem.post();
2056     }
2057 }
2058
2059
2060 void ApplyDistanceComp(const al::span<FloatBufferLine> Samples, const size_t SamplesToDo,
2061     const al::span<const DistanceComp::ChanData,MaxOutputChannels> chandata)
2062 {
2063     ASSUME(SamplesToDo > 0);
2064
2065     auto distcomp = chandata.begin();
2066     for(auto &chanbuffer : Samples)
2067     {
2068         const float gain{distcomp->Gain};
2069         auto distbuf = al::span{al::assume_aligned<16>(distcomp->Buffer.data()),
2070             distcomp->Buffer.size()};
2071         ++distcomp;
2072
2073         const size_t base{distbuf.size()};
2074         if(base < 1) continue;
2075
2076         const auto inout = al::span{al::assume_aligned<16>(chanbuffer.data()), SamplesToDo};
2077         if(SamplesToDo >= base) LIKELY
2078         {
2079             auto delay_end = std::rotate(inout.begin(), inout.end()-ptrdiff_t(base), inout.end());
2080             std::swap_ranges(inout.begin(), delay_end, distbuf.begin());
2081         }
2082         else
2083         {
2084             auto delay_start = std::swap_ranges(inout.begin(), inout.end(), distbuf.begin());
2085             std::rotate(distbuf.begin(), delay_start, distbuf.begin()+ptrdiff_t(base));
2086         }
2087         std::transform(inout.begin(), inout.end(), inout.begin(),
2088             [gain](float s) { return s*gain; });
2089     }
2090 }
2091
2092 void ApplyDither(const al::span<FloatBufferLine> Samples, uint *dither_seed,
2093     const float quant_scale, const size_t SamplesToDo)
2094 {
2095     static constexpr double invRNGRange{1.0 / std::numeric_limits<uint>::max()};
2096     ASSUME(SamplesToDo > 0);
2097
2098     /* Dithering. Generate whitenoise (uniform distribution of random values
2099      * between -1 and +1) and add it to the sample values, after scaling up to
2100      * the desired quantization depth and before rounding.
2101      */
2102     const float invscale{1.0f / quant_scale};
2103     uint seed{*dither_seed};
2104     auto dither_sample = [&seed,invscale,quant_scale](const float sample) noexcept -> float
2105     {
2106         float val{sample * quant_scale};
2107         uint rng0{dither_rng(&seed)};
2108         uint rng1{dither_rng(&seed)};
2109         val += static_cast<float>(rng0*invRNGRange - rng1*invRNGRange);
2110         return fast_roundf(val) * invscale;
2111     };
2112     for(FloatBufferLine &inout : Samples)
2113         std::transform(inout.begin(), inout.begin()+SamplesToDo, inout.begin(), dither_sample);
2114     *dither_seed = seed;
2115 }
2116
2117
2118 /* Base template left undefined. Should be marked =delete, but Clang 3.8.1
2119  * chokes on that given the inline specializations.
2120  */
2121 template<typename T>
2122 inline T SampleConv(float) noexcept;
2123
2124 template<> inline float SampleConv(float val) noexcept
2125 { return val; }
2126 template<> inline int32_t SampleConv(float val) noexcept
2127 {
2128     /* Floats have a 23-bit mantissa, plus an implied 1 bit and a sign bit.
2129      * This means a normalized float has at most 25 bits of signed precision.
2130      * When scaling and clamping for a signed 32-bit integer, these following
2131      * values are the best a float can give.
2132      */
2133     return fastf2i(std::clamp(val*2147483648.0f, -2147483648.0f, 2147483520.0f));
2134 }
2135 template<> inline int16_t SampleConv(float val) noexcept
2136 { return static_cast<int16_t>(fastf2i(std::clamp(val*32768.0f, -32768.0f, 32767.0f))); }
2137 template<> inline int8_t SampleConv(float val) noexcept
2138 { return static_cast<int8_t>(fastf2i(std::clamp(val*128.0f, -128.0f, 127.0f))); }
2139
2140 /* Define unsigned output variations. */
2141 template<> inline uint32_t SampleConv(float val) noexcept
2142 { return static_cast<uint32_t>(SampleConv<int32_t>(val)) + 2147483648u; }
2143 template<> inline uint16_t SampleConv(float val) noexcept
2144 { return static_cast<uint16_t>(SampleConv<int16_t>(val) + 32768); }
2145 template<> inline uint8_t SampleConv(float val) noexcept
2146 { return static_cast<uint8_t>(SampleConv<int8_t>(val) + 128); }
2147
2148 template<typename T>
2149 void Write(const al::span<const FloatBufferLine> InBuffer, void *OutBuffer, const size_t Offset,
2150     const size_t SamplesToDo, const size_t FrameStep)
2151 {
2152     ASSUME(FrameStep > 0);
2153     ASSUME(SamplesToDo > 0);
2154
2155     const auto output = al::span{static_cast<T*>(OutBuffer), (Offset+SamplesToDo)*FrameStep}
2156         .subspan(Offset*FrameStep);
2157     size_t c{0};
2158     for(const FloatBufferLine &inbuf : InBuffer)
2159     {
2160         auto out = output.begin();
2161         auto conv_sample = [FrameStep,c,&out](const float s) noexcept
2162         {
2163             out[c] = SampleConv<T>(s);
2164             out += ptrdiff_t(FrameStep);
2165         };
2166         std::for_each_n(inbuf.cbegin(), SamplesToDo, conv_sample);
2167         ++c;
2168     }
2169     if(const size_t extra{FrameStep - c})
2170     {
2171         const auto silence = SampleConv<T>(0.0f);
2172         for(size_t i{0};i < SamplesToDo;++i)
2173             std::fill_n(&output[i*FrameStep + c], extra, silence);
2174     }
2175 }
2176
2177 } // namespace
2178
2179 uint DeviceBase::renderSamples(const uint numSamples)
2180 {
2181     const uint samplesToDo{std::min(numSamples, uint{BufferLineSize})};
2182
2183     /* Clear main mixing buffers. */
2184     for(FloatBufferLine &buffer : MixBuffer)
2185         buffer.fill(0.0f);
2186
2187     {
2188         const auto mixLock = getWriteMixLock();
2189
2190         /* Process and mix each context's sources and effects. */
2191         ProcessContexts(this, samplesToDo);
2192
2193         /* Every second's worth of samples is converted and added to clock base
2194          * so that large sample counts don't overflow during conversion. This
2195          * also guarantees a stable conversion.
2196          */
2197         auto samplesDone = mSamplesDone.load(std::memory_order_relaxed) + samplesToDo;
2198         auto clockBase = mClockBase.load(std::memory_order_relaxed) +
2199             std::chrono::seconds{samplesDone/Frequency};
2200         mSamplesDone.store(samplesDone%Frequency, std::memory_order_relaxed);
2201         mClockBase.store(clockBase, std::memory_order_relaxed);
2202     }
2203
2204     /* Apply any needed post-process for finalizing the Dry mix to the RealOut
2205      * (Ambisonic decode, UHJ encode, etc).
2206      */
2207     postProcess(samplesToDo);
2208
2209     /* Apply compression, limiting sample amplitude if needed or desired. */
2210     if(Limiter) Limiter->process(samplesToDo, RealOut.Buffer.data());
2211
2212     /* Apply delays and attenuation for mismatched speaker distances. */
2213     if(ChannelDelays)
2214         ApplyDistanceComp(RealOut.Buffer, samplesToDo, ChannelDelays->mChannels);
2215
2216     /* Apply dithering. The compressor should have left enough headroom for the
2217      * dither noise to not saturate.
2218      */
2219     if(DitherDepth > 0.0f)
2220         ApplyDither(RealOut.Buffer, &DitherSeed, DitherDepth, samplesToDo);
2221
2222     return samplesToDo;
2223 }
2224
2225 void DeviceBase::renderSamples(const al::span<float*> outBuffers, const uint numSamples)
2226 {
2227     FPUCtl mixer_mode{};
2228     uint total{0};
2229     while(const uint todo{numSamples - total})
2230     {
2231         const uint samplesToDo{renderSamples(todo)};
2232
2233         auto srcbuf = RealOut.Buffer.cbegin();
2234         for(auto *dstbuf : outBuffers)
2235         {
2236             const auto dst = al::span{dstbuf, numSamples}.subspan(total);
2237             std::copy_n(srcbuf->cbegin(), samplesToDo, dst.begin());
2238             ++srcbuf;
2239         }
2240
2241         total += samplesToDo;
2242     }
2243 }
2244
2245 void DeviceBase::renderSamples(void *outBuffer, const uint numSamples, const size_t frameStep)
2246 {
2247     FPUCtl mixer_mode{};
2248     uint total{0};
2249     while(const uint todo{numSamples - total})
2250     {
2251         const uint samplesToDo{renderSamples(todo)};
2252
2253         if(outBuffer) LIKELY
2254         {
2255             /* Finally, interleave and convert samples, writing to the device's
2256              * output buffer.
2257              */
2258             switch(FmtType)
2259             {
2260 #define HANDLE_WRITE(T) case T:                                               \
2261     Write<DevFmtType_t<T>>(RealOut.Buffer, outBuffer, total, samplesToDo, frameStep); break;
2262             HANDLE_WRITE(DevFmtByte)
2263             HANDLE_WRITE(DevFmtUByte)
2264             HANDLE_WRITE(DevFmtShort)
2265             HANDLE_WRITE(DevFmtUShort)
2266             HANDLE_WRITE(DevFmtInt)
2267             HANDLE_WRITE(DevFmtUInt)
2268             HANDLE_WRITE(DevFmtFloat)
2269 #undef HANDLE_WRITE
2270             }
2271         }
2272
2273         total += samplesToDo;
2274     }
2275 }
2276
2277 void DeviceBase::handleDisconnect(const char *msg, ...)
2278 {
2279     const auto mixLock = getWriteMixLock();
2280
2281     if(Connected.exchange(false, std::memory_order_acq_rel))
2282     {
2283         AsyncEvent evt{std::in_place_type<AsyncDisconnectEvent>};
2284         auto &disconnect = std::get<AsyncDisconnectEvent>(evt);
2285
2286         /* NOLINTBEGIN(*-array-to-pointer-decay) */
2287         va_list args, args2;
2288         va_start(args, msg);
2289         va_copy(args2, args);
2290         if(int msglen{vsnprintf(nullptr, 0, msg, args)}; msglen > 0)
2291         {
2292             disconnect.msg.resize(static_cast<uint>(msglen)+1_uz);
2293             vsnprintf(disconnect.msg.data(), disconnect.msg.size(), msg, args2);
2294         }
2295         else
2296             disconnect.msg = "<failed constructing message>";
2297         va_end(args2);
2298         va_end(args);
2299         /* NOLINTEND(*-array-to-pointer-decay) */
2300
2301         while(!disconnect.msg.empty() && disconnect.msg.back() == '\0')
2302             disconnect.msg.pop_back();
2303
2304         for(ContextBase *ctx : *mContexts.load())
2305         {
2306             RingBuffer *ring{ctx->mAsyncEvents.get()};
2307             auto evt_data = ring->getWriteVector().first;
2308             if(evt_data.len > 0)
2309             {
2310                 al::construct_at(reinterpret_cast<AsyncEvent*>(evt_data.buf), evt);
2311                 ring->writeAdvance(1);
2312                 ctx->mEventSem.post();
2313             }
2314
2315             if(!ctx->mStopVoicesOnDisconnect.load())
2316             {
2317                 ProcessVoiceChanges(ctx);
2318                 continue;
2319             }
2320
2321             auto voicelist = ctx->getVoicesSpanAcquired();
2322             auto stop_voice = [](Voice *voice) -> void
2323             {
2324                 voice->mCurrentBuffer.store(nullptr, std::memory_order_relaxed);
2325                 voice->mLoopBuffer.store(nullptr, std::memory_order_relaxed);
2326                 voice->mSourceID.store(0u, std::memory_order_relaxed);
2327                 voice->mPlayState.store(Voice::Stopped, std::memory_order_release);
2328             };
2329             std::for_each(voicelist.begin(), voicelist.end(), stop_voice);
2330         }
2331     }
2332 }