alc/alu.cpp

   1 /**
   2  * OpenAL cross platform audio library
   3  * Copyright (C) 1999-2007 by authors.
   4  * This library is free software; you can redistribute it and/or
   5  *  modify it under the terms of the GNU Library General Public
   6  *  License as published by the Free Software Foundation; either
   7  *  version 2 of the License, or (at your option) any later version.
   8  *
   9  * This library is distributed in the hope that it will be useful,
  10  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  11  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  12  *  Library General Public License for more details.
  13  *
  14  * You should have received a copy of the GNU Library General Public
  15  *  License along with this library; if not, write to the
  16  *  Free Software Foundation, Inc.,
  17  *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  18  * Or go to http://www.gnu.org/copyleft/lgpl.html
  19  */
  20
  21 #include "config.h"
  22
  23 #include "alu.h"
  24
  25 #include <algorithm>
  26 #include <array>
  27 #include <atomic>
  28 #include <cassert>
  29 #include <chrono>
  30 #include <climits>
  31 #include <cstdarg>
  32 #include <cstdint>
  33 #include <cstdio>
  34 #include <cstdlib>
  35 #include <functional>
  36 #include <iterator>
  37 #include <limits>
  38 #include <memory>
  39 #include <new>
  40 #include <optional>
  41 #include <utility>
  42
  43 #include "almalloc.h"
  44 #include "alnumbers.h"
  45 #include "alnumeric.h"
  46 #include "alspan.h"
  47 #include "alstring.h"
  48 #include "atomic.h"
  49 #include "core/ambidefs.h"
  50 #include "core/async_event.h"
  51 #include "core/bformatdec.h"
  52 #include "core/bs2b.h"
  53 #include "core/bsinc_defs.h"
  54 #include "core/bsinc_tables.h"
  55 #include "core/bufferline.h"
  56 #include "core/buffer_storage.h"
  57 #include "core/context.h"
  58 #include "core/cpu_caps.h"
  59 #include "core/cubic_tables.h"
  60 #include "core/devformat.h"
  61 #include "core/device.h"
  62 #include "core/effects/base.h"
  63 #include "core/effectslot.h"
  64 #include "core/filters/biquad.h"
  65 #include "core/filters/nfc.h"
  66 #include "core/fpu_ctrl.h"
  67 #include "core/hrtf.h"
  68 #include "core/mastering.h"
  69 #include "core/mixer.h"
  70 #include "core/mixer/defs.h"
  71 #include "core/mixer/hrtfdefs.h"
  72 #include "core/resampler_limits.h"
  73 #include "core/uhjfilter.h"
  74 #include "core/voice.h"
  75 #include "core/voice_change.h"
  76 #include "intrusive_ptr.h"
  77 #include "opthelpers.h"
  78 #include "ringbuffer.h"
  79 #include "strutils.h"
  80 #include "vecmat.h"
  81 #include "vector.h"
  82
  83 struct CTag;
  84 #ifdef HAVE_SSE
  85 struct SSETag;
  86 #endif
  87 #ifdef HAVE_SSE2
  88 struct SSE2Tag;
  89 #endif
  90 #ifdef HAVE_SSE4_1
  91 struct SSE4Tag;
  92 #endif
  93 #ifdef HAVE_NEON
  94 struct NEONTag;
  95 #endif
  96 struct PointTag;
  97 struct LerpTag;
  98 struct CubicTag;
  99 struct BSincTag;
 100 struct FastBSincTag;
 101
 102
 103 static_assert(!(MaxResamplerPadding&1), "MaxResamplerPadding is not a multiple of two");
 104
 105
 106 namespace {
 107
 108 using uint = unsigned int;
 109 using namespace std::chrono;
 110 using namespace std::string_view_literals;
 111
 112 float InitConeScale()
 113 {
 114     float ret{1.0f};
 115     if(auto optval = al::getenv("__ALSOFT_HALF_ANGLE_CONES"))
 116     {
 117         if(al::case_compare(*optval, "true"sv) == 0
 118             || strtol(optval->c_str(), nullptr, 0) == 1)
 119             ret *= 0.5f;
 120     }
 121     return ret;
 122 }
 123 /* Cone scalar */
 124 const float ConeScale{InitConeScale()};
 125
 126 /* Localized scalars for mono sources (initialized in aluInit, after
 127  * configuration is loaded).
 128  */
 129 float XScale{1.0f};
 130 float YScale{1.0f};
 131 float ZScale{1.0f};
 132
 133 /* Source distance scale for NFC filters. */
 134 float NfcScale{1.0f};
 135
 136
 137 using HrtfDirectMixerFunc = void(*)(const FloatBufferSpan LeftOut, const FloatBufferSpan RightOut,
 138     const al::span<const FloatBufferLine> InSamples, const al::span<float2> AccumSamples,
 139     const al::span<float,BufferLineSize> TempBuf, const al::span<HrtfChannelState> ChanState,
 140     const size_t IrSize, const size_t SamplesToDo);
 141
 142 HrtfDirectMixerFunc MixDirectHrtf{MixDirectHrtf_<CTag>};
 143
 144 inline HrtfDirectMixerFunc SelectHrtfMixer()
 145 {
 146 #ifdef HAVE_NEON
 147     if((CPUCapFlags&CPU_CAP_NEON))
 148         return MixDirectHrtf_<NEONTag>;
 149 #endif
 150 #ifdef HAVE_SSE
 151     if((CPUCapFlags&CPU_CAP_SSE))
 152         return MixDirectHrtf_<SSETag>;
 153 #endif
 154
 155     return MixDirectHrtf_<CTag>;
 156 }
 157
 158
 159 inline void BsincPrepare(const uint increment, BsincState *state, const BSincTable *table)
 160 {
 161     size_t si{BSincScaleCount - 1};
 162     float sf{0.0f};
 163
 164     if(increment > MixerFracOne)
 165     {
 166         sf = MixerFracOne/static_cast<float>(increment) - table->scaleBase;
 167         sf = std::max(0.0f, BSincScaleCount*sf*table->scaleRange - 1.0f);
 168         si = float2uint(sf);
 169         /* The interpolation factor is fit to this diagonally-symmetric curve
 170          * to reduce the transition ripple caused by interpolating different
 171          * scales of the sinc function.
 172          */
 173         sf = 1.0f - std::cos(std::asin(sf - static_cast<float>(si)));
 174     }
 175
 176     state->sf = sf;
 177     state->m = table->m[si];
 178     state->l = (state->m/2) - 1;
 179     state->filter = table->Tab.subspan(table->filterOffset[si]);
 180 }
 181
 182 inline ResamplerFunc SelectResampler(Resampler resampler, uint increment)
 183 {
 184     switch(resampler)
 185     {
 186     case Resampler::Point:
 187         return Resample_<PointTag,CTag>;
 188     case Resampler::Linear:
 189 #ifdef HAVE_NEON
 190         if((CPUCapFlags&CPU_CAP_NEON))
 191             return Resample_<LerpTag,NEONTag>;
 192 #endif
 193 #ifdef HAVE_SSE4_1
 194         if((CPUCapFlags&CPU_CAP_SSE4_1))
 195             return Resample_<LerpTag,SSE4Tag>;
 196 #endif
 197 #ifdef HAVE_SSE2
 198         if((CPUCapFlags&CPU_CAP_SSE2))
 199             return Resample_<LerpTag,SSE2Tag>;
 200 #endif
 201         return Resample_<LerpTag,CTag>;
 202     case Resampler::Spline:
 203     case Resampler::Gaussian:
 204 #ifdef HAVE_NEON
 205         if((CPUCapFlags&CPU_CAP_NEON))
 206             return Resample_<CubicTag,NEONTag>;
 207 #endif
 208 #ifdef HAVE_SSE4_1
 209         if((CPUCapFlags&CPU_CAP_SSE4_1))
 210             return Resample_<CubicTag,SSE4Tag>;
 211 #endif
 212 #ifdef HAVE_SSE2
 213         if((CPUCapFlags&CPU_CAP_SSE2))
 214             return Resample_<CubicTag,SSE2Tag>;
 215 #endif
 216 #ifdef HAVE_SSE
 217         if((CPUCapFlags&CPU_CAP_SSE))
 218             return Resample_<CubicTag,SSETag>;
 219 #endif
 220         return Resample_<CubicTag,CTag>;
 221     case Resampler::BSinc12:
 222     case Resampler::BSinc24:
 223         if(increment > MixerFracOne)
 224         {
 225 #ifdef HAVE_NEON
 226             if((CPUCapFlags&CPU_CAP_NEON))
 227                 return Resample_<BSincTag,NEONTag>;
 228 #endif
 229 #ifdef HAVE_SSE
 230             if((CPUCapFlags&CPU_CAP_SSE))
 231                 return Resample_<BSincTag,SSETag>;
 232 #endif
 233             return Resample_<BSincTag,CTag>;
 234         }
 235         /* fall-through */
 236     case Resampler::FastBSinc12:
 237     case Resampler::FastBSinc24:
 238 #ifdef HAVE_NEON
 239         if((CPUCapFlags&CPU_CAP_NEON))
 240             return Resample_<FastBSincTag,NEONTag>;
 241 #endif
 242 #ifdef HAVE_SSE
 243         if((CPUCapFlags&CPU_CAP_SSE))
 244             return Resample_<FastBSincTag,SSETag>;
 245 #endif
 246         return Resample_<FastBSincTag,CTag>;
 247     }
 248
 249     return Resample_<PointTag,CTag>;
 250 }
 251
 252 } // namespace
 253
 254 void aluInit(CompatFlagBitset flags, const float nfcscale)
 255 {
 256     MixDirectHrtf = SelectHrtfMixer();
 257     XScale = flags.test(CompatFlags::ReverseX) ? -1.0f : 1.0f;
 258     YScale = flags.test(CompatFlags::ReverseY) ? -1.0f : 1.0f;
 259     ZScale = flags.test(CompatFlags::ReverseZ) ? -1.0f : 1.0f;
 260
 261     NfcScale = std::clamp(nfcscale, 0.0001f, 10000.0f);
 262 }
 263
 264
 265 ResamplerFunc PrepareResampler(Resampler resampler, uint increment, InterpState *state)
 266 {
 267     switch(resampler)
 268     {
 269     case Resampler::Point:
 270     case Resampler::Linear:
 271         break;
 272     case Resampler::Spline:
 273         state->emplace<CubicState>(al::span{gSplineFilter.mTable});
 274         break;
 275     case Resampler::Gaussian:
 276         state->emplace<CubicState>(al::span{gGaussianFilter.mTable});
 277         break;
 278     case Resampler::FastBSinc12:
 279     case Resampler::BSinc12:
 280         BsincPrepare(increment, &state->emplace<BsincState>(), &gBSinc12);
 281         break;
 282     case Resampler::FastBSinc24:
 283     case Resampler::BSinc24:
 284         BsincPrepare(increment, &state->emplace<BsincState>(), &gBSinc24);
 285         break;
 286     }
 287     return SelectResampler(resampler, increment);
 288 }
 289
 290
 291 void DeviceBase::ProcessHrtf(const size_t SamplesToDo)
 292 {
 293     /* HRTF is stereo output only. */
 294     const size_t lidx{RealOut.ChannelIndex[FrontLeft]};
 295     const size_t ridx{RealOut.ChannelIndex[FrontRight]};
 296
 297     MixDirectHrtf(RealOut.Buffer[lidx], RealOut.Buffer[ridx], Dry.Buffer, HrtfAccumData,
 298         mHrtfState->mTemp, mHrtfState->mChannels, mHrtfState->mIrSize, SamplesToDo);
 299 }
 300
 301 void DeviceBase::ProcessAmbiDec(const size_t SamplesToDo)
 302 {
 303     AmbiDecoder->process(RealOut.Buffer, Dry.Buffer, SamplesToDo);
 304 }
 305
 306 void DeviceBase::ProcessAmbiDecStablized(const size_t SamplesToDo)
 307 {
 308     /* Decode with front image stablization. */
 309     const size_t lidx{RealOut.ChannelIndex[FrontLeft]};
 310     const size_t ridx{RealOut.ChannelIndex[FrontRight]};
 311     const size_t cidx{RealOut.ChannelIndex[FrontCenter]};
 312
 313     AmbiDecoder->processStablize(RealOut.Buffer, Dry.Buffer, lidx, ridx, cidx, SamplesToDo);
 314 }
 315
 316 void DeviceBase::ProcessUhj(const size_t SamplesToDo)
 317 {
 318     /* UHJ is stereo output only. */
 319     const size_t lidx{RealOut.ChannelIndex[FrontLeft]};
 320     const size_t ridx{RealOut.ChannelIndex[FrontRight]};
 321
 322     /* Encode to stereo-compatible 2-channel UHJ output. */
 323     mUhjEncoder->encode(RealOut.Buffer[lidx].data(), RealOut.Buffer[ridx].data(),
 324         {{Dry.Buffer[0].data(), Dry.Buffer[1].data(), Dry.Buffer[2].data()}}, SamplesToDo);
 325 }
 326
 327 void DeviceBase::ProcessBs2b(const size_t SamplesToDo)
 328 {
 329     /* First, decode the ambisonic mix to the "real" output. */
 330     AmbiDecoder->process(RealOut.Buffer, Dry.Buffer, SamplesToDo);
 331
 332     /* BS2B is stereo output only. */
 333     const size_t lidx{RealOut.ChannelIndex[FrontLeft]};
 334     const size_t ridx{RealOut.ChannelIndex[FrontRight]};
 335
 336     /* Now apply the BS2B binaural/crossfeed filter. */
 337     Bs2b->cross_feed(RealOut.Buffer[lidx].data(), RealOut.Buffer[ridx].data(), SamplesToDo);
 338 }
 339
 340
 341 namespace {
 342
 343 /* This RNG method was created based on the math found in opusdec. It's quick,
 344  * and starting with a seed value of 22222, is suitable for generating
 345  * whitenoise.
 346  */
 347 inline uint dither_rng(uint *seed) noexcept
 348 {
 349     *seed = (*seed * 96314165) + 907633515;
 350     return *seed;
 351 }
 352
 353
 354 /* Ambisonic upsampler function. It's effectively a matrix multiply. It takes
 355  * an 'upsampler' and 'rotator' as the input matrices, and creates a matrix
 356  * that behaves as if the B-Format input was first decoded to a speaker array
 357  * at its input order, encoded back into the higher order mix, then finally
 358  * rotated.
 359  */
 360 void UpsampleBFormatTransform(
 361     const al::span<std::array<float,MaxAmbiChannels>,MaxAmbiChannels> output,
 362     const al::span<const std::array<float,MaxAmbiChannels>> upsampler,
 363     const al::span<const std::array<float,MaxAmbiChannels>,MaxAmbiChannels> rotator,
 364     size_t ambi_order)
 365 {
 366     const size_t num_chans{AmbiChannelsFromOrder(ambi_order)};
 367     for(size_t i{0};i < upsampler.size();++i)
 368         output[i].fill(0.0f);
 369     for(size_t i{0};i < upsampler.size();++i)
 370     {
 371         for(size_t k{0};k < num_chans;++k)
 372         {
 373             const float a{upsampler[i][k]};
 374             /* Write the full number of channels. The compiler will have an
 375              * easier time optimizing if it has a fixed length.
 376              */
 377             std::transform(rotator[k].cbegin(), rotator[k].cend(), output[i].cbegin(),
 378                 output[i].begin(), [a](float rot, float dst) noexcept { return rot*a + dst; });
 379         }
 380     }
 381 }
 382
 383
 384 constexpr auto GetAmbiScales(AmbiScaling scaletype) noexcept
 385 {
 386     switch(scaletype)
 387     {
 388     case AmbiScaling::FuMa: return al::span{AmbiScale::FromFuMa};
 389     case AmbiScaling::SN3D: return al::span{AmbiScale::FromSN3D};
 390     case AmbiScaling::UHJ: return al::span{AmbiScale::FromUHJ};
 391     case AmbiScaling::N3D: break;
 392     }
 393     return al::span{AmbiScale::FromN3D};
 394 }
 395
 396 constexpr auto GetAmbiLayout(AmbiLayout layouttype) noexcept
 397 {
 398     if(layouttype == AmbiLayout::FuMa) return al::span{AmbiIndex::FromFuMa};
 399     return al::span{AmbiIndex::FromACN};
 400 }
 401
 402 constexpr auto GetAmbi2DLayout(AmbiLayout layouttype) noexcept
 403 {
 404     if(layouttype == AmbiLayout::FuMa) return al::span{AmbiIndex::FromFuMa2D};
 405     return al::span{AmbiIndex::FromACN2D};
 406 }
 407
 408
 409 bool CalcContextParams(ContextBase *ctx)
 410 {
 411     ContextProps *props{ctx->mParams.ContextUpdate.exchange(nullptr, std::memory_order_acq_rel)};
 412     if(!props) return false;
 413
 414     const alu::Vector pos{props->Position[0], props->Position[1], props->Position[2], 1.0f};
 415     ctx->mParams.Position = pos;
 416
 417     /* AT then UP */
 418     alu::Vector N{props->OrientAt[0], props->OrientAt[1], props->OrientAt[2], 0.0f};
 419     N.normalize();
 420     alu::Vector V{props->OrientUp[0], props->OrientUp[1], props->OrientUp[2], 0.0f};
 421     V.normalize();
 422     /* Build and normalize right-vector */
 423     alu::Vector U{N.cross_product(V)};
 424     U.normalize();
 425
 426     const alu::Matrix rot{
 427         U[0], V[0], -N[0], 0.0,
 428         U[1], V[1], -N[1], 0.0,
 429         U[2], V[2], -N[2], 0.0,
 430          0.0,  0.0,   0.0, 1.0};
 431     const alu::Vector vel{props->Velocity[0], props->Velocity[1], props->Velocity[2], 0.0};
 432
 433     ctx->mParams.Matrix = rot;
 434     ctx->mParams.Velocity = rot * vel;
 435
 436     ctx->mParams.Gain = props->Gain * ctx->mGainBoost;
 437     ctx->mParams.MetersPerUnit = props->MetersPerUnit
 438 #ifdef ALSOFT_EAX
 439         * props->DistanceFactor
 440 #endif
 441         ;
 442     ctx->mParams.AirAbsorptionGainHF = props->AirAbsorptionGainHF;
 443
 444     ctx->mParams.DopplerFactor = props->DopplerFactor;
 445     ctx->mParams.SpeedOfSound = props->SpeedOfSound * props->DopplerVelocity
 446 #ifdef ALSOFT_EAX
 447         / props->DistanceFactor
 448 #endif
 449         ;
 450
 451     ctx->mParams.SourceDistanceModel = props->SourceDistanceModel;
 452     ctx->mParams.mDistanceModel = props->mDistanceModel;
 453
 454     AtomicReplaceHead(ctx->mFreeContextProps, props);
 455     return true;
 456 }
 457
 458 bool CalcEffectSlotParams(EffectSlot *slot, EffectSlot **sorted_slots, ContextBase *context)
 459 {
 460     EffectSlotProps *props{slot->Update.exchange(nullptr, std::memory_order_acq_rel)};
 461     if(!props) return false;
 462
 463     /* If the effect slot target changed, clear the first sorted entry to force
 464      * a re-sort.
 465      */
 466     if(slot->Target != props->Target)
 467         *sorted_slots = nullptr;
 468     slot->Gain = props->Gain;
 469     slot->AuxSendAuto = props->AuxSendAuto;
 470     slot->Target = props->Target;
 471     slot->EffectType = props->Type;
 472     slot->mEffectProps = props->Props;
 473
 474     slot->RoomRolloff = 0.0f;
 475     slot->DecayTime = 0.0f;
 476     slot->DecayLFRatio = 0.0f;
 477     slot->DecayHFRatio = 0.0f;
 478     slot->DecayHFLimit = false;
 479     slot->AirAbsorptionGainHF = 1.0f;
 480     if(auto *reverbprops = std::get_if<ReverbProps>(&props->Props))
 481     {
 482         slot->RoomRolloff = reverbprops->RoomRolloffFactor;
 483         slot->AirAbsorptionGainHF = reverbprops->AirAbsorptionGainHF;
 484         /* If this effect slot's Auxiliary Send Auto is off, don't apply the
 485          * automatic send adjustments based on source distance.
 486          */
 487         if(slot->AuxSendAuto)
 488         {
 489             slot->DecayTime = reverbprops->DecayTime;
 490             slot->DecayLFRatio = reverbprops->DecayLFRatio;
 491             slot->DecayHFRatio = reverbprops->DecayHFRatio;
 492             slot->DecayHFLimit = reverbprops->DecayHFLimit;
 493         }
 494     }
 495
 496     EffectState *state{props->State.release()};
 497     EffectState *oldstate{slot->mEffectState.release()};
 498     slot->mEffectState.reset(state);
 499
 500     /* Only release the old state if it won't get deleted, since we can't be
 501      * deleting/freeing anything in the mixer.
 502      */
 503     if(!oldstate->releaseIfNoDelete())
 504     {
 505         /* Otherwise, if it would be deleted send it off with a release event. */
 506         RingBuffer *ring{context->mAsyncEvents.get()};
 507         auto evt_vec = ring->getWriteVector();
 508         if(evt_vec[0].len > 0) LIKELY
 509         {
 510             auto &evt = InitAsyncEvent<AsyncEffectReleaseEvent>(evt_vec[0].buf);
 511             evt.mEffectState = oldstate;
 512             ring->writeAdvance(1);
 513         }
 514         else
 515         {
 516             /* If writing the event failed, the queue was probably full. Store
 517              * the old state in the property object where it can eventually be
 518              * cleaned up sometime later (not ideal, but better than blocking
 519              * or leaking).
 520              */
 521             props->State.reset(oldstate);
 522         }
 523     }
 524
 525     AtomicReplaceHead(context->mFreeEffectSlotProps, props);
 526
 527     const auto output = [slot,context]() -> EffectTarget
 528     {
 529         if(EffectSlot *target{slot->Target})
 530             return EffectTarget{&target->Wet, nullptr};
 531         DeviceBase *device{context->mDevice};
 532         return EffectTarget{&device->Dry, &device->RealOut};
 533     }();
 534     state->update(context, slot, &slot->mEffectProps, output);
 535     return true;
 536 }
 537
 538
 539 /* Scales the azimuth of the given vector by 3 if it's in front. Effectively
 540  * scales +/-30 degrees to +/-90 degrees, leaving > +90 and < -90 alone.
 541  */
 542 inline std::array<float,3> ScaleAzimuthFront3(std::array<float,3> pos)
 543 {
 544     if(pos[2] < 0.0f)
 545     {
 546         /* Normalize the length of the x,z components for a 2D vector of the
 547          * azimuth angle. Negate Z since {0,0,-1} is angle 0.
 548          */
 549         const float len2d{std::sqrt(pos[0]*pos[0] + pos[2]*pos[2])};
 550         float x{pos[0] / len2d};
 551         float z{-pos[2] / len2d};
 552
 553         /* Z > cos(pi/6) = -30 < azimuth < 30 degrees. */
 554         if(z > 0.866025403785f)
 555         {
 556             /* Triple the angle represented by x,z. */
 557             x = x*3.0f - x*x*x*4.0f;
 558             z = z*z*z*4.0f - z*3.0f;
 559
 560             /* Scale the vector back to fit in 3D. */
 561             pos[0] = x * len2d;
 562             pos[2] = -z * len2d;
 563         }
 564         else
 565         {
 566             /* If azimuth >= 30 degrees, clamp to 90 degrees. */
 567             pos[0] = std::copysign(len2d, pos[0]);
 568             pos[2] = 0.0f;
 569         }
 570     }
 571     return pos;
 572 }
 573
 574 /* Scales the azimuth of the given vector by 1.5 (3/2) if it's in front. */
 575 inline std::array<float,3> ScaleAzimuthFront3_2(std::array<float,3> pos)
 576 {
 577     if(pos[2] < 0.0f)
 578     {
 579         const float len2d{std::sqrt(pos[0]*pos[0] + pos[2]*pos[2])};
 580         float x{pos[0] / len2d};
 581         float z{-pos[2] / len2d};
 582
 583         /* Z > cos(pi/3) = -60 < azimuth < 60 degrees. */
 584         if(z > 0.5f)
 585         {
 586             /* Halve the angle represented by x,z. */
 587             x = std::copysign(std::sqrt((1.0f - z) * 0.5f), x);
 588             z = std::sqrt((1.0f + z) * 0.5f);
 589
 590             /* Triple the angle represented by x,z. */
 591             x = x*3.0f - x*x*x*4.0f;
 592             z = z*z*z*4.0f - z*3.0f;
 593
 594             /* Scale the vector back to fit in 3D. */
 595             pos[0] = x * len2d;
 596             pos[2] = -z * len2d;
 597         }
 598         else
 599         {
 600             /* If azimuth >= 60 degrees, clamp to 90 degrees. */
 601             pos[0] = std::copysign(len2d, pos[0]);
 602             pos[2] = 0.0f;
 603         }
 604     }
 605     return pos;
 606 }
 607
 608
 609 /* Begin ambisonic rotation helpers.
 610  *
 611  * Rotating first-order B-Format just needs a straight-forward X/Y/Z rotation
 612  * matrix. Higher orders, however, are more complicated. The method implemented
 613  * here is a recursive algorithm (the rotation for first-order is used to help
 614  * generate the second-order rotation, which helps generate the third-order
 615  * rotation, etc).
 616  *
 617  * Adapted from
 618  * <https://github.com/polarch/Spherical-Harmonic-Transform/blob/master/getSHrotMtx.m>,
 619  * provided under the BSD 3-Clause license.
 620  *
 621  * Copyright (c) 2015, Archontis Politis
 622  * Copyright (c) 2019, Christopher Robinson
 623  *
 624  * The u, v, and w coefficients used for generating higher-order rotations are
 625  * precomputed since they're constant. The second-order coefficients are
 626  * followed by the third-order coefficients, etc.
 627  */
 628 constexpr size_t CalcRotatorSize(size_t l) noexcept
 629 {
 630     if(l >= 2)
 631         return (l*2 + 1)*(l*2 + 1) + CalcRotatorSize(l-1);
 632     return 0;
 633 }
 634
 635 struct RotatorCoeffs {
 636     struct CoeffValues {
 637         float u, v, w;
 638     };
 639     std::array<CoeffValues,CalcRotatorSize(MaxAmbiOrder)> mCoeffs{};
 640
 641     RotatorCoeffs()
 642     {
 643         auto coeffs = mCoeffs.begin();
 644
 645         for(int l=2;l <= MaxAmbiOrder;++l)
 646         {
 647             for(int n{-l};n <= l;++n)
 648             {
 649                 for(int m{-l};m <= l;++m)
 650                 {
 651                     /* compute u,v,w terms of Eq.8.1 (Table I)
 652                      *
 653                      * const bool d{m == 0}; // the delta function d_m0
 654                      * const double denom{(std::abs(n) == l) ?
 655                      *     (2*l) * (2*l - 1) : (l*l - n*n)};
 656                      *
 657                      * const int abs_m{std::abs(m)};
 658                      * coeffs->u = std::sqrt((l*l - m*m) / denom);
 659                      * coeffs->v = std::sqrt((l+abs_m-1) * (l+abs_m) / denom) *
 660                      *     (1.0+d) * (1.0 - 2.0*d) * 0.5;
 661                      * coeffs->w = std::sqrt((l-abs_m-1) * (l-abs_m) / denom) *
 662                      *     (1.0-d) * -0.5;
 663                      */
 664
 665                     const double denom{static_cast<double>((std::abs(n) == l) ?
 666                           (2*l) * (2*l - 1) : (l*l - n*n))};
 667
 668                     if(m == 0)
 669                     {
 670                         coeffs->u = static_cast<float>(std::sqrt(l * l / denom));
 671                         coeffs->v = static_cast<float>(std::sqrt((l-1) * l / denom) * -1.0);
 672                         coeffs->w = 0.0f;
 673                     }
 674                     else
 675                     {
 676                         const int abs_m{std::abs(m)};
 677                         coeffs->u = static_cast<float>(std::sqrt((l*l - m*m) / denom));
 678                         coeffs->v = static_cast<float>(std::sqrt((l+abs_m-1) * (l+abs_m) / denom) *
 679                             0.5);
 680                         coeffs->w = static_cast<float>(std::sqrt((l-abs_m-1) * (l-abs_m) / denom) *
 681                             -0.5);
 682                     }
 683                     ++coeffs;
 684                 }
 685             }
 686         }
 687     }
 688 };
 689 const RotatorCoeffs RotatorCoeffArray{};
 690
 691 /**
 692  * Given the matrix, pre-filled with the (zeroth- and) first-order rotation
 693  * coefficients, this fills in the coefficients for the higher orders up to and
 694  * including the given order. The matrix is in ACN layout.
 695  */
 696 void AmbiRotator(AmbiRotateMatrix &matrix, const int order)
 697 {
 698     /* Don't do anything for < 2nd order. */
 699     if(order < 2) return;
 700
 701     static constexpr auto P = [](const int i, const int l, const int a, const int n,
 702         const size_t last_band, const AmbiRotateMatrix &R)
 703     {
 704         const float ri1{ R[ 1+2][static_cast<size_t>(i+2_z)]};
 705         const float rim1{R[-1+2][static_cast<size_t>(i+2_z)]};
 706         const float ri0{ R[ 0+2][static_cast<size_t>(i+2_z)]};
 707
 708         const size_t y{last_band + static_cast<size_t>(a+l-1)};
 709         if(n == -l)
 710             return ri1*R[last_band][y] + rim1*R[last_band + static_cast<size_t>(l-1_z)*2][y];
 711         if(n == l)
 712             return ri1*R[last_band + static_cast<size_t>(l-1_z)*2][y] - rim1*R[last_band][y];
 713         return ri0*R[last_band + static_cast<size_t>(l-1_z+n)][y];
 714     };
 715
 716     static constexpr auto U = [](const int l, const int m, const int n, const size_t last_band,
 717         const AmbiRotateMatrix &R)
 718     {
 719         return P(0, l, m, n, last_band, R);
 720     };
 721     static constexpr auto V = [](const int l, const int m, const int n, const size_t last_band,
 722         const AmbiRotateMatrix &R)
 723     {
 724         using namespace al::numbers;
 725         if(m > 0)
 726         {
 727             const bool d{m == 1};
 728             const float p0{P( 1, l,  m-1, n, last_band, R)};
 729             const float p1{P(-1, l, -m+1, n, last_band, R)};
 730             return d ? p0*sqrt2_v<float> : (p0 - p1);
 731         }
 732         const bool d{m == -1};
 733         const float p0{P( 1, l,  m+1, n, last_band, R)};
 734         const float p1{P(-1, l, -m-1, n, last_band, R)};
 735         return d ? p1*sqrt2_v<float> : (p0 + p1);
 736     };
 737     static constexpr auto W = [](const int l, const int m, const int n, const size_t last_band,
 738         const AmbiRotateMatrix &R)
 739     {
 740         assert(m != 0);
 741         if(m > 0)
 742         {
 743             const float p0{P( 1, l,  m+1, n, last_band, R)};
 744             const float p1{P(-1, l, -m-1, n, last_band, R)};
 745             return p0 + p1;
 746         }
 747         const float p0{P( 1, l,  m-1, n, last_band, R)};
 748         const float p1{P(-1, l, -m+1, n, last_band, R)};
 749         return p0 - p1;
 750     };
 751
 752     // compute rotation matrix of each subsequent band recursively
 753     auto coeffs = RotatorCoeffArray.mCoeffs.cbegin();
 754     size_t band_idx{4}, last_band{1};
 755     for(int l{2};l <= order;++l)
 756     {
 757         size_t y{band_idx};
 758         for(int n{-l};n <= l;++n,++y)
 759         {
 760             size_t x{band_idx};
 761             for(int m{-l};m <= l;++m,++x)
 762             {
 763                 float r{0.0f};
 764
 765                 // computes Eq.8.1
 766                 if(const float u{coeffs->u}; u != 0.0f)
 767                     r += u * U(l, m, n, last_band, matrix);
 768                 if(const float v{coeffs->v}; v != 0.0f)
 769                     r += v * V(l, m, n, last_band, matrix);
 770                 if(const float w{coeffs->w}; w != 0.0f)
 771                     r += w * W(l, m, n, last_band, matrix);
 772
 773                 matrix[y][x] = r;
 774                 ++coeffs;
 775             }
 776         }
 777         last_band = band_idx;
 778         band_idx += static_cast<uint>(l)*2_uz + 1;
 779     }
 780 }
 781 /* End ambisonic rotation helpers. */
 782
 783
 784 constexpr float sin30{0.5f};
 785 constexpr float cos30{0.866025403785f};
 786 constexpr float sin45{al::numbers::sqrt2_v<float>*0.5f};
 787 constexpr float cos45{al::numbers::sqrt2_v<float>*0.5f};
 788 constexpr float sin110{ 0.939692620786f};
 789 constexpr float cos110{-0.342020143326f};
 790
 791 struct ChanPosMap {
 792     Channel channel;
 793     std::array<float,3> pos;
 794 };
 795
 796
 797 struct GainTriplet { float Base, HF, LF; };
 798
 799 void CalcPanningAndFilters(Voice *voice, const float xpos, const float ypos, const float zpos,
 800     const float Distance, const float Spread, const GainTriplet &DryGain,
 801     const al::span<const GainTriplet,MaxSendCount> WetGain,
 802     const al::span<EffectSlot*,MaxSendCount> SendSlots, const VoiceProps *props,
 803     const ContextParams &Context, DeviceBase *Device)
 804 {
 805     static constexpr std::array MonoMap{
 806         ChanPosMap{FrontCenter, std::array{0.0f, 0.0f, -1.0f}}
 807     };
 808     static constexpr std::array RearMap{
 809         ChanPosMap{BackLeft,  std::array{-sin30, 0.0f, cos30}},
 810         ChanPosMap{BackRight, std::array{ sin30, 0.0f, cos30}},
 811     };
 812     static constexpr std::array QuadMap{
 813         ChanPosMap{FrontLeft,  std::array{-sin45, 0.0f, -cos45}},
 814         ChanPosMap{FrontRight, std::array{ sin45, 0.0f, -cos45}},
 815         ChanPosMap{BackLeft,   std::array{-sin45, 0.0f,  cos45}},
 816         ChanPosMap{BackRight,  std::array{ sin45, 0.0f,  cos45}},
 817     };
 818     static constexpr std::array X51Map{
 819         ChanPosMap{FrontLeft,   std::array{-sin30, 0.0f, -cos30}},
 820         ChanPosMap{FrontRight,  std::array{ sin30, 0.0f, -cos30}},
 821         ChanPosMap{FrontCenter, std::array{  0.0f, 0.0f, -1.0f}},
 822         ChanPosMap{LFE, {}},
 823         ChanPosMap{SideLeft,    std::array{-sin110, 0.0f, -cos110}},
 824         ChanPosMap{SideRight,   std::array{ sin110, 0.0f, -cos110}},
 825     };
 826     static constexpr std::array X61Map{
 827         ChanPosMap{FrontLeft,   std::array{-sin30, 0.0f, -cos30}},
 828         ChanPosMap{FrontRight,  std::array{ sin30, 0.0f, -cos30}},
 829         ChanPosMap{FrontCenter, std::array{  0.0f, 0.0f, -1.0f}},
 830         ChanPosMap{LFE, {}},
 831         ChanPosMap{BackCenter,  std::array{ 0.0f, 0.0f, 1.0f}},
 832         ChanPosMap{SideLeft,    std::array{-1.0f, 0.0f, 0.0f}},
 833         ChanPosMap{SideRight,   std::array{ 1.0f, 0.0f, 0.0f}},
 834     };
 835     static constexpr std::array X71Map{
 836         ChanPosMap{FrontLeft,   std::array{-sin30, 0.0f, -cos30}},
 837         ChanPosMap{FrontRight,  std::array{ sin30, 0.0f, -cos30}},
 838         ChanPosMap{FrontCenter, std::array{  0.0f, 0.0f, -1.0f}},
 839         ChanPosMap{LFE, {}},
 840         ChanPosMap{BackLeft,    std::array{-sin30, 0.0f, cos30}},
 841         ChanPosMap{BackRight,   std::array{ sin30, 0.0f, cos30}},
 842         ChanPosMap{SideLeft,    std::array{ -1.0f, 0.0f, 0.0f}},
 843         ChanPosMap{SideRight,   std::array{  1.0f, 0.0f, 0.0f}},
 844     };
 845
 846     std::array StereoMap{
 847         ChanPosMap{FrontLeft,   std::array{-sin30, 0.0f, -cos30}},
 848         ChanPosMap{FrontRight,  std::array{ sin30, 0.0f, -cos30}},
 849     };
 850
 851     const auto Frequency = static_cast<float>(Device->Frequency);
 852     const uint NumSends{Device->NumAuxSends};
 853
 854     const size_t num_channels{voice->mChans.size()};
 855     ASSUME(num_channels > 0);
 856
 857     for(auto &chandata : voice->mChans)
 858     {
 859         chandata.mDryParams.Hrtf.Target = HrtfFilter{};
 860         chandata.mDryParams.Gains.Target.fill(0.0f);
 861         std::for_each(chandata.mWetParams.begin(), chandata.mWetParams.begin()+NumSends,
 862             [](SendParams &params) -> void { params.Gains.Target.fill(0.0f); });
 863     }
 864
 865     const auto getChans = [props,&StereoMap](FmtChannels chanfmt) noexcept
 866         -> std::pair<DirectMode,al::span<const ChanPosMap>>
 867     {
 868         switch(chanfmt)
 869         {
 870         case FmtMono:
 871             /* Mono buffers are never played direct. */
 872             return {DirectMode::Off, al::span{MonoMap}};
 873
 874         case FmtStereo:
 875         case FmtMonoDup:
 876             if(props->DirectChannels == DirectMode::Off)
 877             {
 878                 for(size_t i{0};i < 2;++i)
 879                 {
 880                     /* StereoPan is counter-clockwise in radians. */
 881                     const float a{props->StereoPan[i]};
 882                     StereoMap[i].pos[0] = -std::sin(a);
 883                     StereoMap[i].pos[2] = -std::cos(a);
 884                 }
 885             }
 886             return {props->DirectChannels, al::span{StereoMap}};
 887
 888         case FmtRear: return {props->DirectChannels, al::span{RearMap}};
 889         case FmtQuad: return {props->DirectChannels, al::span{QuadMap}};
 890         case FmtX51: return {props->DirectChannels, al::span{X51Map}};
 891         case FmtX61: return {props->DirectChannels, al::span{X61Map}};
 892         case FmtX71: return {props->DirectChannels, al::span{X71Map}};
 893
 894         case FmtBFormat2D:
 895         case FmtBFormat3D:
 896         case FmtUHJ2:
 897         case FmtUHJ3:
 898         case FmtUHJ4:
 899         case FmtSuperStereo:
 900             return {DirectMode::Off, {}};
 901         }
 902         return {props->DirectChannels, {}};
 903     };
 904     const auto [DirectChannels,chans] = getChans(voice->mFmtChannels);
 905
 906     voice->mFlags.reset(VoiceHasHrtf).reset(VoiceHasNfc);
 907     if(auto *decoder{voice->mDecoder.get()})
 908         decoder->mWidthControl = std::min(props->EnhWidth, 0.7f);
 909
 910     const float lgain{std::min(1.0f-props->Panning, 1.0f)};
 911     const float rgain{std::min(1.0f+props->Panning, 1.0f)};
 912     const float mingain{std::min(lgain, rgain)};
 913     auto SelectChannelGain = [lgain,rgain,mingain](const Channel chan) noexcept
 914     {
 915         switch(chan)
 916         {
 917         case FrontLeft: return lgain;
 918         case FrontRight: return rgain;
 919         case FrontCenter: break;
 920         case LFE: break;
 921         case BackLeft: return lgain;
 922         case BackRight: return rgain;
 923         case BackCenter: break;
 924         case SideLeft: return lgain;
 925         case SideRight: return rgain;
 926         case TopCenter: break;
 927         case TopFrontLeft: return lgain;
 928         case TopFrontCenter: break;
 929         case TopFrontRight: return rgain;
 930         case TopBackLeft: return lgain;
 931         case TopBackCenter: break;
 932         case TopBackRight: return rgain;
 933         case BottomFrontLeft: return lgain;
 934         case BottomFrontRight: return rgain;
 935         case BottomBackLeft: return lgain;
 936         case BottomBackRight: return rgain;
 937         case Aux0: case Aux1: case Aux2: case Aux3: case Aux4: case Aux5: case Aux6: case Aux7:
 938         case Aux8: case Aux9: case Aux10: case Aux11: case Aux12: case Aux13: case Aux14:
 939         case Aux15: case MaxChannels: break;
 940         }
 941         return mingain;
 942     };
 943
 944     if(IsAmbisonic(voice->mFmtChannels))
 945     {
 946         /* Special handling for B-Format and UHJ sources. */
 947
 948         if(Device->AvgSpeakerDist > 0.0f && voice->mFmtChannels != FmtUHJ2
 949             && voice->mFmtChannels != FmtSuperStereo)
 950         {
 951             if(!(Distance > std::numeric_limits<float>::epsilon()))
 952             {
 953                 /* NOTE: The NFCtrlFilters were created with a w0 of 0, which
 954                  * is what we want for FOA input. The first channel may have
 955                  * been previously re-adjusted if panned, so reset it.
 956                  */
 957                 voice->mChans[0].mDryParams.NFCtrlFilter.adjust(0.0f);
 958             }
 959             else
 960             {
 961                 /* Clamp the distance for really close sources, to prevent
 962                  * excessive bass.
 963                  */
 964                 const float mdist{std::max(Distance*NfcScale, Device->AvgSpeakerDist/4.0f)};
 965                 const float w0{SpeedOfSoundMetersPerSec / (mdist * Frequency)};
 966
 967                 /* Only need to adjust the first channel of a B-Format source. */
 968                 voice->mChans[0].mDryParams.NFCtrlFilter.adjust(w0);
 969             }
 970
 971             voice->mFlags.set(VoiceHasNfc);
 972         }
 973
 974         /* Panning a B-Format sound toward some direction is easy. Just pan the
 975          * first (W) channel as a normal mono sound. The angular spread is used
 976          * as a directional scalar to blend between full coverage and full
 977          * panning.
 978          */
 979         const float coverage{!(Distance > std::numeric_limits<float>::epsilon()) ? 1.0f :
 980             (al::numbers::inv_pi_v<float>/2.0f * Spread)};
 981
 982         auto calc_coeffs = [xpos,ypos,zpos](RenderMode mode)
 983         {
 984             if(mode != RenderMode::Pairwise)
 985                 return CalcDirectionCoeffs(std::array{xpos, ypos, zpos}, 0.0f);
 986             const auto pos = ScaleAzimuthFront3_2(std::array{xpos, ypos, zpos});
 987             return CalcDirectionCoeffs(pos, 0.0f);
 988         };
 989         const auto scales = GetAmbiScales(voice->mAmbiScaling);
 990         auto coeffs = calc_coeffs(Device->mRenderMode);
 991
 992         if(!(coverage > 0.0f))
 993         {
 994             ComputePanGains(&Device->Dry, coeffs, DryGain.Base*scales[0],
 995                 voice->mChans[0].mDryParams.Gains.Target);
 996             for(uint i{0};i < NumSends;i++)
 997             {
 998                 if(const EffectSlot *Slot{SendSlots[i]})
 999                     ComputePanGains(&Slot->Wet, coeffs, WetGain[i].Base*scales[0],
1000                         voice->mChans[0].mWetParams[i].Gains.Target);
1001             }
1002         }
1003         else
1004         {
1005             /* Local B-Format sources have their XYZ channels rotated according
1006              * to the orientation.
1007              */
1008             /* AT then UP */
1009             alu::Vector N{props->OrientAt[0], props->OrientAt[1], props->OrientAt[2], 0.0f};
1010             N.normalize();
1011             alu::Vector V{props->OrientUp[0], props->OrientUp[1], props->OrientUp[2], 0.0f};
1012             V.normalize();
1013             if(!props->HeadRelative)
1014             {
1015                 N = Context.Matrix * N;
1016                 V = Context.Matrix * V;
1017             }
1018             /* Build and normalize right-vector */
1019             alu::Vector U{N.cross_product(V)};
1020             U.normalize();
1021
1022             /* Build a rotation matrix. Manually fill the zeroth- and first-
1023              * order elements, then construct the rotation for the higher
1024              * orders.
1025              */
1026             AmbiRotateMatrix &shrot = Device->mAmbiRotateMatrix;
1027             shrot.fill(AmbiRotateMatrix::value_type{});
1028
1029             shrot[0][0] = 1.0f;
1030             shrot[1][1] =  U[0]; shrot[1][2] = -U[1]; shrot[1][3] =  U[2];
1031             shrot[2][1] = -V[0]; shrot[2][2] =  V[1]; shrot[2][3] = -V[2];
1032             shrot[3][1] = -N[0]; shrot[3][2] =  N[1]; shrot[3][3] = -N[2];
1033             AmbiRotator(shrot, static_cast<int>(Device->mAmbiOrder));
1034
1035             /* If the device is higher order than the voice, "upsample" the
1036              * matrix.
1037              *
1038              * NOTE: Starting with second-order, a 2D upsample needs to be
1039              * applied with a 2D source and 3D output, even when they're the
1040              * same order. This is because higher orders have a height offset
1041              * on various channels (i.e. when elevation=0, those height-related
1042              * channels should be non-0).
1043              */
1044             AmbiRotateMatrix &mixmatrix = Device->mAmbiRotateMatrix2;
1045             if(Device->mAmbiOrder > voice->mAmbiOrder
1046                 || (Device->mAmbiOrder >= 2 && !Device->m2DMixing
1047                     && Is2DAmbisonic(voice->mFmtChannels)))
1048             {
1049                 if(voice->mAmbiOrder == 1)
1050                 {
1051                     const auto upsampler = Is2DAmbisonic(voice->mFmtChannels) ?
1052                         al::span{AmbiScale::FirstOrder2DUp} : al::span{AmbiScale::FirstOrderUp};
1053                     UpsampleBFormatTransform(mixmatrix, upsampler, shrot, Device->mAmbiOrder);
1054                 }
1055                 else if(voice->mAmbiOrder == 2)
1056                 {
1057                     const auto upsampler = Is2DAmbisonic(voice->mFmtChannels) ?
1058                         al::span{AmbiScale::SecondOrder2DUp} : al::span{AmbiScale::SecondOrderUp};
1059                     UpsampleBFormatTransform(mixmatrix, upsampler, shrot, Device->mAmbiOrder);
1060                 }
1061                 else if(voice->mAmbiOrder == 3)
1062                 {
1063                     const auto upsampler = Is2DAmbisonic(voice->mFmtChannels) ?
1064                         al::span{AmbiScale::ThirdOrder2DUp} : al::span{AmbiScale::ThirdOrderUp};
1065                     UpsampleBFormatTransform(mixmatrix, upsampler, shrot, Device->mAmbiOrder);
1066                 }
1067                 else if(voice->mAmbiOrder == 4)
1068                 {
1069                     const auto upsampler = al::span{AmbiScale::FourthOrder2DUp};
1070                     UpsampleBFormatTransform(mixmatrix, upsampler, shrot, Device->mAmbiOrder);
1071                 }
1072                 else
1073                     al::unreachable();
1074             }
1075             else
1076                 mixmatrix = shrot;
1077
1078             /* Convert the rotation matrix for input ordering and scaling, and
1079              * whether input is 2D or 3D.
1080              */
1081             const auto index_map = Is2DAmbisonic(voice->mFmtChannels) ?
1082                 GetAmbi2DLayout(voice->mAmbiLayout).subspan(0) :
1083                 GetAmbiLayout(voice->mAmbiLayout).subspan(0);
1084
1085             /* Scale the panned W signal inversely to coverage (full coverage
1086              * means no panned signal), and according to the channel scaling.
1087              */
1088             std::for_each(coeffs.begin(), coeffs.end(),
1089                 [scale=(1.0f-coverage)*scales[0]](float &coeff) noexcept { coeff *= scale; });
1090
1091             for(size_t c{0};c < num_channels;c++)
1092             {
1093                 const size_t acn{index_map[c]};
1094                 const float scale{scales[acn] * coverage};
1095
1096                 /* For channel 0, combine the B-Format signal (scaled according
1097                  * to the coverage amount) with the directional pan. For all
1098                  * other channels, use just the (scaled) B-Format signal.
1099                  */
1100                 std::transform(mixmatrix[acn].cbegin(), mixmatrix[acn].cend(), coeffs.begin(),
1101                     coeffs.begin(), [scale](const float in, const float coeff) noexcept
1102                     { return in*scale + coeff; });
1103
1104                 ComputePanGains(&Device->Dry, coeffs, DryGain.Base,
1105                     voice->mChans[c].mDryParams.Gains.Target);
1106
1107                 for(uint i{0};i < NumSends;i++)
1108                 {
1109                     if(const EffectSlot *Slot{SendSlots[i]})
1110                         ComputePanGains(&Slot->Wet, coeffs, WetGain[i].Base,
1111                             voice->mChans[c].mWetParams[i].Gains.Target);
1112                 }
1113
1114                 coeffs = std::array<float,MaxAmbiChannels>{};
1115             }
1116         }
1117     }
1118     else if(DirectChannels != DirectMode::Off && !Device->RealOut.RemixMap.empty())
1119     {
1120         /* Direct source channels always play local. Skip the virtual channels
1121          * and write inputs to the matching real outputs.
1122          */
1123         voice->mDirect.Buffer = Device->RealOut.Buffer;
1124
1125         for(size_t c{0};c < num_channels;c++)
1126         {
1127             const float pangain{SelectChannelGain(chans[c].channel)};
1128             if(uint idx{Device->channelIdxByName(chans[c].channel)}; idx != InvalidChannelIndex)
1129                 voice->mChans[c].mDryParams.Gains.Target[idx] = DryGain.Base * pangain;
1130             else if(DirectChannels == DirectMode::RemixMismatch)
1131             {
1132                 auto match_channel = [channel=chans[c].channel](const InputRemixMap &map) noexcept
1133                 { return channel == map.channel; };
1134                 auto remap = std::find_if(Device->RealOut.RemixMap.cbegin(),
1135                     Device->RealOut.RemixMap.cend(), match_channel);
1136                 if(remap != Device->RealOut.RemixMap.cend())
1137                 {
1138                     for(const auto &target : remap->targets)
1139                     {
1140                         idx = Device->channelIdxByName(target.channel);
1141                         if(idx != InvalidChannelIndex)
1142                             voice->mChans[c].mDryParams.Gains.Target[idx] = DryGain.Base * pangain
1143                                 * target.mix;
1144                     }
1145                 }
1146             }
1147         }
1148
1149         /* Auxiliary sends still use normal channel panning since they mix to
1150          * B-Format, which can't channel-match.
1151          */
1152         for(size_t c{0};c < num_channels;c++)
1153         {
1154             /* Skip LFE */
1155             if(chans[c].channel == LFE)
1156                 continue;
1157
1158             const float pangain{SelectChannelGain(chans[c].channel)};
1159             const auto coeffs = CalcDirectionCoeffs(chans[c].pos, 0.0f);
1160
1161             for(uint i{0};i < NumSends;i++)
1162             {
1163                 if(const EffectSlot *Slot{SendSlots[i]})
1164                     ComputePanGains(&Slot->Wet, coeffs, WetGain[i].Base * pangain,
1165                         voice->mChans[c].mWetParams[i].Gains.Target);
1166             }
1167         }
1168     }
1169     else if(Device->mRenderMode == RenderMode::Hrtf)
1170     {
1171         /* Full HRTF rendering. Skip the virtual channels and render to the
1172          * real outputs.
1173          */
1174         voice->mDirect.Buffer = Device->RealOut.Buffer;
1175
1176         if(Distance > std::numeric_limits<float>::epsilon())
1177         {
1178             if(voice->mFmtChannels == FmtMono)
1179             {
1180                 const float src_ev{std::asin(std::clamp(ypos, -1.0f, 1.0f))};
1181                 const float src_az{std::atan2(xpos, -zpos)};
1182
1183                 Device->mHrtf->getCoeffs(src_ev, src_az, Distance*NfcScale, Spread,
1184                     voice->mChans[0].mDryParams.Hrtf.Target.Coeffs,
1185                     voice->mChans[0].mDryParams.Hrtf.Target.Delay);
1186                 voice->mChans[0].mDryParams.Hrtf.Target.Gain = DryGain.Base;
1187
1188                 const auto coeffs = CalcDirectionCoeffs(std::array{xpos, ypos, zpos}, Spread);
1189                 for(uint i{0};i < NumSends;i++)
1190                 {
1191                     if(const EffectSlot *Slot{SendSlots[i]})
1192                         ComputePanGains(&Slot->Wet, coeffs, WetGain[i].Base,
1193                             voice->mChans[0].mWetParams[i].Gains.Target);
1194                 }
1195             }
1196             else for(size_t c{0};c < num_channels;c++)
1197             {
1198                 /* Skip LFE */
1199                 if(chans[c].channel == LFE) continue;
1200                 const float pangain{SelectChannelGain(chans[c].channel)};
1201
1202                 /* Warp the channel position toward the source position as the
1203                  * source spread decreases. With no spread, all channels are at
1204                  * the source position, at full spread (pi*2), each channel is
1205                  * left unchanged.
1206                  */
1207                 const float a{1.0f - (al::numbers::inv_pi_v<float>/2.0f)*Spread};
1208                 std::array pos{
1209                     lerpf(chans[c].pos[0], xpos, a),
1210                     lerpf(chans[c].pos[1], ypos, a),
1211                     lerpf(chans[c].pos[2], zpos, a)};
1212                 const float len{std::sqrt(pos[0]*pos[0] + pos[1]*pos[1] + pos[2]*pos[2])};
1213                 if(len < 1.0f)
1214                 {
1215                     pos[0] /= len;
1216                     pos[1] /= len;
1217                     pos[2] /= len;
1218                 }
1219
1220                 const float ev{std::asin(std::clamp(pos[1], -1.0f, 1.0f))};
1221                 const float az{std::atan2(pos[0], -pos[2])};
1222
1223                 Device->mHrtf->getCoeffs(ev, az, Distance*NfcScale, 0.0f,
1224                     voice->mChans[c].mDryParams.Hrtf.Target.Coeffs,
1225                     voice->mChans[c].mDryParams.Hrtf.Target.Delay);
1226                 voice->mChans[c].mDryParams.Hrtf.Target.Gain = DryGain.Base * pangain;
1227
1228                 const auto coeffs = CalcDirectionCoeffs(pos, 0.0f);
1229                 for(uint i{0};i < NumSends;i++)
1230                 {
1231                     if(const EffectSlot *Slot{SendSlots[i]})
1232                         ComputePanGains(&Slot->Wet, coeffs, WetGain[i].Base * pangain,
1233                             voice->mChans[c].mWetParams[i].Gains.Target);
1234                 }
1235             }
1236         }
1237         else
1238         {
1239             /* With no distance, spread is only meaningful for mono sources
1240              * where it can be 0 or full (non-mono sources are always full
1241              * spread here).
1242              */
1243             const float spread{Spread * float(voice->mFmtChannels == FmtMono)};
1244
1245             /* Local sources on HRTF play with each channel panned to its
1246              * relative location around the listener, providing "virtual
1247              * speaker" responses.
1248              */
1249             for(size_t c{0};c < num_channels;c++)
1250             {
1251                 /* Skip LFE */
1252                 if(chans[c].channel == LFE)
1253                     continue;
1254                 const float pangain{SelectChannelGain(chans[c].channel)};
1255
1256                 /* Get the HRIR coefficients and delays for this channel
1257                  * position.
1258                  */
1259                 const float ev{std::asin(chans[c].pos[1])};
1260                 const float az{std::atan2(chans[c].pos[0], -chans[c].pos[2])};
1261
1262                 Device->mHrtf->getCoeffs(ev, az, std::numeric_limits<float>::infinity(), spread,
1263                     voice->mChans[c].mDryParams.Hrtf.Target.Coeffs,
1264                     voice->mChans[c].mDryParams.Hrtf.Target.Delay);
1265                 voice->mChans[c].mDryParams.Hrtf.Target.Gain = DryGain.Base * pangain;
1266
1267                 /* Normal panning for auxiliary sends. */
1268                 const auto coeffs = CalcDirectionCoeffs(chans[c].pos, spread);
1269
1270                 for(uint i{0};i < NumSends;i++)
1271                 {
1272                     if(const EffectSlot *Slot{SendSlots[i]})
1273                         ComputePanGains(&Slot->Wet, coeffs, WetGain[i].Base * pangain,
1274                             voice->mChans[c].mWetParams[i].Gains.Target);
1275                 }
1276             }
1277         }
1278
1279         voice->mFlags.set(VoiceHasHrtf);
1280     }
1281     else
1282     {
1283         /* Non-HRTF rendering. Use normal panning to the output. */
1284
1285         if(Distance > std::numeric_limits<float>::epsilon())
1286         {
1287             /* Calculate NFC filter coefficient if needed. */
1288             if(Device->AvgSpeakerDist > 0.0f)
1289             {
1290                 /* Clamp the distance for really close sources, to prevent
1291                  * excessive bass.
1292                  */
1293                 const float mdist{std::max(Distance*NfcScale, Device->AvgSpeakerDist/4.0f)};
1294                 const float w0{SpeedOfSoundMetersPerSec / (mdist * Frequency)};
1295
1296                 /* Adjust NFC filters. */
1297                 for(size_t c{0};c < num_channels;c++)
1298                     voice->mChans[c].mDryParams.NFCtrlFilter.adjust(w0);
1299
1300                 voice->mFlags.set(VoiceHasNfc);
1301             }
1302
1303             if(voice->mFmtChannels == FmtMono)
1304             {
1305                 auto calc_coeffs = [xpos,ypos,zpos,Spread](RenderMode mode)
1306                 {
1307                     if(mode != RenderMode::Pairwise)
1308                         return CalcDirectionCoeffs(std::array{xpos, ypos, zpos}, Spread);
1309                     const auto pos = ScaleAzimuthFront3_2(std::array{xpos, ypos, zpos});
1310                     return CalcDirectionCoeffs(pos, Spread);
1311                 };
1312                 const auto coeffs = calc_coeffs(Device->mRenderMode);
1313
1314                 ComputePanGains(&Device->Dry, coeffs, DryGain.Base,
1315                     voice->mChans[0].mDryParams.Gains.Target);
1316                 for(uint i{0};i < NumSends;i++)
1317                 {
1318                     if(const EffectSlot *Slot{SendSlots[i]})
1319                         ComputePanGains(&Slot->Wet, coeffs, WetGain[i].Base,
1320                             voice->mChans[0].mWetParams[i].Gains.Target);
1321                 }
1322             }
1323             else for(size_t c{0};c < num_channels;c++)
1324             {
1325                 const auto pangain = SelectChannelGain(chans[c].channel);
1326
1327                 /* Special-case LFE */
1328                 if(chans[c].channel == LFE)
1329                 {
1330                     if(Device->Dry.Buffer.data() == Device->RealOut.Buffer.data())
1331                     {
1332                         const auto idx = uint{Device->channelIdxByName(chans[c].channel)};
1333                         if(idx != InvalidChannelIndex)
1334                             voice->mChans[c].mDryParams.Gains.Target[idx] = DryGain.Base * pangain;
1335                     }
1336                     continue;
1337                 }
1338
1339                 /* Warp the channel position toward the source position as the
1340                  * spread decreases. With no spread, all channels are at the
1341                  * source position, at full spread (pi*2), each channel
1342                  * position is left unchanged.
1343                  */
1344                 const auto a = 1.0f - (al::numbers::inv_pi_v<float>/2.0f)*Spread;
1345                 auto pos = std::array{
1346                     lerpf(chans[c].pos[0], xpos, a),
1347                     lerpf(chans[c].pos[1], ypos, a),
1348                     lerpf(chans[c].pos[2], zpos, a)};
1349                 const auto len = std::sqrt(pos[0]*pos[0] + pos[1]*pos[1] + pos[2]*pos[2]);
1350                 if(len < 1.0f)
1351                 {
1352                     pos[0] /= len;
1353                     pos[1] /= len;
1354                     pos[2] /= len;
1355                 }
1356
1357                 if(Device->mRenderMode == RenderMode::Pairwise)
1358                     pos = ScaleAzimuthFront3(pos);
1359                 const auto coeffs = CalcDirectionCoeffs(pos, 0.0f);
1360
1361                 ComputePanGains(&Device->Dry, coeffs, DryGain.Base * pangain,
1362                     voice->mChans[c].mDryParams.Gains.Target);
1363                 for(uint i{0};i < NumSends;i++)
1364                 {
1365                     if(const EffectSlot *Slot{SendSlots[i]})
1366                         ComputePanGains(&Slot->Wet, coeffs, WetGain[i].Base * pangain,
1367                             voice->mChans[c].mWetParams[i].Gains.Target);
1368                 }
1369             }
1370         }
1371         else
1372         {
1373             if(Device->AvgSpeakerDist > 0.0f)
1374             {
1375                 /* If the source distance is 0, simulate a plane-wave by using
1376                  * infinite distance, which results in a w0 of 0.
1377                  */
1378                 static constexpr float w0{0.0f};
1379                 for(size_t c{0};c < num_channels;c++)
1380                     voice->mChans[c].mDryParams.NFCtrlFilter.adjust(w0);
1381
1382                 voice->mFlags.set(VoiceHasNfc);
1383             }
1384
1385             /* With no distance, spread is only meaningful for mono sources
1386              * where it can be 0 or full (non-mono sources are always full
1387              * spread here).
1388              */
1389             const float spread{Spread * float(voice->mFmtChannels == FmtMono)};
1390             for(size_t c{0};c < num_channels;c++)
1391             {
1392                 const float pangain{SelectChannelGain(chans[c].channel)};
1393
1394                 /* Special-case LFE */
1395                 if(chans[c].channel == LFE)
1396                 {
1397                     if(Device->Dry.Buffer.data() == Device->RealOut.Buffer.data())
1398                     {
1399                         const uint idx{Device->channelIdxByName(chans[c].channel)};
1400                         if(idx != InvalidChannelIndex)
1401                             voice->mChans[c].mDryParams.Gains.Target[idx] = DryGain.Base * pangain;
1402                     }
1403                     continue;
1404                 }
1405
1406                 const auto coeffs = CalcDirectionCoeffs((Device->mRenderMode==RenderMode::Pairwise)
1407                     ? ScaleAzimuthFront3(chans[c].pos) : chans[c].pos, spread);
1408
1409                 ComputePanGains(&Device->Dry, coeffs, DryGain.Base * pangain,
1410                     voice->mChans[c].mDryParams.Gains.Target);
1411                 for(uint i{0};i < NumSends;i++)
1412                 {
1413                     if(const EffectSlot *Slot{SendSlots[i]})
1414                         ComputePanGains(&Slot->Wet, coeffs, WetGain[i].Base * pangain,
1415                             voice->mChans[c].mWetParams[i].Gains.Target);
1416                 }
1417             }
1418         }
1419     }
1420
1421     {
1422         const float hfNorm{props->Direct.HFReference / Frequency};
1423         const float lfNorm{props->Direct.LFReference / Frequency};
1424
1425         voice->mDirect.FilterType = AF_None;
1426         if(DryGain.HF != 1.0f) voice->mDirect.FilterType |= AF_LowPass;
1427         if(DryGain.LF != 1.0f) voice->mDirect.FilterType |= AF_HighPass;
1428
1429         auto &lowpass = voice->mChans[0].mDryParams.LowPass;
1430         auto &highpass = voice->mChans[0].mDryParams.HighPass;
1431         lowpass.setParamsFromSlope(BiquadType::HighShelf, hfNorm, DryGain.HF, 1.0f);
1432         highpass.setParamsFromSlope(BiquadType::LowShelf, lfNorm, DryGain.LF, 1.0f);
1433         for(size_t c{1};c < num_channels;c++)
1434         {
1435             voice->mChans[c].mDryParams.LowPass.copyParamsFrom(lowpass);
1436             voice->mChans[c].mDryParams.HighPass.copyParamsFrom(highpass);
1437         }
1438     }
1439     for(uint i{0};i < NumSends;i++)
1440     {
1441         const float hfNorm{props->Send[i].HFReference / Frequency};
1442         const float lfNorm{props->Send[i].LFReference / Frequency};
1443
1444         voice->mSend[i].FilterType = AF_None;
1445         if(WetGain[i].HF != 1.0f) voice->mSend[i].FilterType |= AF_LowPass;
1446         if(WetGain[i].LF != 1.0f) voice->mSend[i].FilterType |= AF_HighPass;
1447
1448         auto &lowpass = voice->mChans[0].mWetParams[i].LowPass;
1449         auto &highpass = voice->mChans[0].mWetParams[i].HighPass;
1450         lowpass.setParamsFromSlope(BiquadType::HighShelf, hfNorm, WetGain[i].HF, 1.0f);
1451         highpass.setParamsFromSlope(BiquadType::LowShelf, lfNorm, WetGain[i].LF, 1.0f);
1452         for(size_t c{1};c < num_channels;c++)
1453         {
1454             voice->mChans[c].mWetParams[i].LowPass.copyParamsFrom(lowpass);
1455             voice->mChans[c].mWetParams[i].HighPass.copyParamsFrom(highpass);
1456         }
1457     }
1458 }
1459
1460 void CalcNonAttnSourceParams(Voice *voice, const VoiceProps *props, const ContextBase *context)
1461 {
1462     DeviceBase *Device{context->mDevice};
1463     std::array<EffectSlot*,MaxSendCount> SendSlots{};
1464
1465     voice->mDirect.Buffer = Device->Dry.Buffer;
1466     for(uint i{0};i < Device->NumAuxSends;i++)
1467     {
1468         SendSlots[i] = props->Send[i].Slot;
1469         if(!SendSlots[i] || SendSlots[i]->EffectType == EffectSlotType::None)
1470         {
1471             SendSlots[i] = nullptr;
1472             voice->mSend[i].Buffer = {};
1473         }
1474         else
1475             voice->mSend[i].Buffer = SendSlots[i]->Wet.Buffer;
1476     }
1477
1478     /* Calculate the stepping value */
1479     const auto Pitch = static_cast<float>(voice->mFrequency) /
1480         static_cast<float>(Device->Frequency) * props->Pitch;
1481     if(Pitch > float{MaxPitch})
1482         voice->mStep = MaxPitch<<MixerFracBits;
1483     else
1484         voice->mStep = std::max(fastf2u(Pitch * MixerFracOne), 1u);
1485     voice->mResampler = PrepareResampler(props->mResampler, voice->mStep, &voice->mResampleState);
1486
1487     /* Calculate gains */
1488     GainTriplet DryGain{};
1489     DryGain.Base = std::min(std::clamp(props->Gain, props->MinGain, props->MaxGain) *
1490         props->Direct.Gain * context->mParams.Gain, GainMixMax);
1491     DryGain.HF = props->Direct.GainHF;
1492     DryGain.LF = props->Direct.GainLF;
1493
1494     std::array<GainTriplet,MaxSendCount> WetGain{};
1495     for(uint i{0};i < Device->NumAuxSends;i++)
1496     {
1497         WetGain[i].Base = std::min(std::clamp(props->Gain, props->MinGain, props->MaxGain) *
1498             props->Send[i].Gain * context->mParams.Gain, GainMixMax);
1499         WetGain[i].HF = props->Send[i].GainHF;
1500         WetGain[i].LF = props->Send[i].GainLF;
1501     }
1502
1503     CalcPanningAndFilters(voice, 0.0f, 0.0f, -1.0f, 0.0f, 0.0f, DryGain, WetGain, SendSlots, props,
1504         context->mParams, Device);
1505 }
1506
1507 void CalcAttnSourceParams(Voice *voice, const VoiceProps *props, const ContextBase *context)
1508 {
1509     DeviceBase *Device{context->mDevice};
1510     const uint NumSends{Device->NumAuxSends};
1511
1512     /* Set mixing buffers and get send parameters. */
1513     voice->mDirect.Buffer = Device->Dry.Buffer;
1514     std::array<EffectSlot*,MaxSendCount> SendSlots{};
1515     std::array<float,MaxSendCount> RoomRolloff{};
1516     for(uint i{0};i < NumSends;i++)
1517     {
1518         SendSlots[i] = props->Send[i].Slot;
1519         if(!SendSlots[i] || SendSlots[i]->EffectType == EffectSlotType::None)
1520         {
1521             SendSlots[i] = nullptr;
1522             voice->mSend[i].Buffer = {};
1523         }
1524         else
1525         {
1526             /* NOTE: Contrary to the EFX docs, the effect's room rolloff factor
1527              * applies to the selected distance model along with the source's
1528              * room rolloff factor, not necessarily the inverse distance model.
1529              */
1530             RoomRolloff[i] = props->RoomRolloffFactor + SendSlots[i]->RoomRolloff;
1531
1532             voice->mSend[i].Buffer = SendSlots[i]->Wet.Buffer;
1533         }
1534     }
1535
1536     /* Transform source to listener space (convert to head relative) */
1537     alu::Vector Position{props->Position[0], props->Position[1], props->Position[2], 1.0f};
1538     alu::Vector Velocity{props->Velocity[0], props->Velocity[1], props->Velocity[2], 0.0f};
1539     alu::Vector Direction{props->Direction[0], props->Direction[1], props->Direction[2], 0.0f};
1540     if(!props->HeadRelative)
1541     {
1542         /* Transform source vectors */
1543         Position = context->mParams.Matrix * (Position - context->mParams.Position);
1544         Velocity = context->mParams.Matrix * Velocity;
1545         Direction = context->mParams.Matrix * Direction;
1546     }
1547     else
1548     {
1549         /* Offset the source velocity to be relative of the listener velocity */
1550         Velocity += context->mParams.Velocity;
1551     }
1552
1553     const bool directional{Direction.normalize() > 0.0f};
1554     alu::Vector ToSource{Position[0], Position[1], Position[2], 0.0f};
1555     const float Distance{ToSource.normalize()};
1556
1557     /* Calculate distance attenuation */
1558     float ClampedDist{Distance};
1559     float DryGainBase{props->Gain};
1560     std::array<float,MaxSendCount> WetGainBase{};
1561     WetGainBase.fill(props->Gain);
1562
1563     float DryAttnBase{1.0f};
1564     switch(context->mParams.SourceDistanceModel ? props->mDistanceModel
1565         : context->mParams.mDistanceModel)
1566     {
1567     case DistanceModel::InverseClamped:
1568         if(props->MaxDistance < props->RefDistance) break;
1569         ClampedDist = std::clamp(ClampedDist, props->RefDistance, props->MaxDistance);
1570         /*fall-through*/
1571     case DistanceModel::Inverse:
1572         if(props->RefDistance > 0.0f)
1573         {
1574             float dist{lerpf(props->RefDistance, ClampedDist, props->RolloffFactor)};
1575             if(dist > 0.0f)
1576             {
1577                 DryAttnBase = props->RefDistance / dist;
1578                 DryGainBase *= DryAttnBase;
1579             }
1580
1581             for(size_t i{0};i < NumSends;++i)
1582             {
1583                 dist = lerpf(props->RefDistance, ClampedDist, RoomRolloff[i]);
1584                 if(dist > 0.0f) WetGainBase[i] *= props->RefDistance / dist;
1585             }
1586         }
1587         break;
1588
1589     case DistanceModel::LinearClamped:
1590         if(props->MaxDistance < props->RefDistance) break;
1591         ClampedDist = std::clamp(ClampedDist, props->RefDistance, props->MaxDistance);
1592         /*fall-through*/
1593     case DistanceModel::Linear:
1594         if(props->MaxDistance != props->RefDistance)
1595         {
1596             float attn{(ClampedDist-props->RefDistance) /
1597                 (props->MaxDistance-props->RefDistance) * props->RolloffFactor};
1598             DryAttnBase = std::max(1.0f - attn, 0.0f);
1599             DryGainBase *= DryAttnBase;
1600
1601             for(size_t i{0};i < NumSends;++i)
1602             {
1603                 attn = (ClampedDist-props->RefDistance) /
1604                     (props->MaxDistance-props->RefDistance) * RoomRolloff[i];
1605                 WetGainBase[i] *= std::max(1.0f - attn, 0.0f);
1606             }
1607         }
1608         break;
1609
1610     case DistanceModel::ExponentClamped:
1611         if(props->MaxDistance < props->RefDistance) break;
1612         ClampedDist = std::clamp(ClampedDist, props->RefDistance, props->MaxDistance);
1613         /*fall-through*/
1614     case DistanceModel::Exponent:
1615         if(ClampedDist > 0.0f && props->RefDistance > 0.0f)
1616         {
1617             const float dist_ratio{ClampedDist/props->RefDistance};
1618             DryAttnBase = std::pow(dist_ratio, -props->RolloffFactor);
1619             DryGainBase *= DryAttnBase;
1620             for(size_t i{0};i < NumSends;++i)
1621                 WetGainBase[i] *= std::pow(dist_ratio, -RoomRolloff[i]);
1622         }
1623         break;
1624
1625     case DistanceModel::Disable:
1626         break;
1627     }
1628
1629     /* Calculate directional soundcones */
1630     float ConeHF{1.0f}, WetCone{1.0f}, WetConeHF{1.0f};
1631     if(directional && props->InnerAngle < 360.0f)
1632     {
1633         static constexpr float Rad2Deg{static_cast<float>(180.0 / al::numbers::pi)};
1634         const float Angle{Rad2Deg*2.0f * std::acos(-Direction.dot_product(ToSource)) * ConeScale};
1635
1636         float ConeGain{1.0f};
1637         if(Angle >= props->OuterAngle)
1638         {
1639             ConeGain = props->OuterGain;
1640             if(props->DryGainHFAuto)
1641                 ConeHF = props->OuterGainHF;
1642         }
1643         else if(Angle >= props->InnerAngle)
1644         {
1645             const float scale{(Angle-props->InnerAngle) / (props->OuterAngle-props->InnerAngle)};
1646             ConeGain = lerpf(1.0f, props->OuterGain, scale);
1647             if(props->DryGainHFAuto)
1648                 ConeHF = lerpf(1.0f, props->OuterGainHF, scale);
1649         }
1650
1651         DryGainBase *= ConeGain;
1652         if(props->WetGainAuto)
1653             WetCone = ConeGain;
1654         if(props->WetGainHFAuto)
1655             WetConeHF = ConeHF;
1656     }
1657
1658     /* Apply gain and frequency filters */
1659     GainTriplet DryGain{};
1660     DryGainBase = std::clamp(DryGainBase, props->MinGain, props->MaxGain) * context->mParams.Gain;
1661     DryGain.Base = std::min(DryGainBase * props->Direct.Gain, GainMixMax);
1662     DryGain.HF = ConeHF * props->Direct.GainHF;
1663     DryGain.LF = props->Direct.GainLF;
1664
1665     std::array<GainTriplet,MaxSendCount> WetGain{};
1666     for(uint i{0};i < NumSends;i++)
1667     {
1668         const auto gain = std::clamp(WetGainBase[i]*WetCone, props->MinGain, props->MaxGain) *
1669             context->mParams.Gain;
1670         WetGain[i].Base = std::min(gain * props->Send[i].Gain, GainMixMax);
1671         WetGain[i].HF = WetConeHF * props->Send[i].GainHF;
1672         WetGain[i].LF = props->Send[i].GainLF;
1673     }
1674
1675     /* Distance-based air absorption and initial send decay. */
1676     if(Distance > props->RefDistance) LIKELY
1677     {
1678         /* FIXME: In keeping with EAX, the base air absorption gain should be
1679          * taken from the reverb property in the "primary fx slot" when it has
1680          * a reverb effect and the environment flag set, and be applied to the
1681          * direct path and all environment sends, rather than each path using
1682          * the air absorption gain associated with the given slot's effect. At
1683          * this point in the mixer, and even in EFX itself, there's no concept
1684          * of a "primary fx slot" so it's unclear which effect slot should be
1685          * checked.
1686          *
1687          * The HF reference is also intended to be handled the same way, but
1688          * again, there's no concept of a "primary fx slot" here and no way to
1689          * know which effect slot to look at for the reference frequency.
1690          */
1691         const auto distance_units = float{(Distance-props->RefDistance) * props->RolloffFactor};
1692         const auto distance_meters = float{distance_units * context->mParams.MetersPerUnit};
1693         const auto absorb = float{distance_meters * props->AirAbsorptionFactor};
1694         if(absorb > std::numeric_limits<float>::epsilon())
1695             DryGain.HF *= std::pow(context->mParams.AirAbsorptionGainHF, absorb);
1696
1697         /* If the source's Auxiliary Send Filter Gain Auto is off, no extra
1698          * adjustment is applied to the send gains.
1699          */
1700         for(uint i{props->WetGainAuto ? 0u : NumSends};i < NumSends;++i)
1701         {
1702             if(!SendSlots[i] || !(SendSlots[i]->DecayTime > 0.0f))
1703                 continue;
1704
1705             if(SendSlots[i]->AirAbsorptionGainHF < 1.0f
1706                 && absorb > std::numeric_limits<float>::epsilon())
1707                 WetGain[i].HF *= std::pow(SendSlots[i]->AirAbsorptionGainHF, absorb);
1708
1709             const float DecayDistance{SendSlots[i]->DecayTime * SpeedOfSoundMetersPerSec};
1710
1711             /* Apply a decay-time transformation to the wet path, based on the
1712              * source distance. The initial decay of the reverb effect is
1713              * calculated and applied to the wet path.
1714              *
1715              * FIXME: This is very likely not correct. It more likely should
1716              * work by calculating a rolloff dynamically based on the reverb
1717              * parameters (and source distance?) and add it to the room rolloff
1718              * with the reverb and source rolloff parameters.
1719              */
1720             const float baseAttn{DryAttnBase};
1721             const float fact{distance_meters / DecayDistance};
1722             const float gain{std::pow(ReverbDecayGain, fact)*(1.0f-baseAttn) + baseAttn};
1723             WetGain[i].Base *= gain;
1724         }
1725     }
1726
1727
1728     /* Initial source pitch */
1729     float Pitch{props->Pitch};
1730
1731     /* Calculate velocity-based doppler effect */
1732     float DopplerFactor{props->DopplerFactor * context->mParams.DopplerFactor};
1733     if(DopplerFactor > 0.0f)
1734     {
1735         const alu::Vector &lvelocity = context->mParams.Velocity;
1736         float vss{Velocity.dot_product(ToSource) * -DopplerFactor};
1737         float vls{lvelocity.dot_product(ToSource) * -DopplerFactor};
1738
1739         const float SpeedOfSound{context->mParams.SpeedOfSound};
1740         if(!(vls < SpeedOfSound))
1741         {
1742             /* Listener moving away from the source at the speed of sound.
1743              * Sound waves can't catch it.
1744              */
1745             Pitch = 0.0f;
1746         }
1747         else if(!(vss < SpeedOfSound))
1748         {
1749             /* Source moving toward the listener at the speed of sound. Sound
1750              * waves bunch up to extreme frequencies.
1751              */
1752             Pitch = std::numeric_limits<float>::infinity();
1753         }
1754         else
1755         {
1756             /* Source and listener movement is nominal. Calculate the proper
1757              * doppler shift.
1758              */
1759             Pitch *= (SpeedOfSound-vls) / (SpeedOfSound-vss);
1760         }
1761     }
1762
1763     /* Adjust pitch based on the buffer and output frequencies, and calculate
1764      * fixed-point stepping value.
1765      */
1766     Pitch *= static_cast<float>(voice->mFrequency) / static_cast<float>(Device->Frequency);
1767     if(Pitch > float{MaxPitch})
1768         voice->mStep = MaxPitch<<MixerFracBits;
1769     else
1770         voice->mStep = std::max(fastf2u(Pitch * MixerFracOne), 1u);
1771     voice->mResampler = PrepareResampler(props->mResampler, voice->mStep, &voice->mResampleState);
1772
1773     float spread{0.0f};
1774     if(props->Radius > Distance)
1775         spread = al::numbers::pi_v<float>*2.0f - Distance/props->Radius*al::numbers::pi_v<float>;
1776     else if(Distance > 0.0f)
1777         spread = std::asin(props->Radius/Distance) * 2.0f;
1778
1779     CalcPanningAndFilters(voice, ToSource[0]*XScale, ToSource[1]*YScale, ToSource[2]*ZScale,
1780         Distance, spread, DryGain, WetGain, SendSlots, props, context->mParams, Device);
1781 }
1782
1783 void CalcSourceParams(Voice *voice, ContextBase *context, bool force)
1784 {
1785     VoicePropsItem *props{voice->mUpdate.exchange(nullptr, std::memory_order_acq_rel)};
1786     if(!props && !force) return;
1787
1788     if(props)
1789     {
1790         voice->mProps = static_cast<VoiceProps&>(*props);
1791
1792         AtomicReplaceHead(context->mFreeVoiceProps, props);
1793     }
1794
1795     if((voice->mProps.DirectChannels != DirectMode::Off && voice->mFmtChannels != FmtMono
1796             && !IsAmbisonic(voice->mFmtChannels))
1797         || voice->mProps.mSpatializeMode == SpatializeMode::Off
1798         || (voice->mProps.mSpatializeMode==SpatializeMode::Auto && voice->mFmtChannels != FmtMono))
1799         CalcNonAttnSourceParams(voice, &voice->mProps, context);
1800     else
1801         CalcAttnSourceParams(voice, &voice->mProps, context);
1802 }
1803
1804
1805 void SendSourceStateEvent(ContextBase *context, uint id, VChangeState state)
1806 {
1807     RingBuffer *ring{context->mAsyncEvents.get()};
1808     auto evt_vec = ring->getWriteVector();
1809     if(evt_vec[0].len < 1) return;
1810
1811     auto &evt = InitAsyncEvent<AsyncSourceStateEvent>(evt_vec[0].buf);
1812     evt.mId = id;
1813     switch(state)
1814     {
1815     case VChangeState::Reset:
1816         evt.mState = AsyncSrcState::Reset;
1817         break;
1818     case VChangeState::Stop:
1819         evt.mState = AsyncSrcState::Stop;
1820         break;
1821     case VChangeState::Play:
1822         evt.mState = AsyncSrcState::Play;
1823         break;
1824     case VChangeState::Pause:
1825         evt.mState = AsyncSrcState::Pause;
1826         break;
1827     /* Shouldn't happen. */
1828     case VChangeState::Restart:
1829         al::unreachable();
1830     }
1831
1832     ring->writeAdvance(1);
1833 }
1834
1835 void ProcessVoiceChanges(ContextBase *ctx)
1836 {
1837     VoiceChange *cur{ctx->mCurrentVoiceChange.load(std::memory_order_acquire)};
1838     VoiceChange *next{cur->mNext.load(std::memory_order_acquire)};
1839     if(!next) return;
1840
1841     const auto enabledevt = ctx->mEnabledEvts.load(std::memory_order_acquire);
1842     do {
1843         cur = next;
1844
1845         bool sendevt{false};
1846         if(cur->mState == VChangeState::Reset || cur->mState == VChangeState::Stop)
1847         {
1848             if(Voice *voice{cur->mVoice})
1849             {
1850                 voice->mCurrentBuffer.store(nullptr, std::memory_order_relaxed);
1851                 voice->mLoopBuffer.store(nullptr, std::memory_order_relaxed);
1852                 /* A source ID indicates the voice was playing or paused, which
1853                  * gets a reset/stop event.
1854                  */
1855                 sendevt = voice->mSourceID.exchange(0u, std::memory_order_relaxed) != 0u;
1856                 Voice::State oldvstate{Voice::Playing};
1857                 voice->mPlayState.compare_exchange_strong(oldvstate, Voice::Stopping,
1858                     std::memory_order_relaxed, std::memory_order_acquire);
1859                 voice->mPendingChange.store(false, std::memory_order_release);
1860             }
1861             /* Reset state change events are always sent, even if the voice is
1862              * already stopped or even if there is no voice.
1863              */
1864             sendevt |= (cur->mState == VChangeState::Reset);
1865         }
1866         else if(cur->mState == VChangeState::Pause)
1867         {
1868             Voice *voice{cur->mVoice};
1869             Voice::State oldvstate{Voice::Playing};
1870             sendevt = voice->mPlayState.compare_exchange_strong(oldvstate, Voice::Stopping,
1871                 std::memory_order_release, std::memory_order_acquire);
1872         }
1873         else if(cur->mState == VChangeState::Play)
1874         {
1875             /* NOTE: When playing a voice, sending a source state change event
1876              * depends if there's an old voice to stop and if that stop is
1877              * successful. If there is no old voice, a playing event is always
1878              * sent. If there is an old voice, an event is sent only if the
1879              * voice is already stopped.
1880              */
1881             if(Voice *oldvoice{cur->mOldVoice})
1882             {
1883                 oldvoice->mCurrentBuffer.store(nullptr, std::memory_order_relaxed);
1884                 oldvoice->mLoopBuffer.store(nullptr, std::memory_order_relaxed);
1885                 oldvoice->mSourceID.store(0u, std::memory_order_relaxed);
1886                 Voice::State oldvstate{Voice::Playing};
1887                 sendevt = !oldvoice->mPlayState.compare_exchange_strong(oldvstate, Voice::Stopping,
1888                     std::memory_order_relaxed, std::memory_order_acquire);
1889                 oldvoice->mPendingChange.store(false, std::memory_order_release);
1890             }
1891             else
1892                 sendevt = true;
1893
1894             Voice *voice{cur->mVoice};
1895             voice->mPlayState.store(Voice::Playing, std::memory_order_release);
1896         }
1897         else if(cur->mState == VChangeState::Restart)
1898         {
1899             /* Restarting a voice never sends a source change event. */
1900             Voice *oldvoice{cur->mOldVoice};
1901             oldvoice->mCurrentBuffer.store(nullptr, std::memory_order_relaxed);
1902             oldvoice->mLoopBuffer.store(nullptr, std::memory_order_relaxed);
1903             /* If there's no sourceID, the old voice finished so don't start
1904              * the new one at its new offset.
1905              */
1906             if(oldvoice->mSourceID.exchange(0u, std::memory_order_relaxed) != 0u)
1907             {
1908                 /* Otherwise, set the voice to stopping if it's not already (it
1909                  * might already be, if paused), and play the new voice as
1910                  * appropriate.
1911                  */
1912                 Voice::State oldvstate{Voice::Playing};
1913                 oldvoice->mPlayState.compare_exchange_strong(oldvstate, Voice::Stopping,
1914                     std::memory_order_relaxed, std::memory_order_acquire);
1915
1916                 Voice *voice{cur->mVoice};
1917                 voice->mPlayState.store((oldvstate == Voice::Playing) ? Voice::Playing
1918                     : Voice::Stopped, std::memory_order_release);
1919             }
1920             oldvoice->mPendingChange.store(false, std::memory_order_release);
1921         }
1922         if(sendevt && enabledevt.test(al::to_underlying(AsyncEnableBits::SourceState)))
1923             SendSourceStateEvent(ctx, cur->mSourceID, cur->mState);
1924
1925         next = cur->mNext.load(std::memory_order_acquire);
1926     } while(next);
1927     ctx->mCurrentVoiceChange.store(cur, std::memory_order_release);
1928 }
1929
1930 void ProcessParamUpdates(ContextBase *ctx, const al::span<EffectSlot*> slots,
1931     const al::span<EffectSlot*> sorted_slots, const al::span<Voice*> voices)
1932 {
1933     ProcessVoiceChanges(ctx);
1934
1935     IncrementRef(ctx->mUpdateCount);
1936     if(!ctx->mHoldUpdates.load(std::memory_order_acquire)) LIKELY
1937     {
1938         bool force{CalcContextParams(ctx)};
1939         auto sorted_slot_base = al::to_address(sorted_slots.begin());
1940         for(EffectSlot *slot : slots)
1941             force |= CalcEffectSlotParams(slot, sorted_slot_base, ctx);
1942
1943         for(Voice *voice : voices)
1944         {
1945             /* Only update voices that have a source. */
1946             if(voice->mSourceID.load(std::memory_order_relaxed) != 0)
1947                 CalcSourceParams(voice, ctx, force);
1948         }
1949     }
1950     IncrementRef(ctx->mUpdateCount);
1951 }
1952
1953 void ProcessContexts(DeviceBase *device, const uint SamplesToDo)
1954 {
1955     ASSUME(SamplesToDo > 0);
1956
1957     const nanoseconds curtime{device->mClockBase.load(std::memory_order_relaxed) +
1958         nanoseconds{seconds{device->mSamplesDone.load(std::memory_order_relaxed)}}/
1959         device->Frequency};
1960
1961     auto proc_context = [SamplesToDo,curtime](ContextBase *ctx)
1962     {
1963         const auto auxslotspan = al::span{*ctx->mActiveAuxSlots.load(std::memory_order_acquire)};
1964         const auto auxslots = auxslotspan.first(auxslotspan.size()>>1);
1965         const auto sorted_slots = auxslotspan.last(auxslotspan.size()>>1);
1966         const auto voices = ctx->getVoicesSpanAcquired();
1967
1968         /* Process pending property updates for objects on the context. */
1969         ProcessParamUpdates(ctx, auxslots, sorted_slots, voices);
1970
1971         /* Clear auxiliary effect slot mixing buffers. */
1972         auto clear_wetbuffers = [](EffectSlot *slot)
1973         {
1974             auto clear_buffer = [](const FloatBufferSpan buffer)
1975             { std::fill(buffer.begin(), buffer.end(), 0.0f); };
1976             std::for_each(slot->Wet.Buffer.begin(), slot->Wet.Buffer.end(), clear_buffer);
1977         };
1978         std::for_each(auxslots.begin(), auxslots.end(), clear_wetbuffers);
1979
1980         /* Process voices that have a playing source. */
1981         auto proc_voice = [ctx,curtime,SamplesToDo](Voice *voice)
1982         {
1983             const Voice::State vstate{voice->mPlayState.load(std::memory_order_acquire)};
1984             if(vstate != Voice::Stopped && vstate != Voice::Pending)
1985                 voice->mix(vstate, ctx, curtime, SamplesToDo);
1986         };
1987         std::for_each(voices.begin(), voices.end(), proc_voice);
1988
1989         /* Process effects. */
1990         if(!auxslots.empty())
1991         {
1992             /* Sort the slots into extra storage, so that effect slots come
1993              * before their effect slot target (or their targets' target). Skip
1994              * sorting if it has already been done.
1995              */
1996             if(!sorted_slots[0])
1997             {
1998                 /* First, copy the slots to the sorted list and partition them,
1999                  * so that all slots without a target slot go to the end.
2000                  */
2001                 auto has_target = [](const EffectSlot *slot) noexcept -> bool
2002                 { return slot->Target != nullptr; };
2003                 auto split_point = std::partition_copy(auxslots.rbegin(), auxslots.rend(),
2004                     sorted_slots.begin(), sorted_slots.rbegin(), has_target).first;
2005                 /* There must be at least one slot without a slot target. */
2006                 assert(split_point != sorted_slots.end());
2007
2008                 /* Starting from the back of the sorted list, continue
2009                  * partitioning the front of the list given each target until
2010                  * all targets are accounted for. This ensures all slots
2011                  * without a target go last, all slots directly targeting those
2012                  * last slots go second-to-last, all slots directly targeting
2013                  * those second-last slots go third-to-last, etc.
2014                  */
2015                 auto next_target = sorted_slots.end();
2016                 while(std::distance(sorted_slots.begin(), split_point) > 1)
2017                 {
2018                     /* This shouldn't happen, but if there's unsorted slots
2019                      * left that don't target any sorted slots, they can't
2020                      * contribute to the output, so leave them.
2021                      */
2022                     if(next_target == split_point) UNLIKELY
2023                         break;
2024
2025                     --next_target;
2026                     auto not_next = [next_target](const EffectSlot *slot) noexcept -> bool
2027                     { return slot->Target != *next_target; };
2028                     split_point = std::partition(sorted_slots.begin(), split_point, not_next);
2029                 }
2030             }
2031
2032             auto proc_slot = [SamplesToDo](const EffectSlot *slot)
2033             {
2034                 EffectState *state{slot->mEffectState.get()};
2035                 state->process(SamplesToDo, slot->Wet.Buffer, state->mOutTarget);
2036             };
2037             std::for_each(sorted_slots.begin(), sorted_slots.end(), proc_slot);
2038         }
2039
2040         /* Signal the event handler if there are any events to read. */
2041         if(RingBuffer *ring{ctx->mAsyncEvents.get()}; ring->readSpace() > 0)
2042             ctx->mEventSem.post();
2043     };
2044     const auto contexts = al::span{*device->mContexts.load(std::memory_order_acquire)};
2045     std::for_each(contexts.begin(), contexts.end(), proc_context);
2046 }
2047
2048
2049 void ApplyDistanceComp(const al::span<FloatBufferLine> Samples, const size_t SamplesToDo,
2050     const al::span<const DistanceComp::ChanData,MaxOutputChannels> chandata)
2051 {
2052     ASSUME(SamplesToDo > 0);
2053
2054     auto distcomp = chandata.begin();
2055     for(auto &chanbuffer : Samples)
2056     {
2057         const float gain{distcomp->Gain};
2058         auto distbuf = al::span{al::assume_aligned<16>(distcomp->Buffer.data()),
2059             distcomp->Buffer.size()};
2060         ++distcomp;
2061
2062         const size_t base{distbuf.size()};
2063         if(base < 1) continue;
2064
2065         const auto inout = al::span{al::assume_aligned<16>(chanbuffer.data()), SamplesToDo};
2066         if(SamplesToDo >= base) LIKELY
2067         {
2068             auto delay_end = std::rotate(inout.begin(), inout.end()-ptrdiff_t(base), inout.end());
2069             std::swap_ranges(inout.begin(), delay_end, distbuf.begin());
2070         }
2071         else
2072         {
2073             auto delay_start = std::swap_ranges(inout.begin(), inout.end(), distbuf.begin());
2074             std::rotate(distbuf.begin(), delay_start, distbuf.begin()+ptrdiff_t(base));
2075         }
2076         std::transform(inout.begin(), inout.end(), inout.begin(),
2077             [gain](float s) { return s*gain; });
2078     }
2079 }
2080
2081 void ApplyDither(const al::span<FloatBufferLine> Samples, uint *dither_seed,
2082     const float quant_scale, const size_t SamplesToDo)
2083 {
2084     static constexpr double invRNGRange{1.0 / std::numeric_limits<uint>::max()};
2085     ASSUME(SamplesToDo > 0);
2086
2087     /* Dithering. Generate whitenoise (uniform distribution of random values
2088      * between -1 and +1) and add it to the sample values, after scaling up to
2089      * the desired quantization depth and before rounding.
2090      */
2091     const float invscale{1.0f / quant_scale};
2092     uint seed{*dither_seed};
2093     auto dither_sample = [&seed,invscale,quant_scale](const float sample) noexcept -> float
2094     {
2095         float val{sample * quant_scale};
2096         uint rng0{dither_rng(&seed)};
2097         uint rng1{dither_rng(&seed)};
2098         val += static_cast<float>(rng0*invRNGRange - rng1*invRNGRange);
2099         return fast_roundf(val) * invscale;
2100     };
2101     for(FloatBufferLine &inout : Samples)
2102         std::transform(inout.begin(), inout.begin()+SamplesToDo, inout.begin(), dither_sample);
2103     *dither_seed = seed;
2104 }
2105
2106
2107 /* Base template left undefined. Should be marked =delete, but Clang 3.8.1
2108  * chokes on that given the inline specializations.
2109  */
2110 template<typename T>
2111 inline T SampleConv(float) noexcept;
2112
2113 template<> inline float SampleConv(float val) noexcept
2114 { return val; }
2115 template<> inline int32_t SampleConv(float val) noexcept
2116 {
2117     /* Floats have a 23-bit mantissa, plus an implied 1 bit and a sign bit.
2118      * This means a normalized float has at most 25 bits of signed precision.
2119      * When scaling and clamping for a signed 32-bit integer, these following
2120      * values are the best a float can give.
2121      */
2122     return fastf2i(std::clamp(val*2147483648.0f, -2147483648.0f, 2147483520.0f));
2123 }
2124 template<> inline int16_t SampleConv(float val) noexcept
2125 { return static_cast<int16_t>(fastf2i(std::clamp(val*32768.0f, -32768.0f, 32767.0f))); }
2126 template<> inline int8_t SampleConv(float val) noexcept
2127 { return static_cast<int8_t>(fastf2i(std::clamp(val*128.0f, -128.0f, 127.0f))); }
2128
2129 /* Define unsigned output variations. */
2130 template<> inline uint32_t SampleConv(float val) noexcept
2131 { return static_cast<uint32_t>(SampleConv<int32_t>(val)) + 2147483648u; }
2132 template<> inline uint16_t SampleConv(float val) noexcept
2133 { return static_cast<uint16_t>(SampleConv<int16_t>(val) + 32768); }
2134 template<> inline uint8_t SampleConv(float val) noexcept
2135 { return static_cast<uint8_t>(SampleConv<int8_t>(val) + 128); }
2136
2137 template<typename T>
2138 void Write(const al::span<const FloatBufferLine> InBuffer, void *OutBuffer, const size_t Offset,
2139     const size_t SamplesToDo, const size_t FrameStep)
2140 {
2141     ASSUME(FrameStep > 0);
2142     ASSUME(SamplesToDo > 0);
2143
2144     const auto output = al::span{static_cast<T*>(OutBuffer), (Offset+SamplesToDo)*FrameStep}
2145         .subspan(Offset*FrameStep);
2146     size_t c{0};
2147     for(const FloatBufferLine &inbuf : InBuffer)
2148     {
2149         auto out = output.begin();
2150         auto conv_sample = [FrameStep,c,&out](const float s) noexcept
2151         {
2152             out[c] = SampleConv<T>(s);
2153             out += ptrdiff_t(FrameStep);
2154         };
2155         std::for_each_n(inbuf.cbegin(), SamplesToDo, conv_sample);
2156         ++c;
2157     }
2158     if(const size_t extra{FrameStep - c})
2159     {
2160         const auto silence = SampleConv<T>(0.0f);
2161         for(size_t i{0};i < SamplesToDo;++i)
2162             std::fill_n(&output[i*FrameStep + c], extra, silence);
2163     }
2164 }
2165
2166 template<typename T>
2167 void Write(const al::span<const FloatBufferLine> InBuffer, al::span<void*> OutBuffers,
2168     const size_t Offset, const size_t SamplesToDo)
2169 {
2170     ASSUME(SamplesToDo > 0);
2171
2172     auto srcbuf = InBuffer.cbegin();
2173     for(auto *dstbuf : OutBuffers)
2174     {
2175         const auto src = al::span{*srcbuf}.first(SamplesToDo);
2176         const auto dst = al::span{static_cast<T*>(dstbuf), Offset+SamplesToDo}.subspan(Offset);
2177         std::transform(src.cbegin(), src.end(), dst.begin(), SampleConv<T>);
2178         ++srcbuf;
2179     }
2180 }
2181
2182 } // namespace
2183
2184 uint DeviceBase::renderSamples(const uint numSamples)
2185 {
2186     const uint samplesToDo{std::min(numSamples, uint{BufferLineSize})};
2187
2188     /* Clear main mixing buffers. */
2189     for(FloatBufferLine &buffer : MixBuffer)
2190         buffer.fill(0.0f);
2191
2192     {
2193         const auto mixLock = getWriteMixLock();
2194
2195         /* Process and mix each context's sources and effects. */
2196         ProcessContexts(this, samplesToDo);
2197
2198         /* Every second's worth of samples is converted and added to clock base
2199          * so that large sample counts don't overflow during conversion. This
2200          * also guarantees a stable conversion.
2201          */
2202         auto samplesDone = mSamplesDone.load(std::memory_order_relaxed) + samplesToDo;
2203         auto clockBase = mClockBase.load(std::memory_order_relaxed) +
2204             std::chrono::seconds{samplesDone/Frequency};
2205         mSamplesDone.store(samplesDone%Frequency, std::memory_order_relaxed);
2206         mClockBase.store(clockBase, std::memory_order_relaxed);
2207     }
2208
2209     /* Apply any needed post-process for finalizing the Dry mix to the RealOut
2210      * (Ambisonic decode, UHJ encode, etc).
2211      */
2212     postProcess(samplesToDo);
2213
2214     /* Apply compression, limiting sample amplitude if needed or desired. */
2215     if(Limiter) Limiter->process(samplesToDo, RealOut.Buffer);
2216
2217     /* Apply delays and attenuation for mismatched speaker distances. */
2218     if(ChannelDelays)
2219         ApplyDistanceComp(RealOut.Buffer, samplesToDo, ChannelDelays->mChannels);
2220
2221     /* Apply dithering. The compressor should have left enough headroom for the
2222      * dither noise to not saturate.
2223      */
2224     if(DitherDepth > 0.0f)
2225         ApplyDither(RealOut.Buffer, &DitherSeed, DitherDepth, samplesToDo);
2226
2227     return samplesToDo;
2228 }
2229
2230 void DeviceBase::renderSamples(const al::span<void*> outBuffers, const uint numSamples)
2231 {
2232     FPUCtl mixer_mode{};
2233     uint total{0};
2234     while(const uint todo{numSamples - total})
2235     {
2236         const uint samplesToDo{renderSamples(todo)};
2237
2238         switch(FmtType)
2239         {
2240 #define HANDLE_WRITE(T) case T:                                               \
2241     Write<DevFmtType_t<T>>(RealOut.Buffer, outBuffers, total, samplesToDo); break;
2242         HANDLE_WRITE(DevFmtByte)
2243         HANDLE_WRITE(DevFmtUByte)
2244         HANDLE_WRITE(DevFmtShort)
2245         HANDLE_WRITE(DevFmtUShort)
2246         HANDLE_WRITE(DevFmtInt)
2247         HANDLE_WRITE(DevFmtUInt)
2248         HANDLE_WRITE(DevFmtFloat)
2249         }
2250 #undef HANDLE_WRITE
2251
2252         total += samplesToDo;
2253     }
2254 }
2255
2256 void DeviceBase::renderSamples(void *outBuffer, const uint numSamples, const size_t frameStep)
2257 {
2258     FPUCtl mixer_mode{};
2259     uint total{0};
2260     while(const uint todo{numSamples - total})
2261     {
2262         const uint samplesToDo{renderSamples(todo)};
2263
2264         if(outBuffer) LIKELY
2265         {
2266             /* Finally, interleave and convert samples, writing to the device's
2267              * output buffer.
2268              */
2269             switch(FmtType)
2270             {
2271 #define HANDLE_WRITE(T) case T:                                               \
2272     Write<DevFmtType_t<T>>(RealOut.Buffer, outBuffer, total, samplesToDo, frameStep); break;
2273             HANDLE_WRITE(DevFmtByte)
2274             HANDLE_WRITE(DevFmtUByte)
2275             HANDLE_WRITE(DevFmtShort)
2276             HANDLE_WRITE(DevFmtUShort)
2277             HANDLE_WRITE(DevFmtInt)
2278             HANDLE_WRITE(DevFmtUInt)
2279             HANDLE_WRITE(DevFmtFloat)
2280 #undef HANDLE_WRITE
2281             }
2282         }
2283
2284         total += samplesToDo;
2285     }
2286 }
2287
2288 void DeviceBase::handleDisconnect(const char *msg, ...)
2289 {
2290     const auto mixLock = getWriteMixLock();
2291
2292     if(Connected.exchange(false, std::memory_order_acq_rel))
2293     {
2294         AsyncEvent evt{std::in_place_type<AsyncDisconnectEvent>};
2295         auto &disconnect = std::get<AsyncDisconnectEvent>(evt);
2296
2297         /* NOLINTBEGIN(*-array-to-pointer-decay) */
2298         va_list args, args2;
2299         va_start(args, msg);
2300         va_copy(args2, args);
2301         if(int msglen{vsnprintf(nullptr, 0, msg, args)}; msglen > 0)
2302         {
2303             disconnect.msg.resize(static_cast<uint>(msglen)+1_uz);
2304             vsnprintf(disconnect.msg.data(), disconnect.msg.size(), msg, args2);
2305         }
2306         else
2307             disconnect.msg = "<failed constructing message>";
2308         va_end(args2);
2309         va_end(args);
2310         /* NOLINTEND(*-array-to-pointer-decay) */
2311
2312         while(!disconnect.msg.empty() && disconnect.msg.back() == '\0')
2313             disconnect.msg.pop_back();
2314
2315         for(ContextBase *ctx : *mContexts.load())
2316         {
2317             RingBuffer *ring{ctx->mAsyncEvents.get()};
2318             auto evt_data = ring->getWriteVector()[0];
2319             if(evt_data.len > 0)
2320             {
2321                 al::construct_at(reinterpret_cast<AsyncEvent*>(evt_data.buf), evt);
2322                 ring->writeAdvance(1);
2323                 ctx->mEventSem.post();
2324             }
2325
2326             if(!ctx->mStopVoicesOnDisconnect.load())
2327             {
2328                 ProcessVoiceChanges(ctx);
2329                 continue;
2330             }
2331
2332             auto voicelist = ctx->getVoicesSpanAcquired();
2333             auto stop_voice = [](Voice *voice) -> void
2334             {
2335                 voice->mCurrentBuffer.store(nullptr, std::memory_order_relaxed);
2336                 voice->mLoopBuffer.store(nullptr, std::memory_order_relaxed);
2337                 voice->mSourceID.store(0u, std::memory_order_relaxed);
2338                 voice->mPlayState.store(Voice::Stopped, std::memory_order_release);
2339             };
2340             std::for_each(voicelist.begin(), voicelist.end(), stop_voice);
2341         }
2342     }
2343 }