alc/alu.cpp

   1 /**
   2  * OpenAL cross platform audio library
   3  * Copyright (C) 1999-2007 by authors.
   4  * This library is free software; you can redistribute it and/or
   5  *  modify it under the terms of the GNU Library General Public
   6  *  License as published by the Free Software Foundation; either
   7  *  version 2 of the License, or (at your option) any later version.
   8  *
   9  * This library is distributed in the hope that it will be useful,
  10  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  11  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  12  *  Library General Public License for more details.
  13  *
  14  * You should have received a copy of the GNU Library General Public
  15  *  License along with this library; if not, write to the
  16  *  Free Software Foundation, Inc.,
  17  *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  18  * Or go to http://www.gnu.org/copyleft/lgpl.html
  19  */
  20
  21 #include "config.h"
  22
  23 #include "alu.h"
  24
  25 #include <algorithm>
  26 #include <array>
  27 #include <atomic>
  28 #include <cassert>
  29 #include <chrono>
  30 #include <climits>
  31 #include <cstdarg>
  32 #include <cstdint>
  33 #include <cstdio>
  34 #include <cstdlib>
  35 #include <functional>
  36 #include <iterator>
  37 #include <limits>
  38 #include <memory>
  39 #include <new>
  40 #include <optional>
  41 #include <utility>
  42
  43 #include "almalloc.h"
  44 #include "alnumbers.h"
  45 #include "alnumeric.h"
  46 #include "alspan.h"
  47 #include "alstring.h"
  48 #include "atomic.h"
  49 #include "core/ambidefs.h"
  50 #include "core/async_event.h"
  51 #include "core/bformatdec.h"
  52 #include "core/bs2b.h"
  53 #include "core/bsinc_defs.h"
  54 #include "core/bsinc_tables.h"
  55 #include "core/bufferline.h"
  56 #include "core/buffer_storage.h"
  57 #include "core/context.h"
  58 #include "core/cpu_caps.h"
  59 #include "core/cubic_tables.h"
  60 #include "core/devformat.h"
  61 #include "core/device.h"
  62 #include "core/effects/base.h"
  63 #include "core/effectslot.h"
  64 #include "core/filters/biquad.h"
  65 #include "core/filters/nfc.h"
  66 #include "core/fpu_ctrl.h"
  67 #include "core/hrtf.h"
  68 #include "core/mastering.h"
  69 #include "core/mixer.h"
  70 #include "core/mixer/defs.h"
  71 #include "core/mixer/hrtfdefs.h"
  72 #include "core/resampler_limits.h"
  73 #include "core/uhjfilter.h"
  74 #include "core/voice.h"
  75 #include "core/voice_change.h"
  76 #include "intrusive_ptr.h"
  77 #include "opthelpers.h"
  78 #include "ringbuffer.h"
  79 #include "strutils.h"
  80 #include "vecmat.h"
  81 #include "vector.h"
  82
  83 struct CTag;
  84 #ifdef HAVE_SSE
  85 struct SSETag;
  86 #endif
  87 #ifdef HAVE_SSE2
  88 struct SSE2Tag;
  89 #endif
  90 #ifdef HAVE_SSE4_1
  91 struct SSE4Tag;
  92 #endif
  93 #ifdef HAVE_NEON
  94 struct NEONTag;
  95 #endif
  96 struct PointTag;
  97 struct LerpTag;
  98 struct CubicTag;
  99 struct BSincTag;
 100 struct FastBSincTag;
 101
 102
 103 static_assert(!(MaxResamplerPadding&1), "MaxResamplerPadding is not a multiple of two");
 104
 105
 106 namespace {
 107
 108 using uint = unsigned int;
 109 using namespace std::chrono;
 110 using namespace std::string_view_literals;
 111
 112 float InitConeScale()
 113 {
 114     float ret{1.0f};
 115     if(auto optval = al::getenv("__ALSOFT_HALF_ANGLE_CONES"))
 116     {
 117         if(al::case_compare(*optval, "true"sv) == 0
 118             || strtol(optval->c_str(), nullptr, 0) == 1)
 119             ret *= 0.5f;
 120     }
 121     return ret;
 122 }
 123 /* Cone scalar */
 124 const float ConeScale{InitConeScale()};
 125
 126 /* Localized scalars for mono sources (initialized in aluInit, after
 127  * configuration is loaded).
 128  */
 129 float XScale{1.0f};
 130 float YScale{1.0f};
 131 float ZScale{1.0f};
 132
 133 /* Source distance scale for NFC filters. */
 134 float NfcScale{1.0f};
 135
 136
 137 using HrtfDirectMixerFunc = void(*)(const FloatBufferSpan LeftOut, const FloatBufferSpan RightOut,
 138     const al::span<const FloatBufferLine> InSamples, const al::span<float2> AccumSamples,
 139     const al::span<float,BufferLineSize> TempBuf, const al::span<HrtfChannelState> ChanState,
 140     const size_t IrSize, const size_t SamplesToDo);
 141
 142 HrtfDirectMixerFunc MixDirectHrtf{MixDirectHrtf_<CTag>};
 143
 144 inline HrtfDirectMixerFunc SelectHrtfMixer()
 145 {
 146 #ifdef HAVE_NEON
 147     if((CPUCapFlags&CPU_CAP_NEON))
 148         return MixDirectHrtf_<NEONTag>;
 149 #endif
 150 #ifdef HAVE_SSE
 151     if((CPUCapFlags&CPU_CAP_SSE))
 152         return MixDirectHrtf_<SSETag>;
 153 #endif
 154
 155     return MixDirectHrtf_<CTag>;
 156 }
 157
 158
 159 inline void BsincPrepare(const uint increment, BsincState *state, const BSincTable *table)
 160 {
 161     size_t si{BSincScaleCount - 1};
 162     float sf{0.0f};
 163
 164     if(increment > MixerFracOne)
 165     {
 166         sf = MixerFracOne/static_cast<float>(increment) - table->scaleBase;
 167         sf = std::max(0.0f, BSincScaleCount*sf*table->scaleRange - 1.0f);
 168         si = float2uint(sf);
 169         /* The interpolation factor is fit to this diagonally-symmetric curve
 170          * to reduce the transition ripple caused by interpolating different
 171          * scales of the sinc function.
 172          */
 173         sf = 1.0f - std::cos(std::asin(sf - static_cast<float>(si)));
 174     }
 175
 176     state->sf = sf;
 177     state->m = table->m[si];
 178     state->l = (state->m/2) - 1;
 179     state->filter = table->Tab.subspan(table->filterOffset[si]);
 180 }
 181
 182 inline ResamplerFunc SelectResampler(Resampler resampler, uint increment)
 183 {
 184     switch(resampler)
 185     {
 186     case Resampler::Point:
 187         return Resample_<PointTag,CTag>;
 188     case Resampler::Linear:
 189 #ifdef HAVE_NEON
 190         if((CPUCapFlags&CPU_CAP_NEON))
 191             return Resample_<LerpTag,NEONTag>;
 192 #endif
 193 #ifdef HAVE_SSE4_1
 194         if((CPUCapFlags&CPU_CAP_SSE4_1))
 195             return Resample_<LerpTag,SSE4Tag>;
 196 #endif
 197 #ifdef HAVE_SSE2
 198         if((CPUCapFlags&CPU_CAP_SSE2))
 199             return Resample_<LerpTag,SSE2Tag>;
 200 #endif
 201         return Resample_<LerpTag,CTag>;
 202     case Resampler::Spline:
 203     case Resampler::Gaussian:
 204 #ifdef HAVE_NEON
 205         if((CPUCapFlags&CPU_CAP_NEON))
 206             return Resample_<CubicTag,NEONTag>;
 207 #endif
 208 #ifdef HAVE_SSE4_1
 209         if((CPUCapFlags&CPU_CAP_SSE4_1))
 210             return Resample_<CubicTag,SSE4Tag>;
 211 #endif
 212 #ifdef HAVE_SSE2
 213         if((CPUCapFlags&CPU_CAP_SSE2))
 214             return Resample_<CubicTag,SSE2Tag>;
 215 #endif
 216 #ifdef HAVE_SSE
 217         if((CPUCapFlags&CPU_CAP_SSE))
 218             return Resample_<CubicTag,SSETag>;
 219 #endif
 220         return Resample_<CubicTag,CTag>;
 221     case Resampler::BSinc12:
 222     case Resampler::BSinc24:
 223         if(increment > MixerFracOne)
 224         {
 225 #ifdef HAVE_NEON
 226             if((CPUCapFlags&CPU_CAP_NEON))
 227                 return Resample_<BSincTag,NEONTag>;
 228 #endif
 229 #ifdef HAVE_SSE
 230             if((CPUCapFlags&CPU_CAP_SSE))
 231                 return Resample_<BSincTag,SSETag>;
 232 #endif
 233             return Resample_<BSincTag,CTag>;
 234         }
 235         /* fall-through */
 236     case Resampler::FastBSinc12:
 237     case Resampler::FastBSinc24:
 238 #ifdef HAVE_NEON
 239         if((CPUCapFlags&CPU_CAP_NEON))
 240             return Resample_<FastBSincTag,NEONTag>;
 241 #endif
 242 #ifdef HAVE_SSE
 243         if((CPUCapFlags&CPU_CAP_SSE))
 244             return Resample_<FastBSincTag,SSETag>;
 245 #endif
 246         return Resample_<FastBSincTag,CTag>;
 247     }
 248
 249     return Resample_<PointTag,CTag>;
 250 }
 251
 252 } // namespace
 253
 254 void aluInit(CompatFlagBitset flags, const float nfcscale)
 255 {
 256     MixDirectHrtf = SelectHrtfMixer();
 257     XScale = flags.test(CompatFlags::ReverseX) ? -1.0f : 1.0f;
 258     YScale = flags.test(CompatFlags::ReverseY) ? -1.0f : 1.0f;
 259     ZScale = flags.test(CompatFlags::ReverseZ) ? -1.0f : 1.0f;
 260
 261     NfcScale = std::clamp(nfcscale, 0.0001f, 10000.0f);
 262 }
 263
 264
 265 ResamplerFunc PrepareResampler(Resampler resampler, uint increment, InterpState *state)
 266 {
 267     switch(resampler)
 268     {
 269     case Resampler::Point:
 270     case Resampler::Linear:
 271         break;
 272     case Resampler::Spline:
 273         state->emplace<CubicState>(al::span{gSplineFilter.mTable});
 274         break;
 275     case Resampler::Gaussian:
 276         state->emplace<CubicState>(al::span{gGaussianFilter.mTable});
 277         break;
 278     case Resampler::FastBSinc12:
 279     case Resampler::BSinc12:
 280         BsincPrepare(increment, &state->emplace<BsincState>(), &gBSinc12);
 281         break;
 282     case Resampler::FastBSinc24:
 283     case Resampler::BSinc24:
 284         BsincPrepare(increment, &state->emplace<BsincState>(), &gBSinc24);
 285         break;
 286     }
 287     return SelectResampler(resampler, increment);
 288 }
 289
 290
 291 void DeviceBase::ProcessHrtf(const size_t SamplesToDo)
 292 {
 293     /* HRTF is stereo output only. */
 294     const size_t lidx{RealOut.ChannelIndex[FrontLeft]};
 295     const size_t ridx{RealOut.ChannelIndex[FrontRight]};
 296
 297     MixDirectHrtf(RealOut.Buffer[lidx], RealOut.Buffer[ridx], Dry.Buffer, HrtfAccumData,
 298         mHrtfState->mTemp, mHrtfState->mChannels, mHrtfState->mIrSize, SamplesToDo);
 299 }
 300
 301 void DeviceBase::ProcessAmbiDec(const size_t SamplesToDo)
 302 {
 303     AmbiDecoder->process(RealOut.Buffer, Dry.Buffer, SamplesToDo);
 304 }
 305
 306 void DeviceBase::ProcessAmbiDecStablized(const size_t SamplesToDo)
 307 {
 308     /* Decode with front image stablization. */
 309     const size_t lidx{RealOut.ChannelIndex[FrontLeft]};
 310     const size_t ridx{RealOut.ChannelIndex[FrontRight]};
 311     const size_t cidx{RealOut.ChannelIndex[FrontCenter]};
 312
 313     AmbiDecoder->processStablize(RealOut.Buffer, Dry.Buffer, lidx, ridx, cidx, SamplesToDo);
 314 }
 315
 316 void DeviceBase::ProcessUhj(const size_t SamplesToDo)
 317 {
 318     /* UHJ is stereo output only. */
 319     const size_t lidx{RealOut.ChannelIndex[FrontLeft]};
 320     const size_t ridx{RealOut.ChannelIndex[FrontRight]};
 321
 322     /* Encode to stereo-compatible 2-channel UHJ output. */
 323     mUhjEncoder->encode(RealOut.Buffer[lidx].data(), RealOut.Buffer[ridx].data(),
 324         {{Dry.Buffer[0].data(), Dry.Buffer[1].data(), Dry.Buffer[2].data()}}, SamplesToDo);
 325 }
 326
 327 void DeviceBase::ProcessBs2b(const size_t SamplesToDo)
 328 {
 329     /* First, decode the ambisonic mix to the "real" output. */
 330     AmbiDecoder->process(RealOut.Buffer, Dry.Buffer, SamplesToDo);
 331
 332     /* BS2B is stereo output only. */
 333     const size_t lidx{RealOut.ChannelIndex[FrontLeft]};
 334     const size_t ridx{RealOut.ChannelIndex[FrontRight]};
 335
 336     /* Now apply the BS2B binaural/crossfeed filter. */
 337     Bs2b->cross_feed(RealOut.Buffer[lidx].data(), RealOut.Buffer[ridx].data(), SamplesToDo);
 338 }
 339
 340
 341 namespace {
 342
 343 /* This RNG method was created based on the math found in opusdec. It's quick,
 344  * and starting with a seed value of 22222, is suitable for generating
 345  * whitenoise.
 346  */
 347 inline uint dither_rng(uint *seed) noexcept
 348 {
 349     *seed = (*seed * 96314165) + 907633515;
 350     return *seed;
 351 }
 352
 353
 354 /* Ambisonic upsampler function. It's effectively a matrix multiply. It takes
 355  * an 'upsampler' and 'rotator' as the input matrices, and creates a matrix
 356  * that behaves as if the B-Format input was first decoded to a speaker array
 357  * at its input order, encoded back into the higher order mix, then finally
 358  * rotated.
 359  */
 360 void UpsampleBFormatTransform(
 361     const al::span<std::array<float,MaxAmbiChannels>,MaxAmbiChannels> output,
 362     const al::span<const std::array<float,MaxAmbiChannels>> upsampler,
 363     const al::span<const std::array<float,MaxAmbiChannels>,MaxAmbiChannels> rotator,
 364     size_t ambi_order)
 365 {
 366     const size_t num_chans{AmbiChannelsFromOrder(ambi_order)};
 367     for(size_t i{0};i < upsampler.size();++i)
 368         output[i].fill(0.0f);
 369     for(size_t i{0};i < upsampler.size();++i)
 370     {
 371         for(size_t k{0};k < num_chans;++k)
 372         {
 373             const float a{upsampler[i][k]};
 374             /* Write the full number of channels. The compiler will have an
 375              * easier time optimizing if it has a fixed length.
 376              */
 377             std::transform(rotator[k].cbegin(), rotator[k].cend(), output[i].cbegin(),
 378                 output[i].begin(), [a](float rot, float dst) noexcept { return rot*a + dst; });
 379         }
 380     }
 381 }
 382
 383
 384 constexpr auto GetAmbiScales(AmbiScaling scaletype) noexcept
 385 {
 386     switch(scaletype)
 387     {
 388     case AmbiScaling::FuMa: return al::span{AmbiScale::FromFuMa};
 389     case AmbiScaling::SN3D: return al::span{AmbiScale::FromSN3D};
 390     case AmbiScaling::UHJ: return al::span{AmbiScale::FromUHJ};
 391     case AmbiScaling::N3D: break;
 392     }
 393     return al::span{AmbiScale::FromN3D};
 394 }
 395
 396 constexpr auto GetAmbiLayout(AmbiLayout layouttype) noexcept
 397 {
 398     if(layouttype == AmbiLayout::FuMa) return al::span{AmbiIndex::FromFuMa};
 399     return al::span{AmbiIndex::FromACN};
 400 }
 401
 402 constexpr auto GetAmbi2DLayout(AmbiLayout layouttype) noexcept
 403 {
 404     if(layouttype == AmbiLayout::FuMa) return al::span{AmbiIndex::FromFuMa2D};
 405     return al::span{AmbiIndex::FromACN2D};
 406 }
 407
 408
 409 bool CalcContextParams(ContextBase *ctx)
 410 {
 411     ContextProps *props{ctx->mParams.ContextUpdate.exchange(nullptr, std::memory_order_acq_rel)};
 412     if(!props) return false;
 413
 414     const alu::Vector pos{props->Position[0], props->Position[1], props->Position[2], 1.0f};
 415     ctx->mParams.Position = pos;
 416
 417     /* AT then UP */
 418     alu::Vector N{props->OrientAt[0], props->OrientAt[1], props->OrientAt[2], 0.0f};
 419     N.normalize();
 420     alu::Vector V{props->OrientUp[0], props->OrientUp[1], props->OrientUp[2], 0.0f};
 421     V.normalize();
 422     /* Build and normalize right-vector */
 423     alu::Vector U{N.cross_product(V)};
 424     U.normalize();
 425
 426     const alu::Matrix rot{
 427         U[0], V[0], -N[0], 0.0,
 428         U[1], V[1], -N[1], 0.0,
 429         U[2], V[2], -N[2], 0.0,
 430          0.0,  0.0,   0.0, 1.0};
 431     const alu::Vector vel{props->Velocity[0], props->Velocity[1], props->Velocity[2], 0.0};
 432
 433     ctx->mParams.Matrix = rot;
 434     ctx->mParams.Velocity = rot * vel;
 435
 436     ctx->mParams.Gain = props->Gain * ctx->mGainBoost;
 437     ctx->mParams.MetersPerUnit = props->MetersPerUnit
 438 #ifdef ALSOFT_EAX
 439         * props->DistanceFactor
 440 #endif
 441         ;
 442     ctx->mParams.AirAbsorptionGainHF = props->AirAbsorptionGainHF;
 443
 444     ctx->mParams.DopplerFactor = props->DopplerFactor;
 445     ctx->mParams.SpeedOfSound = props->SpeedOfSound * props->DopplerVelocity
 446 #ifdef ALSOFT_EAX
 447         / props->DistanceFactor
 448 #endif
 449         ;
 450
 451     ctx->mParams.SourceDistanceModel = props->SourceDistanceModel;
 452     ctx->mParams.mDistanceModel = props->mDistanceModel;
 453
 454     AtomicReplaceHead(ctx->mFreeContextProps, props);
 455     return true;
 456 }
 457
 458 bool CalcEffectSlotParams(EffectSlot *slot, EffectSlot **sorted_slots, ContextBase *context)
 459 {
 460     EffectSlotProps *props{slot->Update.exchange(nullptr, std::memory_order_acq_rel)};
 461     if(!props) return false;
 462
 463     /* If the effect slot target changed, clear the first sorted entry to force
 464      * a re-sort.
 465      */
 466     if(slot->Target != props->Target)
 467         *sorted_slots = nullptr;
 468     slot->Gain = props->Gain;
 469     slot->AuxSendAuto = props->AuxSendAuto;
 470     slot->Target = props->Target;
 471     slot->EffectType = props->Type;
 472     slot->mEffectProps = props->Props;
 473
 474     slot->RoomRolloff = 0.0f;
 475     slot->DecayTime = 0.0f;
 476     slot->DecayLFRatio = 0.0f;
 477     slot->DecayHFRatio = 0.0f;
 478     slot->DecayHFLimit = false;
 479     slot->AirAbsorptionGainHF = 1.0f;
 480     if(auto *reverbprops = std::get_if<ReverbProps>(&props->Props))
 481     {
 482         slot->RoomRolloff = reverbprops->RoomRolloffFactor;
 483         slot->AirAbsorptionGainHF = reverbprops->AirAbsorptionGainHF;
 484         /* If this effect slot's Auxiliary Send Auto is off, don't apply the
 485          * automatic send adjustments based on source distance.
 486          */
 487         if(slot->AuxSendAuto)
 488         {
 489             slot->DecayTime = reverbprops->DecayTime;
 490             slot->DecayLFRatio = reverbprops->DecayLFRatio;
 491             slot->DecayHFRatio = reverbprops->DecayHFRatio;
 492             slot->DecayHFLimit = reverbprops->DecayHFLimit;
 493         }
 494     }
 495
 496     EffectState *state{props->State.release()};
 497     EffectState *oldstate{slot->mEffectState.release()};
 498     slot->mEffectState.reset(state);
 499
 500     /* Only release the old state if it won't get deleted, since we can't be
 501      * deleting/freeing anything in the mixer.
 502      */
 503     if(!oldstate->releaseIfNoDelete())
 504     {
 505         /* Otherwise, if it would be deleted send it off with a release event. */
 506         RingBuffer *ring{context->mAsyncEvents.get()};
 507         auto evt_vec = ring->getWriteVector();
 508         if(evt_vec.first.len > 0) LIKELY
 509         {
 510             auto &evt = InitAsyncEvent<AsyncEffectReleaseEvent>(evt_vec.first.buf);
 511             evt.mEffectState = oldstate;
 512             ring->writeAdvance(1);
 513         }
 514         else
 515         {
 516             /* If writing the event failed, the queue was probably full. Store
 517              * the old state in the property object where it can eventually be
 518              * cleaned up sometime later (not ideal, but better than blocking
 519              * or leaking).
 520              */
 521             props->State.reset(oldstate);
 522         }
 523     }
 524
 525     AtomicReplaceHead(context->mFreeEffectSlotProps, props);
 526
 527     const auto output = [slot,context]() -> EffectTarget
 528     {
 529         if(EffectSlot *target{slot->Target})
 530             return EffectTarget{&target->Wet, nullptr};
 531         DeviceBase *device{context->mDevice};
 532         return EffectTarget{&device->Dry, &device->RealOut};
 533     }();
 534     state->update(context, slot, &slot->mEffectProps, output);
 535     return true;
 536 }
 537
 538
 539 /* Scales the azimuth of the given vector by 3 if it's in front. Effectively
 540  * scales +/-30 degrees to +/-90 degrees, leaving > +90 and < -90 alone.
 541  */
 542 inline std::array<float,3> ScaleAzimuthFront3(std::array<float,3> pos)
 543 {
 544     if(pos[2] < 0.0f)
 545     {
 546         /* Normalize the length of the x,z components for a 2D vector of the
 547          * azimuth angle. Negate Z since {0,0,-1} is angle 0.
 548          */
 549         const float len2d{std::sqrt(pos[0]*pos[0] + pos[2]*pos[2])};
 550         float x{pos[0] / len2d};
 551         float z{-pos[2] / len2d};
 552
 553         /* Z > cos(pi/6) = -30 < azimuth < 30 degrees. */
 554         if(z > 0.866025403785f)
 555         {
 556             /* Triple the angle represented by x,z. */
 557             x = x*3.0f - x*x*x*4.0f;
 558             z = z*z*z*4.0f - z*3.0f;
 559
 560             /* Scale the vector back to fit in 3D. */
 561             pos[0] = x * len2d;
 562             pos[2] = -z * len2d;
 563         }
 564         else
 565         {
 566             /* If azimuth >= 30 degrees, clamp to 90 degrees. */
 567             pos[0] = std::copysign(len2d, pos[0]);
 568             pos[2] = 0.0f;
 569         }
 570     }
 571     return pos;
 572 }
 573
 574 /* Scales the azimuth of the given vector by 1.5 (3/2) if it's in front. */
 575 inline std::array<float,3> ScaleAzimuthFront3_2(std::array<float,3> pos)
 576 {
 577     if(pos[2] < 0.0f)
 578     {
 579         const float len2d{std::sqrt(pos[0]*pos[0] + pos[2]*pos[2])};
 580         float x{pos[0] / len2d};
 581         float z{-pos[2] / len2d};
 582
 583         /* Z > cos(pi/3) = -60 < azimuth < 60 degrees. */
 584         if(z > 0.5f)
 585         {
 586             /* Halve the angle represented by x,z. */
 587             x = std::copysign(std::sqrt((1.0f - z) * 0.5f), x);
 588             z = std::sqrt((1.0f + z) * 0.5f);
 589
 590             /* Triple the angle represented by x,z. */
 591             x = x*3.0f - x*x*x*4.0f;
 592             z = z*z*z*4.0f - z*3.0f;
 593
 594             /* Scale the vector back to fit in 3D. */
 595             pos[0] = x * len2d;
 596             pos[2] = -z * len2d;
 597         }
 598         else
 599         {
 600             /* If azimuth >= 60 degrees, clamp to 90 degrees. */
 601             pos[0] = std::copysign(len2d, pos[0]);
 602             pos[2] = 0.0f;
 603         }
 604     }
 605     return pos;
 606 }
 607
 608
 609 /* Begin ambisonic rotation helpers.
 610  *
 611  * Rotating first-order B-Format just needs a straight-forward X/Y/Z rotation
 612  * matrix. Higher orders, however, are more complicated. The method implemented
 613  * here is a recursive algorithm (the rotation for first-order is used to help
 614  * generate the second-order rotation, which helps generate the third-order
 615  * rotation, etc).
 616  *
 617  * Adapted from
 618  * <https://github.com/polarch/Spherical-Harmonic-Transform/blob/master/getSHrotMtx.m>,
 619  * provided under the BSD 3-Clause license.
 620  *
 621  * Copyright (c) 2015, Archontis Politis
 622  * Copyright (c) 2019, Christopher Robinson
 623  *
 624  * The u, v, and w coefficients used for generating higher-order rotations are
 625  * precomputed since they're constant. The second-order coefficients are
 626  * followed by the third-order coefficients, etc.
 627  */
 628 constexpr size_t CalcRotatorSize(size_t l) noexcept
 629 {
 630     if(l >= 2)
 631         return (l*2 + 1)*(l*2 + 1) + CalcRotatorSize(l-1);
 632     return 0;
 633 }
 634
 635 struct RotatorCoeffs {
 636     struct CoeffValues {
 637         float u, v, w;
 638     };
 639     std::array<CoeffValues,CalcRotatorSize(MaxAmbiOrder)> mCoeffs{};
 640
 641     RotatorCoeffs()
 642     {
 643         auto coeffs = mCoeffs.begin();
 644
 645         for(int l=2;l <= MaxAmbiOrder;++l)
 646         {
 647             for(int n{-l};n <= l;++n)
 648             {
 649                 for(int m{-l};m <= l;++m)
 650                 {
 651                     /* compute u,v,w terms of Eq.8.1 (Table I)
 652                      *
 653                      * const bool d{m == 0}; // the delta function d_m0
 654                      * const double denom{(std::abs(n) == l) ?
 655                      *     (2*l) * (2*l - 1) : (l*l - n*n)};
 656                      *
 657                      * const int abs_m{std::abs(m)};
 658                      * coeffs->u = std::sqrt((l*l - m*m) / denom);
 659                      * coeffs->v = std::sqrt((l+abs_m-1) * (l+abs_m) / denom) *
 660                      *     (1.0+d) * (1.0 - 2.0*d) * 0.5;
 661                      * coeffs->w = std::sqrt((l-abs_m-1) * (l-abs_m) / denom) *
 662                      *     (1.0-d) * -0.5;
 663                      */
 664
 665                     const double denom{static_cast<double>((std::abs(n) == l) ?
 666                           (2*l) * (2*l - 1) : (l*l - n*n))};
 667
 668                     if(m == 0)
 669                     {
 670                         coeffs->u = static_cast<float>(std::sqrt(l * l / denom));
 671                         coeffs->v = static_cast<float>(std::sqrt((l-1) * l / denom) * -1.0);
 672                         coeffs->w = 0.0f;
 673                     }
 674                     else
 675                     {
 676                         const int abs_m{std::abs(m)};
 677                         coeffs->u = static_cast<float>(std::sqrt((l*l - m*m) / denom));
 678                         coeffs->v = static_cast<float>(std::sqrt((l+abs_m-1) * (l+abs_m) / denom) *
 679                             0.5);
 680                         coeffs->w = static_cast<float>(std::sqrt((l-abs_m-1) * (l-abs_m) / denom) *
 681                             -0.5);
 682                     }
 683                     ++coeffs;
 684                 }
 685             }
 686         }
 687     }
 688 };
 689 const RotatorCoeffs RotatorCoeffArray{};
 690
 691 /**
 692  * Given the matrix, pre-filled with the (zeroth- and) first-order rotation
 693  * coefficients, this fills in the coefficients for the higher orders up to and
 694  * including the given order. The matrix is in ACN layout.
 695  */
 696 void AmbiRotator(AmbiRotateMatrix &matrix, const int order)
 697 {
 698     /* Don't do anything for < 2nd order. */
 699     if(order < 2) return;
 700
 701     auto P = [](const int i, const int l, const int a, const int n, const size_t last_band,
 702         const AmbiRotateMatrix &R)
 703     {
 704         const float ri1{ R[ 1+2][static_cast<size_t>(i+2_z)]};
 705         const float rim1{R[-1+2][static_cast<size_t>(i+2_z)]};
 706         const float ri0{ R[ 0+2][static_cast<size_t>(i+2_z)]};
 707
 708         const size_t y{last_band + static_cast<size_t>(a+l-1)};
 709         if(n == -l)
 710             return ri1*R[last_band][y] + rim1*R[last_band + static_cast<size_t>(l-1_z)*2][y];
 711         if(n == l)
 712             return ri1*R[last_band + static_cast<size_t>(l-1_z)*2][y] - rim1*R[last_band][y];
 713         return ri0*R[last_band + static_cast<size_t>(l-1_z+n)][y];
 714     };
 715
 716     auto U = [P](const int l, const int m, const int n, const size_t last_band,
 717         const AmbiRotateMatrix &R)
 718     {
 719         return P(0, l, m, n, last_band, R);
 720     };
 721     auto V = [P](const int l, const int m, const int n, const size_t last_band,
 722         const AmbiRotateMatrix &R)
 723     {
 724         using namespace al::numbers;
 725         if(m > 0)
 726         {
 727             const bool d{m == 1};
 728             const float p0{P( 1, l,  m-1, n, last_band, R)};
 729             const float p1{P(-1, l, -m+1, n, last_band, R)};
 730             return d ? p0*sqrt2_v<float> : (p0 - p1);
 731         }
 732         const bool d{m == -1};
 733         const float p0{P( 1, l,  m+1, n, last_band, R)};
 734         const float p1{P(-1, l, -m-1, n, last_band, R)};
 735         return d ? p1*sqrt2_v<float> : (p0 + p1);
 736     };
 737     auto W = [P](const int l, const int m, const int n, const size_t last_band,
 738         const AmbiRotateMatrix &R)
 739     {
 740         assert(m != 0);
 741         if(m > 0)
 742         {
 743             const float p0{P( 1, l,  m+1, n, last_band, R)};
 744             const float p1{P(-1, l, -m-1, n, last_band, R)};
 745             return p0 + p1;
 746         }
 747         const float p0{P( 1, l,  m-1, n, last_band, R)};
 748         const float p1{P(-1, l, -m+1, n, last_band, R)};
 749         return p0 - p1;
 750     };
 751
 752     // compute rotation matrix of each subsequent band recursively
 753     auto coeffs = RotatorCoeffArray.mCoeffs.cbegin();
 754     size_t band_idx{4}, last_band{1};
 755     for(int l{2};l <= order;++l)
 756     {
 757         size_t y{band_idx};
 758         for(int n{-l};n <= l;++n,++y)
 759         {
 760             size_t x{band_idx};
 761             for(int m{-l};m <= l;++m,++x)
 762             {
 763                 float r{0.0f};
 764
 765                 // computes Eq.8.1
 766                 if(const float u{coeffs->u}; u != 0.0f)
 767                     r += u * U(l, m, n, last_band, matrix);
 768                 if(const float v{coeffs->v}; v != 0.0f)
 769                     r += v * V(l, m, n, last_band, matrix);
 770                 if(const float w{coeffs->w}; w != 0.0f)
 771                     r += w * W(l, m, n, last_band, matrix);
 772
 773                 matrix[y][x] = r;
 774                 ++coeffs;
 775             }
 776         }
 777         last_band = band_idx;
 778         band_idx += static_cast<uint>(l)*2_uz + 1;
 779     }
 780 }
 781 /* End ambisonic rotation helpers. */
 782
 783
 784 constexpr float sin30{0.5f};
 785 constexpr float cos30{0.866025403785f};
 786 constexpr float sin45{al::numbers::sqrt2_v<float>*0.5f};
 787 constexpr float cos45{al::numbers::sqrt2_v<float>*0.5f};
 788 constexpr float sin110{ 0.939692620786f};
 789 constexpr float cos110{-0.342020143326f};
 790
 791 struct ChanPosMap {
 792     Channel channel;
 793     std::array<float,3> pos;
 794 };
 795
 796
 797 struct GainTriplet { float Base, HF, LF; };
 798
 799 void CalcPanningAndFilters(Voice *voice, const float xpos, const float ypos, const float zpos,
 800     const float Distance, const float Spread, const GainTriplet &DryGain,
 801     const al::span<const GainTriplet,MaxSendCount> WetGain,
 802     const al::span<EffectSlot*,MaxSendCount> SendSlots, const VoiceProps *props,
 803     const ContextParams &Context, DeviceBase *Device)
 804 {
 805     static constexpr std::array MonoMap{
 806         ChanPosMap{FrontCenter, std::array{0.0f, 0.0f, -1.0f}}
 807     };
 808     static constexpr std::array RearMap{
 809         ChanPosMap{BackLeft,  std::array{-sin30, 0.0f, cos30}},
 810         ChanPosMap{BackRight, std::array{ sin30, 0.0f, cos30}},
 811     };
 812     static constexpr std::array QuadMap{
 813         ChanPosMap{FrontLeft,  std::array{-sin45, 0.0f, -cos45}},
 814         ChanPosMap{FrontRight, std::array{ sin45, 0.0f, -cos45}},
 815         ChanPosMap{BackLeft,   std::array{-sin45, 0.0f,  cos45}},
 816         ChanPosMap{BackRight,  std::array{ sin45, 0.0f,  cos45}},
 817     };
 818     static constexpr std::array X51Map{
 819         ChanPosMap{FrontLeft,   std::array{-sin30, 0.0f, -cos30}},
 820         ChanPosMap{FrontRight,  std::array{ sin30, 0.0f, -cos30}},
 821         ChanPosMap{FrontCenter, std::array{  0.0f, 0.0f, -1.0f}},
 822         ChanPosMap{LFE, {}},
 823         ChanPosMap{SideLeft,    std::array{-sin110, 0.0f, -cos110}},
 824         ChanPosMap{SideRight,   std::array{ sin110, 0.0f, -cos110}},
 825     };
 826     static constexpr std::array X61Map{
 827         ChanPosMap{FrontLeft,   std::array{-sin30, 0.0f, -cos30}},
 828         ChanPosMap{FrontRight,  std::array{ sin30, 0.0f, -cos30}},
 829         ChanPosMap{FrontCenter, std::array{  0.0f, 0.0f, -1.0f}},
 830         ChanPosMap{LFE, {}},
 831         ChanPosMap{BackCenter,  std::array{ 0.0f, 0.0f, 1.0f}},
 832         ChanPosMap{SideLeft,    std::array{-1.0f, 0.0f, 0.0f}},
 833         ChanPosMap{SideRight,   std::array{ 1.0f, 0.0f, 0.0f}},
 834     };
 835     static constexpr std::array X71Map{
 836         ChanPosMap{FrontLeft,   std::array{-sin30, 0.0f, -cos30}},
 837         ChanPosMap{FrontRight,  std::array{ sin30, 0.0f, -cos30}},
 838         ChanPosMap{FrontCenter, std::array{  0.0f, 0.0f, -1.0f}},
 839         ChanPosMap{LFE, {}},
 840         ChanPosMap{BackLeft,    std::array{-sin30, 0.0f, cos30}},
 841         ChanPosMap{BackRight,   std::array{ sin30, 0.0f, cos30}},
 842         ChanPosMap{SideLeft,    std::array{ -1.0f, 0.0f, 0.0f}},
 843         ChanPosMap{SideRight,   std::array{  1.0f, 0.0f, 0.0f}},
 844     };
 845
 846     std::array StereoMap{
 847         ChanPosMap{FrontLeft,   std::array{-sin30, 0.0f, -cos30}},
 848         ChanPosMap{FrontRight,  std::array{ sin30, 0.0f, -cos30}},
 849     };
 850
 851     const auto Frequency = static_cast<float>(Device->Frequency);
 852     const uint NumSends{Device->NumAuxSends};
 853
 854     const size_t num_channels{voice->mChans.size()};
 855     ASSUME(num_channels > 0);
 856
 857     for(auto &chandata : voice->mChans)
 858     {
 859         chandata.mDryParams.Hrtf.Target = HrtfFilter{};
 860         chandata.mDryParams.Gains.Target.fill(0.0f);
 861         std::for_each(chandata.mWetParams.begin(), chandata.mWetParams.begin()+NumSends,
 862             [](SendParams &params) -> void { params.Gains.Target.fill(0.0f); });
 863     }
 864
 865     const auto getChans = [props,&StereoMap](FmtChannels chanfmt) noexcept
 866         -> std::pair<DirectMode,al::span<const ChanPosMap>>
 867     {
 868         switch(chanfmt)
 869         {
 870         case FmtMono:
 871             /* Mono buffers are never played direct. */
 872             return {DirectMode::Off, al::span{MonoMap}};
 873
 874         case FmtStereo:
 875         case FmtMonoDup:
 876             if(props->DirectChannels == DirectMode::Off)
 877             {
 878                 for(size_t i{0};i < 2;++i)
 879                 {
 880                     /* StereoPan is counter-clockwise in radians. */
 881                     const float a{props->StereoPan[i]};
 882                     StereoMap[i].pos[0] = -std::sin(a);
 883                     StereoMap[i].pos[2] = -std::cos(a);
 884                 }
 885             }
 886             return {props->DirectChannels, al::span{StereoMap}};
 887
 888         case FmtRear: return {props->DirectChannels, al::span{RearMap}};
 889         case FmtQuad: return {props->DirectChannels, al::span{QuadMap}};
 890         case FmtX51: return {props->DirectChannels, al::span{X51Map}};
 891         case FmtX61: return {props->DirectChannels, al::span{X61Map}};
 892         case FmtX71: return {props->DirectChannels, al::span{X71Map}};
 893
 894         case FmtBFormat2D:
 895         case FmtBFormat3D:
 896         case FmtUHJ2:
 897         case FmtUHJ3:
 898         case FmtUHJ4:
 899         case FmtSuperStereo:
 900             return {DirectMode::Off, {}};
 901         }
 902         return {props->DirectChannels, {}};
 903     };
 904     const auto [DirectChannels,chans] = getChans(voice->mFmtChannels);
 905
 906     voice->mFlags.reset(VoiceHasHrtf).reset(VoiceHasNfc);
 907     if(auto *decoder{voice->mDecoder.get()})
 908         decoder->mWidthControl = std::min(props->EnhWidth, 0.7f);
 909
 910     const float lgain{std::min(1.0f-props->Panning, 1.0f)};
 911     const float rgain{std::min(1.0f+props->Panning, 1.0f)};
 912     const float mingain{std::min(lgain, rgain)};
 913     auto SelectChannelGain = [lgain,rgain,mingain](const Channel chan) noexcept
 914     {
 915         switch(chan)
 916         {
 917         case FrontLeft: return lgain;
 918         case FrontRight: return rgain;
 919         case FrontCenter: break;
 920         case LFE: break;
 921         case BackLeft: return lgain;
 922         case BackRight: return rgain;
 923         case BackCenter: break;
 924         case SideLeft: return lgain;
 925         case SideRight: return rgain;
 926         case TopCenter: break;
 927         case TopFrontLeft: return lgain;
 928         case TopFrontCenter: break;
 929         case TopFrontRight: return rgain;
 930         case TopBackLeft: return lgain;
 931         case TopBackCenter: break;
 932         case TopBackRight: return rgain;
 933         case BottomFrontLeft: return lgain;
 934         case BottomFrontRight: return rgain;
 935         case BottomBackLeft: return lgain;
 936         case BottomBackRight: return rgain;
 937         case Aux0: case Aux1: case Aux2: case Aux3: case Aux4: case Aux5: case Aux6: case Aux7:
 938         case Aux8: case Aux9: case Aux10: case Aux11: case Aux12: case Aux13: case Aux14:
 939         case Aux15: case MaxChannels: break;
 940         }
 941         return mingain;
 942     };
 943
 944     if(IsAmbisonic(voice->mFmtChannels))
 945     {
 946         /* Special handling for B-Format and UHJ sources. */
 947
 948         if(Device->AvgSpeakerDist > 0.0f && voice->mFmtChannels != FmtUHJ2
 949             && voice->mFmtChannels != FmtSuperStereo)
 950         {
 951             if(!(Distance > std::numeric_limits<float>::epsilon()))
 952             {
 953                 /* NOTE: The NFCtrlFilters were created with a w0 of 0, which
 954                  * is what we want for FOA input. The first channel may have
 955                  * been previously re-adjusted if panned, so reset it.
 956                  */
 957                 voice->mChans[0].mDryParams.NFCtrlFilter.adjust(0.0f);
 958             }
 959             else
 960             {
 961                 /* Clamp the distance for really close sources, to prevent
 962                  * excessive bass.
 963                  */
 964                 const float mdist{std::max(Distance*NfcScale, Device->AvgSpeakerDist/4.0f)};
 965                 const float w0{SpeedOfSoundMetersPerSec / (mdist * Frequency)};
 966
 967                 /* Only need to adjust the first channel of a B-Format source. */
 968                 voice->mChans[0].mDryParams.NFCtrlFilter.adjust(w0);
 969             }
 970
 971             voice->mFlags.set(VoiceHasNfc);
 972         }
 973
 974         /* Panning a B-Format sound toward some direction is easy. Just pan the
 975          * first (W) channel as a normal mono sound. The angular spread is used
 976          * as a directional scalar to blend between full coverage and full
 977          * panning.
 978          */
 979         const float coverage{!(Distance > std::numeric_limits<float>::epsilon()) ? 1.0f :
 980             (al::numbers::inv_pi_v<float>/2.0f * Spread)};
 981
 982         auto calc_coeffs = [xpos,ypos,zpos](RenderMode mode)
 983         {
 984             if(mode != RenderMode::Pairwise)
 985                 return CalcDirectionCoeffs(std::array{xpos, ypos, zpos}, 0.0f);
 986             const auto pos = ScaleAzimuthFront3_2(std::array{xpos, ypos, zpos});
 987             return CalcDirectionCoeffs(pos, 0.0f);
 988         };
 989         const auto scales = GetAmbiScales(voice->mAmbiScaling);
 990         auto coeffs = calc_coeffs(Device->mRenderMode);
 991
 992         if(!(coverage > 0.0f))
 993         {
 994             ComputePanGains(&Device->Dry, coeffs, DryGain.Base*scales[0],
 995                 voice->mChans[0].mDryParams.Gains.Target);
 996             for(uint i{0};i < NumSends;i++)
 997             {
 998                 if(const EffectSlot *Slot{SendSlots[i]})
 999                     ComputePanGains(&Slot->Wet, coeffs, WetGain[i].Base*scales[0],
1000                         voice->mChans[0].mWetParams[i].Gains.Target);
1001             }
1002         }
1003         else
1004         {
1005             /* Local B-Format sources have their XYZ channels rotated according
1006              * to the orientation.
1007              */
1008             /* AT then UP */
1009             alu::Vector N{props->OrientAt[0], props->OrientAt[1], props->OrientAt[2], 0.0f};
1010             N.normalize();
1011             alu::Vector V{props->OrientUp[0], props->OrientUp[1], props->OrientUp[2], 0.0f};
1012             V.normalize();
1013             if(!props->HeadRelative)
1014             {
1015                 N = Context.Matrix * N;
1016                 V = Context.Matrix * V;
1017             }
1018             /* Build and normalize right-vector */
1019             alu::Vector U{N.cross_product(V)};
1020             U.normalize();
1021
1022             /* Build a rotation matrix. Manually fill the zeroth- and first-
1023              * order elements, then construct the rotation for the higher
1024              * orders.
1025              */
1026             AmbiRotateMatrix &shrot = Device->mAmbiRotateMatrix;
1027             shrot.fill(AmbiRotateMatrix::value_type{});
1028
1029             shrot[0][0] = 1.0f;
1030             shrot[1][1] =  U[0]; shrot[1][2] = -U[1]; shrot[1][3] =  U[2];
1031             shrot[2][1] = -V[0]; shrot[2][2] =  V[1]; shrot[2][3] = -V[2];
1032             shrot[3][1] = -N[0]; shrot[3][2] =  N[1]; shrot[3][3] = -N[2];
1033             AmbiRotator(shrot, static_cast<int>(Device->mAmbiOrder));
1034
1035             /* If the device is higher order than the voice, "upsample" the
1036              * matrix.
1037              *
1038              * NOTE: Starting with second-order, a 2D upsample needs to be
1039              * applied with a 2D source and 3D output, even when they're the
1040              * same order. This is because higher orders have a height offset
1041              * on various channels (i.e. when elevation=0, those height-related
1042              * channels should be non-0).
1043              */
1044             AmbiRotateMatrix &mixmatrix = Device->mAmbiRotateMatrix2;
1045             if(Device->mAmbiOrder > voice->mAmbiOrder
1046                 || (Device->mAmbiOrder >= 2 && !Device->m2DMixing
1047                     && Is2DAmbisonic(voice->mFmtChannels)))
1048             {
1049                 if(voice->mAmbiOrder == 1)
1050                 {
1051                     const auto upsampler = Is2DAmbisonic(voice->mFmtChannels) ?
1052                         al::span{AmbiScale::FirstOrder2DUp} : al::span{AmbiScale::FirstOrderUp};
1053                     UpsampleBFormatTransform(mixmatrix, upsampler, shrot, Device->mAmbiOrder);
1054                 }
1055                 else if(voice->mAmbiOrder == 2)
1056                 {
1057                     const auto upsampler = Is2DAmbisonic(voice->mFmtChannels) ?
1058                         al::span{AmbiScale::SecondOrder2DUp} : al::span{AmbiScale::SecondOrderUp};
1059                     UpsampleBFormatTransform(mixmatrix, upsampler, shrot, Device->mAmbiOrder);
1060                 }
1061                 else if(voice->mAmbiOrder == 3)
1062                 {
1063                     const auto upsampler = Is2DAmbisonic(voice->mFmtChannels) ?
1064                         al::span{AmbiScale::ThirdOrder2DUp} : al::span{AmbiScale::ThirdOrderUp};
1065                     UpsampleBFormatTransform(mixmatrix, upsampler, shrot, Device->mAmbiOrder);
1066                 }
1067                 else if(voice->mAmbiOrder == 4)
1068                 {
1069                     const auto upsampler = al::span{AmbiScale::FourthOrder2DUp};
1070                     UpsampleBFormatTransform(mixmatrix, upsampler, shrot, Device->mAmbiOrder);
1071                 }
1072                 else
1073                     al::unreachable();
1074             }
1075             else
1076                 mixmatrix = shrot;
1077
1078             /* Convert the rotation matrix for input ordering and scaling, and
1079              * whether input is 2D or 3D.
1080              */
1081             const auto index_map = Is2DAmbisonic(voice->mFmtChannels) ?
1082                 GetAmbi2DLayout(voice->mAmbiLayout).subspan(0) :
1083                 GetAmbiLayout(voice->mAmbiLayout).subspan(0);
1084
1085             /* Scale the panned W signal inversely to coverage (full coverage
1086              * means no panned signal), and according to the channel scaling.
1087              */
1088             std::for_each(coeffs.begin(), coeffs.end(),
1089                 [scale=(1.0f-coverage)*scales[0]](float &coeff) noexcept { coeff *= scale; });
1090
1091             for(size_t c{0};c < num_channels;c++)
1092             {
1093                 const size_t acn{index_map[c]};
1094                 const float scale{scales[acn] * coverage};
1095
1096                 /* For channel 0, combine the B-Format signal (scaled according
1097                  * to the coverage amount) with the directional pan. For all
1098                  * other channels, use just the (scaled) B-Format signal.
1099                  */
1100                 std::transform(mixmatrix[acn].cbegin(), mixmatrix[acn].cend(), coeffs.begin(),
1101                     coeffs.begin(), [scale](const float in, const float coeff) noexcept
1102                     { return in*scale + coeff; });
1103
1104                 ComputePanGains(&Device->Dry, coeffs, DryGain.Base,
1105                     voice->mChans[c].mDryParams.Gains.Target);
1106
1107                 for(uint i{0};i < NumSends;i++)
1108                 {
1109                     if(const EffectSlot *Slot{SendSlots[i]})
1110                         ComputePanGains(&Slot->Wet, coeffs, WetGain[i].Base,
1111                             voice->mChans[c].mWetParams[i].Gains.Target);
1112                 }
1113
1114                 coeffs = std::array<float,MaxAmbiChannels>{};
1115             }
1116         }
1117     }
1118     else if(DirectChannels != DirectMode::Off && !Device->RealOut.RemixMap.empty())
1119     {
1120         /* Direct source channels always play local. Skip the virtual channels
1121          * and write inputs to the matching real outputs.
1122          */
1123         voice->mDirect.Buffer = Device->RealOut.Buffer;
1124
1125         for(size_t c{0};c < num_channels;c++)
1126         {
1127             const float pangain{SelectChannelGain(chans[c].channel)};
1128             if(uint idx{Device->channelIdxByName(chans[c].channel)}; idx != InvalidChannelIndex)
1129                 voice->mChans[c].mDryParams.Gains.Target[idx] = DryGain.Base * pangain;
1130             else if(DirectChannels == DirectMode::RemixMismatch)
1131             {
1132                 auto match_channel = [channel=chans[c].channel](const InputRemixMap &map) noexcept
1133                 { return channel == map.channel; };
1134                 auto remap = std::find_if(Device->RealOut.RemixMap.cbegin(),
1135                     Device->RealOut.RemixMap.cend(), match_channel);
1136                 if(remap != Device->RealOut.RemixMap.cend())
1137                 {
1138                     for(const auto &target : remap->targets)
1139                     {
1140                         idx = Device->channelIdxByName(target.channel);
1141                         if(idx != InvalidChannelIndex)
1142                             voice->mChans[c].mDryParams.Gains.Target[idx] = DryGain.Base * pangain
1143                                 * target.mix;
1144                     }
1145                 }
1146             }
1147         }
1148
1149         /* Auxiliary sends still use normal channel panning since they mix to
1150          * B-Format, which can't channel-match.
1151          */
1152         for(size_t c{0};c < num_channels;c++)
1153         {
1154             /* Skip LFE */
1155             if(chans[c].channel == LFE)
1156                 continue;
1157
1158             const float pangain{SelectChannelGain(chans[c].channel)};
1159             const auto coeffs = CalcDirectionCoeffs(chans[c].pos, 0.0f);
1160
1161             for(uint i{0};i < NumSends;i++)
1162             {
1163                 if(const EffectSlot *Slot{SendSlots[i]})
1164                     ComputePanGains(&Slot->Wet, coeffs, WetGain[i].Base * pangain,
1165                         voice->mChans[c].mWetParams[i].Gains.Target);
1166             }
1167         }
1168     }
1169     else if(Device->mRenderMode == RenderMode::Hrtf)
1170     {
1171         /* Full HRTF rendering. Skip the virtual channels and render to the
1172          * real outputs.
1173          */
1174         voice->mDirect.Buffer = Device->RealOut.Buffer;
1175
1176         if(Distance > std::numeric_limits<float>::epsilon())
1177         {
1178             if(voice->mFmtChannels == FmtMono)
1179             {
1180                 const float src_ev{std::asin(std::clamp(ypos, -1.0f, 1.0f))};
1181                 const float src_az{std::atan2(xpos, -zpos)};
1182
1183                 Device->mHrtf->getCoeffs(src_ev, src_az, Distance*NfcScale, Spread,
1184                     voice->mChans[0].mDryParams.Hrtf.Target.Coeffs,
1185                     voice->mChans[0].mDryParams.Hrtf.Target.Delay);
1186                 voice->mChans[0].mDryParams.Hrtf.Target.Gain = DryGain.Base;
1187
1188                 const auto coeffs = CalcDirectionCoeffs(std::array{xpos, ypos, zpos}, Spread);
1189                 for(uint i{0};i < NumSends;i++)
1190                 {
1191                     if(const EffectSlot *Slot{SendSlots[i]})
1192                         ComputePanGains(&Slot->Wet, coeffs, WetGain[i].Base,
1193                             voice->mChans[0].mWetParams[i].Gains.Target);
1194                 }
1195             }
1196             else for(size_t c{0};c < num_channels;c++)
1197             {
1198                 using namespace al::numbers;
1199
1200                 /* Skip LFE */
1201                 if(chans[c].channel == LFE) continue;
1202                 const float pangain{SelectChannelGain(chans[c].channel)};
1203
1204                 /* Warp the channel position toward the source position as the
1205                  * source spread decreases. With no spread, all channels are at
1206                  * the source position, at full spread (pi*2), each channel is
1207                  * left unchanged.
1208                  */
1209                 const float a{1.0f - (inv_pi_v<float>/2.0f)*Spread};
1210                 std::array pos{
1211                     lerpf(chans[c].pos[0], xpos, a),
1212                     lerpf(chans[c].pos[1], ypos, a),
1213                     lerpf(chans[c].pos[2], zpos, a)};
1214                 const float len{std::sqrt(pos[0]*pos[0] + pos[1]*pos[1] + pos[2]*pos[2])};
1215                 if(len < 1.0f)
1216                 {
1217                     pos[0] /= len;
1218                     pos[1] /= len;
1219                     pos[2] /= len;
1220                 }
1221
1222                 const float ev{std::asin(std::clamp(pos[1], -1.0f, 1.0f))};
1223                 const float az{std::atan2(pos[0], -pos[2])};
1224
1225                 Device->mHrtf->getCoeffs(ev, az, Distance*NfcScale, 0.0f,
1226                     voice->mChans[c].mDryParams.Hrtf.Target.Coeffs,
1227                     voice->mChans[c].mDryParams.Hrtf.Target.Delay);
1228                 voice->mChans[c].mDryParams.Hrtf.Target.Gain = DryGain.Base * pangain;
1229
1230                 const auto coeffs = CalcDirectionCoeffs(pos, 0.0f);
1231                 for(uint i{0};i < NumSends;i++)
1232                 {
1233                     if(const EffectSlot *Slot{SendSlots[i]})
1234                         ComputePanGains(&Slot->Wet, coeffs, WetGain[i].Base * pangain,
1235                             voice->mChans[c].mWetParams[i].Gains.Target);
1236                 }
1237             }
1238         }
1239         else
1240         {
1241             /* With no distance, spread is only meaningful for mono sources
1242              * where it can be 0 or full (non-mono sources are always full
1243              * spread here).
1244              */
1245             const float spread{Spread * float(voice->mFmtChannels == FmtMono)};
1246
1247             /* Local sources on HRTF play with each channel panned to its
1248              * relative location around the listener, providing "virtual
1249              * speaker" responses.
1250              */
1251             for(size_t c{0};c < num_channels;c++)
1252             {
1253                 /* Skip LFE */
1254                 if(chans[c].channel == LFE)
1255                     continue;
1256                 const float pangain{SelectChannelGain(chans[c].channel)};
1257
1258                 /* Get the HRIR coefficients and delays for this channel
1259                  * position.
1260                  */
1261                 const float ev{std::asin(chans[c].pos[1])};
1262                 const float az{std::atan2(chans[c].pos[0], -chans[c].pos[2])};
1263
1264                 Device->mHrtf->getCoeffs(ev, az, std::numeric_limits<float>::infinity(), spread,
1265                     voice->mChans[c].mDryParams.Hrtf.Target.Coeffs,
1266                     voice->mChans[c].mDryParams.Hrtf.Target.Delay);
1267                 voice->mChans[c].mDryParams.Hrtf.Target.Gain = DryGain.Base * pangain;
1268
1269                 /* Normal panning for auxiliary sends. */
1270                 const auto coeffs = CalcDirectionCoeffs(chans[c].pos, spread);
1271
1272                 for(uint i{0};i < NumSends;i++)
1273                 {
1274                     if(const EffectSlot *Slot{SendSlots[i]})
1275                         ComputePanGains(&Slot->Wet, coeffs, WetGain[i].Base * pangain,
1276                             voice->mChans[c].mWetParams[i].Gains.Target);
1277                 }
1278             }
1279         }
1280
1281         voice->mFlags.set(VoiceHasHrtf);
1282     }
1283     else
1284     {
1285         /* Non-HRTF rendering. Use normal panning to the output. */
1286
1287         if(Distance > std::numeric_limits<float>::epsilon())
1288         {
1289             /* Calculate NFC filter coefficient if needed. */
1290             if(Device->AvgSpeakerDist > 0.0f)
1291             {
1292                 /* Clamp the distance for really close sources, to prevent
1293                  * excessive bass.
1294                  */
1295                 const float mdist{std::max(Distance*NfcScale, Device->AvgSpeakerDist/4.0f)};
1296                 const float w0{SpeedOfSoundMetersPerSec / (mdist * Frequency)};
1297
1298                 /* Adjust NFC filters. */
1299                 for(size_t c{0};c < num_channels;c++)
1300                     voice->mChans[c].mDryParams.NFCtrlFilter.adjust(w0);
1301
1302                 voice->mFlags.set(VoiceHasNfc);
1303             }
1304
1305             if(voice->mFmtChannels == FmtMono)
1306             {
1307                 auto calc_coeffs = [xpos,ypos,zpos,Spread](RenderMode mode)
1308                 {
1309                     if(mode != RenderMode::Pairwise)
1310                         return CalcDirectionCoeffs(std::array{xpos, ypos, zpos}, Spread);
1311                     const auto pos = ScaleAzimuthFront3_2(std::array{xpos, ypos, zpos});
1312                     return CalcDirectionCoeffs(pos, Spread);
1313                 };
1314                 const auto coeffs = calc_coeffs(Device->mRenderMode);
1315
1316                 ComputePanGains(&Device->Dry, coeffs, DryGain.Base,
1317                     voice->mChans[0].mDryParams.Gains.Target);
1318                 for(uint i{0};i < NumSends;i++)
1319                 {
1320                     if(const EffectSlot *Slot{SendSlots[i]})
1321                         ComputePanGains(&Slot->Wet, coeffs, WetGain[i].Base,
1322                             voice->mChans[0].mWetParams[i].Gains.Target);
1323                 }
1324             }
1325             else
1326             {
1327                 using namespace al::numbers;
1328
1329                 for(size_t c{0};c < num_channels;c++)
1330                 {
1331                     const float pangain{SelectChannelGain(chans[c].channel)};
1332
1333                     /* Special-case LFE */
1334                     if(chans[c].channel == LFE)
1335                     {
1336                         if(Device->Dry.Buffer.data() == Device->RealOut.Buffer.data())
1337                         {
1338                             const uint idx{Device->channelIdxByName(chans[c].channel)};
1339                             if(idx != InvalidChannelIndex)
1340                                 voice->mChans[c].mDryParams.Gains.Target[idx] = DryGain.Base
1341                                     * pangain;
1342                         }
1343                         continue;
1344                     }
1345
1346                     /* Warp the channel position toward the source position as
1347                      * the spread decreases. With no spread, all channels are
1348                      * at the source position, at full spread (pi*2), each
1349                      * channel position is left unchanged.
1350                      */
1351                     const float a{1.0f - (inv_pi_v<float>/2.0f)*Spread};
1352                     std::array pos{
1353                         lerpf(chans[c].pos[0], xpos, a),
1354                         lerpf(chans[c].pos[1], ypos, a),
1355                         lerpf(chans[c].pos[2], zpos, a)};
1356                     const float len{std::sqrt(pos[0]*pos[0] + pos[1]*pos[1] + pos[2]*pos[2])};
1357                     if(len < 1.0f)
1358                     {
1359                         pos[0] /= len;
1360                         pos[1] /= len;
1361                         pos[2] /= len;
1362                     }
1363
1364                     if(Device->mRenderMode == RenderMode::Pairwise)
1365                         pos = ScaleAzimuthFront3(pos);
1366                     const auto coeffs = CalcDirectionCoeffs(pos, 0.0f);
1367
1368                     ComputePanGains(&Device->Dry, coeffs, DryGain.Base * pangain,
1369                         voice->mChans[c].mDryParams.Gains.Target);
1370                     for(uint i{0};i < NumSends;i++)
1371                     {
1372                         if(const EffectSlot *Slot{SendSlots[i]})
1373                             ComputePanGains(&Slot->Wet, coeffs, WetGain[i].Base * pangain,
1374                                 voice->mChans[c].mWetParams[i].Gains.Target);
1375                     }
1376                 }
1377             }
1378         }
1379         else
1380         {
1381             if(Device->AvgSpeakerDist > 0.0f)
1382             {
1383                 /* If the source distance is 0, simulate a plane-wave by using
1384                  * infinite distance, which results in a w0 of 0.
1385                  */
1386                 static constexpr float w0{0.0f};
1387                 for(size_t c{0};c < num_channels;c++)
1388                     voice->mChans[c].mDryParams.NFCtrlFilter.adjust(w0);
1389
1390                 voice->mFlags.set(VoiceHasNfc);
1391             }
1392
1393             /* With no distance, spread is only meaningful for mono sources
1394              * where it can be 0 or full (non-mono sources are always full
1395              * spread here).
1396              */
1397             const float spread{Spread * float(voice->mFmtChannels == FmtMono)};
1398             for(size_t c{0};c < num_channels;c++)
1399             {
1400                 const float pangain{SelectChannelGain(chans[c].channel)};
1401
1402                 /* Special-case LFE */
1403                 if(chans[c].channel == LFE)
1404                 {
1405                     if(Device->Dry.Buffer.data() == Device->RealOut.Buffer.data())
1406                     {
1407                         const uint idx{Device->channelIdxByName(chans[c].channel)};
1408                         if(idx != InvalidChannelIndex)
1409                             voice->mChans[c].mDryParams.Gains.Target[idx] = DryGain.Base * pangain;
1410                     }
1411                     continue;
1412                 }
1413
1414                 const auto coeffs = CalcDirectionCoeffs((Device->mRenderMode==RenderMode::Pairwise)
1415                     ? ScaleAzimuthFront3(chans[c].pos) : chans[c].pos, spread);
1416
1417                 ComputePanGains(&Device->Dry, coeffs, DryGain.Base * pangain,
1418                     voice->mChans[c].mDryParams.Gains.Target);
1419                 for(uint i{0};i < NumSends;i++)
1420                 {
1421                     if(const EffectSlot *Slot{SendSlots[i]})
1422                         ComputePanGains(&Slot->Wet, coeffs, WetGain[i].Base * pangain,
1423                             voice->mChans[c].mWetParams[i].Gains.Target);
1424                 }
1425             }
1426         }
1427     }
1428
1429     {
1430         const float hfNorm{props->Direct.HFReference / Frequency};
1431         const float lfNorm{props->Direct.LFReference / Frequency};
1432
1433         voice->mDirect.FilterType = AF_None;
1434         if(DryGain.HF != 1.0f) voice->mDirect.FilterType |= AF_LowPass;
1435         if(DryGain.LF != 1.0f) voice->mDirect.FilterType |= AF_HighPass;
1436
1437         auto &lowpass = voice->mChans[0].mDryParams.LowPass;
1438         auto &highpass = voice->mChans[0].mDryParams.HighPass;
1439         lowpass.setParamsFromSlope(BiquadType::HighShelf, hfNorm, DryGain.HF, 1.0f);
1440         highpass.setParamsFromSlope(BiquadType::LowShelf, lfNorm, DryGain.LF, 1.0f);
1441         for(size_t c{1};c < num_channels;c++)
1442         {
1443             voice->mChans[c].mDryParams.LowPass.copyParamsFrom(lowpass);
1444             voice->mChans[c].mDryParams.HighPass.copyParamsFrom(highpass);
1445         }
1446     }
1447     for(uint i{0};i < NumSends;i++)
1448     {
1449         const float hfNorm{props->Send[i].HFReference / Frequency};
1450         const float lfNorm{props->Send[i].LFReference / Frequency};
1451
1452         voice->mSend[i].FilterType = AF_None;
1453         if(WetGain[i].HF != 1.0f) voice->mSend[i].FilterType |= AF_LowPass;
1454         if(WetGain[i].LF != 1.0f) voice->mSend[i].FilterType |= AF_HighPass;
1455
1456         auto &lowpass = voice->mChans[0].mWetParams[i].LowPass;
1457         auto &highpass = voice->mChans[0].mWetParams[i].HighPass;
1458         lowpass.setParamsFromSlope(BiquadType::HighShelf, hfNorm, WetGain[i].HF, 1.0f);
1459         highpass.setParamsFromSlope(BiquadType::LowShelf, lfNorm, WetGain[i].LF, 1.0f);
1460         for(size_t c{1};c < num_channels;c++)
1461         {
1462             voice->mChans[c].mWetParams[i].LowPass.copyParamsFrom(lowpass);
1463             voice->mChans[c].mWetParams[i].HighPass.copyParamsFrom(highpass);
1464         }
1465     }
1466 }
1467
1468 void CalcNonAttnSourceParams(Voice *voice, const VoiceProps *props, const ContextBase *context)
1469 {
1470     DeviceBase *Device{context->mDevice};
1471     std::array<EffectSlot*,MaxSendCount> SendSlots{};
1472
1473     voice->mDirect.Buffer = Device->Dry.Buffer;
1474     for(uint i{0};i < Device->NumAuxSends;i++)
1475     {
1476         SendSlots[i] = props->Send[i].Slot;
1477         if(!SendSlots[i] || SendSlots[i]->EffectType == EffectSlotType::None)
1478         {
1479             SendSlots[i] = nullptr;
1480             voice->mSend[i].Buffer = {};
1481         }
1482         else
1483             voice->mSend[i].Buffer = SendSlots[i]->Wet.Buffer;
1484     }
1485
1486     /* Calculate the stepping value */
1487     const auto Pitch = static_cast<float>(voice->mFrequency) /
1488         static_cast<float>(Device->Frequency) * props->Pitch;
1489     if(Pitch > float{MaxPitch})
1490         voice->mStep = MaxPitch<<MixerFracBits;
1491     else
1492         voice->mStep = std::max(fastf2u(Pitch * MixerFracOne), 1u);
1493     voice->mResampler = PrepareResampler(props->mResampler, voice->mStep, &voice->mResampleState);
1494
1495     /* Calculate gains */
1496     GainTriplet DryGain{};
1497     DryGain.Base = std::min(std::clamp(props->Gain, props->MinGain, props->MaxGain) *
1498         props->Direct.Gain * context->mParams.Gain, GainMixMax);
1499     DryGain.HF = props->Direct.GainHF;
1500     DryGain.LF = props->Direct.GainLF;
1501
1502     std::array<GainTriplet,MaxSendCount> WetGain{};
1503     for(uint i{0};i < Device->NumAuxSends;i++)
1504     {
1505         WetGain[i].Base = std::min(std::clamp(props->Gain, props->MinGain, props->MaxGain) *
1506             props->Send[i].Gain * context->mParams.Gain, GainMixMax);
1507         WetGain[i].HF = props->Send[i].GainHF;
1508         WetGain[i].LF = props->Send[i].GainLF;
1509     }
1510
1511     CalcPanningAndFilters(voice, 0.0f, 0.0f, -1.0f, 0.0f, 0.0f, DryGain, WetGain, SendSlots, props,
1512         context->mParams, Device);
1513 }
1514
1515 void CalcAttnSourceParams(Voice *voice, const VoiceProps *props, const ContextBase *context)
1516 {
1517     DeviceBase *Device{context->mDevice};
1518     const uint NumSends{Device->NumAuxSends};
1519
1520     /* Set mixing buffers and get send parameters. */
1521     voice->mDirect.Buffer = Device->Dry.Buffer;
1522     std::array<EffectSlot*,MaxSendCount> SendSlots{};
1523     std::array<float,MaxSendCount> RoomRolloff{};
1524     for(uint i{0};i < NumSends;i++)
1525     {
1526         SendSlots[i] = props->Send[i].Slot;
1527         if(!SendSlots[i] || SendSlots[i]->EffectType == EffectSlotType::None)
1528         {
1529             SendSlots[i] = nullptr;
1530             voice->mSend[i].Buffer = {};
1531         }
1532         else
1533         {
1534             /* NOTE: Contrary to the EFX docs, the effect's room rolloff factor
1535              * applies to the selected distance model along with the source's
1536              * room rolloff factor, not necessarily the inverse distance model.
1537              */
1538             RoomRolloff[i] = props->RoomRolloffFactor + SendSlots[i]->RoomRolloff;
1539
1540             voice->mSend[i].Buffer = SendSlots[i]->Wet.Buffer;
1541         }
1542     }
1543
1544     /* Transform source to listener space (convert to head relative) */
1545     alu::Vector Position{props->Position[0], props->Position[1], props->Position[2], 1.0f};
1546     alu::Vector Velocity{props->Velocity[0], props->Velocity[1], props->Velocity[2], 0.0f};
1547     alu::Vector Direction{props->Direction[0], props->Direction[1], props->Direction[2], 0.0f};
1548     if(!props->HeadRelative)
1549     {
1550         /* Transform source vectors */
1551         Position = context->mParams.Matrix * (Position - context->mParams.Position);
1552         Velocity = context->mParams.Matrix * Velocity;
1553         Direction = context->mParams.Matrix * Direction;
1554     }
1555     else
1556     {
1557         /* Offset the source velocity to be relative of the listener velocity */
1558         Velocity += context->mParams.Velocity;
1559     }
1560
1561     const bool directional{Direction.normalize() > 0.0f};
1562     alu::Vector ToSource{Position[0], Position[1], Position[2], 0.0f};
1563     const float Distance{ToSource.normalize()};
1564
1565     /* Calculate distance attenuation */
1566     float ClampedDist{Distance};
1567     float DryGainBase{props->Gain};
1568     std::array<float,MaxSendCount> WetGainBase{};
1569     WetGainBase.fill(props->Gain);
1570
1571     float DryAttnBase{1.0f};
1572     switch(context->mParams.SourceDistanceModel ? props->mDistanceModel
1573         : context->mParams.mDistanceModel)
1574     {
1575     case DistanceModel::InverseClamped:
1576         if(props->MaxDistance < props->RefDistance) break;
1577         ClampedDist = std::clamp(ClampedDist, props->RefDistance, props->MaxDistance);
1578         /*fall-through*/
1579     case DistanceModel::Inverse:
1580         if(props->RefDistance > 0.0f)
1581         {
1582             float dist{lerpf(props->RefDistance, ClampedDist, props->RolloffFactor)};
1583             if(dist > 0.0f)
1584             {
1585                 DryAttnBase = props->RefDistance / dist;
1586                 DryGainBase *= DryAttnBase;
1587             }
1588
1589             for(size_t i{0};i < NumSends;++i)
1590             {
1591                 dist = lerpf(props->RefDistance, ClampedDist, RoomRolloff[i]);
1592                 if(dist > 0.0f) WetGainBase[i] *= props->RefDistance / dist;
1593             }
1594         }
1595         break;
1596
1597     case DistanceModel::LinearClamped:
1598         if(props->MaxDistance < props->RefDistance) break;
1599         ClampedDist = std::clamp(ClampedDist, props->RefDistance, props->MaxDistance);
1600         /*fall-through*/
1601     case DistanceModel::Linear:
1602         if(props->MaxDistance != props->RefDistance)
1603         {
1604             float attn{(ClampedDist-props->RefDistance) /
1605                 (props->MaxDistance-props->RefDistance) * props->RolloffFactor};
1606             DryAttnBase = std::max(1.0f - attn, 0.0f);
1607             DryGainBase *= DryAttnBase;
1608
1609             for(size_t i{0};i < NumSends;++i)
1610             {
1611                 attn = (ClampedDist-props->RefDistance) /
1612                     (props->MaxDistance-props->RefDistance) * RoomRolloff[i];
1613                 WetGainBase[i] *= std::max(1.0f - attn, 0.0f);
1614             }
1615         }
1616         break;
1617
1618     case DistanceModel::ExponentClamped:
1619         if(props->MaxDistance < props->RefDistance) break;
1620         ClampedDist = std::clamp(ClampedDist, props->RefDistance, props->MaxDistance);
1621         /*fall-through*/
1622     case DistanceModel::Exponent:
1623         if(ClampedDist > 0.0f && props->RefDistance > 0.0f)
1624         {
1625             const float dist_ratio{ClampedDist/props->RefDistance};
1626             DryAttnBase = std::pow(dist_ratio, -props->RolloffFactor);
1627             DryGainBase *= DryAttnBase;
1628             for(size_t i{0};i < NumSends;++i)
1629                 WetGainBase[i] *= std::pow(dist_ratio, -RoomRolloff[i]);
1630         }
1631         break;
1632
1633     case DistanceModel::Disable:
1634         break;
1635     }
1636
1637     /* Calculate directional soundcones */
1638     float ConeHF{1.0f}, WetCone{1.0f}, WetConeHF{1.0f};
1639     if(directional && props->InnerAngle < 360.0f)
1640     {
1641         static constexpr float Rad2Deg{static_cast<float>(180.0 / al::numbers::pi)};
1642         const float Angle{Rad2Deg*2.0f * std::acos(-Direction.dot_product(ToSource)) * ConeScale};
1643
1644         float ConeGain{1.0f};
1645         if(Angle >= props->OuterAngle)
1646         {
1647             ConeGain = props->OuterGain;
1648             if(props->DryGainHFAuto)
1649                 ConeHF = props->OuterGainHF;
1650         }
1651         else if(Angle >= props->InnerAngle)
1652         {
1653             const float scale{(Angle-props->InnerAngle) / (props->OuterAngle-props->InnerAngle)};
1654             ConeGain = lerpf(1.0f, props->OuterGain, scale);
1655             if(props->DryGainHFAuto)
1656                 ConeHF = lerpf(1.0f, props->OuterGainHF, scale);
1657         }
1658
1659         DryGainBase *= ConeGain;
1660         if(props->WetGainAuto)
1661             WetCone = ConeGain;
1662         if(props->WetGainHFAuto)
1663             WetConeHF = ConeHF;
1664     }
1665
1666     /* Apply gain and frequency filters */
1667     GainTriplet DryGain{};
1668     DryGainBase = std::clamp(DryGainBase, props->MinGain, props->MaxGain) * context->mParams.Gain;
1669     DryGain.Base = std::min(DryGainBase * props->Direct.Gain, GainMixMax);
1670     DryGain.HF = ConeHF * props->Direct.GainHF;
1671     DryGain.LF = props->Direct.GainLF;
1672
1673     std::array<GainTriplet,MaxSendCount> WetGain{};
1674     for(uint i{0};i < NumSends;i++)
1675     {
1676         const auto gain = std::clamp(WetGainBase[i]*WetCone, props->MinGain, props->MaxGain) *
1677             context->mParams.Gain;
1678         WetGain[i].Base = std::min(gain * props->Send[i].Gain, GainMixMax);
1679         WetGain[i].HF = WetConeHF * props->Send[i].GainHF;
1680         WetGain[i].LF = props->Send[i].GainLF;
1681     }
1682
1683     /* Distance-based air absorption and initial send decay. */
1684     if(Distance > props->RefDistance) LIKELY
1685     {
1686         /* FIXME: In keeping with EAX, the base air absorption gain should be
1687          * taken from the reverb property in the "primary fx slot" when it has
1688          * a reverb effect and the environment flag set, and be applied to the
1689          * direct path and all environment sends, rather than each path using
1690          * the air absorption gain associated with the given slot's effect. At
1691          * this point in the mixer, and even in EFX itself, there's no concept
1692          * of a "primary fx slot" so it's unclear which effect slot should be
1693          * checked.
1694          *
1695          * The HF reference is also intended to be handled the same way, but
1696          * again, there's no concept of a "primary fx slot" here and no way to
1697          * know which effect slot to look at for the reference frequency.
1698          */
1699         const auto distance_units = float{(Distance-props->RefDistance) * props->RolloffFactor};
1700         const auto distance_meters = float{distance_units * context->mParams.MetersPerUnit};
1701         const auto absorb = float{distance_meters * props->AirAbsorptionFactor};
1702         if(absorb > std::numeric_limits<float>::epsilon())
1703             DryGain.HF *= std::pow(context->mParams.AirAbsorptionGainHF, absorb);
1704
1705         /* If the source's Auxiliary Send Filter Gain Auto is off, no extra
1706          * adjustment is applied to the send gains.
1707          */
1708         for(uint i{props->WetGainAuto ? 0u : NumSends};i < NumSends;++i)
1709         {
1710             if(!SendSlots[i] || !(SendSlots[i]->DecayTime > 0.0f))
1711                 continue;
1712
1713             if(SendSlots[i]->AirAbsorptionGainHF < 1.0f
1714                 && absorb > std::numeric_limits<float>::epsilon())
1715                 WetGain[i].HF *= std::pow(SendSlots[i]->AirAbsorptionGainHF, absorb);
1716
1717             const float DecayDistance{SendSlots[i]->DecayTime * SpeedOfSoundMetersPerSec};
1718
1719             /* Apply a decay-time transformation to the wet path, based on the
1720              * source distance. The initial decay of the reverb effect is
1721              * calculated and applied to the wet path.
1722              *
1723              * FIXME: This is very likely not correct. It more likely should
1724              * work by calculating a rolloff dynamically based on the reverb
1725              * parameters (and source distance?) and add it to the room rolloff
1726              * with the reverb and source rolloff parameters.
1727              */
1728             const float baseAttn{DryAttnBase};
1729             const float fact{distance_meters / DecayDistance};
1730             const float gain{std::pow(ReverbDecayGain, fact)*(1.0f-baseAttn) + baseAttn};
1731             WetGain[i].Base *= gain;
1732         }
1733     }
1734
1735
1736     /* Initial source pitch */
1737     float Pitch{props->Pitch};
1738
1739     /* Calculate velocity-based doppler effect */
1740     float DopplerFactor{props->DopplerFactor * context->mParams.DopplerFactor};
1741     if(DopplerFactor > 0.0f)
1742     {
1743         const alu::Vector &lvelocity = context->mParams.Velocity;
1744         float vss{Velocity.dot_product(ToSource) * -DopplerFactor};
1745         float vls{lvelocity.dot_product(ToSource) * -DopplerFactor};
1746
1747         const float SpeedOfSound{context->mParams.SpeedOfSound};
1748         if(!(vls < SpeedOfSound))
1749         {
1750             /* Listener moving away from the source at the speed of sound.
1751              * Sound waves can't catch it.
1752              */
1753             Pitch = 0.0f;
1754         }
1755         else if(!(vss < SpeedOfSound))
1756         {
1757             /* Source moving toward the listener at the speed of sound. Sound
1758              * waves bunch up to extreme frequencies.
1759              */
1760             Pitch = std::numeric_limits<float>::infinity();
1761         }
1762         else
1763         {
1764             /* Source and listener movement is nominal. Calculate the proper
1765              * doppler shift.
1766              */
1767             Pitch *= (SpeedOfSound-vls) / (SpeedOfSound-vss);
1768         }
1769     }
1770
1771     /* Adjust pitch based on the buffer and output frequencies, and calculate
1772      * fixed-point stepping value.
1773      */
1774     Pitch *= static_cast<float>(voice->mFrequency) / static_cast<float>(Device->Frequency);
1775     if(Pitch > float{MaxPitch})
1776         voice->mStep = MaxPitch<<MixerFracBits;
1777     else
1778         voice->mStep = std::max(fastf2u(Pitch * MixerFracOne), 1u);
1779     voice->mResampler = PrepareResampler(props->mResampler, voice->mStep, &voice->mResampleState);
1780
1781     float spread{0.0f};
1782     if(props->Radius > Distance)
1783         spread = al::numbers::pi_v<float>*2.0f - Distance/props->Radius*al::numbers::pi_v<float>;
1784     else if(Distance > 0.0f)
1785         spread = std::asin(props->Radius/Distance) * 2.0f;
1786
1787     CalcPanningAndFilters(voice, ToSource[0]*XScale, ToSource[1]*YScale, ToSource[2]*ZScale,
1788         Distance, spread, DryGain, WetGain, SendSlots, props, context->mParams, Device);
1789 }
1790
1791 void CalcSourceParams(Voice *voice, ContextBase *context, bool force)
1792 {
1793     VoicePropsItem *props{voice->mUpdate.exchange(nullptr, std::memory_order_acq_rel)};
1794     if(!props && !force) return;
1795
1796     if(props)
1797     {
1798         voice->mProps = static_cast<VoiceProps&>(*props);
1799
1800         AtomicReplaceHead(context->mFreeVoiceProps, props);
1801     }
1802
1803     if((voice->mProps.DirectChannels != DirectMode::Off && voice->mFmtChannels != FmtMono
1804             && !IsAmbisonic(voice->mFmtChannels))
1805         || voice->mProps.mSpatializeMode == SpatializeMode::Off
1806         || (voice->mProps.mSpatializeMode==SpatializeMode::Auto && voice->mFmtChannels != FmtMono))
1807         CalcNonAttnSourceParams(voice, &voice->mProps, context);
1808     else
1809         CalcAttnSourceParams(voice, &voice->mProps, context);
1810 }
1811
1812
1813 void SendSourceStateEvent(ContextBase *context, uint id, VChangeState state)
1814 {
1815     RingBuffer *ring{context->mAsyncEvents.get()};
1816     auto evt_vec = ring->getWriteVector();
1817     if(evt_vec.first.len < 1) return;
1818
1819     auto &evt = InitAsyncEvent<AsyncSourceStateEvent>(evt_vec.first.buf);
1820     evt.mId = id;
1821     switch(state)
1822     {
1823     case VChangeState::Reset:
1824         evt.mState = AsyncSrcState::Reset;
1825         break;
1826     case VChangeState::Stop:
1827         evt.mState = AsyncSrcState::Stop;
1828         break;
1829     case VChangeState::Play:
1830         evt.mState = AsyncSrcState::Play;
1831         break;
1832     case VChangeState::Pause:
1833         evt.mState = AsyncSrcState::Pause;
1834         break;
1835     /* Shouldn't happen. */
1836     case VChangeState::Restart:
1837         al::unreachable();
1838     }
1839
1840     ring->writeAdvance(1);
1841 }
1842
1843 void ProcessVoiceChanges(ContextBase *ctx)
1844 {
1845     VoiceChange *cur{ctx->mCurrentVoiceChange.load(std::memory_order_acquire)};
1846     VoiceChange *next{cur->mNext.load(std::memory_order_acquire)};
1847     if(!next) return;
1848
1849     const auto enabledevt = ctx->mEnabledEvts.load(std::memory_order_acquire);
1850     do {
1851         cur = next;
1852
1853         bool sendevt{false};
1854         if(cur->mState == VChangeState::Reset || cur->mState == VChangeState::Stop)
1855         {
1856             if(Voice *voice{cur->mVoice})
1857             {
1858                 voice->mCurrentBuffer.store(nullptr, std::memory_order_relaxed);
1859                 voice->mLoopBuffer.store(nullptr, std::memory_order_relaxed);
1860                 /* A source ID indicates the voice was playing or paused, which
1861                  * gets a reset/stop event.
1862                  */
1863                 sendevt = voice->mSourceID.exchange(0u, std::memory_order_relaxed) != 0u;
1864                 Voice::State oldvstate{Voice::Playing};
1865                 voice->mPlayState.compare_exchange_strong(oldvstate, Voice::Stopping,
1866                     std::memory_order_relaxed, std::memory_order_acquire);
1867                 voice->mPendingChange.store(false, std::memory_order_release);
1868             }
1869             /* Reset state change events are always sent, even if the voice is
1870              * already stopped or even if there is no voice.
1871              */
1872             sendevt |= (cur->mState == VChangeState::Reset);
1873         }
1874         else if(cur->mState == VChangeState::Pause)
1875         {
1876             Voice *voice{cur->mVoice};
1877             Voice::State oldvstate{Voice::Playing};
1878             sendevt = voice->mPlayState.compare_exchange_strong(oldvstate, Voice::Stopping,
1879                 std::memory_order_release, std::memory_order_acquire);
1880         }
1881         else if(cur->mState == VChangeState::Play)
1882         {
1883             /* NOTE: When playing a voice, sending a source state change event
1884              * depends if there's an old voice to stop and if that stop is
1885              * successful. If there is no old voice, a playing event is always
1886              * sent. If there is an old voice, an event is sent only if the
1887              * voice is already stopped.
1888              */
1889             if(Voice *oldvoice{cur->mOldVoice})
1890             {
1891                 oldvoice->mCurrentBuffer.store(nullptr, std::memory_order_relaxed);
1892                 oldvoice->mLoopBuffer.store(nullptr, std::memory_order_relaxed);
1893                 oldvoice->mSourceID.store(0u, std::memory_order_relaxed);
1894                 Voice::State oldvstate{Voice::Playing};
1895                 sendevt = !oldvoice->mPlayState.compare_exchange_strong(oldvstate, Voice::Stopping,
1896                     std::memory_order_relaxed, std::memory_order_acquire);
1897                 oldvoice->mPendingChange.store(false, std::memory_order_release);
1898             }
1899             else
1900                 sendevt = true;
1901
1902             Voice *voice{cur->mVoice};
1903             voice->mPlayState.store(Voice::Playing, std::memory_order_release);
1904         }
1905         else if(cur->mState == VChangeState::Restart)
1906         {
1907             /* Restarting a voice never sends a source change event. */
1908             Voice *oldvoice{cur->mOldVoice};
1909             oldvoice->mCurrentBuffer.store(nullptr, std::memory_order_relaxed);
1910             oldvoice->mLoopBuffer.store(nullptr, std::memory_order_relaxed);
1911             /* If there's no sourceID, the old voice finished so don't start
1912              * the new one at its new offset.
1913              */
1914             if(oldvoice->mSourceID.exchange(0u, std::memory_order_relaxed) != 0u)
1915             {
1916                 /* Otherwise, set the voice to stopping if it's not already (it
1917                  * might already be, if paused), and play the new voice as
1918                  * appropriate.
1919                  */
1920                 Voice::State oldvstate{Voice::Playing};
1921                 oldvoice->mPlayState.compare_exchange_strong(oldvstate, Voice::Stopping,
1922                     std::memory_order_relaxed, std::memory_order_acquire);
1923
1924                 Voice *voice{cur->mVoice};
1925                 voice->mPlayState.store((oldvstate == Voice::Playing) ? Voice::Playing
1926                     : Voice::Stopped, std::memory_order_release);
1927             }
1928             oldvoice->mPendingChange.store(false, std::memory_order_release);
1929         }
1930         if(sendevt && enabledevt.test(al::to_underlying(AsyncEnableBits::SourceState)))
1931             SendSourceStateEvent(ctx, cur->mSourceID, cur->mState);
1932
1933         next = cur->mNext.load(std::memory_order_acquire);
1934     } while(next);
1935     ctx->mCurrentVoiceChange.store(cur, std::memory_order_release);
1936 }
1937
1938 void ProcessParamUpdates(ContextBase *ctx, const al::span<EffectSlot*> slots,
1939     const al::span<EffectSlot*> sorted_slots, const al::span<Voice*> voices)
1940 {
1941     ProcessVoiceChanges(ctx);
1942
1943     IncrementRef(ctx->mUpdateCount);
1944     if(!ctx->mHoldUpdates.load(std::memory_order_acquire)) LIKELY
1945     {
1946         bool force{CalcContextParams(ctx)};
1947         auto sorted_slot_base = al::to_address(sorted_slots.begin());
1948         for(EffectSlot *slot : slots)
1949             force |= CalcEffectSlotParams(slot, sorted_slot_base, ctx);
1950
1951         for(Voice *voice : voices)
1952         {
1953             /* Only update voices that have a source. */
1954             if(voice->mSourceID.load(std::memory_order_relaxed) != 0)
1955                 CalcSourceParams(voice, ctx, force);
1956         }
1957     }
1958     IncrementRef(ctx->mUpdateCount);
1959 }
1960
1961 void ProcessContexts(DeviceBase *device, const uint SamplesToDo)
1962 {
1963     ASSUME(SamplesToDo > 0);
1964
1965     const nanoseconds curtime{device->mClockBase.load(std::memory_order_relaxed) +
1966         nanoseconds{seconds{device->mSamplesDone.load(std::memory_order_relaxed)}}/
1967         device->Frequency};
1968
1969     auto proc_context = [SamplesToDo,curtime](ContextBase *ctx)
1970     {
1971         const auto auxslotspan = al::span{*ctx->mActiveAuxSlots.load(std::memory_order_acquire)};
1972         const auto auxslots = auxslotspan.first(auxslotspan.size()>>1);
1973         const auto sorted_slots = auxslotspan.last(auxslotspan.size()>>1);
1974         const auto voices = ctx->getVoicesSpanAcquired();
1975
1976         /* Process pending property updates for objects on the context. */
1977         ProcessParamUpdates(ctx, auxslots, sorted_slots, voices);
1978
1979         /* Clear auxiliary effect slot mixing buffers. */
1980         auto clear_wetbuffers = [](EffectSlot *slot)
1981         {
1982             auto clear_buffer = [](const FloatBufferSpan buffer)
1983             { std::fill(buffer.begin(), buffer.end(), 0.0f); };
1984             std::for_each(slot->Wet.Buffer.begin(), slot->Wet.Buffer.end(), clear_buffer);
1985         };
1986         std::for_each(auxslots.begin(), auxslots.end(), clear_wetbuffers);
1987
1988         /* Process voices that have a playing source. */
1989         auto proc_voice = [ctx,curtime,SamplesToDo](Voice *voice)
1990         {
1991             const Voice::State vstate{voice->mPlayState.load(std::memory_order_acquire)};
1992             if(vstate != Voice::Stopped && vstate != Voice::Pending)
1993                 voice->mix(vstate, ctx, curtime, SamplesToDo);
1994         };
1995         std::for_each(voices.begin(), voices.end(), proc_voice);
1996
1997         /* Process effects. */
1998         if(!auxslots.empty())
1999         {
2000             /* Sort the slots into extra storage, so that effect slots come
2001              * before their effect slot target (or their targets' target). Skip
2002              * sorting if it has already been done.
2003              */
2004             if(!sorted_slots[0])
2005             {
2006                 /* First, copy the slots to the sorted list and partition them,
2007                  * so that all slots without a target slot go to the end.
2008                  */
2009                 auto has_target = [](const EffectSlot *slot) noexcept -> bool
2010                 { return slot->Target != nullptr; };
2011                 auto split_point = std::partition_copy(auxslots.rbegin(), auxslots.rend(),
2012                     sorted_slots.begin(), sorted_slots.rbegin(), has_target).first;
2013                 /* There must be at least one slot without a slot target. */
2014                 assert(split_point != sorted_slots.end());
2015
2016                 /* Starting from the back of the sorted list, continue
2017                  * partitioning the front of the list given each target until
2018                  * all targets are accounted for. This ensures all slots
2019                  * without a target go last, all slots directly targeting those
2020                  * last slots go second-to-last, all slots directly targeting
2021                  * those second-last slots go third-to-last, etc.
2022                  */
2023                 auto next_target = sorted_slots.end();
2024                 while(std::distance(sorted_slots.begin(), split_point) > 1)
2025                 {
2026                     /* This shouldn't happen, but if there's unsorted slots
2027                      * left that don't target any sorted slots, they can't
2028                      * contribute to the output, so leave them.
2029                      */
2030                     if(next_target == split_point) UNLIKELY
2031                         break;
2032
2033                     --next_target;
2034                     auto not_next = [next_target](const EffectSlot *slot) noexcept -> bool
2035                     { return slot->Target != *next_target; };
2036                     split_point = std::partition(sorted_slots.begin(), split_point, not_next);
2037                 }
2038             }
2039
2040             auto proc_slot = [SamplesToDo](const EffectSlot *slot)
2041             {
2042                 EffectState *state{slot->mEffectState.get()};
2043                 state->process(SamplesToDo, slot->Wet.Buffer, state->mOutTarget);
2044             };
2045             std::for_each(sorted_slots.begin(), sorted_slots.end(), proc_slot);
2046         }
2047
2048         /* Signal the event handler if there are any events to read. */
2049         if(RingBuffer *ring{ctx->mAsyncEvents.get()}; ring->readSpace() > 0)
2050             ctx->mEventSem.post();
2051     };
2052     const auto contexts = al::span{*device->mContexts.load(std::memory_order_acquire)};
2053     std::for_each(contexts.begin(), contexts.end(), proc_context);
2054 }
2055
2056
2057 void ApplyDistanceComp(const al::span<FloatBufferLine> Samples, const size_t SamplesToDo,
2058     const al::span<const DistanceComp::ChanData,MaxOutputChannels> chandata)
2059 {
2060     ASSUME(SamplesToDo > 0);
2061
2062     auto distcomp = chandata.begin();
2063     for(auto &chanbuffer : Samples)
2064     {
2065         const float gain{distcomp->Gain};
2066         auto distbuf = al::span{al::assume_aligned<16>(distcomp->Buffer.data()),
2067             distcomp->Buffer.size()};
2068         ++distcomp;
2069
2070         const size_t base{distbuf.size()};
2071         if(base < 1) continue;
2072
2073         const auto inout = al::span{al::assume_aligned<16>(chanbuffer.data()), SamplesToDo};
2074         if(SamplesToDo >= base) LIKELY
2075         {
2076             auto delay_end = std::rotate(inout.begin(), inout.end()-ptrdiff_t(base), inout.end());
2077             std::swap_ranges(inout.begin(), delay_end, distbuf.begin());
2078         }
2079         else
2080         {
2081             auto delay_start = std::swap_ranges(inout.begin(), inout.end(), distbuf.begin());
2082             std::rotate(distbuf.begin(), delay_start, distbuf.begin()+ptrdiff_t(base));
2083         }
2084         std::transform(inout.begin(), inout.end(), inout.begin(),
2085             [gain](float s) { return s*gain; });
2086     }
2087 }
2088
2089 void ApplyDither(const al::span<FloatBufferLine> Samples, uint *dither_seed,
2090     const float quant_scale, const size_t SamplesToDo)
2091 {
2092     static constexpr double invRNGRange{1.0 / std::numeric_limits<uint>::max()};
2093     ASSUME(SamplesToDo > 0);
2094
2095     /* Dithering. Generate whitenoise (uniform distribution of random values
2096      * between -1 and +1) and add it to the sample values, after scaling up to
2097      * the desired quantization depth and before rounding.
2098      */
2099     const float invscale{1.0f / quant_scale};
2100     uint seed{*dither_seed};
2101     auto dither_sample = [&seed,invscale,quant_scale](const float sample) noexcept -> float
2102     {
2103         float val{sample * quant_scale};
2104         uint rng0{dither_rng(&seed)};
2105         uint rng1{dither_rng(&seed)};
2106         val += static_cast<float>(rng0*invRNGRange - rng1*invRNGRange);
2107         return fast_roundf(val) * invscale;
2108     };
2109     for(FloatBufferLine &inout : Samples)
2110         std::transform(inout.begin(), inout.begin()+SamplesToDo, inout.begin(), dither_sample);
2111     *dither_seed = seed;
2112 }
2113
2114
2115 /* Base template left undefined. Should be marked =delete, but Clang 3.8.1
2116  * chokes on that given the inline specializations.
2117  */
2118 template<typename T>
2119 inline T SampleConv(float) noexcept;
2120
2121 template<> inline float SampleConv(float val) noexcept
2122 { return val; }
2123 template<> inline int32_t SampleConv(float val) noexcept
2124 {
2125     /* Floats have a 23-bit mantissa, plus an implied 1 bit and a sign bit.
2126      * This means a normalized float has at most 25 bits of signed precision.
2127      * When scaling and clamping for a signed 32-bit integer, these following
2128      * values are the best a float can give.
2129      */
2130     return fastf2i(std::clamp(val*2147483648.0f, -2147483648.0f, 2147483520.0f));
2131 }
2132 template<> inline int16_t SampleConv(float val) noexcept
2133 { return static_cast<int16_t>(fastf2i(std::clamp(val*32768.0f, -32768.0f, 32767.0f))); }
2134 template<> inline int8_t SampleConv(float val) noexcept
2135 { return static_cast<int8_t>(fastf2i(std::clamp(val*128.0f, -128.0f, 127.0f))); }
2136
2137 /* Define unsigned output variations. */
2138 template<> inline uint32_t SampleConv(float val) noexcept
2139 { return static_cast<uint32_t>(SampleConv<int32_t>(val)) + 2147483648u; }
2140 template<> inline uint16_t SampleConv(float val) noexcept
2141 { return static_cast<uint16_t>(SampleConv<int16_t>(val) + 32768); }
2142 template<> inline uint8_t SampleConv(float val) noexcept
2143 { return static_cast<uint8_t>(SampleConv<int8_t>(val) + 128); }
2144
2145 template<typename T>
2146 void Write(const al::span<const FloatBufferLine> InBuffer, void *OutBuffer, const size_t Offset,
2147     const size_t SamplesToDo, const size_t FrameStep)
2148 {
2149     ASSUME(FrameStep > 0);
2150     ASSUME(SamplesToDo > 0);
2151
2152     const auto output = al::span{static_cast<T*>(OutBuffer), (Offset+SamplesToDo)*FrameStep}
2153         .subspan(Offset*FrameStep);
2154     size_t c{0};
2155     for(const FloatBufferLine &inbuf : InBuffer)
2156     {
2157         auto out = output.begin();
2158         auto conv_sample = [FrameStep,c,&out](const float s) noexcept
2159         {
2160             out[c] = SampleConv<T>(s);
2161             out += ptrdiff_t(FrameStep);
2162         };
2163         std::for_each_n(inbuf.cbegin(), SamplesToDo, conv_sample);
2164         ++c;
2165     }
2166     if(const size_t extra{FrameStep - c})
2167     {
2168         const auto silence = SampleConv<T>(0.0f);
2169         for(size_t i{0};i < SamplesToDo;++i)
2170             std::fill_n(&output[i*FrameStep + c], extra, silence);
2171     }
2172 }
2173
2174 template<typename T>
2175 void Write(const al::span<const FloatBufferLine> InBuffer, al::span<void*> OutBuffers,
2176     const size_t Offset, const size_t SamplesToDo)
2177 {
2178     ASSUME(SamplesToDo > 0);
2179
2180     auto srcbuf = InBuffer.cbegin();
2181     for(auto *dstbuf : OutBuffers)
2182     {
2183         const auto src = al::span{*srcbuf}.first(SamplesToDo);
2184         const auto dst = al::span{static_cast<T*>(dstbuf), Offset+SamplesToDo}.subspan(Offset);
2185         std::transform(src.cbegin(), src.end(), dst.begin(), SampleConv<T>);
2186         ++srcbuf;
2187     }
2188 }
2189
2190 } // namespace
2191
2192 uint DeviceBase::renderSamples(const uint numSamples)
2193 {
2194     const uint samplesToDo{std::min(numSamples, uint{BufferLineSize})};
2195
2196     /* Clear main mixing buffers. */
2197     for(FloatBufferLine &buffer : MixBuffer)
2198         buffer.fill(0.0f);
2199
2200     {
2201         const auto mixLock = getWriteMixLock();
2202
2203         /* Process and mix each context's sources and effects. */
2204         ProcessContexts(this, samplesToDo);
2205
2206         /* Every second's worth of samples is converted and added to clock base
2207          * so that large sample counts don't overflow during conversion. This
2208          * also guarantees a stable conversion.
2209          */
2210         auto samplesDone = mSamplesDone.load(std::memory_order_relaxed) + samplesToDo;
2211         auto clockBase = mClockBase.load(std::memory_order_relaxed) +
2212             std::chrono::seconds{samplesDone/Frequency};
2213         mSamplesDone.store(samplesDone%Frequency, std::memory_order_relaxed);
2214         mClockBase.store(clockBase, std::memory_order_relaxed);
2215     }
2216
2217     /* Apply any needed post-process for finalizing the Dry mix to the RealOut
2218      * (Ambisonic decode, UHJ encode, etc).
2219      */
2220     postProcess(samplesToDo);
2221
2222     /* Apply compression, limiting sample amplitude if needed or desired. */
2223     if(Limiter) Limiter->process(samplesToDo, RealOut.Buffer);
2224
2225     /* Apply delays and attenuation for mismatched speaker distances. */
2226     if(ChannelDelays)
2227         ApplyDistanceComp(RealOut.Buffer, samplesToDo, ChannelDelays->mChannels);
2228
2229     /* Apply dithering. The compressor should have left enough headroom for the
2230      * dither noise to not saturate.
2231      */
2232     if(DitherDepth > 0.0f)
2233         ApplyDither(RealOut.Buffer, &DitherSeed, DitherDepth, samplesToDo);
2234
2235     return samplesToDo;
2236 }
2237
2238 void DeviceBase::renderSamples(const al::span<void*> outBuffers, const uint numSamples)
2239 {
2240     FPUCtl mixer_mode{};
2241     uint total{0};
2242     while(const uint todo{numSamples - total})
2243     {
2244         const uint samplesToDo{renderSamples(todo)};
2245
2246         switch(FmtType)
2247         {
2248 #define HANDLE_WRITE(T) case T:                                               \
2249     Write<DevFmtType_t<T>>(RealOut.Buffer, outBuffers, total, samplesToDo); break;
2250         HANDLE_WRITE(DevFmtByte)
2251         HANDLE_WRITE(DevFmtUByte)
2252         HANDLE_WRITE(DevFmtShort)
2253         HANDLE_WRITE(DevFmtUShort)
2254         HANDLE_WRITE(DevFmtInt)
2255         HANDLE_WRITE(DevFmtUInt)
2256         HANDLE_WRITE(DevFmtFloat)
2257         }
2258 #undef HANDLE_WRITE
2259
2260         total += samplesToDo;
2261     }
2262 }
2263
2264 void DeviceBase::renderSamples(void *outBuffer, const uint numSamples, const size_t frameStep)
2265 {
2266     FPUCtl mixer_mode{};
2267     uint total{0};
2268     while(const uint todo{numSamples - total})
2269     {
2270         const uint samplesToDo{renderSamples(todo)};
2271
2272         if(outBuffer) LIKELY
2273         {
2274             /* Finally, interleave and convert samples, writing to the device's
2275              * output buffer.
2276              */
2277             switch(FmtType)
2278             {
2279 #define HANDLE_WRITE(T) case T:                                               \
2280     Write<DevFmtType_t<T>>(RealOut.Buffer, outBuffer, total, samplesToDo, frameStep); break;
2281             HANDLE_WRITE(DevFmtByte)
2282             HANDLE_WRITE(DevFmtUByte)
2283             HANDLE_WRITE(DevFmtShort)
2284             HANDLE_WRITE(DevFmtUShort)
2285             HANDLE_WRITE(DevFmtInt)
2286             HANDLE_WRITE(DevFmtUInt)
2287             HANDLE_WRITE(DevFmtFloat)
2288 #undef HANDLE_WRITE
2289             }
2290         }
2291
2292         total += samplesToDo;
2293     }
2294 }
2295
2296 void DeviceBase::handleDisconnect(const char *msg, ...)
2297 {
2298     const auto mixLock = getWriteMixLock();
2299
2300     if(Connected.exchange(false, std::memory_order_acq_rel))
2301     {
2302         AsyncEvent evt{std::in_place_type<AsyncDisconnectEvent>};
2303         auto &disconnect = std::get<AsyncDisconnectEvent>(evt);
2304
2305         /* NOLINTBEGIN(*-array-to-pointer-decay) */
2306         va_list args, args2;
2307         va_start(args, msg);
2308         va_copy(args2, args);
2309         if(int msglen{vsnprintf(nullptr, 0, msg, args)}; msglen > 0)
2310         {
2311             disconnect.msg.resize(static_cast<uint>(msglen)+1_uz);
2312             vsnprintf(disconnect.msg.data(), disconnect.msg.size(), msg, args2);
2313         }
2314         else
2315             disconnect.msg = "<failed constructing message>";
2316         va_end(args2);
2317         va_end(args);
2318         /* NOLINTEND(*-array-to-pointer-decay) */
2319
2320         while(!disconnect.msg.empty() && disconnect.msg.back() == '\0')
2321             disconnect.msg.pop_back();
2322
2323         for(ContextBase *ctx : *mContexts.load())
2324         {
2325             RingBuffer *ring{ctx->mAsyncEvents.get()};
2326             auto evt_data = ring->getWriteVector().first;
2327             if(evt_data.len > 0)
2328             {
2329                 al::construct_at(reinterpret_cast<AsyncEvent*>(evt_data.buf), evt);
2330                 ring->writeAdvance(1);
2331                 ctx->mEventSem.post();
2332             }
2333
2334             if(!ctx->mStopVoicesOnDisconnect.load())
2335             {
2336                 ProcessVoiceChanges(ctx);
2337                 continue;
2338             }
2339
2340             auto voicelist = ctx->getVoicesSpanAcquired();
2341             auto stop_voice = [](Voice *voice) -> void
2342             {
2343                 voice->mCurrentBuffer.store(nullptr, std::memory_order_relaxed);
2344                 voice->mLoopBuffer.store(nullptr, std::memory_order_relaxed);
2345                 voice->mSourceID.store(0u, std::memory_order_relaxed);
2346                 voice->mPlayState.store(Voice::Stopped, std::memory_order_release);
2347             };
2348             std::for_each(voicelist.begin(), voicelist.end(), stop_voice);
2349         }
2350     }
2351 }