alc/alu.cpp

   1 /**
   2  * OpenAL cross platform audio library
   3  * Copyright (C) 1999-2007 by authors.
   4  * This library is free software; you can redistribute it and/or
   5  *  modify it under the terms of the GNU Library General Public
   6  *  License as published by the Free Software Foundation; either
   7  *  version 2 of the License, or (at your option) any later version.
   8  *
   9  * This library is distributed in the hope that it will be useful,
  10  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  11  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  12  *  Library General Public License for more details.
  13  *
  14  * You should have received a copy of the GNU Library General Public
  15  *  License along with this library; if not, write to the
  16  *  Free Software Foundation, Inc.,
  17  *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  18  * Or go to http://www.gnu.org/copyleft/lgpl.html
  19  */
  20
  21 #include "config.h"
  22
  23 #include "alu.h"
  24
  25 #include <algorithm>
  26 #include <array>
  27 #include <atomic>
  28 #include <cassert>
  29 #include <chrono>
  30 #include <climits>
  31 #include <cstdarg>
  32 #include <cstdio>
  33 #include <cstdlib>
  34 #include <functional>
  35 #include <iterator>
  36 #include <limits>
  37 #include <memory>
  38 #include <new>
  39 #include <stdint.h>
  40 #include <utility>
  41
  42 #include "almalloc.h"
  43 #include "alnumbers.h"
  44 #include "alnumeric.h"
  45 #include "alspan.h"
  46 #include "alstring.h"
  47 #include "atomic.h"
  48 #include "core/ambidefs.h"
  49 #include "core/async_event.h"
  50 #include "core/bformatdec.h"
  51 #include "core/bs2b.h"
  52 #include "core/bsinc_defs.h"
  53 #include "core/bsinc_tables.h"
  54 #include "core/bufferline.h"
  55 #include "core/buffer_storage.h"
  56 #include "core/context.h"
  57 #include "core/cpu_caps.h"
  58 #include "core/devformat.h"
  59 #include "core/device.h"
  60 #include "core/effects/base.h"
  61 #include "core/effectslot.h"
  62 #include "core/filters/biquad.h"
  63 #include "core/filters/nfc.h"
  64 #include "core/fpu_ctrl.h"
  65 #include "core/hrtf.h"
  66 #include "core/mastering.h"
  67 #include "core/mixer.h"
  68 #include "core/mixer/defs.h"
  69 #include "core/mixer/hrtfdefs.h"
  70 #include "core/resampler_limits.h"
  71 #include "core/uhjfilter.h"
  72 #include "core/voice.h"
  73 #include "core/voice_change.h"
  74 #include "intrusive_ptr.h"
  75 #include "opthelpers.h"
  76 #include "ringbuffer.h"
  77 #include "strutils.h"
  78 #include "threads.h"
  79 #include "vecmat.h"
  80 #include "vector.h"
  81
  82 struct CTag;
  83 #ifdef HAVE_SSE
  84 struct SSETag;
  85 #endif
  86 #ifdef HAVE_SSE2
  87 struct SSE2Tag;
  88 #endif
  89 #ifdef HAVE_SSE4_1
  90 struct SSE4Tag;
  91 #endif
  92 #ifdef HAVE_NEON
  93 struct NEONTag;
  94 #endif
  95 struct PointTag;
  96 struct LerpTag;
  97 struct CubicTag;
  98 struct BSincTag;
  99 struct FastBSincTag;
 100
 101
 102 static_assert(!(MaxResamplerPadding&1), "MaxResamplerPadding is not a multiple of two");
 103
 104
 105 namespace {
 106
 107 using uint = unsigned int;
 108
 109 constexpr uint MaxPitch{10};
 110
 111 static_assert((BufferLineSize-1)/MaxPitch > 0, "MaxPitch is too large for BufferLineSize!");
 112 static_assert((INT_MAX>>MixerFracBits)/MaxPitch > BufferLineSize,
 113     "MaxPitch and/or BufferLineSize are too large for MixerFracBits!");
 114
 115 using namespace std::placeholders;
 116
 117 float InitConeScale()
 118 {
 119     float ret{1.0f};
 120     if(auto optval = al::getenv("__ALSOFT_HALF_ANGLE_CONES"))
 121     {
 122         if(al::strcasecmp(optval->c_str(), "true") == 0
 123             || strtol(optval->c_str(), nullptr, 0) == 1)
 124             ret *= 0.5f;
 125     }
 126     return ret;
 127 }
 128
 129 float InitZScale()
 130 {
 131     float ret{1.0f};
 132     if(auto optval = al::getenv("__ALSOFT_REVERSE_Z"))
 133     {
 134         if(al::strcasecmp(optval->c_str(), "true") == 0
 135             || strtol(optval->c_str(), nullptr, 0) == 1)
 136             ret *= -1.0f;
 137     }
 138     return ret;
 139 }
 140
 141 } // namespace
 142
 143 /* Cone scalar */
 144 const float ConeScale{InitConeScale()};
 145
 146 /* Localized Z scalar for mono sources */
 147 const float ZScale{InitZScale()};
 148
 149 namespace {
 150
 151 struct ChanMap {
 152     Channel channel;
 153     float angle;
 154     float elevation;
 155 };
 156
 157 using HrtfDirectMixerFunc = void(*)(const FloatBufferSpan LeftOut, const FloatBufferSpan RightOut,
 158     const al::span<const FloatBufferLine> InSamples, float2 *AccumSamples, float *TempBuf,
 159     HrtfChannelState *ChanState, const size_t IrSize, const size_t BufferSize);
 160
 161 HrtfDirectMixerFunc MixDirectHrtf{MixDirectHrtf_<CTag>};
 162
 163 inline HrtfDirectMixerFunc SelectHrtfMixer(void)
 164 {
 165 #ifdef HAVE_NEON
 166     if((CPUCapFlags&CPU_CAP_NEON))
 167         return MixDirectHrtf_<NEONTag>;
 168 #endif
 169 #ifdef HAVE_SSE
 170     if((CPUCapFlags&CPU_CAP_SSE))
 171         return MixDirectHrtf_<SSETag>;
 172 #endif
 173
 174     return MixDirectHrtf_<CTag>;
 175 }
 176
 177
 178 inline void BsincPrepare(const uint increment, BsincState *state, const BSincTable *table)
 179 {
 180     size_t si{BSincScaleCount - 1};
 181     float sf{0.0f};
 182
 183     if(increment > MixerFracOne)
 184     {
 185         sf = MixerFracOne/static_cast<float>(increment) - table->scaleBase;
 186         sf = maxf(0.0f, BSincScaleCount*sf*table->scaleRange - 1.0f);
 187         si = float2uint(sf);
 188         /* The interpolation factor is fit to this diagonally-symmetric curve
 189          * to reduce the transition ripple caused by interpolating different
 190          * scales of the sinc function.
 191          */
 192         sf = 1.0f - std::cos(std::asin(sf - static_cast<float>(si)));
 193     }
 194
 195     state->sf = sf;
 196     state->m = table->m[si];
 197     state->l = (state->m/2) - 1;
 198     state->filter = table->Tab + table->filterOffset[si];
 199 }
 200
 201 inline ResamplerFunc SelectResampler(Resampler resampler, uint increment)
 202 {
 203     switch(resampler)
 204     {
 205     case Resampler::Point:
 206         return Resample_<PointTag,CTag>;
 207     case Resampler::Linear:
 208 #ifdef HAVE_NEON
 209         if((CPUCapFlags&CPU_CAP_NEON))
 210             return Resample_<LerpTag,NEONTag>;
 211 #endif
 212 #ifdef HAVE_SSE4_1
 213         if((CPUCapFlags&CPU_CAP_SSE4_1))
 214             return Resample_<LerpTag,SSE4Tag>;
 215 #endif
 216 #ifdef HAVE_SSE2
 217         if((CPUCapFlags&CPU_CAP_SSE2))
 218             return Resample_<LerpTag,SSE2Tag>;
 219 #endif
 220         return Resample_<LerpTag,CTag>;
 221     case Resampler::Cubic:
 222         return Resample_<CubicTag,CTag>;
 223     case Resampler::BSinc12:
 224     case Resampler::BSinc24:
 225         if(increment <= MixerFracOne)
 226         {
 227             /* fall-through */
 228         case Resampler::FastBSinc12:
 229         case Resampler::FastBSinc24:
 230 #ifdef HAVE_NEON
 231             if((CPUCapFlags&CPU_CAP_NEON))
 232                 return Resample_<FastBSincTag,NEONTag>;
 233 #endif
 234 #ifdef HAVE_SSE
 235             if((CPUCapFlags&CPU_CAP_SSE))
 236                 return Resample_<FastBSincTag,SSETag>;
 237 #endif
 238             return Resample_<FastBSincTag,CTag>;
 239         }
 240 #ifdef HAVE_NEON
 241         if((CPUCapFlags&CPU_CAP_NEON))
 242             return Resample_<BSincTag,NEONTag>;
 243 #endif
 244 #ifdef HAVE_SSE
 245         if((CPUCapFlags&CPU_CAP_SSE))
 246             return Resample_<BSincTag,SSETag>;
 247 #endif
 248         return Resample_<BSincTag,CTag>;
 249     }
 250
 251     return Resample_<PointTag,CTag>;
 252 }
 253
 254 } // namespace
 255
 256 void aluInit(void)
 257 {
 258     MixDirectHrtf = SelectHrtfMixer();
 259 }
 260
 261
 262 ResamplerFunc PrepareResampler(Resampler resampler, uint increment, InterpState *state)
 263 {
 264     switch(resampler)
 265     {
 266     case Resampler::Point:
 267     case Resampler::Linear:
 268     case Resampler::Cubic:
 269         break;
 270     case Resampler::FastBSinc12:
 271     case Resampler::BSinc12:
 272         BsincPrepare(increment, &state->bsinc, &bsinc12);
 273         break;
 274     case Resampler::FastBSinc24:
 275     case Resampler::BSinc24:
 276         BsincPrepare(increment, &state->bsinc, &bsinc24);
 277         break;
 278     }
 279     return SelectResampler(resampler, increment);
 280 }
 281
 282
 283 void DeviceBase::ProcessHrtf(const size_t SamplesToDo)
 284 {
 285     /* HRTF is stereo output only. */
 286     const uint lidx{RealOut.ChannelIndex[FrontLeft]};
 287     const uint ridx{RealOut.ChannelIndex[FrontRight]};
 288
 289     MixDirectHrtf(RealOut.Buffer[lidx], RealOut.Buffer[ridx], Dry.Buffer, HrtfAccumData,
 290         mHrtfState->mTemp.data(), mHrtfState->mChannels.data(), mHrtfState->mIrSize, SamplesToDo);
 291 }
 292
 293 void DeviceBase::ProcessAmbiDec(const size_t SamplesToDo)
 294 {
 295     AmbiDecoder->process(RealOut.Buffer, Dry.Buffer.data(), SamplesToDo);
 296 }
 297
 298 void DeviceBase::ProcessAmbiDecStablized(const size_t SamplesToDo)
 299 {
 300     /* Decode with front image stablization. */
 301     const uint lidx{RealOut.ChannelIndex[FrontLeft]};
 302     const uint ridx{RealOut.ChannelIndex[FrontRight]};
 303     const uint cidx{RealOut.ChannelIndex[FrontCenter]};
 304
 305     AmbiDecoder->processStablize(RealOut.Buffer, Dry.Buffer.data(), lidx, ridx, cidx,
 306         SamplesToDo);
 307 }
 308
 309 void DeviceBase::ProcessUhj(const size_t SamplesToDo)
 310 {
 311     /* UHJ is stereo output only. */
 312     const uint lidx{RealOut.ChannelIndex[FrontLeft]};
 313     const uint ridx{RealOut.ChannelIndex[FrontRight]};
 314
 315     /* Encode to stereo-compatible 2-channel UHJ output. */
 316     mUhjEncoder->encode(RealOut.Buffer[lidx].data(), RealOut.Buffer[ridx].data(),
 317         Dry.Buffer.data(), SamplesToDo);
 318 }
 319
 320 void DeviceBase::ProcessBs2b(const size_t SamplesToDo)
 321 {
 322     /* First, decode the ambisonic mix to the "real" output. */
 323     AmbiDecoder->process(RealOut.Buffer, Dry.Buffer.data(), SamplesToDo);
 324
 325     /* BS2B is stereo output only. */
 326     const uint lidx{RealOut.ChannelIndex[FrontLeft]};
 327     const uint ridx{RealOut.ChannelIndex[FrontRight]};
 328
 329     /* Now apply the BS2B binaural/crossfeed filter. */
 330     bs2b_cross_feed(Bs2b.get(), RealOut.Buffer[lidx].data(), RealOut.Buffer[ridx].data(),
 331         SamplesToDo);
 332 }
 333
 334
 335 namespace {
 336
 337 /* This RNG method was created based on the math found in opusdec. It's quick,
 338  * and starting with a seed value of 22222, is suitable for generating
 339  * whitenoise.
 340  */
 341 inline uint dither_rng(uint *seed) noexcept
 342 {
 343     *seed = (*seed * 96314165) + 907633515;
 344     return *seed;
 345 }
 346
 347
 348 inline auto& GetAmbiScales(AmbiScaling scaletype) noexcept
 349 {
 350     switch(scaletype)
 351     {
 352     case AmbiScaling::FuMa: return AmbiScale::FromFuMa();
 353     case AmbiScaling::SN3D: return AmbiScale::FromSN3D();
 354     case AmbiScaling::UHJ: return AmbiScale::FromUHJ();
 355     case AmbiScaling::N3D: break;
 356     }
 357     return AmbiScale::FromN3D();
 358 }
 359
 360 inline auto& GetAmbiLayout(AmbiLayout layouttype) noexcept
 361 {
 362     if(layouttype == AmbiLayout::FuMa) return AmbiIndex::FromFuMa();
 363     return AmbiIndex::FromACN();
 364 }
 365
 366 inline auto& GetAmbi2DLayout(AmbiLayout layouttype) noexcept
 367 {
 368     if(layouttype == AmbiLayout::FuMa) return AmbiIndex::FromFuMa2D();
 369     return AmbiIndex::FromACN2D();
 370 }
 371
 372
 373 bool CalcContextParams(ContextBase *ctx)
 374 {
 375     ContextProps *props{ctx->mParams.ContextUpdate.exchange(nullptr, std::memory_order_acq_rel)};
 376     if(!props) return false;
 377
 378     const alu::Vector pos{props->Position[0], props->Position[1], props->Position[2], 1.0f};
 379     ctx->mParams.Position = pos;
 380
 381     /* AT then UP */
 382     alu::Vector N{props->OrientAt[0], props->OrientAt[1], props->OrientAt[2], 0.0f};
 383     N.normalize();
 384     alu::Vector V{props->OrientUp[0], props->OrientUp[1], props->OrientUp[2], 0.0f};
 385     V.normalize();
 386     /* Build and normalize right-vector */
 387     alu::Vector U{N.cross_product(V)};
 388     U.normalize();
 389
 390     const alu::Matrix rot{
 391         U[0], V[0], -N[0], 0.0,
 392         U[1], V[1], -N[1], 0.0,
 393         U[2], V[2], -N[2], 0.0,
 394          0.0,  0.0,   0.0, 1.0};
 395     const alu::Vector vel{props->Velocity[0], props->Velocity[1], props->Velocity[2], 0.0};
 396
 397     ctx->mParams.Matrix = rot;
 398     ctx->mParams.Velocity = rot * vel;
 399
 400     ctx->mParams.Gain = props->Gain * ctx->mGainBoost;
 401     ctx->mParams.MetersPerUnit = props->MetersPerUnit;
 402     ctx->mParams.AirAbsorptionGainHF = props->AirAbsorptionGainHF;
 403
 404     ctx->mParams.DopplerFactor = props->DopplerFactor;
 405     ctx->mParams.SpeedOfSound = props->SpeedOfSound * props->DopplerVelocity;
 406
 407     ctx->mParams.SourceDistanceModel = props->SourceDistanceModel;
 408     ctx->mParams.mDistanceModel = props->mDistanceModel;
 409
 410     AtomicReplaceHead(ctx->mFreeContextProps, props);
 411     return true;
 412 }
 413
 414 bool CalcEffectSlotParams(EffectSlot *slot, EffectSlot **sorted_slots, ContextBase *context)
 415 {
 416     EffectSlotProps *props{slot->Update.exchange(nullptr, std::memory_order_acq_rel)};
 417     if(!props) return false;
 418
 419     /* If the effect slot target changed, clear the first sorted entry to force
 420      * a re-sort.
 421      */
 422     if(slot->Target != props->Target)
 423         *sorted_slots = nullptr;
 424     slot->Gain = props->Gain;
 425     slot->AuxSendAuto = props->AuxSendAuto;
 426     slot->Target = props->Target;
 427     slot->EffectType = props->Type;
 428     slot->mEffectProps = props->Props;
 429     if(props->Type == EffectSlotType::Reverb || props->Type == EffectSlotType::EAXReverb)
 430     {
 431         slot->RoomRolloff = props->Props.Reverb.RoomRolloffFactor;
 432         slot->DecayTime = props->Props.Reverb.DecayTime;
 433         slot->DecayLFRatio = props->Props.Reverb.DecayLFRatio;
 434         slot->DecayHFRatio = props->Props.Reverb.DecayHFRatio;
 435         slot->DecayHFLimit = props->Props.Reverb.DecayHFLimit;
 436         slot->AirAbsorptionGainHF = props->Props.Reverb.AirAbsorptionGainHF;
 437     }
 438     else
 439     {
 440         slot->RoomRolloff = 0.0f;
 441         slot->DecayTime = 0.0f;
 442         slot->DecayLFRatio = 0.0f;
 443         slot->DecayHFRatio = 0.0f;
 444         slot->DecayHFLimit = false;
 445         slot->AirAbsorptionGainHF = 1.0f;
 446     }
 447
 448     EffectState *state{props->State.release()};
 449     EffectState *oldstate{slot->mEffectState};
 450     slot->mEffectState = state;
 451
 452     /* Only release the old state if it won't get deleted, since we can't be
 453      * deleting/freeing anything in the mixer.
 454      */
 455     if(!oldstate->releaseIfNoDelete())
 456     {
 457         /* Otherwise, if it would be deleted send it off with a release event. */
 458         RingBuffer *ring{context->mAsyncEvents.get()};
 459         auto evt_vec = ring->getWriteVector();
 460         if LIKELY(evt_vec.first.len > 0)
 461         {
 462             AsyncEvent *evt{al::construct_at(reinterpret_cast<AsyncEvent*>(evt_vec.first.buf),
 463                 AsyncEvent::ReleaseEffectState)};
 464             evt->u.mEffectState = oldstate;
 465             ring->writeAdvance(1);
 466         }
 467         else
 468         {
 469             /* If writing the event failed, the queue was probably full. Store
 470              * the old state in the property object where it can eventually be
 471              * cleaned up sometime later (not ideal, but better than blocking
 472              * or leaking).
 473              */
 474             props->State.reset(oldstate);
 475         }
 476     }
 477
 478     AtomicReplaceHead(context->mFreeEffectslotProps, props);
 479
 480     EffectTarget output;
 481     if(EffectSlot *target{slot->Target})
 482         output = EffectTarget{&target->Wet, nullptr};
 483     else
 484     {
 485         DeviceBase *device{context->mDevice};
 486         output = EffectTarget{&device->Dry, &device->RealOut};
 487     }
 488     state->update(context, slot, &slot->mEffectProps, output);
 489     return true;
 490 }
 491
 492
 493 /* Scales the given azimuth toward the side (+/- pi/2 radians) for positions in
 494  * front.
 495  */
 496 inline float ScaleAzimuthFront(float azimuth, float scale)
 497 {
 498     const float abs_azi{std::fabs(azimuth)};
 499     if(!(abs_azi >= al::numbers::pi_v<float>*0.5f))
 500         return std::copysign(minf(abs_azi*scale, al::numbers::pi_v<float>*0.5f), azimuth);
 501     return azimuth;
 502 }
 503
 504 /* Wraps the given value in radians to stay between [-pi,+pi] */
 505 inline float WrapRadians(float r)
 506 {
 507     static constexpr float Pi{al::numbers::pi_v<float>};
 508     static constexpr float Pi2{Pi*2.0f};
 509     if(r >  Pi) return std::fmod(Pi+r, Pi2) - Pi;
 510     if(r < -Pi) return Pi - std::fmod(Pi-r, Pi2);
 511     return r;
 512 }
 513
 514 /* Begin ambisonic rotation helpers.
 515  *
 516  * Rotating first-order B-Format just needs a straight-forward X/Y/Z rotation
 517  * matrix. Higher orders, however, are more complicated. The method implemented
 518  * here is a recursive algorithm (the rotation for first-order is used to help
 519  * generate the second-order rotation, which helps generate the third-order
 520  * rotation, etc).
 521  *
 522  * Adapted from
 523  * <https://github.com/polarch/Spherical-Harmonic-Transform/blob/master/getSHrotMtx.m>,
 524  * provided under the BSD 3-Clause license.
 525  *
 526  * Copyright (c) 2015, Archontis Politis
 527  * Copyright (c) 2019, Christopher Robinson
 528  *
 529  * The u, v, and w coefficients used for generating higher-order rotations are
 530  * precomputed since they're constant. The second-order coefficients are
 531  * followed by the third-order coefficients, etc.
 532  */
 533 struct RotatorCoeffs {
 534     float u, v, w;
 535
 536     template<size_t N0, size_t N1>
 537     static std::array<RotatorCoeffs,N0+N1> ConcatArrays(const std::array<RotatorCoeffs,N0> &lhs,
 538         const std::array<RotatorCoeffs,N1> &rhs)
 539     {
 540         std::array<RotatorCoeffs,N0+N1> ret;
 541         auto iter = std::copy(lhs.cbegin(), lhs.cend(), ret.begin());
 542         std::copy(rhs.cbegin(), rhs.cend(), iter);
 543         return ret;
 544     }
 545
 546     template<int l, int num_elems=l*2+1>
 547     static std::array<RotatorCoeffs,num_elems*num_elems> GenCoeffs()
 548     {
 549         std::array<RotatorCoeffs,num_elems*num_elems> ret{};
 550         auto coeffs = ret.begin();
 551
 552         for(int m{-l};m <= l;++m)
 553         {
 554             for(int n{-l};n <= l;++n)
 555             {
 556                 // compute u,v,w terms of Eq.8.1 (Table I)
 557                 const bool d{m == 0}; // the delta function d_m0
 558                 const float denom{static_cast<float>((std::abs(n) == l) ?
 559                     (2*l) * (2*l - 1) : (l*l - n*n))};
 560
 561                 const int abs_m{std::abs(m)};
 562                 coeffs->u = std::sqrt(static_cast<float>(l*l - m*m)/denom);
 563                 coeffs->v = std::sqrt(static_cast<float>(l+abs_m-1) * static_cast<float>(l+abs_m) /
 564                     denom) * (1.0f+d) * (1.0f - 2.0f*d) * 0.5f;
 565                 coeffs->w = std::sqrt(static_cast<float>(l-abs_m-1) * static_cast<float>(l-abs_m) /
 566                     denom) * (1.0f-d) * -0.5f;
 567                 ++coeffs;
 568             }
 569         }
 570
 571         return ret;
 572     }
 573 };
 574 const auto RotatorCoeffArray = RotatorCoeffs::ConcatArrays(RotatorCoeffs::GenCoeffs<2>(),
 575     RotatorCoeffs::GenCoeffs<3>());
 576
 577 /**
 578  * Given the matrix, pre-filled with the (zeroth- and) first-order rotation
 579  * coefficients, this fills in the coefficients for the higher orders up to and
 580  * including the given order. The matrix is in ACN layout.
 581  */
 582 void AmbiRotator(std::array<std::array<float,MaxAmbiChannels>,MaxAmbiChannels> &matrix,
 583     const int order)
 584 {
 585     /* Don't do anything for < 2nd order. */
 586     if(order < 2) return;
 587
 588     auto P = [](const int i, const int l, const int a, const int n, const size_t last_band,
 589         const std::array<std::array<float,MaxAmbiChannels>,MaxAmbiChannels> &R)
 590     {
 591         const float ri1{ R[static_cast<uint>(i+2)][ 1+2]};
 592         const float rim1{R[static_cast<uint>(i+2)][-1+2]};
 593         const float ri0{ R[static_cast<uint>(i+2)][ 0+2]};
 594
 595         auto vec = R[static_cast<uint>(a+l-1) + last_band].cbegin() + last_band;
 596         if(n == -l)
 597             return ri1*vec[0] + rim1*vec[static_cast<uint>(l-1)*size_t{2}];
 598         if(n == l)
 599             return ri1*vec[static_cast<uint>(l-1)*size_t{2}] - rim1*vec[0];
 600         return ri0*vec[static_cast<uint>(n+l-1)];
 601     };
 602
 603     auto U = [P](const int l, const int m, const int n, const size_t last_band,
 604         const std::array<std::array<float,MaxAmbiChannels>,MaxAmbiChannels> &R)
 605     {
 606         return P(0, l, m, n, last_band, R);
 607     };
 608     auto V = [P](const int l, const int m, const int n, const size_t last_band,
 609         const std::array<std::array<float,MaxAmbiChannels>,MaxAmbiChannels> &R)
 610     {
 611         using namespace al::numbers;
 612         if(m > 0)
 613         {
 614             const bool d{m == 1};
 615             const float p0{P( 1, l,  m-1, n, last_band, R)};
 616             const float p1{P(-1, l, -m+1, n, last_band, R)};
 617             return d ? p0*sqrt2_v<float> : (p0 - p1);
 618         }
 619         const bool d{m == -1};
 620         const float p0{P( 1, l,  m+1, n, last_band, R)};
 621         const float p1{P(-1, l, -m-1, n, last_band, R)};
 622         return d ? p1*sqrt2_v<float> : (p0 + p1);
 623     };
 624     auto W = [P](const int l, const int m, const int n, const size_t last_band,
 625         const std::array<std::array<float,MaxAmbiChannels>,MaxAmbiChannels> &R)
 626     {
 627         assert(m != 0);
 628         if(m > 0)
 629         {
 630             const float p0{P( 1, l,  m+1, n, last_band, R)};
 631             const float p1{P(-1, l, -m-1, n, last_band, R)};
 632             return p0 + p1;
 633         }
 634         const float p0{P( 1, l,  m-1, n, last_band, R)};
 635         const float p1{P(-1, l, -m+1, n, last_band, R)};
 636         return p0 - p1;
 637     };
 638
 639     // compute rotation matrix of each subsequent band recursively
 640     auto coeffs = RotatorCoeffArray.cbegin();
 641     size_t band_idx{4}, last_band{1};
 642     for(int l{2};l <= order;++l)
 643     {
 644         size_t y{band_idx};
 645         for(int m{-l};m <= l;++m,++y)
 646         {
 647             size_t x{band_idx};
 648             for(int n{-l};n <= l;++n,++x)
 649             {
 650                 float r{0.0f};
 651
 652                 // computes Eq.8.1
 653                 const float u{coeffs->u};
 654                 if(u != 0.0f) r += u * U(l, m, n, last_band, matrix);
 655                 const float v{coeffs->v};
 656                 if(v != 0.0f) r += v * V(l, m, n, last_band, matrix);
 657                 const float w{coeffs->w};
 658                 if(w != 0.0f) r += w * W(l, m, n, last_band, matrix);
 659
 660                 matrix[y][x] = r;
 661                 ++coeffs;
 662             }
 663         }
 664         last_band = band_idx;
 665         band_idx += static_cast<uint>(l)*size_t{2} + 1;
 666     }
 667 }
 668 /* End ambisonic rotation helpers. */
 669
 670
 671 constexpr float Deg2Rad(float x) noexcept
 672 { return static_cast<float>(al::numbers::pi / 180.0 * x); }
 673
 674 struct GainTriplet { float Base, HF, LF; };
 675
 676 void CalcPanningAndFilters(Voice *voice, const float xpos, const float ypos, const float zpos,
 677     const float Distance, const float Spread, const GainTriplet &DryGain,
 678     const al::span<const GainTriplet,MAX_SENDS> WetGain, EffectSlot *(&SendSlots)[MAX_SENDS],
 679     const VoiceProps *props, const ContextParams &Context, const DeviceBase *Device)
 680 {
 681     static constexpr ChanMap MonoMap[1]{
 682         { FrontCenter, 0.0f, 0.0f }
 683     }, RearMap[2]{
 684         { BackLeft,  Deg2Rad(-150.0f), Deg2Rad(0.0f) },
 685         { BackRight, Deg2Rad( 150.0f), Deg2Rad(0.0f) }
 686     }, QuadMap[4]{
 687         { FrontLeft,  Deg2Rad( -45.0f), Deg2Rad(0.0f) },
 688         { FrontRight, Deg2Rad(  45.0f), Deg2Rad(0.0f) },
 689         { BackLeft,   Deg2Rad(-135.0f), Deg2Rad(0.0f) },
 690         { BackRight,  Deg2Rad( 135.0f), Deg2Rad(0.0f) }
 691     }, X51Map[6]{
 692         { FrontLeft,   Deg2Rad( -30.0f), Deg2Rad(0.0f) },
 693         { FrontRight,  Deg2Rad(  30.0f), Deg2Rad(0.0f) },
 694         { FrontCenter, Deg2Rad(   0.0f), Deg2Rad(0.0f) },
 695         { LFE, 0.0f, 0.0f },
 696         { SideLeft,    Deg2Rad(-110.0f), Deg2Rad(0.0f) },
 697         { SideRight,   Deg2Rad( 110.0f), Deg2Rad(0.0f) }
 698     }, X61Map[7]{
 699         { FrontLeft,   Deg2Rad(-30.0f), Deg2Rad(0.0f) },
 700         { FrontRight,  Deg2Rad( 30.0f), Deg2Rad(0.0f) },
 701         { FrontCenter, Deg2Rad(  0.0f), Deg2Rad(0.0f) },
 702         { LFE, 0.0f, 0.0f },
 703         { BackCenter,  Deg2Rad(180.0f), Deg2Rad(0.0f) },
 704         { SideLeft,    Deg2Rad(-90.0f), Deg2Rad(0.0f) },
 705         { SideRight,   Deg2Rad( 90.0f), Deg2Rad(0.0f) }
 706     }, X71Map[8]{
 707         { FrontLeft,   Deg2Rad( -30.0f), Deg2Rad(0.0f) },
 708         { FrontRight,  Deg2Rad(  30.0f), Deg2Rad(0.0f) },
 709         { FrontCenter, Deg2Rad(   0.0f), Deg2Rad(0.0f) },
 710         { LFE, 0.0f, 0.0f },
 711         { BackLeft,    Deg2Rad(-150.0f), Deg2Rad(0.0f) },
 712         { BackRight,   Deg2Rad( 150.0f), Deg2Rad(0.0f) },
 713         { SideLeft,    Deg2Rad( -90.0f), Deg2Rad(0.0f) },
 714         { SideRight,   Deg2Rad(  90.0f), Deg2Rad(0.0f) }
 715     };
 716
 717     ChanMap StereoMap[2]{
 718         { FrontLeft,  Deg2Rad(-30.0f), Deg2Rad(0.0f) },
 719         { FrontRight, Deg2Rad( 30.0f), Deg2Rad(0.0f) }
 720     };
 721
 722     const auto Frequency = static_cast<float>(Device->Frequency);
 723     const uint NumSends{Device->NumAuxSends};
 724
 725     const size_t num_channels{voice->mChans.size()};
 726     ASSUME(num_channels > 0);
 727
 728     for(auto &chandata : voice->mChans)
 729     {
 730         chandata.mDryParams.Hrtf.Target = HrtfFilter{};
 731         chandata.mDryParams.Gains.Target.fill(0.0f);
 732         std::for_each(chandata.mWetParams.begin(), chandata.mWetParams.begin()+NumSends,
 733             [](SendParams &params) -> void { params.Gains.Target.fill(0.0f); });
 734     }
 735
 736     DirectMode DirectChannels{props->DirectChannels};
 737     const ChanMap *chans{nullptr};
 738     switch(voice->mFmtChannels)
 739     {
 740     case FmtMono:
 741         chans = MonoMap;
 742         /* Mono buffers are never played direct. */
 743         DirectChannels = DirectMode::Off;
 744         break;
 745
 746     case FmtStereo:
 747         if(DirectChannels == DirectMode::Off)
 748         {
 749             /* Convert counter-clockwise to clock-wise, and wrap between
 750              * [-pi,+pi].
 751              */
 752             StereoMap[0].angle = WrapRadians(-props->StereoPan[0]);
 753             StereoMap[1].angle = WrapRadians(-props->StereoPan[1]);
 754         }
 755         chans = StereoMap;
 756         break;
 757
 758     case FmtRear: chans = RearMap; break;
 759     case FmtQuad: chans = QuadMap; break;
 760     case FmtX51: chans = X51Map; break;
 761     case FmtX61: chans = X61Map; break;
 762     case FmtX71: chans = X71Map; break;
 763
 764     case FmtBFormat2D:
 765     case FmtBFormat3D:
 766     case FmtUHJ2:
 767     case FmtUHJ3:
 768     case FmtUHJ4:
 769     case FmtSuperStereo:
 770         DirectChannels = DirectMode::Off;
 771         break;
 772     }
 773
 774     voice->mFlags.reset(VoiceHasHrtf).reset(VoiceHasNfc);
 775     if(auto *decoder{voice->mDecoder.get()})
 776         decoder->mWidthControl = props->EnhWidth;
 777
 778     if(IsAmbisonic(voice->mFmtChannels))
 779     {
 780         /* Special handling for B-Format and UHJ sources. */
 781
 782         if(Device->AvgSpeakerDist > 0.0f && voice->mFmtChannels != FmtUHJ2
 783             && voice->mFmtChannels != FmtSuperStereo)
 784         {
 785             if(!(Distance > std::numeric_limits<float>::epsilon()))
 786             {
 787                 /* NOTE: The NFCtrlFilters were created with a w0 of 0, which
 788                  * is what we want for FOA input. The first channel may have
 789                  * been previously re-adjusted if panned, so reset it.
 790                  */
 791                 voice->mChans[0].mDryParams.NFCtrlFilter.adjust(0.0f);
 792             }
 793             else
 794             {
 795                 /* Clamp the distance for really close sources, to prevent
 796                  * excessive bass.
 797                  */
 798                 const float mdist{maxf(Distance, Device->AvgSpeakerDist/4.0f)};
 799                 const float w0{SpeedOfSoundMetersPerSec / (mdist * Frequency)};
 800
 801                 /* Only need to adjust the first channel of a B-Format source. */
 802                 voice->mChans[0].mDryParams.NFCtrlFilter.adjust(w0);
 803             }
 804
 805             voice->mFlags.set(VoiceHasNfc);
 806         }
 807
 808         /* Panning a B-Format sound toward some direction is easy. Just pan the
 809          * first (W) channel as a normal mono sound. The angular spread is used
 810          * as a directional scalar to blend between full coverage and full
 811          * panning.
 812          */
 813         const float coverage{!(Distance > std::numeric_limits<float>::epsilon()) ? 1.0f :
 814             (al::numbers::inv_pi_v<float>/2.0f * Spread)};
 815
 816         auto calc_coeffs = [xpos,ypos,zpos](RenderMode mode)
 817         {
 818             if(mode != RenderMode::Pairwise)
 819                 return CalcDirectionCoeffs({xpos, ypos, zpos}, 0.0f);
 820
 821             /* Clamp Y, in case rounding errors caused it to end up outside
 822              * of -1...+1.
 823              */
 824             const float ev{std::asin(clampf(ypos, -1.0f, 1.0f))};
 825             /* Negate Z for right-handed coords with -Z in front. */
 826             const float az{std::atan2(xpos, -zpos)};
 827
 828             /* A scalar of 1.5 for plain stereo results in +/-60 degrees
 829              * being moved to +/-90 degrees for direct right and left
 830              * speaker responses.
 831              */
 832             return CalcAngleCoeffs(ScaleAzimuthFront(az, 1.5f), ev, 0.0f);
 833         };
 834         auto coeffs = calc_coeffs(Device->mRenderMode);
 835         std::transform(coeffs.begin()+1, coeffs.end(), coeffs.begin()+1,
 836             std::bind(std::multiplies<float>{}, _1, 1.0f-coverage));
 837
 838         /* NOTE: W needs to be scaled according to channel scaling. */
 839         auto&& scales = GetAmbiScales(voice->mAmbiScaling);
 840         ComputePanGains(&Device->Dry, coeffs.data(), DryGain.Base*scales[0],
 841             voice->mChans[0].mDryParams.Gains.Target);
 842         for(uint i{0};i < NumSends;i++)
 843         {
 844             if(const EffectSlot *Slot{SendSlots[i]})
 845                 ComputePanGains(&Slot->Wet, coeffs.data(), WetGain[i].Base*scales[0],
 846                     voice->mChans[0].mWetParams[i].Gains.Target);
 847         }
 848
 849         if(coverage > 0.0f)
 850         {
 851             /* Local B-Format sources have their XYZ channels rotated according
 852              * to the orientation.
 853              */
 854             /* AT then UP */
 855             alu::Vector N{props->OrientAt[0], props->OrientAt[1], props->OrientAt[2], 0.0f};
 856             N.normalize();
 857             alu::Vector V{props->OrientUp[0], props->OrientUp[1], props->OrientUp[2], 0.0f};
 858             V.normalize();
 859             if(!props->HeadRelative)
 860             {
 861                 N = Context.Matrix * N;
 862                 V = Context.Matrix * V;
 863             }
 864             /* Build and normalize right-vector */
 865             alu::Vector U{N.cross_product(V)};
 866             U.normalize();
 867
 868             /* Build a rotation matrix. Manually fill the zeroth- and first-
 869              * order elements, then construct the rotation for the higher
 870              * orders.
 871              */
 872             std::array<std::array<float,MaxAmbiChannels>,MaxAmbiChannels> shrot{};
 873             shrot[0][0] = 1.0f;
 874             shrot[1][1] =  U[0]; shrot[1][2] = -V[0]; shrot[1][3] = -N[0];
 875             shrot[2][1] = -U[1]; shrot[2][2] =  V[1]; shrot[2][3] =  N[1];
 876             shrot[3][1] =  U[2]; shrot[3][2] = -V[2]; shrot[3][3] = -N[2];
 877             AmbiRotator(shrot, static_cast<int>(minu(voice->mAmbiOrder, Device->mAmbiOrder)));
 878
 879             /* Convert the rotation matrix for input ordering and scaling, and
 880              * whether input is 2D or 3D.
 881              */
 882             const uint8_t *index_map{Is2DAmbisonic(voice->mFmtChannels) ?
 883                 GetAmbi2DLayout(voice->mAmbiLayout).data() :
 884                 GetAmbiLayout(voice->mAmbiLayout).data()};
 885
 886             static const uint8_t ChansPerOrder[MaxAmbiOrder+1]{1, 3, 5, 7,};
 887             static const uint8_t OrderOffset[MaxAmbiOrder+1]{0, 1, 4, 9,};
 888             for(size_t c{1};c < num_channels;c++)
 889             {
 890                 const size_t acn{index_map[c]};
 891                 const size_t order{AmbiIndex::OrderFromChannel()[acn]};
 892                 const size_t tocopy{ChansPerOrder[order]};
 893                 const size_t offset{OrderOffset[order]};
 894                 const float scale{scales[acn] * coverage};
 895                 auto in = shrot.cbegin() + offset;
 896
 897                 coeffs = std::array<float,MaxAmbiChannels>{};
 898                 for(size_t x{0};x < tocopy;++x)
 899                     coeffs[offset+x] = in[x][acn] * scale;
 900
 901                 ComputePanGains(&Device->Dry, coeffs.data(), DryGain.Base,
 902                     voice->mChans[c].mDryParams.Gains.Target);
 903
 904                 for(uint i{0};i < NumSends;i++)
 905                 {
 906                     if(const EffectSlot *Slot{SendSlots[i]})
 907                         ComputePanGains(&Slot->Wet, coeffs.data(), WetGain[i].Base,
 908                             voice->mChans[c].mWetParams[i].Gains.Target);
 909                 }
 910             }
 911         }
 912     }
 913     else if(DirectChannels != DirectMode::Off && !Device->RealOut.RemixMap.empty())
 914     {
 915         /* Direct source channels always play local. Skip the virtual channels
 916          * and write inputs to the matching real outputs.
 917          */
 918         voice->mDirect.Buffer = Device->RealOut.Buffer;
 919
 920         for(size_t c{0};c < num_channels;c++)
 921         {
 922             uint idx{GetChannelIdxByName(Device->RealOut, chans[c].channel)};
 923             if(idx != INVALID_CHANNEL_INDEX)
 924                 voice->mChans[c].mDryParams.Gains.Target[idx] = DryGain.Base;
 925             else if(DirectChannels == DirectMode::RemixMismatch)
 926             {
 927                 auto match_channel = [chans,c](const InputRemixMap &map) noexcept -> bool
 928                 { return chans[c].channel == map.channel; };
 929                 auto remap = std::find_if(Device->RealOut.RemixMap.cbegin(),
 930                     Device->RealOut.RemixMap.cend(), match_channel);
 931                 if(remap != Device->RealOut.RemixMap.cend())
 932                 {
 933                     for(const auto &target : remap->targets)
 934                     {
 935                         idx = GetChannelIdxByName(Device->RealOut, target.channel);
 936                         if(idx != INVALID_CHANNEL_INDEX)
 937                             voice->mChans[c].mDryParams.Gains.Target[idx] = DryGain.Base *
 938                                 target.mix;
 939                     }
 940                 }
 941             }
 942         }
 943
 944         /* Auxiliary sends still use normal channel panning since they mix to
 945          * B-Format, which can't channel-match.
 946          */
 947         for(size_t c{0};c < num_channels;c++)
 948         {
 949             const auto coeffs = CalcAngleCoeffs(chans[c].angle, chans[c].elevation, 0.0f);
 950
 951             for(uint i{0};i < NumSends;i++)
 952             {
 953                 if(const EffectSlot *Slot{SendSlots[i]})
 954                     ComputePanGains(&Slot->Wet, coeffs.data(), WetGain[i].Base,
 955                         voice->mChans[c].mWetParams[i].Gains.Target);
 956             }
 957         }
 958     }
 959     else if(Device->mRenderMode == RenderMode::Hrtf)
 960     {
 961         /* Full HRTF rendering. Skip the virtual channels and render to the
 962          * real outputs.
 963          */
 964         voice->mDirect.Buffer = Device->RealOut.Buffer;
 965
 966         if(Distance > std::numeric_limits<float>::epsilon())
 967         {
 968             const float ev{std::asin(clampf(ypos, -1.0f, 1.0f))};
 969             const float az{std::atan2(xpos, -zpos)};
 970
 971             /* Get the HRIR coefficients and delays just once, for the given
 972              * source direction.
 973              */
 974             GetHrtfCoeffs(Device->mHrtf.get(), ev, az, Distance, Spread,
 975                 voice->mChans[0].mDryParams.Hrtf.Target.Coeffs,
 976                 voice->mChans[0].mDryParams.Hrtf.Target.Delay);
 977             voice->mChans[0].mDryParams.Hrtf.Target.Gain = DryGain.Base;
 978
 979             /* Remaining channels use the same results as the first. */
 980             for(size_t c{1};c < num_channels;c++)
 981             {
 982                 /* Skip LFE */
 983                 if(chans[c].channel == LFE) continue;
 984                 voice->mChans[c].mDryParams.Hrtf.Target = voice->mChans[0].mDryParams.Hrtf.Target;
 985             }
 986
 987             /* Calculate the directional coefficients once, which apply to all
 988              * input channels of the source sends.
 989              */
 990             const auto coeffs = CalcDirectionCoeffs({xpos, ypos, zpos}, Spread);
 991
 992             for(size_t c{0};c < num_channels;c++)
 993             {
 994                 /* Skip LFE */
 995                 if(chans[c].channel == LFE)
 996                     continue;
 997                 for(uint i{0};i < NumSends;i++)
 998                 {
 999                     if(const EffectSlot *Slot{SendSlots[i]})
1000                         ComputePanGains(&Slot->Wet, coeffs.data(), WetGain[i].Base,
1001                             voice->mChans[c].mWetParams[i].Gains.Target);
1002                 }
1003             }
1004         }
1005         else
1006         {
1007             /* Local sources on HRTF play with each channel panned to its
1008              * relative location around the listener, providing "virtual
1009              * speaker" responses.
1010              */
1011             for(size_t c{0};c < num_channels;c++)
1012             {
1013                 /* Skip LFE */
1014                 if(chans[c].channel == LFE)
1015                     continue;
1016
1017                 /* Get the HRIR coefficients and delays for this channel
1018                  * position.
1019                  */
1020                 GetHrtfCoeffs(Device->mHrtf.get(), chans[c].elevation, chans[c].angle,
1021                     std::numeric_limits<float>::infinity(), Spread,
1022                     voice->mChans[c].mDryParams.Hrtf.Target.Coeffs,
1023                     voice->mChans[c].mDryParams.Hrtf.Target.Delay);
1024                 voice->mChans[c].mDryParams.Hrtf.Target.Gain = DryGain.Base;
1025
1026                 /* Normal panning for auxiliary sends. */
1027                 const auto coeffs = CalcAngleCoeffs(chans[c].angle, chans[c].elevation, Spread);
1028
1029                 for(uint i{0};i < NumSends;i++)
1030                 {
1031                     if(const EffectSlot *Slot{SendSlots[i]})
1032                         ComputePanGains(&Slot->Wet, coeffs.data(), WetGain[i].Base,
1033                             voice->mChans[c].mWetParams[i].Gains.Target);
1034                 }
1035             }
1036         }
1037
1038         voice->mFlags.set(VoiceHasHrtf);
1039     }
1040     else
1041     {
1042         /* Non-HRTF rendering. Use normal panning to the output. */
1043
1044         if(Distance > std::numeric_limits<float>::epsilon())
1045         {
1046             /* Calculate NFC filter coefficient if needed. */
1047             if(Device->AvgSpeakerDist > 0.0f)
1048             {
1049                 /* Clamp the distance for really close sources, to prevent
1050                  * excessive bass.
1051                  */
1052                 const float mdist{maxf(Distance, Device->AvgSpeakerDist/4.0f)};
1053                 const float w0{SpeedOfSoundMetersPerSec / (mdist * Frequency)};
1054
1055                 /* Adjust NFC filters. */
1056                 for(size_t c{0};c < num_channels;c++)
1057                     voice->mChans[c].mDryParams.NFCtrlFilter.adjust(w0);
1058
1059                 voice->mFlags.set(VoiceHasNfc);
1060             }
1061
1062             /* Calculate the directional coefficients once, which apply to all
1063              * input channels.
1064              */
1065             auto calc_coeffs = [xpos,ypos,zpos,Spread](RenderMode mode)
1066             {
1067                 if(mode != RenderMode::Pairwise)
1068                     return CalcDirectionCoeffs({xpos, ypos, zpos}, Spread);
1069                 const float ev{std::asin(clampf(ypos, -1.0f, 1.0f))};
1070                 const float az{std::atan2(xpos, -zpos)};
1071                 return CalcAngleCoeffs(ScaleAzimuthFront(az, 1.5f), ev, Spread);
1072             };
1073             const auto coeffs = calc_coeffs(Device->mRenderMode);
1074
1075             for(size_t c{0};c < num_channels;c++)
1076             {
1077                 /* Special-case LFE */
1078                 if(chans[c].channel == LFE)
1079                 {
1080                     if(Device->Dry.Buffer.data() == Device->RealOut.Buffer.data())
1081                     {
1082                         const uint idx{GetChannelIdxByName(Device->RealOut, chans[c].channel)};
1083                         if(idx != INVALID_CHANNEL_INDEX)
1084                             voice->mChans[c].mDryParams.Gains.Target[idx] = DryGain.Base;
1085                     }
1086                     continue;
1087                 }
1088
1089                 ComputePanGains(&Device->Dry, coeffs.data(), DryGain.Base,
1090                     voice->mChans[c].mDryParams.Gains.Target);
1091                 for(uint i{0};i < NumSends;i++)
1092                 {
1093                     if(const EffectSlot *Slot{SendSlots[i]})
1094                         ComputePanGains(&Slot->Wet, coeffs.data(), WetGain[i].Base,
1095                             voice->mChans[c].mWetParams[i].Gains.Target);
1096                 }
1097             }
1098         }
1099         else
1100         {
1101             if(Device->AvgSpeakerDist > 0.0f)
1102             {
1103                 /* If the source distance is 0, simulate a plane-wave by using
1104                  * infinite distance, which results in a w0 of 0.
1105                  */
1106                 static constexpr float w0{0.0f};
1107                 for(size_t c{0};c < num_channels;c++)
1108                     voice->mChans[c].mDryParams.NFCtrlFilter.adjust(w0);
1109
1110                 voice->mFlags.set(VoiceHasNfc);
1111             }
1112
1113             for(size_t c{0};c < num_channels;c++)
1114             {
1115                 /* Special-case LFE */
1116                 if(chans[c].channel == LFE)
1117                 {
1118                     if(Device->Dry.Buffer.data() == Device->RealOut.Buffer.data())
1119                     {
1120                         const uint idx{GetChannelIdxByName(Device->RealOut, chans[c].channel)};
1121                         if(idx != INVALID_CHANNEL_INDEX)
1122                             voice->mChans[c].mDryParams.Gains.Target[idx] = DryGain.Base;
1123                     }
1124                     continue;
1125                 }
1126
1127                 const auto coeffs = CalcAngleCoeffs((Device->mRenderMode == RenderMode::Pairwise)
1128                     ? ScaleAzimuthFront(chans[c].angle, 3.0f) : chans[c].angle,
1129                     chans[c].elevation, Spread);
1130
1131                 ComputePanGains(&Device->Dry, coeffs.data(), DryGain.Base,
1132                     voice->mChans[c].mDryParams.Gains.Target);
1133                 for(uint i{0};i < NumSends;i++)
1134                 {
1135                     if(const EffectSlot *Slot{SendSlots[i]})
1136                         ComputePanGains(&Slot->Wet, coeffs.data(), WetGain[i].Base,
1137                             voice->mChans[c].mWetParams[i].Gains.Target);
1138                 }
1139             }
1140         }
1141     }
1142
1143     {
1144         const float hfNorm{props->Direct.HFReference / Frequency};
1145         const float lfNorm{props->Direct.LFReference / Frequency};
1146
1147         voice->mDirect.FilterType = AF_None;
1148         if(DryGain.HF != 1.0f) voice->mDirect.FilterType |= AF_LowPass;
1149         if(DryGain.LF != 1.0f) voice->mDirect.FilterType |= AF_HighPass;
1150
1151         auto &lowpass = voice->mChans[0].mDryParams.LowPass;
1152         auto &highpass = voice->mChans[0].mDryParams.HighPass;
1153         lowpass.setParamsFromSlope(BiquadType::HighShelf, hfNorm, DryGain.HF, 1.0f);
1154         highpass.setParamsFromSlope(BiquadType::LowShelf, lfNorm, DryGain.LF, 1.0f);
1155         for(size_t c{1};c < num_channels;c++)
1156         {
1157             voice->mChans[c].mDryParams.LowPass.copyParamsFrom(lowpass);
1158             voice->mChans[c].mDryParams.HighPass.copyParamsFrom(highpass);
1159         }
1160     }
1161     for(uint i{0};i < NumSends;i++)
1162     {
1163         const float hfNorm{props->Send[i].HFReference / Frequency};
1164         const float lfNorm{props->Send[i].LFReference / Frequency};
1165
1166         voice->mSend[i].FilterType = AF_None;
1167         if(WetGain[i].HF != 1.0f) voice->mSend[i].FilterType |= AF_LowPass;
1168         if(WetGain[i].LF != 1.0f) voice->mSend[i].FilterType |= AF_HighPass;
1169
1170         auto &lowpass = voice->mChans[0].mWetParams[i].LowPass;
1171         auto &highpass = voice->mChans[0].mWetParams[i].HighPass;
1172         lowpass.setParamsFromSlope(BiquadType::HighShelf, hfNorm, WetGain[i].HF, 1.0f);
1173         highpass.setParamsFromSlope(BiquadType::LowShelf, lfNorm, WetGain[i].LF, 1.0f);
1174         for(size_t c{1};c < num_channels;c++)
1175         {
1176             voice->mChans[c].mWetParams[i].LowPass.copyParamsFrom(lowpass);
1177             voice->mChans[c].mWetParams[i].HighPass.copyParamsFrom(highpass);
1178         }
1179     }
1180 }
1181
1182 void CalcNonAttnSourceParams(Voice *voice, const VoiceProps *props, const ContextBase *context)
1183 {
1184     const DeviceBase *Device{context->mDevice};
1185     EffectSlot *SendSlots[MAX_SENDS];
1186
1187     voice->mDirect.Buffer = Device->Dry.Buffer;
1188     for(uint i{0};i < Device->NumAuxSends;i++)
1189     {
1190         SendSlots[i] = props->Send[i].Slot;
1191         if(!SendSlots[i] || SendSlots[i]->EffectType == EffectSlotType::None)
1192         {
1193             SendSlots[i] = nullptr;
1194             voice->mSend[i].Buffer = {};
1195         }
1196         else
1197             voice->mSend[i].Buffer = SendSlots[i]->Wet.Buffer;
1198     }
1199
1200     /* Calculate the stepping value */
1201     const auto Pitch = static_cast<float>(voice->mFrequency) /
1202         static_cast<float>(Device->Frequency) * props->Pitch;
1203     if(Pitch > float{MaxPitch})
1204         voice->mStep = MaxPitch<<MixerFracBits;
1205     else
1206         voice->mStep = maxu(fastf2u(Pitch * MixerFracOne), 1);
1207     voice->mResampler = PrepareResampler(props->mResampler, voice->mStep, &voice->mResampleState);
1208
1209     /* Calculate gains */
1210     GainTriplet DryGain;
1211     DryGain.Base  = minf(clampf(props->Gain, props->MinGain, props->MaxGain) * props->Direct.Gain *
1212         context->mParams.Gain, GainMixMax);
1213     DryGain.HF = props->Direct.GainHF;
1214     DryGain.LF = props->Direct.GainLF;
1215     GainTriplet WetGain[MAX_SENDS];
1216     for(uint i{0};i < Device->NumAuxSends;i++)
1217     {
1218         WetGain[i].Base = minf(clampf(props->Gain, props->MinGain, props->MaxGain) *
1219             props->Send[i].Gain * context->mParams.Gain, GainMixMax);
1220         WetGain[i].HF = props->Send[i].GainHF;
1221         WetGain[i].LF = props->Send[i].GainLF;
1222     }
1223
1224     CalcPanningAndFilters(voice, 0.0f, 0.0f, -1.0f, 0.0f, 0.0f, DryGain, WetGain, SendSlots, props,
1225         context->mParams, Device);
1226 }
1227
1228 void CalcAttnSourceParams(Voice *voice, const VoiceProps *props, const ContextBase *context)
1229 {
1230     const DeviceBase *Device{context->mDevice};
1231     const uint NumSends{Device->NumAuxSends};
1232
1233     /* Set mixing buffers and get send parameters. */
1234     voice->mDirect.Buffer = Device->Dry.Buffer;
1235     EffectSlot *SendSlots[MAX_SENDS];
1236     uint UseDryAttnForRoom{0};
1237     for(uint i{0};i < NumSends;i++)
1238     {
1239         SendSlots[i] = props->Send[i].Slot;
1240         if(!SendSlots[i] || SendSlots[i]->EffectType == EffectSlotType::None)
1241             SendSlots[i] = nullptr;
1242         else if(!SendSlots[i]->AuxSendAuto)
1243         {
1244             /* If the slot's auxiliary send auto is off, the data sent to the
1245              * effect slot is the same as the dry path, sans filter effects.
1246              */
1247             UseDryAttnForRoom |= 1u<<i;
1248         }
1249
1250         if(!SendSlots[i])
1251             voice->mSend[i].Buffer = {};
1252         else
1253             voice->mSend[i].Buffer = SendSlots[i]->Wet.Buffer;
1254     }
1255
1256     /* Transform source to listener space (convert to head relative) */
1257     alu::Vector Position{props->Position[0], props->Position[1], props->Position[2], 1.0f};
1258     alu::Vector Velocity{props->Velocity[0], props->Velocity[1], props->Velocity[2], 0.0f};
1259     alu::Vector Direction{props->Direction[0], props->Direction[1], props->Direction[2], 0.0f};
1260     if(!props->HeadRelative)
1261     {
1262         /* Transform source vectors */
1263         Position = context->mParams.Matrix * (Position - context->mParams.Position);
1264         Velocity = context->mParams.Matrix * Velocity;
1265         Direction = context->mParams.Matrix * Direction;
1266     }
1267     else
1268     {
1269         /* Offset the source velocity to be relative of the listener velocity */
1270         Velocity += context->mParams.Velocity;
1271     }
1272
1273     const bool directional{Direction.normalize() > 0.0f};
1274     alu::Vector ToSource{Position[0], Position[1], Position[2], 0.0f};
1275     const float Distance{ToSource.normalize()};
1276
1277     /* Calculate distance attenuation */
1278     float ClampedDist{Distance};
1279     float DryGainBase{props->Gain};
1280     float WetGainBase{props->Gain};
1281
1282     switch(context->mParams.SourceDistanceModel ? props->mDistanceModel
1283         : context->mParams.mDistanceModel)
1284     {
1285         case DistanceModel::InverseClamped:
1286             ClampedDist = clampf(ClampedDist, props->RefDistance, props->MaxDistance);
1287             if(props->MaxDistance < props->RefDistance) break;
1288             /*fall-through*/
1289         case DistanceModel::Inverse:
1290             if(!(props->RefDistance > 0.0f))
1291                 ClampedDist = props->RefDistance;
1292             else
1293             {
1294                 float dist{lerp(props->RefDistance, ClampedDist, props->RolloffFactor)};
1295                 if(dist > 0.0f) DryGainBase *= props->RefDistance / dist;
1296
1297                 dist = lerp(props->RefDistance, ClampedDist, props->RoomRolloffFactor);
1298                 if(dist > 0.0f) WetGainBase *= props->RefDistance / dist;
1299             }
1300             break;
1301
1302         case DistanceModel::LinearClamped:
1303             ClampedDist = clampf(ClampedDist, props->RefDistance, props->MaxDistance);
1304             if(props->MaxDistance < props->RefDistance) break;
1305             /*fall-through*/
1306         case DistanceModel::Linear:
1307             if(!(props->MaxDistance != props->RefDistance))
1308                 ClampedDist = props->RefDistance;
1309             else
1310             {
1311                 float attn{(ClampedDist-props->RefDistance) /
1312                     (props->MaxDistance-props->RefDistance) * props->RolloffFactor};
1313                 DryGainBase *= maxf(1.0f - attn, 0.0f);
1314
1315                 attn = (ClampedDist-props->RefDistance) /
1316                     (props->MaxDistance-props->RefDistance) * props->RoomRolloffFactor;
1317                 WetGainBase *= maxf(1.0f - attn, 0.0f);
1318             }
1319             break;
1320
1321         case DistanceModel::ExponentClamped:
1322             ClampedDist = clampf(ClampedDist, props->RefDistance, props->MaxDistance);
1323             if(props->MaxDistance < props->RefDistance) break;
1324             /*fall-through*/
1325         case DistanceModel::Exponent:
1326             if(!(ClampedDist > 0.0f && props->RefDistance > 0.0f))
1327                 ClampedDist = props->RefDistance;
1328             else
1329             {
1330                 const float dist_ratio{ClampedDist/props->RefDistance};
1331                 DryGainBase *= std::pow(dist_ratio, -props->RolloffFactor);
1332                 WetGainBase *= std::pow(dist_ratio, -props->RoomRolloffFactor);
1333             }
1334             break;
1335
1336         case DistanceModel::Disable:
1337             break;
1338     }
1339
1340     /* Calculate directional soundcones */
1341     float ConeHF{1.0f}, WetConeHF{1.0f};
1342     if(directional && props->InnerAngle < 360.0f)
1343     {
1344         static constexpr float Rad2Deg{static_cast<float>(180.0 / al::numbers::pi)};
1345         const float Angle{Rad2Deg*2.0f * std::acos(-Direction.dot_product(ToSource)) * ConeScale};
1346
1347         float ConeGain{1.0f};
1348         if(Angle >= props->OuterAngle)
1349         {
1350             ConeGain = props->OuterGain;
1351             ConeHF = lerp(1.0f, props->OuterGainHF, props->DryGainHFAuto);
1352         }
1353         else if(Angle >= props->InnerAngle)
1354         {
1355             const float scale{(Angle-props->InnerAngle) / (props->OuterAngle-props->InnerAngle)};
1356             ConeGain = lerp(1.0f, props->OuterGain, scale);
1357             ConeHF = lerp(1.0f, props->OuterGainHF, scale * props->DryGainHFAuto);
1358         }
1359
1360         DryGainBase *= ConeGain;
1361         WetGainBase *= lerp(1.0f, ConeGain, props->WetGainAuto);
1362
1363         WetConeHF = lerp(1.0f, ConeHF, props->WetGainHFAuto);
1364     }
1365
1366     /* Apply gain and frequency filters */
1367     DryGainBase = clampf(DryGainBase, props->MinGain, props->MaxGain) * context->mParams.Gain;
1368     WetGainBase = clampf(WetGainBase, props->MinGain, props->MaxGain) * context->mParams.Gain;
1369
1370     GainTriplet DryGain{};
1371     DryGain.Base = minf(DryGainBase * props->Direct.Gain, GainMixMax);
1372     DryGain.HF = ConeHF * props->Direct.GainHF;
1373     DryGain.LF = props->Direct.GainLF;
1374     GainTriplet WetGain[MAX_SENDS]{};
1375     for(uint i{0};i < NumSends;i++)
1376     {
1377         /* If this effect slot's Auxiliary Send Auto is off, then use the dry
1378          * path distance and cone attenuation, otherwise use the wet (room)
1379          * path distance and cone attenuation. The send filter is used instead
1380          * of the direct filter, regardless.
1381          */
1382         const bool use_room{!(UseDryAttnForRoom&(1u<<i))};
1383         const float gain{use_room ? WetGainBase : DryGainBase};
1384         WetGain[i].Base = minf(gain * props->Send[i].Gain, GainMixMax);
1385         WetGain[i].HF = (use_room ? WetConeHF : ConeHF) * props->Send[i].GainHF;
1386         WetGain[i].LF = props->Send[i].GainLF;
1387     }
1388
1389     /* Distance-based air absorption and initial send decay. */
1390     if(likely(Distance > props->RefDistance))
1391     {
1392         const float distance_base{(Distance-props->RefDistance) * props->RolloffFactor};
1393         const float absorption{distance_base * context->mParams.MetersPerUnit *
1394             props->AirAbsorptionFactor};
1395         if(absorption > std::numeric_limits<float>::epsilon())
1396         {
1397             const float hfattn{std::pow(context->mParams.AirAbsorptionGainHF, absorption)};
1398             DryGain.HF *= hfattn;
1399             for(uint i{0u};i < NumSends;++i)
1400                 WetGain[i].HF *= hfattn;
1401         }
1402
1403         /* If the source's Auxiliary Send Filter Gain Auto is off, no extra
1404          * adjustment is applied to the send gains.
1405          */
1406         for(uint i{props->WetGainAuto ? 0u : NumSends};i < NumSends;++i)
1407         {
1408             if(!SendSlots[i])
1409                 continue;
1410
1411             auto calc_attenuation = [](float distance, float refdist, float rolloff) noexcept
1412             {
1413                 const float dist{lerp(refdist, distance, rolloff)};
1414                 if(dist > refdist) return refdist / dist;
1415                 return 1.0f;
1416             };
1417
1418             /* The reverb effect's room rolloff factor always applies to an
1419              * inverse distance rolloff model.
1420              */
1421             WetGain[i].Base *= calc_attenuation(Distance, props->RefDistance,
1422                 SendSlots[i]->RoomRolloff);
1423
1424             /* If this effect slot's Auxiliary Send Auto is off, don't apply
1425              * the automatic initial reverb decay (should the reverb's room
1426              * rolloff still apply?).
1427              */
1428             if(!SendSlots[i]->AuxSendAuto)
1429                 continue;
1430
1431             GainTriplet DecayDistance;
1432             /* Calculate the distances to where this effect's decay reaches
1433              * -60dB.
1434              */
1435             DecayDistance.Base = SendSlots[i]->DecayTime * SpeedOfSoundMetersPerSec;
1436             DecayDistance.LF = DecayDistance.Base * SendSlots[i]->DecayLFRatio;
1437             DecayDistance.HF = DecayDistance.Base * SendSlots[i]->DecayHFRatio;
1438             if(SendSlots[i]->DecayHFLimit)
1439             {
1440                 const float airAbsorption{SendSlots[i]->AirAbsorptionGainHF};
1441                 if(airAbsorption < 1.0f)
1442                 {
1443                     /* Calculate the distance to where this effect's air
1444                      * absorption reaches -60dB, and limit the effect's HF
1445                      * decay distance (so it doesn't take any longer to decay
1446                      * than the air would allow).
1447                      */
1448                     static constexpr float log10_decaygain{-3.0f/*std::log10(ReverbDecayGain)*/};
1449                     const float absorb_dist{log10_decaygain / std::log10(airAbsorption)};
1450                     DecayDistance.HF = minf(absorb_dist, DecayDistance.HF);
1451                 }
1452             }
1453
1454             const float baseAttn = calc_attenuation(Distance, props->RefDistance,
1455                 props->RolloffFactor);
1456
1457             /* Apply a decay-time transformation to the wet path, based on the
1458              * source distance. The initial decay of the reverb effect is
1459              * calculated and applied to the wet path.
1460              */
1461             const float fact{distance_base / DecayDistance.Base};
1462             const float gain{std::pow(ReverbDecayGain, fact)*(1.0f-baseAttn) + baseAttn};
1463             WetGain[i].Base *= gain;
1464
1465             if(gain > 0.0f)
1466             {
1467                 const float hffact{distance_base / DecayDistance.HF};
1468                 const float gainhf{std::pow(ReverbDecayGain, hffact)*(1.0f-baseAttn) + baseAttn};
1469                 WetGain[i].HF *= minf(gainhf/gain, 1.0f);
1470                 const float lffact{distance_base / DecayDistance.LF};
1471                 const float gainlf{std::pow(ReverbDecayGain, lffact)*(1.0f-baseAttn) + baseAttn};
1472                 WetGain[i].LF *= minf(gainlf/gain, 1.0f);
1473             }
1474         }
1475     }
1476
1477
1478     /* Initial source pitch */
1479     float Pitch{props->Pitch};
1480
1481     /* Calculate velocity-based doppler effect */
1482     float DopplerFactor{props->DopplerFactor * context->mParams.DopplerFactor};
1483     if(DopplerFactor > 0.0f)
1484     {
1485         const alu::Vector &lvelocity = context->mParams.Velocity;
1486         float vss{Velocity.dot_product(ToSource) * -DopplerFactor};
1487         float vls{lvelocity.dot_product(ToSource) * -DopplerFactor};
1488
1489         const float SpeedOfSound{context->mParams.SpeedOfSound};
1490         if(!(vls < SpeedOfSound))
1491         {
1492             /* Listener moving away from the source at the speed of sound.
1493              * Sound waves can't catch it.
1494              */
1495             Pitch = 0.0f;
1496         }
1497         else if(!(vss < SpeedOfSound))
1498         {
1499             /* Source moving toward the listener at the speed of sound. Sound
1500              * waves bunch up to extreme frequencies.
1501              */
1502             Pitch = std::numeric_limits<float>::infinity();
1503         }
1504         else
1505         {
1506             /* Source and listener movement is nominal. Calculate the proper
1507              * doppler shift.
1508              */
1509             Pitch *= (SpeedOfSound-vls) / (SpeedOfSound-vss);
1510         }
1511     }
1512
1513     /* Adjust pitch based on the buffer and output frequencies, and calculate
1514      * fixed-point stepping value.
1515      */
1516     Pitch *= static_cast<float>(voice->mFrequency) / static_cast<float>(Device->Frequency);
1517     if(Pitch > float{MaxPitch})
1518         voice->mStep = MaxPitch<<MixerFracBits;
1519     else
1520         voice->mStep = maxu(fastf2u(Pitch * MixerFracOne), 1);
1521     voice->mResampler = PrepareResampler(props->mResampler, voice->mStep, &voice->mResampleState);
1522
1523     float spread{0.0f};
1524     if(props->Radius > Distance)
1525         spread = al::numbers::pi_v<float>*2.0f - Distance/props->Radius*al::numbers::pi_v<float>;
1526     else if(Distance > 0.0f)
1527         spread = std::asin(props->Radius/Distance) * 2.0f;
1528
1529     CalcPanningAndFilters(voice, ToSource[0], ToSource[1], ToSource[2]*ZScale,
1530         Distance*context->mParams.MetersPerUnit, spread, DryGain, WetGain, SendSlots, props,
1531         context->mParams, Device);
1532 }
1533
1534 void CalcSourceParams(Voice *voice, ContextBase *context, bool force)
1535 {
1536     VoicePropsItem *props{voice->mUpdate.exchange(nullptr, std::memory_order_acq_rel)};
1537     if(!props && !force) return;
1538
1539     if(props)
1540     {
1541         voice->mProps = *props;
1542
1543         AtomicReplaceHead(context->mFreeVoiceProps, props);
1544     }
1545
1546     if((voice->mProps.DirectChannels != DirectMode::Off && voice->mFmtChannels != FmtMono
1547             && !IsAmbisonic(voice->mFmtChannels))
1548         || voice->mProps.mSpatializeMode == SpatializeMode::Off
1549         || (voice->mProps.mSpatializeMode==SpatializeMode::Auto && voice->mFmtChannels != FmtMono))
1550         CalcNonAttnSourceParams(voice, &voice->mProps, context);
1551     else
1552         CalcAttnSourceParams(voice, &voice->mProps, context);
1553 }
1554
1555
1556 void SendSourceStateEvent(ContextBase *context, uint id, VChangeState state)
1557 {
1558     RingBuffer *ring{context->mAsyncEvents.get()};
1559     auto evt_vec = ring->getWriteVector();
1560     if(evt_vec.first.len < 1) return;
1561
1562     AsyncEvent *evt{al::construct_at(reinterpret_cast<AsyncEvent*>(evt_vec.first.buf),
1563         AsyncEvent::SourceStateChange)};
1564     evt->u.srcstate.id = id;
1565     switch(state)
1566     {
1567     case VChangeState::Reset:
1568         evt->u.srcstate.state = AsyncEvent::SrcState::Reset;
1569         break;
1570     case VChangeState::Stop:
1571         evt->u.srcstate.state = AsyncEvent::SrcState::Stop;
1572         break;
1573     case VChangeState::Play:
1574         evt->u.srcstate.state = AsyncEvent::SrcState::Play;
1575         break;
1576     case VChangeState::Pause:
1577         evt->u.srcstate.state = AsyncEvent::SrcState::Pause;
1578         break;
1579     /* Shouldn't happen. */
1580     case VChangeState::Restart:
1581         ASSUME(0);
1582     }
1583
1584     ring->writeAdvance(1);
1585 }
1586
1587 void ProcessVoiceChanges(ContextBase *ctx)
1588 {
1589     VoiceChange *cur{ctx->mCurrentVoiceChange.load(std::memory_order_acquire)};
1590     VoiceChange *next{cur->mNext.load(std::memory_order_acquire)};
1591     if(!next) return;
1592
1593     const uint enabledevt{ctx->mEnabledEvts.load(std::memory_order_acquire)};
1594     do {
1595         cur = next;
1596
1597         bool sendevt{false};
1598         if(cur->mState == VChangeState::Reset || cur->mState == VChangeState::Stop)
1599         {
1600             if(Voice *voice{cur->mVoice})
1601             {
1602                 voice->mCurrentBuffer.store(nullptr, std::memory_order_relaxed);
1603                 voice->mLoopBuffer.store(nullptr, std::memory_order_relaxed);
1604                 /* A source ID indicates the voice was playing or paused, which
1605                  * gets a reset/stop event.
1606                  */
1607                 sendevt = voice->mSourceID.exchange(0u, std::memory_order_relaxed) != 0u;
1608                 Voice::State oldvstate{Voice::Playing};
1609                 voice->mPlayState.compare_exchange_strong(oldvstate, Voice::Stopping,
1610                     std::memory_order_relaxed, std::memory_order_acquire);
1611                 voice->mPendingChange.store(false, std::memory_order_release);
1612             }
1613             /* Reset state change events are always sent, even if the voice is
1614              * already stopped or even if there is no voice.
1615              */
1616             sendevt |= (cur->mState == VChangeState::Reset);
1617         }
1618         else if(cur->mState == VChangeState::Pause)
1619         {
1620             Voice *voice{cur->mVoice};
1621             Voice::State oldvstate{Voice::Playing};
1622             sendevt = voice->mPlayState.compare_exchange_strong(oldvstate, Voice::Stopping,
1623                 std::memory_order_release, std::memory_order_acquire);
1624         }
1625         else if(cur->mState == VChangeState::Play)
1626         {
1627             /* NOTE: When playing a voice, sending a source state change event
1628              * depends if there's an old voice to stop and if that stop is
1629              * successful. If there is no old voice, a playing event is always
1630              * sent. If there is an old voice, an event is sent only if the
1631              * voice is already stopped.
1632              */
1633             if(Voice *oldvoice{cur->mOldVoice})
1634             {
1635                 oldvoice->mCurrentBuffer.store(nullptr, std::memory_order_relaxed);
1636                 oldvoice->mLoopBuffer.store(nullptr, std::memory_order_relaxed);
1637                 oldvoice->mSourceID.store(0u, std::memory_order_relaxed);
1638                 Voice::State oldvstate{Voice::Playing};
1639                 sendevt = !oldvoice->mPlayState.compare_exchange_strong(oldvstate, Voice::Stopping,
1640                     std::memory_order_relaxed, std::memory_order_acquire);
1641                 oldvoice->mPendingChange.store(false, std::memory_order_release);
1642             }
1643             else
1644                 sendevt = true;
1645
1646             Voice *voice{cur->mVoice};
1647             voice->mPlayState.store(Voice::Playing, std::memory_order_release);
1648         }
1649         else if(cur->mState == VChangeState::Restart)
1650         {
1651             /* Restarting a voice never sends a source change event. */
1652             Voice *oldvoice{cur->mOldVoice};
1653             oldvoice->mCurrentBuffer.store(nullptr, std::memory_order_relaxed);
1654             oldvoice->mLoopBuffer.store(nullptr, std::memory_order_relaxed);
1655             /* If there's no sourceID, the old voice finished so don't start
1656              * the new one at its new offset.
1657              */
1658             if(oldvoice->mSourceID.exchange(0u, std::memory_order_relaxed) != 0u)
1659             {
1660                 /* Otherwise, set the voice to stopping if it's not already (it
1661                  * might already be, if paused), and play the new voice as
1662                  * appropriate.
1663                  */
1664                 Voice::State oldvstate{Voice::Playing};
1665                 oldvoice->mPlayState.compare_exchange_strong(oldvstate, Voice::Stopping,
1666                     std::memory_order_relaxed, std::memory_order_acquire);
1667
1668                 Voice *voice{cur->mVoice};
1669                 voice->mPlayState.store((oldvstate == Voice::Playing) ? Voice::Playing
1670                     : Voice::Stopped, std::memory_order_release);
1671             }
1672             oldvoice->mPendingChange.store(false, std::memory_order_release);
1673         }
1674         if(sendevt && (enabledevt&AsyncEvent::SourceStateChange))
1675             SendSourceStateEvent(ctx, cur->mSourceID, cur->mState);
1676
1677         next = cur->mNext.load(std::memory_order_acquire);
1678     } while(next);
1679     ctx->mCurrentVoiceChange.store(cur, std::memory_order_release);
1680 }
1681
1682 void ProcessParamUpdates(ContextBase *ctx, const EffectSlotArray &slots,
1683     const al::span<Voice*> voices)
1684 {
1685     ProcessVoiceChanges(ctx);
1686
1687     IncrementRef(ctx->mUpdateCount);
1688     if LIKELY(!ctx->mHoldUpdates.load(std::memory_order_acquire))
1689     {
1690         bool force{CalcContextParams(ctx)};
1691         auto sorted_slots = const_cast<EffectSlot**>(slots.data() + slots.size());
1692         for(EffectSlot *slot : slots)
1693             force |= CalcEffectSlotParams(slot, sorted_slots, ctx);
1694
1695         for(Voice *voice : voices)
1696         {
1697             /* Only update voices that have a source. */
1698             if(voice->mSourceID.load(std::memory_order_relaxed) != 0)
1699                 CalcSourceParams(voice, ctx, force);
1700         }
1701     }
1702     IncrementRef(ctx->mUpdateCount);
1703 }
1704
1705 void ProcessContexts(DeviceBase *device, const uint SamplesToDo)
1706 {
1707     ASSUME(SamplesToDo > 0);
1708
1709     for(ContextBase *ctx : *device->mContexts.load(std::memory_order_acquire))
1710     {
1711         const EffectSlotArray &auxslots = *ctx->mActiveAuxSlots.load(std::memory_order_acquire);
1712         const al::span<Voice*> voices{ctx->getVoicesSpanAcquired()};
1713
1714         /* Process pending propery updates for objects on the context. */
1715         ProcessParamUpdates(ctx, auxslots, voices);
1716
1717         /* Clear auxiliary effect slot mixing buffers. */
1718         for(EffectSlot *slot : auxslots)
1719         {
1720             for(auto &buffer : slot->Wet.Buffer)
1721                 buffer.fill(0.0f);
1722         }
1723
1724         /* Process voices that have a playing source. */
1725         for(Voice *voice : voices)
1726         {
1727             const Voice::State vstate{voice->mPlayState.load(std::memory_order_acquire)};
1728             if(vstate != Voice::Stopped && vstate != Voice::Pending)
1729                 voice->mix(vstate, ctx, SamplesToDo);
1730         }
1731
1732         /* Process effects. */
1733         if(const size_t num_slots{auxslots.size()})
1734         {
1735             auto slots = auxslots.data();
1736             auto slots_end = slots + num_slots;
1737
1738             /* Sort the slots into extra storage, so that effect slots come
1739              * before their effect slot target (or their targets' target).
1740              */
1741             const al::span<EffectSlot*> sorted_slots{const_cast<EffectSlot**>(slots_end),
1742                 num_slots};
1743             /* Skip sorting if it has already been done. */
1744             if(!sorted_slots[0])
1745             {
1746                 /* First, copy the slots to the sorted list, then partition the
1747                  * sorted list so that all slots without a target slot go to
1748                  * the end.
1749                  */
1750                 std::copy(slots, slots_end, sorted_slots.begin());
1751                 auto split_point = std::partition(sorted_slots.begin(), sorted_slots.end(),
1752                     [](const EffectSlot *slot) noexcept -> bool
1753                     { return slot->Target != nullptr; });
1754                 /* There must be at least one slot without a slot target. */
1755                 assert(split_point != sorted_slots.end());
1756
1757                 /* Simple case: no more than 1 slot has a target slot. Either
1758                  * all slots go right to the output, or the remaining one must
1759                  * target an already-partitioned slot.
1760                  */
1761                 if(split_point - sorted_slots.begin() > 1)
1762                 {
1763                     /* At least two slots target other slots. Starting from the
1764                      * back of the sorted list, continue partitioning the front
1765                      * of the list given each target until all targets are
1766                      * accounted for. This ensures all slots without a target
1767                      * go last, all slots directly targeting those last slots
1768                      * go second-to-last, all slots directly targeting those
1769                      * second-last slots go third-to-last, etc.
1770                      */
1771                     auto next_target = sorted_slots.end();
1772                     do {
1773                         /* This shouldn't happen, but if there's unsorted slots
1774                          * left that don't target any sorted slots, they can't
1775                          * contribute to the output, so leave them.
1776                          */
1777                         if UNLIKELY(next_target == split_point)
1778                             break;
1779
1780                         --next_target;
1781                         split_point = std::partition(sorted_slots.begin(), split_point,
1782                             [next_target](const EffectSlot *slot) noexcept -> bool
1783                             { return slot->Target != *next_target; });
1784                     } while(split_point - sorted_slots.begin() > 1);
1785                 }
1786             }
1787
1788             for(const EffectSlot *slot : sorted_slots)
1789             {
1790                 EffectState *state{slot->mEffectState};
1791                 state->process(SamplesToDo, slot->Wet.Buffer, state->mOutTarget);
1792             }
1793         }
1794
1795         /* Signal the event handler if there are any events to read. */
1796         RingBuffer *ring{ctx->mAsyncEvents.get()};
1797         if(ring->readSpace() > 0)
1798             ctx->mEventSem.post();
1799     }
1800 }
1801
1802
1803 void ApplyDistanceComp(const al::span<FloatBufferLine> Samples, const size_t SamplesToDo,
1804     const DistanceComp::ChanData *distcomp)
1805 {
1806     ASSUME(SamplesToDo > 0);
1807
1808     for(auto &chanbuffer : Samples)
1809     {
1810         const float gain{distcomp->Gain};
1811         const size_t base{distcomp->Length};
1812         float *distbuf{al::assume_aligned<16>(distcomp->Buffer)};
1813         ++distcomp;
1814
1815         if(base < 1)
1816             continue;
1817
1818         float *inout{al::assume_aligned<16>(chanbuffer.data())};
1819         auto inout_end = inout + SamplesToDo;
1820         if LIKELY(SamplesToDo >= base)
1821         {
1822             auto delay_end = std::rotate(inout, inout_end - base, inout_end);
1823             std::swap_ranges(inout, delay_end, distbuf);
1824         }
1825         else
1826         {
1827             auto delay_start = std::swap_ranges(inout, inout_end, distbuf);
1828             std::rotate(distbuf, delay_start, distbuf + base);
1829         }
1830         std::transform(inout, inout_end, inout, std::bind(std::multiplies<float>{}, _1, gain));
1831     }
1832 }
1833
1834 void ApplyDither(const al::span<FloatBufferLine> Samples, uint *dither_seed,
1835     const float quant_scale, const size_t SamplesToDo)
1836 {
1837     ASSUME(SamplesToDo > 0);
1838
1839     /* Dithering. Generate whitenoise (uniform distribution of random values
1840      * between -1 and +1) and add it to the sample values, after scaling up to
1841      * the desired quantization depth amd before rounding.
1842      */
1843     const float invscale{1.0f / quant_scale};
1844     uint seed{*dither_seed};
1845     auto dither_sample = [&seed,invscale,quant_scale](const float sample) noexcept -> float
1846     {
1847         float val{sample * quant_scale};
1848         uint rng0{dither_rng(&seed)};
1849         uint rng1{dither_rng(&seed)};
1850         val += static_cast<float>(rng0*(1.0/UINT_MAX) - rng1*(1.0/UINT_MAX));
1851         return fast_roundf(val) * invscale;
1852     };
1853     for(FloatBufferLine &inout : Samples)
1854         std::transform(inout.begin(), inout.begin()+SamplesToDo, inout.begin(), dither_sample);
1855     *dither_seed = seed;
1856 }
1857
1858
1859 /* Base template left undefined. Should be marked =delete, but Clang 3.8.1
1860  * chokes on that given the inline specializations.
1861  */
1862 template<typename T>
1863 inline T SampleConv(float) noexcept;
1864
1865 template<> inline float SampleConv(float val) noexcept
1866 { return val; }
1867 template<> inline int32_t SampleConv(float val) noexcept
1868 {
1869     /* Floats have a 23-bit mantissa, plus an implied 1 bit and a sign bit.
1870      * This means a normalized float has at most 25 bits of signed precision.
1871      * When scaling and clamping for a signed 32-bit integer, these following
1872      * values are the best a float can give.
1873      */
1874     return fastf2i(clampf(val*2147483648.0f, -2147483648.0f, 2147483520.0f));
1875 }
1876 template<> inline int16_t SampleConv(float val) noexcept
1877 { return static_cast<int16_t>(fastf2i(clampf(val*32768.0f, -32768.0f, 32767.0f))); }
1878 template<> inline int8_t SampleConv(float val) noexcept
1879 { return static_cast<int8_t>(fastf2i(clampf(val*128.0f, -128.0f, 127.0f))); }
1880
1881 /* Define unsigned output variations. */
1882 template<> inline uint32_t SampleConv(float val) noexcept
1883 { return static_cast<uint32_t>(SampleConv<int32_t>(val)) + 2147483648u; }
1884 template<> inline uint16_t SampleConv(float val) noexcept
1885 { return static_cast<uint16_t>(SampleConv<int16_t>(val) + 32768); }
1886 template<> inline uint8_t SampleConv(float val) noexcept
1887 { return static_cast<uint8_t>(SampleConv<int8_t>(val) + 128); }
1888
1889 template<DevFmtType T>
1890 void Write(const al::span<const FloatBufferLine> InBuffer, void *OutBuffer, const size_t Offset,
1891     const size_t SamplesToDo, const size_t FrameStep)
1892 {
1893     ASSUME(FrameStep > 0);
1894     ASSUME(SamplesToDo > 0);
1895
1896     DevFmtType_t<T> *outbase{static_cast<DevFmtType_t<T>*>(OutBuffer) + Offset*FrameStep};
1897     size_t c{0};
1898     for(const FloatBufferLine &inbuf : InBuffer)
1899     {
1900         DevFmtType_t<T> *out{outbase++};
1901         auto conv_sample = [FrameStep,&out](const float s) noexcept -> void
1902         {
1903             *out = SampleConv<DevFmtType_t<T>>(s);
1904             out += FrameStep;
1905         };
1906         std::for_each(inbuf.begin(), inbuf.begin()+SamplesToDo, conv_sample);
1907         ++c;
1908     }
1909     if(const size_t extra{FrameStep - c})
1910     {
1911         const auto silence = SampleConv<DevFmtType_t<T>>(0.0f);
1912         for(size_t i{0};i < SamplesToDo;++i)
1913         {
1914             std::fill_n(outbase, extra, silence);
1915             outbase += FrameStep;
1916         }
1917     }
1918 }
1919
1920 } // namespace
1921
1922 uint DeviceBase::renderSamples(const uint numSamples)
1923 {
1924     const uint samplesToDo{minu(numSamples, BufferLineSize)};
1925
1926     /* Clear main mixing buffers. */
1927     for(FloatBufferLine &buffer : MixBuffer)
1928         buffer.fill(0.0f);
1929
1930     /* Increment the mix count at the start (lsb should now be 1). */
1931     IncrementRef(MixCount);
1932
1933     /* Process and mix each context's sources and effects. */
1934     ProcessContexts(this, samplesToDo);
1935
1936     /* Increment the clock time. Every second's worth of samples is converted
1937      * and added to clock base so that large sample counts don't overflow
1938      * during conversion. This also guarantees a stable conversion.
1939      */
1940     SamplesDone += samplesToDo;
1941     ClockBase += std::chrono::seconds{SamplesDone / Frequency};
1942     SamplesDone %= Frequency;
1943
1944     /* Increment the mix count at the end (lsb should now be 0). */
1945     IncrementRef(MixCount);
1946
1947     /* Apply any needed post-process for finalizing the Dry mix to the RealOut
1948      * (Ambisonic decode, UHJ encode, etc).
1949      */
1950     postProcess(samplesToDo);
1951
1952     /* Apply compression, limiting sample amplitude if needed or desired. */
1953     if(Limiter) Limiter->process(samplesToDo, RealOut.Buffer.data());
1954
1955     /* Apply delays and attenuation for mismatched speaker distances. */
1956     if(ChannelDelays)
1957         ApplyDistanceComp(RealOut.Buffer, samplesToDo, ChannelDelays->mChannels.data());
1958
1959     /* Apply dithering. The compressor should have left enough headroom for the
1960      * dither noise to not saturate.
1961      */
1962     if(DitherDepth > 0.0f)
1963         ApplyDither(RealOut.Buffer, &DitherSeed, DitherDepth, samplesToDo);
1964
1965     return samplesToDo;
1966 }
1967
1968 void DeviceBase::renderSamples(const al::span<float*> outBuffers, const uint numSamples)
1969 {
1970     FPUCtl mixer_mode{};
1971     uint total{0};
1972     while(const uint todo{numSamples - total})
1973     {
1974         const uint samplesToDo{renderSamples(todo)};
1975
1976         auto *srcbuf = RealOut.Buffer.data();
1977         for(auto *dstbuf : outBuffers)
1978         {
1979             std::copy_n(srcbuf->data(), samplesToDo, dstbuf + total);
1980             ++srcbuf;
1981         }
1982
1983         total += samplesToDo;
1984     }
1985 }
1986
1987 void DeviceBase::renderSamples(void *outBuffer, const uint numSamples, const size_t frameStep)
1988 {
1989     FPUCtl mixer_mode{};
1990     uint total{0};
1991     while(const uint todo{numSamples - total})
1992     {
1993         const uint samplesToDo{renderSamples(todo)};
1994
1995         if LIKELY(outBuffer)
1996         {
1997             /* Finally, interleave and convert samples, writing to the device's
1998              * output buffer.
1999              */
2000             switch(FmtType)
2001             {
2002 #define HANDLE_WRITE(T) case T:                                               \
2003     Write<T>(RealOut.Buffer, outBuffer, total, samplesToDo, frameStep); break;
2004             HANDLE_WRITE(DevFmtByte)
2005             HANDLE_WRITE(DevFmtUByte)
2006             HANDLE_WRITE(DevFmtShort)
2007             HANDLE_WRITE(DevFmtUShort)
2008             HANDLE_WRITE(DevFmtInt)
2009             HANDLE_WRITE(DevFmtUInt)
2010             HANDLE_WRITE(DevFmtFloat)
2011 #undef HANDLE_WRITE
2012             }
2013         }
2014
2015         total += samplesToDo;
2016     }
2017 }
2018
2019 void DeviceBase::handleDisconnect(const char *msg, ...)
2020 {
2021     if(!Connected.exchange(false, std::memory_order_acq_rel))
2022         return;
2023
2024     AsyncEvent evt{AsyncEvent::Disconnected};
2025
2026     va_list args;
2027     va_start(args, msg);
2028     int msglen{vsnprintf(evt.u.disconnect.msg, sizeof(evt.u.disconnect.msg), msg, args)};
2029     va_end(args);
2030
2031     if(msglen < 0 || static_cast<size_t>(msglen) >= sizeof(evt.u.disconnect.msg))
2032         evt.u.disconnect.msg[sizeof(evt.u.disconnect.msg)-1] = 0;
2033
2034     IncrementRef(MixCount);
2035     for(ContextBase *ctx : *mContexts.load())
2036     {
2037         const uint enabledevt{ctx->mEnabledEvts.load(std::memory_order_acquire)};
2038         if((enabledevt&AsyncEvent::Disconnected))
2039         {
2040             RingBuffer *ring{ctx->mAsyncEvents.get()};
2041             auto evt_data = ring->getWriteVector().first;
2042             if(evt_data.len > 0)
2043             {
2044                 al::construct_at(reinterpret_cast<AsyncEvent*>(evt_data.buf), evt);
2045                 ring->writeAdvance(1);
2046                 ctx->mEventSem.post();
2047             }
2048         }
2049
2050         if(!ctx->mStopVoicesOnDisconnect)
2051         {
2052             ProcessVoiceChanges(ctx);
2053             continue;
2054         }
2055
2056         auto voicelist = ctx->getVoicesSpanAcquired();
2057         auto stop_voice = [](Voice *voice) -> void
2058         {
2059             voice->mCurrentBuffer.store(nullptr, std::memory_order_relaxed);
2060             voice->mLoopBuffer.store(nullptr, std::memory_order_relaxed);
2061             voice->mSourceID.store(0u, std::memory_order_relaxed);
2062             voice->mPlayState.store(Voice::Stopped, std::memory_order_release);
2063         };
2064         std::for_each(voicelist.begin(), voicelist.end(), stop_voice);
2065     }
2066     IncrementRef(MixCount);
2067 }