core/voice.cpp

   1
   2 #include "config.h"
   3
   4 #include "voice.h"
   5
   6 #include <algorithm>
   7 #include <array>
   8 #include <atomic>
   9 #include <cassert>
  10 #include <climits>
  11 #include <cstdint>
  12 #include <cstdlib>
  13 #include <iterator>
  14 #include <memory>
  15 #include <new>
  16 #include <optional>
  17 #include <utility>
  18 #include <vector>
  19
  20 #include "alnumeric.h"
  21 #include "alspan.h"
  22 #include "alstring.h"
  23 #include "ambidefs.h"
  24 #include "async_event.h"
  25 #include "buffer_storage.h"
  26 #include "context.h"
  27 #include "cpu_caps.h"
  28 #include "devformat.h"
  29 #include "device.h"
  30 #include "filters/biquad.h"
  31 #include "filters/nfc.h"
  32 #include "filters/splitter.h"
  33 #include "fmt_traits.h"
  34 #include "logging.h"
  35 #include "mixer.h"
  36 #include "mixer/defs.h"
  37 #include "mixer/hrtfdefs.h"
  38 #include "opthelpers.h"
  39 #include "resampler_limits.h"
  40 #include "ringbuffer.h"
  41 #include "vector.h"
  42 #include "voice_change.h"
  43
  44 struct CTag;
  45 #ifdef HAVE_SSE
  46 struct SSETag;
  47 #endif
  48 #ifdef HAVE_NEON
  49 struct NEONTag;
  50 #endif
  51
  52
  53 static_assert(!(DeviceBase::MixerLineSize&3), "MixerLineSize must be a multiple of 4");
  54 static_assert(!(MaxResamplerEdge&3), "MaxResamplerEdge is not a multiple of 4");
  55
  56 static_assert((BufferLineSize-1)/MaxPitch > 0, "MaxPitch is too large for BufferLineSize!");
  57 static_assert((INT_MAX>>MixerFracBits)/MaxPitch > BufferLineSize,
  58     "MaxPitch and/or BufferLineSize are too large for MixerFracBits!");
  59
  60 namespace {
  61
  62 using uint = unsigned int;
  63 using namespace std::chrono;
  64 using namespace std::string_view_literals;
  65
  66 using HrtfMixerFunc = void(*)(const al::span<const float> InSamples,
  67     const al::span<float2> AccumSamples, const uint IrSize, const MixHrtfFilter *hrtfparams,
  68     const size_t SamplesToDo);
  69 using HrtfMixerBlendFunc = void(*)(const al::span<const float> InSamples,
  70     const al::span<float2> AccumSamples, const uint IrSize, const HrtfFilter *oldparams,
  71     const MixHrtfFilter *newparams, const size_t SamplesToDo);
  72
  73 HrtfMixerFunc MixHrtfSamples{MixHrtf_<CTag>};
  74 HrtfMixerBlendFunc MixHrtfBlendSamples{MixHrtfBlend_<CTag>};
  75
  76 inline MixerOutFunc SelectMixer()
  77 {
  78 #ifdef HAVE_NEON
  79     if((CPUCapFlags&CPU_CAP_NEON))
  80         return Mix_<NEONTag>;
  81 #endif
  82 #ifdef HAVE_SSE
  83     if((CPUCapFlags&CPU_CAP_SSE))
  84         return Mix_<SSETag>;
  85 #endif
  86     return Mix_<CTag>;
  87 }
  88
  89 inline MixerOneFunc SelectMixerOne()
  90 {
  91 #ifdef HAVE_NEON
  92     if((CPUCapFlags&CPU_CAP_NEON))
  93         return Mix_<NEONTag>;
  94 #endif
  95 #ifdef HAVE_SSE
  96     if((CPUCapFlags&CPU_CAP_SSE))
  97         return Mix_<SSETag>;
  98 #endif
  99     return Mix_<CTag>;
 100 }
 101
 102 inline HrtfMixerFunc SelectHrtfMixer()
 103 {
 104 #ifdef HAVE_NEON
 105     if((CPUCapFlags&CPU_CAP_NEON))
 106         return MixHrtf_<NEONTag>;
 107 #endif
 108 #ifdef HAVE_SSE
 109     if((CPUCapFlags&CPU_CAP_SSE))
 110         return MixHrtf_<SSETag>;
 111 #endif
 112     return MixHrtf_<CTag>;
 113 }
 114
 115 inline HrtfMixerBlendFunc SelectHrtfBlendMixer()
 116 {
 117 #ifdef HAVE_NEON
 118     if((CPUCapFlags&CPU_CAP_NEON))
 119         return MixHrtfBlend_<NEONTag>;
 120 #endif
 121 #ifdef HAVE_SSE
 122     if((CPUCapFlags&CPU_CAP_SSE))
 123         return MixHrtfBlend_<SSETag>;
 124 #endif
 125     return MixHrtfBlend_<CTag>;
 126 }
 127
 128 } // namespace
 129
 130 void Voice::InitMixer(std::optional<std::string> resopt)
 131 {
 132     if(resopt)
 133     {
 134         struct ResamplerEntry {
 135             const std::string_view name;
 136             const Resampler resampler;
 137         };
 138         constexpr std::array ResamplerList{
 139             ResamplerEntry{"none"sv, Resampler::Point},
 140             ResamplerEntry{"point"sv, Resampler::Point},
 141             ResamplerEntry{"linear"sv, Resampler::Linear},
 142             ResamplerEntry{"spline"sv, Resampler::Spline},
 143             ResamplerEntry{"gaussian"sv, Resampler::Gaussian},
 144             ResamplerEntry{"bsinc12"sv, Resampler::BSinc12},
 145             ResamplerEntry{"fast_bsinc12"sv, Resampler::FastBSinc12},
 146             ResamplerEntry{"bsinc24"sv, Resampler::BSinc24},
 147             ResamplerEntry{"fast_bsinc24"sv, Resampler::FastBSinc24},
 148         };
 149
 150         std::string_view resampler{*resopt};
 151         if(al::case_compare(resampler, "cubic"sv) == 0
 152             || al::case_compare(resampler, "sinc4"sv) == 0
 153             || al::case_compare(resampler, "sinc8"sv) == 0)
 154         {
 155             WARN("Resampler option \"%s\" is deprecated, using gaussian\n", resopt->c_str());
 156             resampler = "gaussian"sv;
 157         }
 158         else if(al::case_compare(resampler, "bsinc"sv) == 0)
 159         {
 160             WARN("Resampler option \"%s\" is deprecated, using bsinc12\n", resopt->c_str());
 161             resampler = "bsinc12"sv;
 162         }
 163
 164         auto iter = std::find_if(ResamplerList.begin(), ResamplerList.end(),
 165             [resampler](const ResamplerEntry &entry) -> bool
 166             { return al::case_compare(resampler, entry.name) == 0; });
 167         if(iter == ResamplerList.end())
 168             ERR("Invalid resampler: %s\n", resopt->c_str());
 169         else
 170             ResamplerDefault = iter->resampler;
 171     }
 172
 173     MixSamplesOut = SelectMixer();
 174     MixSamplesOne = SelectMixerOne();
 175     MixHrtfBlendSamples = SelectHrtfBlendMixer();
 176     MixHrtfSamples = SelectHrtfMixer();
 177 }
 178
 179
 180 namespace {
 181
 182 /* IMA ADPCM Stepsize table */
 183 constexpr std::array<int,89> IMAStep_size{{
 184        7,    8,    9,   10,   11,   12,   13,   14,   16,   17,   19,
 185       21,   23,   25,   28,   31,   34,   37,   41,   45,   50,   55,
 186       60,   66,   73,   80,   88,   97,  107,  118,  130,  143,  157,
 187      173,  190,  209,  230,  253,  279,  307,  337,  371,  408,  449,
 188      494,  544,  598,  658,  724,  796,  876,  963, 1060, 1166, 1282,
 189     1411, 1552, 1707, 1878, 2066, 2272, 2499, 2749, 3024, 3327, 3660,
 190     4026, 4428, 4871, 5358, 5894, 6484, 7132, 7845, 8630, 9493,10442,
 191    11487,12635,13899,15289,16818,18500,20350,22358,24633,27086,29794,
 192    32767
 193 }};
 194
 195 /* IMA4 ADPCM Codeword decode table */
 196 constexpr std::array<int,16> IMA4Codeword{{
 197     1, 3, 5, 7, 9, 11, 13, 15,
 198    -1,-3,-5,-7,-9,-11,-13,-15,
 199 }};
 200
 201 /* IMA4 ADPCM Step index adjust decode table */
 202 constexpr std::array<int,16>IMA4Index_adjust{{
 203    -1,-1,-1,-1, 2, 4, 6, 8,
 204    -1,-1,-1,-1, 2, 4, 6, 8
 205 }};
 206
 207 /* MSADPCM Adaption table */
 208 constexpr std::array<int,16> MSADPCMAdaption{{
 209     230, 230, 230, 230, 307, 409, 512, 614,
 210     768, 614, 512, 409, 307, 230, 230, 230
 211 }};
 212
 213 /* MSADPCM Adaption Coefficient tables */
 214 constexpr std::array MSADPCMAdaptionCoeff{
 215     std::array{256,    0},
 216     std::array{512, -256},
 217     std::array{  0,    0},
 218     std::array{192,   64},
 219     std::array{240,    0},
 220     std::array{460, -208},
 221     std::array{392, -232}
 222 };
 223
 224
 225 void SendSourceStoppedEvent(ContextBase *context, uint id)
 226 {
 227     RingBuffer *ring{context->mAsyncEvents.get()};
 228     auto evt_vec = ring->getWriteVector();
 229     if(evt_vec.first.len < 1) return;
 230
 231     auto &evt = InitAsyncEvent<AsyncSourceStateEvent>(evt_vec.first.buf);
 232     evt.mId = id;
 233     evt.mState = AsyncSrcState::Stop;
 234
 235     ring->writeAdvance(1);
 236 }
 237
 238
 239 al::span<const float> DoFilters(BiquadFilter &lpfilter, BiquadFilter &hpfilter,
 240     const al::span<float,BufferLineSize> dst, const al::span<const float> src, int type)
 241 {
 242     switch(type)
 243     {
 244     case AF_None:
 245         lpfilter.clear();
 246         hpfilter.clear();
 247         break;
 248
 249     case AF_LowPass:
 250         lpfilter.process(src, dst);
 251         hpfilter.clear();
 252         return dst.first(src.size());
 253     case AF_HighPass:
 254         lpfilter.clear();
 255         hpfilter.process(src, dst);
 256         return dst.first(src.size());
 257
 258     case AF_BandPass:
 259         DualBiquad{lpfilter, hpfilter}.process(src, dst);
 260         return dst.first(src.size());
 261     }
 262     return src;
 263 }
 264
 265
 266 template<FmtType Type>
 267 inline void LoadSamples(const al::span<float> dstSamples, const al::span<const std::byte> srcData,
 268     const size_t srcChan, const size_t srcOffset, const size_t srcStep,
 269     const size_t samplesPerBlock [[maybe_unused]]) noexcept
 270 {
 271     using TypeTraits = al::FmtTypeTraits<Type>;
 272     using SampleType = typename TypeTraits::Type;
 273     static constexpr size_t sampleSize{sizeof(SampleType)};
 274     assert(srcChan < srcStep);
 275     auto converter = TypeTraits{};
 276
 277     al::span<const SampleType> src{reinterpret_cast<const SampleType*>(srcData.data()),
 278         srcData.size()/sampleSize};
 279     auto ssrc = src.cbegin() + ptrdiff_t(srcOffset*srcStep);
 280     std::generate(dstSamples.begin(), dstSamples.end(), [&ssrc,srcChan,srcStep,converter]
 281     {
 282         auto ret = converter(ssrc[srcChan]);
 283         ssrc += ptrdiff_t(srcStep);
 284         return ret;
 285     });
 286 }
 287
 288 template<>
 289 inline void LoadSamples<FmtIMA4>(al::span<float> dstSamples, al::span<const std::byte> src,
 290     const size_t srcChan, const size_t srcOffset, const size_t srcStep,
 291     const size_t samplesPerBlock) noexcept
 292 {
 293     static constexpr int MaxStepIndex{static_cast<int>(IMAStep_size.size()) - 1};
 294
 295     assert(srcStep > 0 || srcStep <= 2);
 296     assert(srcChan < srcStep);
 297     assert(samplesPerBlock > 1);
 298     const size_t blockBytes{((samplesPerBlock-1)/2 + 4)*srcStep};
 299
 300     /* Skip to the ADPCM block containing the srcOffset sample. */
 301     src = src.subspan(srcOffset/samplesPerBlock*blockBytes);
 302     /* Calculate how many samples need to be skipped in the block. */
 303     size_t skip{srcOffset % samplesPerBlock};
 304
 305     /* NOTE: This could probably be optimized better. */
 306     while(!dstSamples.empty())
 307     {
 308         auto nibbleData = src.cbegin();
 309         src = src.subspan(blockBytes);
 310
 311         /* Each IMA4 block starts with a signed 16-bit sample, and a signed
 312          * 16-bit table index. The table index needs to be clamped.
 313          */
 314         int sample{int(nibbleData[srcChan*4]) | (int(nibbleData[srcChan*4 + 1]) << 8)};
 315         int index{int(nibbleData[srcChan*4 + 2]) | (int(nibbleData[srcChan*4 + 3]) << 8)};
 316         nibbleData += ptrdiff_t((srcStep+srcChan)*4);
 317
 318         sample = (sample^0x8000) - 32768;
 319         index = std::clamp((index^0x8000) - 32768, 0, MaxStepIndex);
 320
 321         if(skip == 0)
 322         {
 323             dstSamples[0] = static_cast<float>(sample) / 32768.0f;
 324             dstSamples = dstSamples.subspan<1>();
 325             if(dstSamples.empty()) return;
 326         }
 327         else
 328             --skip;
 329
 330         auto decode_sample = [&sample,&index](const uint nibble)
 331         {
 332             sample += IMA4Codeword[nibble] * IMAStep_size[static_cast<uint>(index)] / 8;
 333             sample = std::clamp(sample, -32768, 32767);
 334
 335             index += IMA4Index_adjust[nibble];
 336             index = std::clamp(index, 0, MaxStepIndex);
 337
 338             return sample;
 339         };
 340
 341         /* The rest of the block is arranged as a series of nibbles, contained
 342          * in 4 *bytes* per channel interleaved. So every 8 nibbles we need to
 343          * skip 4 bytes per channel to get the next nibbles for this channel.
 344          *
 345          * First, decode the samples that we need to skip in the block (will
 346          * always be less than the block size). They need to be decoded despite
 347          * being ignored for proper state on the remaining samples.
 348          */
 349         size_t nibbleOffset{0};
 350         const size_t startOffset{skip + 1};
 351         for(;skip;--skip)
 352         {
 353             const size_t byteShift{(nibbleOffset&1) * 4};
 354             const size_t wordOffset{(nibbleOffset>>1) & ~3_uz};
 355             const size_t byteOffset{wordOffset*srcStep + ((nibbleOffset>>1)&3u)};
 356             ++nibbleOffset;
 357
 358             std::ignore = decode_sample(uint(nibbleData[byteOffset]>>byteShift) & 15u);
 359         }
 360
 361         /* Second, decode the rest of the block and write to the output, until
 362          * the end of the block or the end of output.
 363          */
 364         const size_t todo{std::min(samplesPerBlock-startOffset, dstSamples.size())};
 365         std::generate_n(dstSamples.begin(), todo, [&]
 366         {
 367             const size_t byteShift{(nibbleOffset&1) * 4};
 368             const size_t wordOffset{(nibbleOffset>>1) & ~3_uz};
 369             const size_t byteOffset{wordOffset*srcStep + ((nibbleOffset>>1)&3u)};
 370             ++nibbleOffset;
 371
 372             const int result{decode_sample(uint(nibbleData[byteOffset]>>byteShift) & 15u)};
 373             return static_cast<float>(result) / 32768.0f;
 374         });
 375         dstSamples = dstSamples.subspan(todo);
 376     }
 377 }
 378
 379 template<>
 380 inline void LoadSamples<FmtMSADPCM>(al::span<float> dstSamples, al::span<const std::byte> src,
 381     const size_t srcChan, const size_t srcOffset, const size_t srcStep,
 382     const size_t samplesPerBlock) noexcept
 383 {
 384     assert(srcStep > 0 || srcStep <= 2);
 385     assert(srcChan < srcStep);
 386     assert(samplesPerBlock > 2);
 387     const size_t blockBytes{((samplesPerBlock-2)/2 + 7)*srcStep};
 388
 389     src = src.subspan(srcOffset/samplesPerBlock*blockBytes);
 390     size_t skip{srcOffset % samplesPerBlock};
 391
 392     while(!dstSamples.empty())
 393     {
 394         auto input = src.cbegin();
 395         src = src.subspan(blockBytes);
 396
 397         /* Each MS ADPCM block starts with an 8-bit block predictor, used to
 398          * dictate how the two sample history values are mixed with the decoded
 399          * sample, and an initial signed 16-bit delta value which scales the
 400          * nibble sample value. This is followed by the two initial 16-bit
 401          * sample history values.
 402          */
 403         const uint8_t blockpred{std::min(uint8_t(input[srcChan]), uint8_t{6})};
 404         input += ptrdiff_t(srcStep);
 405         int delta{int(input[2*srcChan + 0]) | (int(input[2*srcChan + 1]) << 8)};
 406         input += ptrdiff_t(srcStep*2);
 407
 408         std::array<int,2> sampleHistory{};
 409         sampleHistory[0] = int(input[2*srcChan + 0]) | (int(input[2*srcChan + 1])<<8);
 410         input += ptrdiff_t(srcStep*2);
 411         sampleHistory[1] = int(input[2*srcChan + 0]) | (int(input[2*srcChan + 1])<<8);
 412         input += ptrdiff_t(srcStep*2);
 413
 414         const al::span coeffs{MSADPCMAdaptionCoeff[blockpred]};
 415         delta = (delta^0x8000) - 32768;
 416         sampleHistory[0] = (sampleHistory[0]^0x8000) - 32768;
 417         sampleHistory[1] = (sampleHistory[1]^0x8000) - 32768;
 418
 419         /* The second history sample is "older", so it's the first to be
 420          * written out.
 421          */
 422         if(skip == 0)
 423         {
 424             dstSamples[0] = static_cast<float>(sampleHistory[1]) / 32768.0f;
 425             dstSamples = dstSamples.subspan<1>();
 426             if(dstSamples.empty()) return;
 427             dstSamples[0] = static_cast<float>(sampleHistory[0]) / 32768.0f;
 428             dstSamples = dstSamples.subspan<1>();
 429             if(dstSamples.empty()) return;
 430         }
 431         else if(skip == 1)
 432         {
 433             --skip;
 434             dstSamples[0] = static_cast<float>(sampleHistory[0]) / 32768.0f;
 435             dstSamples = dstSamples.subspan<1>();
 436             if(dstSamples.empty()) return;
 437         }
 438         else
 439             skip -= 2;
 440
 441         auto decode_sample = [&sampleHistory,&delta,coeffs](const int nibble)
 442         {
 443             int pred{(sampleHistory[0]*coeffs[0] + sampleHistory[1]*coeffs[1]) / 256};
 444             pred += ((nibble^0x08) - 0x08) * delta;
 445             pred  = std::clamp(pred, -32768, 32767);
 446
 447             sampleHistory[1] = sampleHistory[0];
 448             sampleHistory[0] = pred;
 449
 450             delta = (MSADPCMAdaption[static_cast<uint>(nibble)] * delta) / 256;
 451             delta = std::max(16, delta);
 452
 453             return pred;
 454         };
 455
 456         /* The rest of the block is a series of nibbles, interleaved per-
 457          * channel. First, skip samples.
 458          */
 459         const size_t startOffset{skip + 2};
 460         size_t nibbleOffset{srcChan};
 461         for(;skip;--skip)
 462         {
 463             const size_t byteOffset{nibbleOffset>>1};
 464             const size_t byteShift{((nibbleOffset&1)^1) * 4};
 465             nibbleOffset += srcStep;
 466
 467             std::ignore = decode_sample(int(input[byteOffset]>>byteShift) & 15);
 468         }
 469
 470         /* Now decode the rest of the block, until the end of the block or the
 471          * dst buffer is filled.
 472          */
 473         const size_t todo{std::min(samplesPerBlock-startOffset, dstSamples.size())};
 474         std::generate_n(dstSamples.begin(), todo, [&]
 475         {
 476             const size_t byteOffset{nibbleOffset>>1};
 477             const size_t byteShift{((nibbleOffset&1)^1) * 4};
 478             nibbleOffset += srcStep;
 479
 480             const int sample{decode_sample(int(input[byteOffset]>>byteShift) & 15)};
 481             return static_cast<float>(sample) / 32768.0f;
 482         });
 483         dstSamples = dstSamples.subspan(todo);
 484     }
 485 }
 486
 487 void LoadSamples(const al::span<float> dstSamples, const al::span<const std::byte> src,
 488     const size_t srcChan, const size_t srcOffset, const FmtType srcType, const size_t srcStep,
 489     const size_t samplesPerBlock) noexcept
 490 {
 491 #define HANDLE_FMT(T) case T:                                                 \
 492     LoadSamples<T>(dstSamples, src, srcChan, srcOffset, srcStep,              \
 493         samplesPerBlock);                                                     \
 494     break
 495
 496     switch(srcType)
 497     {
 498     HANDLE_FMT(FmtUByte);
 499     HANDLE_FMT(FmtShort);
 500     HANDLE_FMT(FmtInt);
 501     HANDLE_FMT(FmtFloat);
 502     HANDLE_FMT(FmtDouble);
 503     HANDLE_FMT(FmtMulaw);
 504     HANDLE_FMT(FmtAlaw);
 505     HANDLE_FMT(FmtIMA4);
 506     HANDLE_FMT(FmtMSADPCM);
 507     }
 508 #undef HANDLE_FMT
 509 }
 510
 511 void LoadBufferStatic(VoiceBufferItem *buffer, VoiceBufferItem *bufferLoopItem,
 512     const size_t dataPosInt, const FmtType sampleType, const size_t srcChannel,
 513     const size_t srcStep, al::span<float> voiceSamples)
 514 {
 515     if(!bufferLoopItem)
 516     {
 517         float lastSample{0.0f};
 518         /* Load what's left to play from the buffer */
 519         if(buffer->mSampleLen > dataPosInt) LIKELY
 520         {
 521             const size_t buffer_remaining{buffer->mSampleLen - dataPosInt};
 522             const size_t remaining{std::min(voiceSamples.size(), buffer_remaining)};
 523             LoadSamples(voiceSamples.first(remaining), buffer->mSamples, srcChannel, dataPosInt,
 524                 sampleType, srcStep, buffer->mBlockAlign);
 525             lastSample = voiceSamples[remaining-1];
 526             voiceSamples = voiceSamples.subspan(remaining);
 527         }
 528
 529         if(const size_t toFill{voiceSamples.size()})
 530             std::fill_n(voiceSamples.begin(), toFill, lastSample);
 531     }
 532     else
 533     {
 534         const size_t loopStart{buffer->mLoopStart};
 535         const size_t loopEnd{buffer->mLoopEnd};
 536         ASSUME(loopEnd > loopStart);
 537
 538         const size_t intPos{(dataPosInt < loopEnd) ? dataPosInt
 539             : (((dataPosInt-loopStart)%(loopEnd-loopStart)) + loopStart)};
 540
 541         /* Load what's left of this loop iteration */
 542         const size_t remaining{std::min(voiceSamples.size(), loopEnd-dataPosInt)};
 543         LoadSamples(voiceSamples.first(remaining), buffer->mSamples, srcChannel, intPos,
 544             sampleType, srcStep, buffer->mBlockAlign);
 545         voiceSamples = voiceSamples.subspan(remaining);
 546
 547         /* Load repeats of the loop to fill the buffer. */
 548         const size_t loopSize{loopEnd - loopStart};
 549         while(const size_t toFill{std::min(voiceSamples.size(), loopSize)})
 550         {
 551             LoadSamples(voiceSamples.first(toFill), buffer->mSamples, srcChannel, loopStart,
 552                 sampleType, srcStep, buffer->mBlockAlign);
 553             voiceSamples = voiceSamples.subspan(toFill);
 554         }
 555     }
 556 }
 557
 558 void LoadBufferCallback(VoiceBufferItem *buffer, const size_t dataPosInt,
 559     const size_t numCallbackSamples, const FmtType sampleType, const size_t srcChannel,
 560     const size_t srcStep, al::span<float> voiceSamples)
 561 {
 562     float lastSample{0.0f};
 563     if(numCallbackSamples > dataPosInt) LIKELY
 564     {
 565         const size_t remaining{std::min(voiceSamples.size(), numCallbackSamples-dataPosInt)};
 566         LoadSamples(voiceSamples.first(remaining), buffer->mSamples, srcChannel, dataPosInt,
 567             sampleType, srcStep, buffer->mBlockAlign);
 568         lastSample = voiceSamples[remaining-1];
 569         voiceSamples = voiceSamples.subspan(remaining);
 570     }
 571
 572     if(const size_t toFill{voiceSamples.size()})
 573         std::fill_n(voiceSamples.begin(), toFill, lastSample);
 574 }
 575
 576 void LoadBufferQueue(VoiceBufferItem *buffer, VoiceBufferItem *bufferLoopItem,
 577     size_t dataPosInt, const FmtType sampleType, const size_t srcChannel,
 578     const size_t srcStep, al::span<float> voiceSamples)
 579 {
 580     float lastSample{0.0f};
 581     /* Crawl the buffer queue to fill in the temp buffer */
 582     while(buffer && !voiceSamples.empty())
 583     {
 584         if(dataPosInt >= buffer->mSampleLen)
 585         {
 586             dataPosInt -= buffer->mSampleLen;
 587             buffer = buffer->mNext.load(std::memory_order_acquire);
 588             if(!buffer) buffer = bufferLoopItem;
 589             continue;
 590         }
 591
 592         const size_t remaining{std::min(voiceSamples.size(), buffer->mSampleLen-dataPosInt)};
 593         LoadSamples(voiceSamples.first(remaining), buffer->mSamples, srcChannel, dataPosInt,
 594             sampleType, srcStep, buffer->mBlockAlign);
 595
 596         lastSample = voiceSamples[remaining-1];
 597         voiceSamples = voiceSamples.subspan(remaining);
 598         if(voiceSamples.empty())
 599             break;
 600
 601         dataPosInt = 0;
 602         buffer = buffer->mNext.load(std::memory_order_acquire);
 603         if(!buffer) buffer = bufferLoopItem;
 604     }
 605     if(const size_t toFill{voiceSamples.size()})
 606         std::fill_n(voiceSamples.begin(), toFill, lastSample);
 607 }
 608
 609
 610 void DoHrtfMix(const al::span<const float> samples, DirectParams &parms, const float TargetGain,
 611     const size_t Counter, size_t OutPos, const bool IsPlaying, DeviceBase *Device)
 612 {
 613     const uint IrSize{Device->mIrSize};
 614     const auto HrtfSamples = al::span{Device->ExtraSampleData};
 615     const auto AccumSamples = al::span{Device->HrtfAccumData};
 616
 617     /* Copy the HRTF history and new input samples into a temp buffer. */
 618     auto src_iter = std::copy(parms.Hrtf.History.begin(), parms.Hrtf.History.end(),
 619         HrtfSamples.begin());
 620     std::copy_n(samples.begin(), samples.size(), src_iter);
 621     /* Copy the last used samples back into the history buffer for later. */
 622     if(IsPlaying) LIKELY
 623     {
 624         const auto endsamples = HrtfSamples.subspan(samples.size(), parms.Hrtf.History.size());
 625         std::copy_n(endsamples.cbegin(), endsamples.size(), parms.Hrtf.History.begin());
 626     }
 627
 628     /* If fading and this is the first mixing pass, fade between the IRs. */
 629     size_t fademix{0};
 630     if(Counter && OutPos == 0)
 631     {
 632         fademix = std::min(samples.size(), Counter);
 633
 634         float gain{TargetGain};
 635
 636         /* The new coefficients need to fade in completely since they're
 637          * replacing the old ones. To keep the gain fading consistent,
 638          * interpolate between the old and new target gains given how much of
 639          * the fade time this mix handles.
 640          */
 641         if(Counter > fademix)
 642         {
 643             const float a{static_cast<float>(fademix) / static_cast<float>(Counter)};
 644             gain = lerpf(parms.Hrtf.Old.Gain, TargetGain, a);
 645         }
 646
 647         MixHrtfFilter hrtfparams{
 648             parms.Hrtf.Target.Coeffs,
 649             parms.Hrtf.Target.Delay,
 650             0.0f, gain / static_cast<float>(fademix)};
 651         MixHrtfBlendSamples(HrtfSamples, AccumSamples.subspan(OutPos), IrSize, &parms.Hrtf.Old,
 652             &hrtfparams, fademix);
 653
 654         /* Update the old parameters with the result. */
 655         parms.Hrtf.Old = parms.Hrtf.Target;
 656         parms.Hrtf.Old.Gain = gain;
 657         OutPos += fademix;
 658     }
 659
 660     if(fademix < samples.size())
 661     {
 662         const size_t todo{samples.size() - fademix};
 663         float gain{TargetGain};
 664
 665         /* Interpolate the target gain if the gain fading lasts longer than
 666          * this mix.
 667          */
 668         if(Counter > samples.size())
 669         {
 670             const float a{static_cast<float>(todo) / static_cast<float>(Counter-fademix)};
 671             gain = lerpf(parms.Hrtf.Old.Gain, TargetGain, a);
 672         }
 673
 674         MixHrtfFilter hrtfparams{
 675             parms.Hrtf.Target.Coeffs,
 676             parms.Hrtf.Target.Delay,
 677             parms.Hrtf.Old.Gain,
 678             (gain - parms.Hrtf.Old.Gain) / static_cast<float>(todo)};
 679         MixHrtfSamples(HrtfSamples.subspan(fademix), AccumSamples.subspan(OutPos), IrSize,
 680             &hrtfparams, todo);
 681
 682         /* Store the now-current gain for next time. */
 683         parms.Hrtf.Old.Gain = gain;
 684     }
 685 }
 686
 687 void DoNfcMix(const al::span<const float> samples, al::span<FloatBufferLine> OutBuffer,
 688     DirectParams &parms, const al::span<const float,MaxOutputChannels> OutGains,
 689     const uint Counter, const uint OutPos, DeviceBase *Device)
 690 {
 691     using FilterProc = void (NfcFilter::*)(const al::span<const float>, const al::span<float>);
 692     static constexpr std::array<FilterProc,MaxAmbiOrder+1> NfcProcess{{
 693         nullptr, &NfcFilter::process1, &NfcFilter::process2, &NfcFilter::process3}};
 694
 695     auto CurrentGains = al::span{parms.Gains.Current}.subspan(0);
 696     auto TargetGains = OutGains.subspan(0);
 697     MixSamples(samples, OutBuffer.first(1), CurrentGains, TargetGains, Counter, OutPos);
 698     OutBuffer = OutBuffer.subspan(1);
 699     CurrentGains = CurrentGains.subspan(1);
 700     TargetGains = TargetGains.subspan(1);
 701
 702     const auto nfcsamples = al::span{Device->ExtraSampleData}.subspan(samples.size());
 703     size_t order{1};
 704     while(const size_t chancount{Device->NumChannelsPerOrder[order]})
 705     {
 706         (parms.NFCtrlFilter.*NfcProcess[order])(samples, nfcsamples);
 707         MixSamples(nfcsamples, OutBuffer.first(chancount), CurrentGains, TargetGains, Counter,
 708             OutPos);
 709         OutBuffer = OutBuffer.subspan(chancount);
 710         CurrentGains = CurrentGains.subspan(chancount);
 711         TargetGains = TargetGains.subspan(chancount);
 712         if(++order == MaxAmbiOrder+1)
 713             break;
 714     }
 715 }
 716
 717 } // namespace
 718
 719 void Voice::mix(const State vstate, ContextBase *Context, const nanoseconds deviceTime,
 720     const uint SamplesToDo)
 721 {
 722     static constexpr std::array<float,MaxOutputChannels> SilentTarget{};
 723
 724     ASSUME(SamplesToDo > 0);
 725
 726     DeviceBase *Device{Context->mDevice};
 727     const uint NumSends{Device->NumAuxSends};
 728
 729     /* Get voice info */
 730     int DataPosInt{mPosition.load(std::memory_order_relaxed)};
 731     uint DataPosFrac{mPositionFrac.load(std::memory_order_relaxed)};
 732     VoiceBufferItem *BufferListItem{mCurrentBuffer.load(std::memory_order_relaxed)};
 733     VoiceBufferItem *BufferLoopItem{mLoopBuffer.load(std::memory_order_relaxed)};
 734     const uint increment{mStep};
 735     if(increment < 1) UNLIKELY
 736     {
 737         /* If the voice is supposed to be stopping but can't be mixed, just
 738          * stop it before bailing.
 739          */
 740         if(vstate == Stopping)
 741             mPlayState.store(Stopped, std::memory_order_release);
 742         return;
 743     }
 744
 745     /* If the static voice's current position is beyond the buffer loop end
 746      * position, disable looping.
 747      */
 748     if(mFlags.test(VoiceIsStatic) && BufferLoopItem)
 749     {
 750         if(DataPosInt >= 0 && static_cast<uint>(DataPosInt) >= BufferListItem->mLoopEnd)
 751             BufferLoopItem = nullptr;
 752     }
 753
 754     uint OutPos{0u};
 755
 756     /* Check if we're doing a delayed start, and we start in this update. */
 757     if(mStartTime > deviceTime) UNLIKELY
 758     {
 759         /* If the voice is supposed to be stopping but hasn't actually started
 760          * yet, make sure its stopped.
 761          */
 762         if(vstate == Stopping)
 763         {
 764             mPlayState.store(Stopped, std::memory_order_release);
 765             return;
 766         }
 767
 768         /* If the start time is too far ahead, don't bother. */
 769         auto diff = mStartTime - deviceTime;
 770         if(diff >= seconds{1})
 771             return;
 772
 773         /* Get the number of samples ahead of the current time that output
 774          * should start at. Skip this update if it's beyond the output sample
 775          * count.
 776          *
 777          * Round the start position to a multiple of 4, which some mixers want.
 778          * This makes the start time accurate to 4 samples. This could be made
 779          * sample-accurate by forcing non-SIMD functions on the first run.
 780          */
 781         seconds::rep sampleOffset{duration_cast<seconds>(diff * Device->Frequency).count()};
 782         sampleOffset = (sampleOffset+2) & ~seconds::rep{3};
 783         if(sampleOffset >= SamplesToDo)
 784             return;
 785
 786         OutPos = static_cast<uint>(sampleOffset);
 787     }
 788
 789     /* Calculate the number of samples to mix, and the number of (resampled)
 790      * samples that need to be loaded (mixing samples and decoder padding).
 791      */
 792     const uint samplesToMix{SamplesToDo - OutPos};
 793     const uint samplesToLoad{samplesToMix + mDecoderPadding};
 794
 795     /* Get a span of pointers to hold the floating point, deinterlaced,
 796      * resampled buffer data to be mixed.
 797      */
 798     std::array<float*,DeviceBase::MixerChannelsMax> SamplePointers;
 799     const al::span<float*> MixingSamples{SamplePointers.data(), mChans.size()};
 800     {
 801         const uint channelStep{(samplesToLoad+3u)&~3u};
 802         auto base = Device->mSampleData.end() - MixingSamples.size()*channelStep;
 803         std::generate(MixingSamples.begin(), MixingSamples.end(), [&base,channelStep]
 804         {
 805             const auto ret = base;
 806             base += channelStep;
 807             return al::to_address(ret);
 808         });
 809     }
 810
 811     /* UHJ2 and SuperStereo only have 2 buffer channels, but 3 mixing channels
 812      * (3rd channel is generated from decoding). MonoDup only has 1 buffer
 813      * channel, but 2 mixing channels (2nd channel is just duplicated).
 814      */
 815     const size_t realChannels{(mFmtChannels == FmtMonoDup) ? 1u
 816         : (mFmtChannels == FmtUHJ2 || mFmtChannels == FmtSuperStereo) ? 2u
 817         : MixingSamples.size()};
 818     for(size_t chan{0};chan < realChannels;++chan)
 819     {
 820         static constexpr uint ResBufSize{std::tuple_size_v<decltype(DeviceBase::mResampleData)>};
 821         static constexpr uint srcSizeMax{ResBufSize - MaxResamplerEdge};
 822
 823         const al::span prevSamples{mPrevSamples[chan]};
 824         std::copy(prevSamples.cbegin(), prevSamples.cend(), Device->mResampleData.begin());
 825         const auto resampleBuffer = al::span{Device->mResampleData}.subspan<MaxResamplerEdge>();
 826         int intPos{DataPosInt};
 827         uint fracPos{DataPosFrac};
 828
 829         /* Load samples for this channel from the available buffer(s), with
 830          * resampling.
 831          */
 832         for(uint samplesLoaded{0};samplesLoaded < samplesToLoad;)
 833         {
 834             /* Calculate the number of dst samples that can be loaded this
 835              * iteration, given the available resampler buffer size, and the
 836              * number of src samples that are needed to load it.
 837              */
 838             auto calc_buffer_sizes = [fracPos,increment](uint dstBufferSize)
 839             {
 840                 /* If ext=true, calculate the last written dst pos from the dst
 841                  * count, convert to the last read src pos, then add one to get
 842                  * the src count.
 843                  *
 844                  * If ext=false, convert the dst count to src count directly.
 845                  *
 846                  * Without this, the src count could be short by one when
 847                  * increment < 1.0, or not have a full src at the end when
 848                  * increment > 1.0.
 849                  */
 850                 const bool ext{increment <= MixerFracOne};
 851                 uint64_t dataSize64{dstBufferSize - ext};
 852                 dataSize64 = (dataSize64*increment + fracPos) >> MixerFracBits;
 853                 /* Also include resampler padding. */
 854                 dataSize64 += ext + MaxResamplerEdge;
 855
 856                 if(dataSize64 <= srcSizeMax)
 857                     return std::make_pair(dstBufferSize, static_cast<uint>(dataSize64));
 858
 859                 /* If the source size got saturated, we can't fill the desired
 860                  * dst size. Figure out how many dst samples we can fill.
 861                  */
 862                 dataSize64 = srcSizeMax - MaxResamplerEdge;
 863                 dataSize64 = ((dataSize64<<MixerFracBits) - fracPos) / increment;
 864                 if(dataSize64 < dstBufferSize)
 865                 {
 866                     /* Some resamplers require the destination being 16-byte
 867                      * aligned, so limit to a multiple of 4 samples to maintain
 868                      * alignment if we need to do another iteration after this.
 869                      */
 870                     dstBufferSize = static_cast<uint>(dataSize64) & ~3u;
 871                 }
 872                 return std::make_pair(dstBufferSize, srcSizeMax);
 873             };
 874             const auto [dstBufferSize, srcBufferSize] = calc_buffer_sizes(
 875                 samplesToLoad - samplesLoaded);
 876
 877             size_t srcSampleDelay{0};
 878             if(intPos < 0) UNLIKELY
 879             {
 880                 /* If the current position is negative, there's that many
 881                  * silent samples to load before using the buffer.
 882                  */
 883                 srcSampleDelay = static_cast<uint>(-intPos);
 884                 if(srcSampleDelay >= srcBufferSize)
 885                 {
 886                     /* If the number of silent source samples exceeds the
 887                      * number to load, the output will be silent.
 888                      */
 889                     std::fill_n(MixingSamples[chan]+samplesLoaded, dstBufferSize, 0.0f);
 890                     std::fill_n(resampleBuffer.begin(), srcBufferSize, 0.0f);
 891                     goto skip_resample;
 892                 }
 893
 894                 std::fill_n(resampleBuffer.begin(), srcSampleDelay, 0.0f);
 895             }
 896
 897             /* Load the necessary samples from the given buffer(s). */
 898             if(!BufferListItem) UNLIKELY
 899             {
 900                 const uint avail{std::min(srcBufferSize, MaxResamplerEdge)};
 901                 const uint tofill{std::max(srcBufferSize, MaxResamplerEdge)};
 902                 const auto srcbuf = resampleBuffer.first(tofill);
 903
 904                 /* When loading from a voice that ended prematurely, only take
 905                  * the samples that get closest to 0 amplitude. This helps
 906                  * certain sounds fade out better.
 907                  */
 908                 auto srciter = std::min_element(srcbuf.begin(), srcbuf.begin()+ptrdiff_t(avail),
 909                     [](const float l, const float r) { return std::abs(l) < std::abs(r); });
 910
 911                 std::fill(srciter+1, srcbuf.end(), *srciter);
 912             }
 913             else if(mFlags.test(VoiceIsStatic))
 914             {
 915                 const auto uintPos = static_cast<uint>(std::max(intPos, 0));
 916                 const auto bufferSamples = resampleBuffer.subspan(srcSampleDelay,
 917                     srcBufferSize-srcSampleDelay);
 918                 LoadBufferStatic(BufferListItem, BufferLoopItem, uintPos, mFmtType, chan,
 919                     mFrameStep, bufferSamples);
 920             }
 921             else if(mFlags.test(VoiceIsCallback))
 922             {
 923                 const auto uintPos = static_cast<uint>(std::max(intPos, 0));
 924                 const uint callbackBase{mCallbackBlockBase * mSamplesPerBlock};
 925                 const size_t bufferOffset{uintPos - callbackBase};
 926                 const size_t needSamples{bufferOffset + srcBufferSize - srcSampleDelay};
 927                 const size_t needBlocks{(needSamples + mSamplesPerBlock-1) / mSamplesPerBlock};
 928                 if(!mFlags.test(VoiceCallbackStopped) && needBlocks > mNumCallbackBlocks)
 929                 {
 930                     const size_t byteOffset{mNumCallbackBlocks*size_t{mBytesPerBlock}};
 931                     const size_t needBytes{(needBlocks-mNumCallbackBlocks)*size_t{mBytesPerBlock}};
 932
 933                     const int gotBytes{BufferListItem->mCallback(BufferListItem->mUserData,
 934                         &BufferListItem->mSamples[byteOffset], static_cast<int>(needBytes))};
 935                     if(gotBytes < 0)
 936                         mFlags.set(VoiceCallbackStopped);
 937                     else if(static_cast<uint>(gotBytes) < needBytes)
 938                     {
 939                         mFlags.set(VoiceCallbackStopped);
 940                         mNumCallbackBlocks += static_cast<uint>(gotBytes) / mBytesPerBlock;
 941                     }
 942                     else
 943                         mNumCallbackBlocks = static_cast<uint>(needBlocks);
 944                 }
 945                 const size_t numSamples{size_t{mNumCallbackBlocks} * mSamplesPerBlock};
 946                 const auto bufferSamples = resampleBuffer.subspan(srcSampleDelay,
 947                     srcBufferSize-srcSampleDelay);
 948                 LoadBufferCallback(BufferListItem, bufferOffset, numSamples, mFmtType, chan,
 949                     mFrameStep, bufferSamples);
 950             }
 951             else
 952             {
 953                 const auto uintPos = static_cast<uint>(std::max(intPos, 0));
 954                 const auto bufferSamples = resampleBuffer.subspan(srcSampleDelay,
 955                     srcBufferSize-srcSampleDelay);
 956                 LoadBufferQueue(BufferListItem, BufferLoopItem, uintPos, mFmtType, chan,
 957                     mFrameStep, bufferSamples);
 958             }
 959
 960             /* If there's a matching sample step and no phase offset, use a
 961              * simple copy for resampling.
 962              */
 963             if(increment == MixerFracOne && fracPos == 0)
 964                 std::copy_n(resampleBuffer.cbegin(), dstBufferSize,
 965                     MixingSamples[chan]+samplesLoaded);
 966             else
 967                 mResampler(&mResampleState, Device->mResampleData, fracPos, increment,
 968                     {MixingSamples[chan]+samplesLoaded, dstBufferSize});
 969
 970             /* Store the last source samples used for next time. */
 971             if(vstate == Playing) LIKELY
 972             {
 973                 /* Only store samples for the end of the mix, excluding what
 974                  * gets loaded for decoder padding.
 975                  */
 976                 const uint loadEnd{samplesLoaded + dstBufferSize};
 977                 if(samplesToMix > samplesLoaded && samplesToMix <= loadEnd) LIKELY
 978                 {
 979                     const size_t dstOffset{samplesToMix - samplesLoaded};
 980                     const size_t srcOffset{(dstOffset*increment + fracPos) >> MixerFracBits};
 981                     std::copy_n(Device->mResampleData.cbegin()+srcOffset, prevSamples.size(),
 982                         prevSamples.begin());
 983                 }
 984             }
 985
 986         skip_resample:
 987             samplesLoaded += dstBufferSize;
 988             if(samplesLoaded < samplesToLoad)
 989             {
 990                 fracPos += dstBufferSize*increment;
 991                 const uint srcOffset{fracPos >> MixerFracBits};
 992                 fracPos &= MixerFracMask;
 993                 intPos += static_cast<int>(srcOffset);
 994
 995                 /* If more samples need to be loaded, copy the back of the
 996                  * resampleBuffer to the front to reuse it. prevSamples isn't
 997                  * reliable since it's only updated for the end of the mix.
 998                  */
 999                 std::copy_n(Device->mResampleData.cbegin()+srcOffset, MaxResamplerPadding,
1000                     Device->mResampleData.begin());
1001             }
1002         }
1003     }
1004     if(mFmtChannels == FmtMonoDup)
1005     {
1006         /* NOTE: a mono source shouldn't have a decoder or the VoiceIsAmbisonic
1007          * flag, so aliasing instead of copying to the second channel shouldn't
1008          * be a problem.
1009          */
1010         MixingSamples[1] = MixingSamples[0];
1011     }
1012     else for(auto &samples : MixingSamples.subspan(realChannels))
1013         std::fill_n(samples, samplesToLoad, 0.0f);
1014
1015     if(mDecoder)
1016         mDecoder->decode(MixingSamples, samplesToMix, (vstate==Playing));
1017
1018     if(mFlags.test(VoiceIsAmbisonic))
1019     {
1020         auto voiceSamples = MixingSamples.begin();
1021         for(auto &chandata : mChans)
1022         {
1023             chandata.mAmbiSplitter.processScale({*voiceSamples, samplesToMix},
1024                 chandata.mAmbiHFScale, chandata.mAmbiLFScale);
1025             ++voiceSamples;
1026         }
1027     }
1028
1029     const uint Counter{mFlags.test(VoiceIsFading) ? std::min(samplesToMix, 64u) : 0u};
1030     if(!Counter)
1031     {
1032         /* No fading, just overwrite the old/current params. */
1033         for(auto &chandata : mChans)
1034         {
1035             {
1036                 DirectParams &parms = chandata.mDryParams;
1037                 if(!mFlags.test(VoiceHasHrtf))
1038                     parms.Gains.Current = parms.Gains.Target;
1039                 else
1040                     parms.Hrtf.Old = parms.Hrtf.Target;
1041             }
1042             for(uint send{0};send < NumSends;++send)
1043             {
1044                 if(mSend[send].Buffer.empty())
1045                     continue;
1046
1047                 SendParams &parms = chandata.mWetParams[send];
1048                 parms.Gains.Current = parms.Gains.Target;
1049             }
1050         }
1051     }
1052
1053     auto voiceSamples = MixingSamples.begin();
1054     for(auto &chandata : mChans)
1055     {
1056         /* Now filter and mix to the appropriate outputs. */
1057         const al::span<float,BufferLineSize> FilterBuf{Device->FilteredData};
1058         {
1059             DirectParams &parms = chandata.mDryParams;
1060             const auto samples = DoFilters(parms.LowPass, parms.HighPass, FilterBuf,
1061                 {*voiceSamples, samplesToMix}, mDirect.FilterType);
1062
1063             if(mFlags.test(VoiceHasHrtf))
1064             {
1065                 const float TargetGain{parms.Hrtf.Target.Gain * float(vstate == Playing)};
1066                 DoHrtfMix(samples, parms, TargetGain, Counter, OutPos, (vstate == Playing),
1067                     Device);
1068             }
1069             else
1070             {
1071                 const auto TargetGains = (vstate == Playing) ? al::span{parms.Gains.Target}
1072                     : al::span{SilentTarget};
1073                 if(mFlags.test(VoiceHasNfc))
1074                     DoNfcMix(samples, mDirect.Buffer, parms, TargetGains, Counter, OutPos, Device);
1075                 else
1076                     MixSamples(samples, mDirect.Buffer, parms.Gains.Current, TargetGains, Counter,
1077                         OutPos);
1078             }
1079         }
1080
1081         for(uint send{0};send < NumSends;++send)
1082         {
1083             if(mSend[send].Buffer.empty())
1084                 continue;
1085
1086             SendParams &parms = chandata.mWetParams[send];
1087             const auto samples = DoFilters(parms.LowPass, parms.HighPass, FilterBuf,
1088                 {*voiceSamples, samplesToMix}, mSend[send].FilterType);
1089
1090             const auto TargetGains = (vstate == Playing) ? al::span{parms.Gains.Target}
1091                 : al::span{SilentTarget};
1092             MixSamples(samples, mSend[send].Buffer, parms.Gains.Current, TargetGains, Counter,
1093                 OutPos);
1094         }
1095
1096         ++voiceSamples;
1097     }
1098
1099     mFlags.set(VoiceIsFading);
1100
1101     /* Don't update positions and buffers if we were stopping. */
1102     if(vstate == Stopping) UNLIKELY
1103     {
1104         mPlayState.store(Stopped, std::memory_order_release);
1105         return;
1106     }
1107
1108     /* Update voice positions and buffers as needed. */
1109     DataPosFrac += increment*samplesToMix;
1110     DataPosInt  += static_cast<int>(DataPosFrac>>MixerFracBits);
1111     DataPosFrac &= MixerFracMask;
1112
1113     uint buffers_done{0u};
1114     if(BufferListItem && DataPosInt > 0) LIKELY
1115     {
1116         if(mFlags.test(VoiceIsStatic))
1117         {
1118             if(BufferLoopItem)
1119             {
1120                 /* Handle looping static source */
1121                 const uint LoopStart{BufferListItem->mLoopStart};
1122                 const uint LoopEnd{BufferListItem->mLoopEnd};
1123                 uint DataPosUInt{static_cast<uint>(DataPosInt)};
1124                 if(DataPosUInt >= LoopEnd)
1125                 {
1126                     assert(LoopEnd > LoopStart);
1127                     DataPosUInt = ((DataPosUInt-LoopStart)%(LoopEnd-LoopStart)) + LoopStart;
1128                     DataPosInt = static_cast<int>(DataPosUInt);
1129                 }
1130             }
1131             else
1132             {
1133                 /* Handle non-looping static source */
1134                 if(static_cast<uint>(DataPosInt) >= BufferListItem->mSampleLen)
1135                     BufferListItem = nullptr;
1136             }
1137         }
1138         else if(mFlags.test(VoiceIsCallback))
1139         {
1140             /* Handle callback buffer source */
1141             const uint currentBlock{static_cast<uint>(DataPosInt) / mSamplesPerBlock};
1142             const uint blocksDone{currentBlock - mCallbackBlockBase};
1143             if(blocksDone < mNumCallbackBlocks)
1144             {
1145                 const size_t byteOffset{blocksDone*size_t{mBytesPerBlock}};
1146                 const size_t byteEnd{mNumCallbackBlocks*size_t{mBytesPerBlock}};
1147                 const al::span data{BufferListItem->mSamples};
1148                 std::copy(data.cbegin()+ptrdiff_t(byteOffset), data.cbegin()+ptrdiff_t(byteEnd),
1149                     data.begin());
1150                 mNumCallbackBlocks -= blocksDone;
1151                 mCallbackBlockBase += blocksDone;
1152             }
1153             else
1154             {
1155                 BufferListItem = nullptr;
1156                 mNumCallbackBlocks = 0;
1157                 mCallbackBlockBase += blocksDone;
1158             }
1159         }
1160         else
1161         {
1162             /* Handle streaming source */
1163             do {
1164                 if(BufferListItem->mSampleLen > static_cast<uint>(DataPosInt))
1165                     break;
1166
1167                 DataPosInt -= static_cast<int>(BufferListItem->mSampleLen);
1168
1169                 ++buffers_done;
1170                 BufferListItem = BufferListItem->mNext.load(std::memory_order_relaxed);
1171                 if(!BufferListItem) BufferListItem = BufferLoopItem;
1172             } while(BufferListItem);
1173         }
1174     }
1175
1176     /* Capture the source ID in case it gets reset for stopping. */
1177     const uint SourceID{mSourceID.load(std::memory_order_relaxed)};
1178
1179     /* Update voice info */
1180     mPosition.store(DataPosInt, std::memory_order_relaxed);
1181     mPositionFrac.store(DataPosFrac, std::memory_order_relaxed);
1182     mCurrentBuffer.store(BufferListItem, std::memory_order_relaxed);
1183     if(!BufferListItem)
1184     {
1185         mLoopBuffer.store(nullptr, std::memory_order_relaxed);
1186         mSourceID.store(0u, std::memory_order_relaxed);
1187     }
1188     std::atomic_thread_fence(std::memory_order_release);
1189
1190     /* Send any events now, after the position/buffer info was updated. */
1191     const auto enabledevt = Context->mEnabledEvts.load(std::memory_order_acquire);
1192     if(buffers_done > 0 && enabledevt.test(al::to_underlying(AsyncEnableBits::BufferCompleted)))
1193     {
1194         RingBuffer *ring{Context->mAsyncEvents.get()};
1195         auto evt_vec = ring->getWriteVector();
1196         if(evt_vec.first.len > 0)
1197         {
1198             auto &evt = InitAsyncEvent<AsyncBufferCompleteEvent>(evt_vec.first.buf);
1199             evt.mId = SourceID;
1200             evt.mCount = buffers_done;
1201             ring->writeAdvance(1);
1202         }
1203     }
1204
1205     if(!BufferListItem)
1206     {
1207         /* If the voice just ended, set it to Stopping so the next render
1208          * ensures any residual noise fades to 0 amplitude.
1209          */
1210         mPlayState.store(Stopping, std::memory_order_release);
1211         if(enabledevt.test(al::to_underlying(AsyncEnableBits::SourceState)))
1212             SendSourceStoppedEvent(Context, SourceID);
1213     }
1214 }
1215
1216 void Voice::prepare(DeviceBase *device)
1217 {
1218     /* Even if storing really high order ambisonics, we only mix channels for
1219      * orders up to the device order. The rest are simply dropped.
1220      */
1221     uint num_channels{(mFmtChannels == FmtMonoDup) ? 2
1222         : (mFmtChannels == FmtUHJ2 || mFmtChannels == FmtSuperStereo) ? 3
1223         : ChannelsFromFmt(mFmtChannels, std::min(mAmbiOrder, device->mAmbiOrder))};
1224     if(num_channels > device->MixerChannelsMax) UNLIKELY
1225     {
1226         ERR("Unexpected channel count: %u (limit: %zu, %s : %d)\n", num_channels,
1227             device->MixerChannelsMax, NameFromFormat(mFmtChannels), mAmbiOrder);
1228         num_channels = device->MixerChannelsMax;
1229     }
1230     if(mChans.capacity() > 2 && num_channels < mChans.capacity())
1231     {
1232         decltype(mChans){}.swap(mChans);
1233         decltype(mPrevSamples){}.swap(mPrevSamples);
1234     }
1235     mChans.reserve(std::max(2u, num_channels));
1236     mChans.resize(num_channels);
1237     mPrevSamples.reserve(std::max(2u, num_channels));
1238     mPrevSamples.resize(num_channels);
1239
1240     mDecoder = nullptr;
1241     mDecoderPadding = 0;
1242     if(mFmtChannels == FmtSuperStereo)
1243     {
1244         switch(UhjDecodeQuality)
1245         {
1246         case UhjQualityType::IIR:
1247             mDecoder = std::make_unique<UhjStereoDecoderIIR>();
1248             mDecoderPadding = UhjStereoDecoderIIR::sInputPadding;
1249             break;
1250         case UhjQualityType::FIR256:
1251             mDecoder = std::make_unique<UhjStereoDecoder<UhjLength256>>();
1252             mDecoderPadding = UhjStereoDecoder<UhjLength256>::sInputPadding;
1253             break;
1254         case UhjQualityType::FIR512:
1255             mDecoder = std::make_unique<UhjStereoDecoder<UhjLength512>>();
1256             mDecoderPadding = UhjStereoDecoder<UhjLength512>::sInputPadding;
1257             break;
1258         }
1259     }
1260     else if(IsUHJ(mFmtChannels))
1261     {
1262         switch(UhjDecodeQuality)
1263         {
1264         case UhjQualityType::IIR:
1265             mDecoder = std::make_unique<UhjDecoderIIR>();
1266             mDecoderPadding = UhjDecoderIIR::sInputPadding;
1267             break;
1268         case UhjQualityType::FIR256:
1269             mDecoder = std::make_unique<UhjDecoder<UhjLength256>>();
1270             mDecoderPadding = UhjDecoder<UhjLength256>::sInputPadding;
1271             break;
1272         case UhjQualityType::FIR512:
1273             mDecoder = std::make_unique<UhjDecoder<UhjLength512>>();
1274             mDecoderPadding = UhjDecoder<UhjLength512>::sInputPadding;
1275             break;
1276         }
1277     }
1278
1279     /* Clear the stepping value explicitly so the mixer knows not to mix this
1280      * until the update gets applied.
1281      */
1282     mStep = 0;
1283
1284     /* Make sure the sample history is cleared. */
1285     std::fill(mPrevSamples.begin(), mPrevSamples.end(), HistoryLine{});
1286
1287     if(mFmtChannels == FmtUHJ2 && !device->mUhjEncoder)
1288     {
1289         /* 2-channel UHJ needs different shelf filters. However, we can't just
1290          * use different shelf filters after mixing it, given any old speaker
1291          * setup the user has. To make this work, we apply the expected shelf
1292          * filters for decoding UHJ2 to quad (only needs LF scaling), and act
1293          * as if those 4 quad channels are encoded right back into B-Format.
1294          *
1295          * This isn't perfect, but without an entirely separate and limited
1296          * UHJ2 path, it's better than nothing.
1297          *
1298          * Note this isn't needed with UHJ output (UHJ2->B-Format->UHJ2 is
1299          * identity, so don't mess with it).
1300          */
1301         const BandSplitter splitter{device->mXOverFreq / static_cast<float>(device->Frequency)};
1302         for(auto &chandata : mChans)
1303         {
1304             chandata.mAmbiHFScale = 1.0f;
1305             chandata.mAmbiLFScale = 1.0f;
1306             chandata.mAmbiSplitter = splitter;
1307             chandata.mDryParams = DirectParams{};
1308             chandata.mDryParams.NFCtrlFilter = device->mNFCtrlFilter;
1309             std::fill_n(chandata.mWetParams.begin(), device->NumAuxSends, SendParams{});
1310         }
1311         mChans[0].mAmbiLFScale = DecoderBase::sWLFScale;
1312         mChans[1].mAmbiLFScale = DecoderBase::sXYLFScale;
1313         mChans[2].mAmbiLFScale = DecoderBase::sXYLFScale;
1314         mFlags.set(VoiceIsAmbisonic);
1315     }
1316     /* Don't need to set the VoiceIsAmbisonic flag if the device is not higher
1317      * order than the voice. No HF scaling is necessary to mix it.
1318      */
1319     else if(mAmbiOrder && device->mAmbiOrder > mAmbiOrder)
1320     {
1321         auto OrdersSpan = Is2DAmbisonic(mFmtChannels)
1322             ? al::span<const uint8_t>{AmbiIndex::OrderFrom2DChannel}
1323             : al::span<const uint8_t>{AmbiIndex::OrderFromChannel};
1324         auto OrderFromChan = OrdersSpan.cbegin();
1325         const auto scales = AmbiScale::GetHFOrderScales(mAmbiOrder, device->mAmbiOrder,
1326             device->m2DMixing);
1327
1328         const BandSplitter splitter{device->mXOverFreq / static_cast<float>(device->Frequency)};
1329         for(auto &chandata : mChans)
1330         {
1331             chandata.mAmbiHFScale = scales[*(OrderFromChan++)];
1332             chandata.mAmbiLFScale = 1.0f;
1333             chandata.mAmbiSplitter = splitter;
1334             chandata.mDryParams = DirectParams{};
1335             chandata.mDryParams.NFCtrlFilter = device->mNFCtrlFilter;
1336             std::fill_n(chandata.mWetParams.begin(), device->NumAuxSends, SendParams{});
1337         }
1338         mFlags.set(VoiceIsAmbisonic);
1339     }
1340     else
1341     {
1342         for(auto &chandata : mChans)
1343         {
1344             chandata.mDryParams = DirectParams{};
1345             chandata.mDryParams.NFCtrlFilter = device->mNFCtrlFilter;
1346             std::fill_n(chandata.mWetParams.begin(), device->NumAuxSends, SendParams{});
1347         }
1348         mFlags.reset(VoiceIsAmbisonic);
1349     }
1350 }