core/voice.cpp

   1
   2 #include "config.h"
   3
   4 #include "voice.h"
   5
   6 #include <algorithm>
   7 #include <array>
   8 #include <atomic>
   9 #include <cassert>
  10 #include <climits>
  11 #include <cstdint>
  12 #include <iterator>
  13 #include <memory>
  14 #include <new>
  15 #include <stdlib.h>
  16 #include <utility>
  17 #include <vector>
  18
  19 #include "albyte.h"
  20 #include "alnumeric.h"
  21 #include "aloptional.h"
  22 #include "alspan.h"
  23 #include "alstring.h"
  24 #include "ambidefs.h"
  25 #include "async_event.h"
  26 #include "buffer_storage.h"
  27 #include "context.h"
  28 #include "cpu_caps.h"
  29 #include "devformat.h"
  30 #include "device.h"
  31 #include "filters/biquad.h"
  32 #include "filters/nfc.h"
  33 #include "filters/splitter.h"
  34 #include "fmt_traits.h"
  35 #include "logging.h"
  36 #include "mixer.h"
  37 #include "mixer/defs.h"
  38 #include "mixer/hrtfdefs.h"
  39 #include "opthelpers.h"
  40 #include "resampler_limits.h"
  41 #include "ringbuffer.h"
  42 #include "vector.h"
  43 #include "voice_change.h"
  44
  45 struct CTag;
  46 #ifdef HAVE_SSE
  47 struct SSETag;
  48 #endif
  49 #ifdef HAVE_NEON
  50 struct NEONTag;
  51 #endif
  52
  53
  54 static_assert(!(sizeof(DeviceBase::MixerBufferLine)&15),
  55     "DeviceBase::MixerBufferLine must be a multiple of 16 bytes");
  56 static_assert(!(MaxResamplerEdge&3), "MaxResamplerEdge is not a multiple of 4");
  57
  58 static_assert((BufferLineSize-1)/MaxPitch > 0, "MaxPitch is too large for BufferLineSize!");
  59 static_assert((INT_MAX>>MixerFracBits)/MaxPitch > BufferLineSize,
  60     "MaxPitch and/or BufferLineSize are too large for MixerFracBits!");
  61
  62 Resampler ResamplerDefault{Resampler::Cubic};
  63
  64 namespace {
  65
  66 using uint = unsigned int;
  67 using namespace std::chrono;
  68
  69 using HrtfMixerFunc = void(*)(const float *InSamples, float2 *AccumSamples, const uint IrSize,
  70     const MixHrtfFilter *hrtfparams, const size_t BufferSize);
  71 using HrtfMixerBlendFunc = void(*)(const float *InSamples, float2 *AccumSamples,
  72     const uint IrSize, const HrtfFilter *oldparams, const MixHrtfFilter *newparams,
  73     const size_t BufferSize);
  74
  75 HrtfMixerFunc MixHrtfSamples{MixHrtf_<CTag>};
  76 HrtfMixerBlendFunc MixHrtfBlendSamples{MixHrtfBlend_<CTag>};
  77
  78 inline MixerOutFunc SelectMixer()
  79 {
  80 #ifdef HAVE_NEON
  81     if((CPUCapFlags&CPU_CAP_NEON))
  82         return Mix_<NEONTag>;
  83 #endif
  84 #ifdef HAVE_SSE
  85     if((CPUCapFlags&CPU_CAP_SSE))
  86         return Mix_<SSETag>;
  87 #endif
  88     return Mix_<CTag>;
  89 }
  90
  91 inline MixerOneFunc SelectMixerOne()
  92 {
  93 #ifdef HAVE_NEON
  94     if((CPUCapFlags&CPU_CAP_NEON))
  95         return Mix_<NEONTag>;
  96 #endif
  97 #ifdef HAVE_SSE
  98     if((CPUCapFlags&CPU_CAP_SSE))
  99         return Mix_<SSETag>;
 100 #endif
 101     return Mix_<CTag>;
 102 }
 103
 104 inline HrtfMixerFunc SelectHrtfMixer()
 105 {
 106 #ifdef HAVE_NEON
 107     if((CPUCapFlags&CPU_CAP_NEON))
 108         return MixHrtf_<NEONTag>;
 109 #endif
 110 #ifdef HAVE_SSE
 111     if((CPUCapFlags&CPU_CAP_SSE))
 112         return MixHrtf_<SSETag>;
 113 #endif
 114     return MixHrtf_<CTag>;
 115 }
 116
 117 inline HrtfMixerBlendFunc SelectHrtfBlendMixer()
 118 {
 119 #ifdef HAVE_NEON
 120     if((CPUCapFlags&CPU_CAP_NEON))
 121         return MixHrtfBlend_<NEONTag>;
 122 #endif
 123 #ifdef HAVE_SSE
 124     if((CPUCapFlags&CPU_CAP_SSE))
 125         return MixHrtfBlend_<SSETag>;
 126 #endif
 127     return MixHrtfBlend_<CTag>;
 128 }
 129
 130 } // namespace
 131
 132 void Voice::InitMixer(al::optional<std::string> resampler)
 133 {
 134     if(resampler)
 135     {
 136         struct ResamplerEntry {
 137             const char name[16];
 138             const Resampler resampler;
 139         };
 140         constexpr ResamplerEntry ResamplerList[]{
 141             { "none", Resampler::Point },
 142             { "point", Resampler::Point },
 143             { "linear", Resampler::Linear },
 144             { "cubic", Resampler::Cubic },
 145             { "bsinc12", Resampler::BSinc12 },
 146             { "fast_bsinc12", Resampler::FastBSinc12 },
 147             { "bsinc24", Resampler::BSinc24 },
 148             { "fast_bsinc24", Resampler::FastBSinc24 },
 149         };
 150
 151         const char *str{resampler->c_str()};
 152         if(al::strcasecmp(str, "bsinc") == 0)
 153         {
 154             WARN("Resampler option \"%s\" is deprecated, using bsinc12\n", str);
 155             str = "bsinc12";
 156         }
 157         else if(al::strcasecmp(str, "sinc4") == 0 || al::strcasecmp(str, "sinc8") == 0)
 158         {
 159             WARN("Resampler option \"%s\" is deprecated, using cubic\n", str);
 160             str = "cubic";
 161         }
 162
 163         auto iter = std::find_if(std::begin(ResamplerList), std::end(ResamplerList),
 164             [str](const ResamplerEntry &entry) -> bool
 165             { return al::strcasecmp(str, entry.name) == 0; });
 166         if(iter == std::end(ResamplerList))
 167             ERR("Invalid resampler: %s\n", str);
 168         else
 169             ResamplerDefault = iter->resampler;
 170     }
 171
 172     MixSamplesOut = SelectMixer();
 173     MixSamplesOne = SelectMixerOne();
 174     MixHrtfBlendSamples = SelectHrtfBlendMixer();
 175     MixHrtfSamples = SelectHrtfMixer();
 176 }
 177
 178
 179 namespace {
 180
 181 void SendSourceStoppedEvent(ContextBase *context, uint id)
 182 {
 183     RingBuffer *ring{context->mAsyncEvents.get()};
 184     auto evt_vec = ring->getWriteVector();
 185     if(evt_vec.first.len < 1) return;
 186
 187     AsyncEvent *evt{al::construct_at(reinterpret_cast<AsyncEvent*>(evt_vec.first.buf),
 188         AsyncEvent::SourceStateChange)};
 189     evt->u.srcstate.id = id;
 190     evt->u.srcstate.state = AsyncEvent::SrcState::Stop;
 191
 192     ring->writeAdvance(1);
 193 }
 194
 195
 196 const float *DoFilters(BiquadFilter &lpfilter, BiquadFilter &hpfilter, float *dst,
 197     const al::span<const float> src, int type)
 198 {
 199     switch(type)
 200     {
 201     case AF_None:
 202         lpfilter.clear();
 203         hpfilter.clear();
 204         break;
 205
 206     case AF_LowPass:
 207         lpfilter.process(src, dst);
 208         hpfilter.clear();
 209         return dst;
 210     case AF_HighPass:
 211         lpfilter.clear();
 212         hpfilter.process(src, dst);
 213         return dst;
 214
 215     case AF_BandPass:
 216         DualBiquad{lpfilter, hpfilter}.process(src, dst);
 217         return dst;
 218     }
 219     return src.data();
 220 }
 221
 222
 223 template<FmtType Type>
 224 inline void LoadSamples(float *dstSamples, const al::byte *src, const size_t srcChan,
 225     const size_t srcOffset, const size_t srcStep, const size_t samples) noexcept
 226 {
 227     constexpr size_t sampleSize{sizeof(typename al::FmtTypeTraits<Type>::Type)};
 228     auto s = src + (srcOffset*srcStep + srcChan)*sampleSize;
 229
 230     al::LoadSampleArray<Type>(dstSamples, s, srcStep, samples);
 231 }
 232
 233 void LoadSamples(float *dstSamples, const al::byte *src, const size_t srcChan,
 234     const size_t srcOffset, const FmtType srcType, const size_t srcStep, const size_t samples)
 235     noexcept
 236 {
 237 #define HANDLE_FMT(T) case T:                                                 \
 238     LoadSamples<T>(dstSamples, src, srcChan, srcOffset, srcStep, samples);    \
 239     break
 240
 241     switch(srcType)
 242     {
 243     HANDLE_FMT(FmtUByte);
 244     HANDLE_FMT(FmtShort);
 245     HANDLE_FMT(FmtFloat);
 246     HANDLE_FMT(FmtDouble);
 247     HANDLE_FMT(FmtMulaw);
 248     HANDLE_FMT(FmtAlaw);
 249     }
 250 #undef HANDLE_FMT
 251 }
 252
 253 void LoadBufferStatic(VoiceBufferItem *buffer, VoiceBufferItem *bufferLoopItem,
 254     const size_t dataPosInt, const FmtType sampleType, const size_t srcChannel,
 255     const size_t srcStep, size_t samplesLoaded, const size_t samplesToLoad,
 256     float *voiceSamples)
 257 {
 258     if(!bufferLoopItem)
 259     {
 260         /* Load what's left to play from the buffer */
 261         if(buffer->mSampleLen > dataPosInt) [[likely]]
 262         {
 263             const size_t buffer_remaining{buffer->mSampleLen - dataPosInt};
 264             const size_t remaining{minz(samplesToLoad-samplesLoaded, buffer_remaining)};
 265             LoadSamples(voiceSamples+samplesLoaded, buffer->mSamples, srcChannel, dataPosInt,
 266                 sampleType, srcStep, remaining);
 267             samplesLoaded += remaining;
 268         }
 269
 270         if(const size_t toFill{samplesToLoad - samplesLoaded})
 271         {
 272             auto srcsamples = voiceSamples + samplesLoaded;
 273             std::fill_n(srcsamples, toFill, *(srcsamples-1));
 274         }
 275     }
 276     else
 277     {
 278         const size_t loopStart{buffer->mLoopStart};
 279         const size_t loopEnd{buffer->mLoopEnd};
 280         ASSUME(loopEnd > loopStart);
 281
 282         const size_t intPos{(dataPosInt < loopEnd) ? dataPosInt
 283             : (((dataPosInt-loopStart)%(loopEnd-loopStart)) + loopStart)};
 284
 285         /* Load what's left of this loop iteration */
 286         const size_t remaining{minz(samplesToLoad-samplesLoaded, loopEnd-dataPosInt)};
 287         LoadSamples(voiceSamples+samplesLoaded, buffer->mSamples, srcChannel, intPos, sampleType,
 288             srcStep, remaining);
 289         samplesLoaded += remaining;
 290
 291         /* Load repeats of the loop to fill the buffer. */
 292         const size_t loopSize{loopEnd - loopStart};
 293         while(const size_t toFill{minz(samplesToLoad - samplesLoaded, loopSize)})
 294         {
 295             LoadSamples(voiceSamples+samplesLoaded, buffer->mSamples, srcChannel, loopStart,
 296                 sampleType, srcStep, toFill);
 297             samplesLoaded += toFill;
 298         }
 299     }
 300 }
 301
 302 void LoadBufferCallback(VoiceBufferItem *buffer, const size_t dataPosInt,
 303     const size_t numCallbackSamples, const FmtType sampleType, const size_t srcChannel,
 304     const size_t srcStep, size_t samplesLoaded, const size_t samplesToLoad, float *voiceSamples)
 305 {
 306     /* Load what's left to play from the buffer */
 307     if(numCallbackSamples > dataPosInt) [[likely]]
 308     {
 309         const size_t remaining{minz(samplesToLoad-samplesLoaded, numCallbackSamples-dataPosInt)};
 310         LoadSamples(voiceSamples+samplesLoaded, buffer->mSamples, srcChannel, dataPosInt,
 311             sampleType, srcStep, remaining);
 312         samplesLoaded += remaining;
 313     }
 314
 315     if(const size_t toFill{samplesToLoad - samplesLoaded})
 316     {
 317         auto srcsamples = voiceSamples + samplesLoaded;
 318         std::fill_n(srcsamples, toFill, *(srcsamples-1));
 319     }
 320 }
 321
 322 void LoadBufferQueue(VoiceBufferItem *buffer, VoiceBufferItem *bufferLoopItem,
 323     size_t dataPosInt, const FmtType sampleType, const size_t srcChannel,
 324     const size_t srcStep, size_t samplesLoaded, const size_t samplesToLoad,
 325     float *voiceSamples)
 326 {
 327     /* Crawl the buffer queue to fill in the temp buffer */
 328     while(buffer && samplesLoaded != samplesToLoad)
 329     {
 330         if(dataPosInt >= buffer->mSampleLen)
 331         {
 332             dataPosInt -= buffer->mSampleLen;
 333             buffer = buffer->mNext.load(std::memory_order_acquire);
 334             if(!buffer) buffer = bufferLoopItem;
 335             continue;
 336         }
 337
 338         const size_t remaining{minz(samplesToLoad-samplesLoaded, buffer->mSampleLen-dataPosInt)};
 339         LoadSamples(voiceSamples+samplesLoaded, buffer->mSamples, srcChannel, dataPosInt,
 340             sampleType, srcStep, remaining);
 341
 342         samplesLoaded += remaining;
 343         if(samplesLoaded == samplesToLoad)
 344             break;
 345
 346         dataPosInt = 0;
 347         buffer = buffer->mNext.load(std::memory_order_acquire);
 348         if(!buffer) buffer = bufferLoopItem;
 349     }
 350     if(const size_t toFill{samplesToLoad - samplesLoaded})
 351     {
 352         auto srcsamples = voiceSamples + samplesLoaded;
 353         std::fill_n(srcsamples, toFill, *(srcsamples-1));
 354     }
 355 }
 356
 357
 358 void DoHrtfMix(const float *samples, const uint DstBufferSize, DirectParams &parms,
 359     const float TargetGain, const uint Counter, uint OutPos, const bool IsPlaying,
 360     DeviceBase *Device)
 361 {
 362     const uint IrSize{Device->mIrSize};
 363     auto &HrtfSamples = Device->HrtfSourceData;
 364     auto &AccumSamples = Device->HrtfAccumData;
 365
 366     /* Copy the HRTF history and new input samples into a temp buffer. */
 367     auto src_iter = std::copy(parms.Hrtf.History.begin(), parms.Hrtf.History.end(),
 368         std::begin(HrtfSamples));
 369     std::copy_n(samples, DstBufferSize, src_iter);
 370     /* Copy the last used samples back into the history buffer for later. */
 371     if(IsPlaying) [[likely]]
 372         std::copy_n(std::begin(HrtfSamples) + DstBufferSize, parms.Hrtf.History.size(),
 373             parms.Hrtf.History.begin());
 374
 375     /* If fading and this is the first mixing pass, fade between the IRs. */
 376     uint fademix{0u};
 377     if(Counter && OutPos == 0)
 378     {
 379         fademix = minu(DstBufferSize, Counter);
 380
 381         float gain{TargetGain};
 382
 383         /* The new coefficients need to fade in completely since they're
 384          * replacing the old ones. To keep the gain fading consistent,
 385          * interpolate between the old and new target gains given how much of
 386          * the fade time this mix handles.
 387          */
 388         if(Counter > fademix)
 389         {
 390             const float a{static_cast<float>(fademix) / static_cast<float>(Counter)};
 391             gain = lerpf(parms.Hrtf.Old.Gain, TargetGain, a);
 392         }
 393
 394         MixHrtfFilter hrtfparams{
 395             parms.Hrtf.Target.Coeffs,
 396             parms.Hrtf.Target.Delay,
 397             0.0f, gain / static_cast<float>(fademix)};
 398         MixHrtfBlendSamples(HrtfSamples, AccumSamples+OutPos, IrSize, &parms.Hrtf.Old, &hrtfparams,
 399             fademix);
 400
 401         /* Update the old parameters with the result. */
 402         parms.Hrtf.Old = parms.Hrtf.Target;
 403         parms.Hrtf.Old.Gain = gain;
 404         OutPos += fademix;
 405     }
 406
 407     if(fademix < DstBufferSize)
 408     {
 409         const uint todo{DstBufferSize - fademix};
 410         float gain{TargetGain};
 411
 412         /* Interpolate the target gain if the gain fading lasts longer than
 413          * this mix.
 414          */
 415         if(Counter > DstBufferSize)
 416         {
 417             const float a{static_cast<float>(todo) / static_cast<float>(Counter-fademix)};
 418             gain = lerpf(parms.Hrtf.Old.Gain, TargetGain, a);
 419         }
 420
 421         MixHrtfFilter hrtfparams{
 422             parms.Hrtf.Target.Coeffs,
 423             parms.Hrtf.Target.Delay,
 424             parms.Hrtf.Old.Gain,
 425             (gain - parms.Hrtf.Old.Gain) / static_cast<float>(todo)};
 426         MixHrtfSamples(HrtfSamples+fademix, AccumSamples+OutPos, IrSize, &hrtfparams, todo);
 427
 428         /* Store the now-current gain for next time. */
 429         parms.Hrtf.Old.Gain = gain;
 430     }
 431 }
 432
 433 void DoNfcMix(const al::span<const float> samples, FloatBufferLine *OutBuffer, DirectParams &parms,
 434     const float *TargetGains, const uint Counter, const uint OutPos, DeviceBase *Device)
 435 {
 436     using FilterProc = void (NfcFilter::*)(const al::span<const float>, float*);
 437     static constexpr FilterProc NfcProcess[MaxAmbiOrder+1]{
 438         nullptr, &NfcFilter::process1, &NfcFilter::process2, &NfcFilter::process3};
 439
 440     float *CurrentGains{parms.Gains.Current.data()};
 441     MixSamples(samples, {OutBuffer, 1u}, CurrentGains, TargetGains, Counter, OutPos);
 442     ++OutBuffer;
 443     ++CurrentGains;
 444     ++TargetGains;
 445
 446     const al::span<float> nfcsamples{Device->NfcSampleData, samples.size()};
 447     size_t order{1};
 448     while(const size_t chancount{Device->NumChannelsPerOrder[order]})
 449     {
 450         (parms.NFCtrlFilter.*NfcProcess[order])(samples, nfcsamples.data());
 451         MixSamples(nfcsamples, {OutBuffer, chancount}, CurrentGains, TargetGains, Counter, OutPos);
 452         OutBuffer += chancount;
 453         CurrentGains += chancount;
 454         TargetGains += chancount;
 455         if(++order == MaxAmbiOrder+1)
 456             break;
 457     }
 458 }
 459
 460 } // namespace
 461
 462 void Voice::mix(const State vstate, ContextBase *Context, const nanoseconds deviceTime,
 463     const uint SamplesToDo)
 464 {
 465     static constexpr std::array<float,MAX_OUTPUT_CHANNELS> SilentTarget{};
 466
 467     ASSUME(SamplesToDo > 0);
 468
 469     DeviceBase *Device{Context->mDevice};
 470     const uint NumSends{Device->NumAuxSends};
 471
 472     /* Get voice info */
 473     int DataPosInt{mPosition.load(std::memory_order_relaxed)};
 474     uint DataPosFrac{mPositionFrac.load(std::memory_order_relaxed)};
 475     VoiceBufferItem *BufferListItem{mCurrentBuffer.load(std::memory_order_relaxed)};
 476     VoiceBufferItem *BufferLoopItem{mLoopBuffer.load(std::memory_order_relaxed)};
 477     const uint increment{mStep};
 478     if(increment < 1) [[unlikely]]
 479     {
 480         /* If the voice is supposed to be stopping but can't be mixed, just
 481          * stop it before bailing.
 482          */
 483         if(vstate == Stopping)
 484             mPlayState.store(Stopped, std::memory_order_release);
 485         return;
 486     }
 487
 488     /* If the static voice's current position is beyond the buffer loop end
 489      * position, disable looping.
 490      */
 491     if(mFlags.test(VoiceIsStatic) && BufferLoopItem)
 492     {
 493         if(DataPosInt >= 0 && static_cast<uint>(DataPosInt) >= BufferListItem->mLoopEnd)
 494             BufferLoopItem = nullptr;
 495     }
 496
 497     uint OutPos{0u};
 498
 499     /* Check if we're doing a delayed start, and we start in this update. */
 500     if(mStartTime > deviceTime)
 501     {
 502         /* If the start time is too far ahead, don't bother. */
 503         auto diff = mStartTime - deviceTime;
 504         if(diff >= seconds{1})
 505             return;
 506
 507         /* Get the number of samples ahead of the current time that output
 508          * should start at. Skip this update if it's beyond the output sample
 509          * count.
 510          *
 511          * Round the start position to a multiple of 4, which some mixers want.
 512          * This makes the start time accurate to 4 samples. This could be made
 513          * sample-accurate by forcing non-SIMD functions on the first run.
 514          */
 515         seconds::rep sampleOffset{duration_cast<seconds>(diff * Device->Frequency).count()};
 516         sampleOffset = (sampleOffset+2) & ~seconds::rep{3};
 517         if(sampleOffset >= SamplesToDo)
 518             return;
 519
 520         OutPos = static_cast<uint>(sampleOffset);
 521     }
 522
 523     /* Calculate the number of samples to mix, and the number of (resampled)
 524      * samples that need to be loaded (mixing samples and decoder padding).
 525      */
 526     const uint samplesToMix{SamplesToDo - OutPos};
 527     const uint samplesToLoad{samplesToMix + mDecoderPadding};
 528
 529     /* Get a span of pointers to hold the floating point, deinterlaced,
 530      * resampled buffer data.
 531      */
 532     std::array<float*,DeviceBase::MixerChannelsMax> SamplePointers;
 533     const al::span<float*> MixingSamples{SamplePointers.data(), mChans.size()};
 534     auto get_bufferline = [](DeviceBase::MixerBufferLine &bufline) noexcept -> float*
 535     { return bufline.data(); };
 536     std::transform(Device->mSampleData.end() - mChans.size(), Device->mSampleData.end(),
 537         MixingSamples.begin(), get_bufferline);
 538
 539     /* If there's a matching sample step and no phase offset, use a simple copy
 540      * for resampling.
 541      */
 542     const ResamplerFunc Resample{(increment == MixerFracOne && DataPosFrac == 0)
 543         ? ResamplerFunc{[](const InterpState*, const float *RESTRICT src, uint, const uint,
 544             const al::span<float> dst) { std::copy_n(src, dst.size(), dst.begin()); }}
 545         : mResampler};
 546
 547     /* UHJ2 and SuperStereo only have 2 buffer channels, but 3 mixing channels
 548      * (3rd channel is generated from decoding).
 549      */
 550     const size_t realChannels{(mFmtChannels == FmtUHJ2 || mFmtChannels == FmtSuperStereo) ? 2u
 551         : MixingSamples.size()};
 552     for(size_t chan{0};chan < realChannels;++chan)
 553     {
 554         const auto prevSamples = al::as_span(mPrevSamples[chan]);
 555         const auto resampleBuffer = std::copy(prevSamples.cbegin(), prevSamples.cend(),
 556             Device->mResampleData.begin()) - MaxResamplerEdge;
 557         const uint callbackBase{static_cast<uint>(maxi(DataPosInt, 0))};
 558         int intPos{DataPosInt};
 559         uint fracPos{DataPosFrac};
 560
 561         /* Load samples for this channel from the available buffer(s), with
 562          * resampling.
 563          */
 564         for(uint samplesLoaded{0};samplesLoaded < samplesToLoad;)
 565         {
 566             using ResampleBufferType = decltype(DeviceBase::mResampleData);
 567             static constexpr uint srcSizeMax{ResampleBufferType{}.size() - MaxResamplerEdge};
 568
 569             /* Calculate the number of dst samples that can be loaded this
 570              * iteration, given the available resampler buffer size.
 571              */
 572             auto calc_buffer_sizes = [fracPos,increment](uint dstBufferSize)
 573             {
 574                 /* If ext=true, calculate the last written dst pos from the dst
 575                  * count, convert to the last read src pos, then add one to get
 576                  * the src count.
 577                  *
 578                  * If ext=false, convert the dst count to src count directly.
 579                  *
 580                  * Without this, the src count could be short by one when
 581                  * increment < 1.0, or not have a full src at the end when
 582                  * increment > 1.0.
 583                  */
 584                 const bool ext{increment <= MixerFracOne};
 585                 uint64_t dataSize64{dstBufferSize - ext};
 586                 dataSize64 = (dataSize64*increment + fracPos) >> MixerFracBits;
 587                 /* Also include resampler padding. */
 588                 dataSize64 += ext + MaxResamplerEdge;
 589
 590                 if(dataSize64 <= srcSizeMax)
 591                     return std::make_pair(dstBufferSize, static_cast<uint>(dataSize64));
 592
 593                 /* If the source size got saturated, we can't fill the desired
 594                  * dst size. Figure out how many dst samples we can fill.
 595                  */
 596                 dataSize64 = srcSizeMax - MaxResamplerEdge;
 597                 dataSize64 = ((dataSize64<<MixerFracBits) - fracPos) / increment;
 598                 if(dataSize64 < dstBufferSize)
 599                 {
 600                     /* Some resamplers require the destination being 16-byte
 601                      * aligned, so limit to a multiple of 4 samples to maintain
 602                      * alignment.
 603                      */
 604                     dstBufferSize = static_cast<uint>(dataSize64) & ~3u;
 605                 }
 606                 return std::make_pair(dstBufferSize, srcSizeMax);
 607             };
 608             const auto bufferSizes = calc_buffer_sizes(samplesToLoad - samplesLoaded);
 609             const auto dstBufferSize = bufferSizes.first;
 610             const auto srcBufferSize = bufferSizes.second;
 611
 612             /* Load the necessary samples from the given buffer(s). */
 613             if(!BufferListItem)
 614             {
 615                 const uint avail{minu(srcBufferSize, MaxResamplerEdge)};
 616                 const uint tofill{maxu(srcBufferSize, MaxResamplerEdge)};
 617
 618                 /* When loading from a voice that ended prematurely, only take
 619                  * the samples that get closest to 0 amplitude. This helps
 620                  * certain sounds fade out better.
 621                  */
 622                 auto abs_lt = [](const float lhs, const float rhs) noexcept -> bool
 623                 { return std::abs(lhs) < std::abs(rhs); };
 624                 auto srciter = std::min_element(resampleBuffer, resampleBuffer+avail, abs_lt);
 625
 626                 std::fill(srciter+1, resampleBuffer+tofill, *srciter);
 627             }
 628             else
 629             {
 630                 size_t srcSampleDelay{0};
 631                 if(intPos < 0) [[unlikely]]
 632                 {
 633                     /* If the current position is negative, there's that many
 634                      * silent samples to load before using the buffer.
 635                      */
 636                     srcSampleDelay = static_cast<uint>(-intPos);
 637                     if(srcSampleDelay >= srcBufferSize)
 638                     {
 639                         /* If the number of silent source samples exceeds the
 640                          * number to load, the output will be silent.
 641                          */
 642                         std::fill_n(MixingSamples[chan]+samplesLoaded, dstBufferSize, 0.0f);
 643                         std::fill_n(resampleBuffer, srcBufferSize, 0.0f);
 644                         goto skip_resample;
 645                     }
 646
 647                     std::fill_n(resampleBuffer, srcSampleDelay, 0.0f);
 648                 }
 649                 const uint uintPos{static_cast<uint>(maxi(intPos, 0))};
 650
 651                 if(mFlags.test(VoiceIsStatic))
 652                     LoadBufferStatic(BufferListItem, BufferLoopItem, uintPos, mFmtType, chan,
 653                         mFrameStep, srcSampleDelay, srcBufferSize, resampleBuffer);
 654                 else if(mFlags.test(VoiceIsCallback))
 655                 {
 656                     const size_t bufferOffset{uintPos - callbackBase};
 657                     const size_t getTotal{bufferOffset + srcBufferSize - srcSampleDelay};
 658                     if(!mFlags.test(VoiceCallbackStopped) && getTotal > mNumCallbackSamples)
 659                     {
 660                         const size_t byteOffset{mNumCallbackSamples*mFrameSize};
 661                         const size_t needBytes{getTotal*mFrameSize - byteOffset};
 662
 663                         const int gotBytes{BufferListItem->mCallback(BufferListItem->mUserData,
 664                             &BufferListItem->mSamples[byteOffset], static_cast<int>(needBytes))};
 665                         if(gotBytes < 0)
 666                             mFlags.set(VoiceCallbackStopped);
 667                         else if(static_cast<uint>(gotBytes) < needBytes)
 668                         {
 669                             mFlags.set(VoiceCallbackStopped);
 670                             mNumCallbackSamples += static_cast<uint>(gotBytes) / mFrameSize;
 671                         }
 672                         else
 673                             mNumCallbackSamples = static_cast<uint>(getTotal);
 674                     }
 675                     LoadBufferCallback(BufferListItem, bufferOffset, mNumCallbackSamples,
 676                         mFmtType, chan, mFrameStep, srcSampleDelay, srcBufferSize, resampleBuffer);
 677                 }
 678                 else
 679                     LoadBufferQueue(BufferListItem, BufferLoopItem, uintPos, mFmtType, chan,
 680                         mFrameStep, srcSampleDelay, srcBufferSize, resampleBuffer);
 681             }
 682
 683             Resample(&mResampleState, resampleBuffer, fracPos, increment,
 684                 {MixingSamples[chan]+samplesLoaded, dstBufferSize});
 685
 686             /* Store the last source samples used for next time. */
 687             if(vstate == Playing) [[likely]]
 688             {
 689                 /* Only store samples for the end of the mix, excluding what
 690                  * gets loaded for decoder padding.
 691                  */
 692                 const uint loadEnd{samplesLoaded + dstBufferSize};
 693                 if(samplesToMix > samplesLoaded && samplesToMix <= loadEnd) [[likely]]
 694                 {
 695                     const size_t dstOffset{samplesToMix - samplesLoaded};
 696                     const size_t srcOffset{(dstOffset*increment + fracPos) >> MixerFracBits};
 697                     std::copy_n(resampleBuffer-MaxResamplerEdge+srcOffset, prevSamples.size(),
 698                         prevSamples.begin());
 699                 }
 700             }
 701
 702         skip_resample:
 703             samplesLoaded += dstBufferSize;
 704             if(samplesLoaded < samplesToLoad)
 705             {
 706                 fracPos += dstBufferSize*increment;
 707                 const uint srcOffset{fracPos >> MixerFracBits};
 708                 fracPos &= MixerFracMask;
 709                 intPos += srcOffset;
 710
 711                 /* If more samples need to be loaded, copy the back of the
 712                  * resampleBuffer to the front to reuse it. prevSamples isn't
 713                  * reliable since it's only updated for the end of the mix.
 714                  */
 715                 std::copy(resampleBuffer-MaxResamplerEdge+srcOffset,
 716                     resampleBuffer+MaxResamplerEdge+srcOffset, resampleBuffer-MaxResamplerEdge);
 717             }
 718         }
 719     }
 720     for(auto &samples : MixingSamples.subspan(realChannels))
 721         std::fill_n(samples, samplesToLoad, 0.0f);
 722
 723     if(mDecoder)
 724         mDecoder->decode(MixingSamples, samplesToMix, (vstate==Playing) ? samplesToMix : 0);
 725
 726     if(mFlags.test(VoiceIsAmbisonic))
 727     {
 728         auto voiceSamples = MixingSamples.begin();
 729         for(auto &chandata : mChans)
 730         {
 731             chandata.mAmbiSplitter.processScale({*voiceSamples, samplesToMix},
 732                 chandata.mAmbiHFScale, chandata.mAmbiLFScale);
 733             ++voiceSamples;
 734         }
 735     }
 736
 737     const uint Counter{mFlags.test(VoiceIsFading) ? minu(samplesToMix, 64u) : 0u};
 738     if(!Counter)
 739     {
 740         /* No fading, just overwrite the old/current params. */
 741         for(auto &chandata : mChans)
 742         {
 743             {
 744                 DirectParams &parms = chandata.mDryParams;
 745                 if(!mFlags.test(VoiceHasHrtf))
 746                     parms.Gains.Current = parms.Gains.Target;
 747                 else
 748                     parms.Hrtf.Old = parms.Hrtf.Target;
 749             }
 750             for(uint send{0};send < NumSends;++send)
 751             {
 752                 if(mSend[send].Buffer.empty())
 753                     continue;
 754
 755                 SendParams &parms = chandata.mWetParams[send];
 756                 parms.Gains.Current = parms.Gains.Target;
 757             }
 758         }
 759     }
 760
 761     auto voiceSamples = MixingSamples.begin();
 762     for(auto &chandata : mChans)
 763     {
 764         /* Now filter and mix to the appropriate outputs. */
 765         const al::span<float,BufferLineSize> FilterBuf{Device->FilteredData};
 766         {
 767             DirectParams &parms = chandata.mDryParams;
 768             const float *samples{DoFilters(parms.LowPass, parms.HighPass, FilterBuf.data(),
 769                 {*voiceSamples, samplesToMix}, mDirect.FilterType)};
 770
 771             if(mFlags.test(VoiceHasHrtf))
 772             {
 773                 const float TargetGain{parms.Hrtf.Target.Gain * (vstate == Playing)};
 774                 DoHrtfMix(samples, samplesToMix, parms, TargetGain, Counter, OutPos,
 775                     (vstate == Playing), Device);
 776             }
 777             else
 778             {
 779                 const float *TargetGains{(vstate == Playing) ? parms.Gains.Target.data()
 780                     : SilentTarget.data()};
 781                 if(mFlags.test(VoiceHasNfc))
 782                     DoNfcMix({samples, samplesToMix}, mDirect.Buffer.data(), parms,
 783                         TargetGains, Counter, OutPos, Device);
 784                 else
 785                     MixSamples({samples, samplesToMix}, mDirect.Buffer,
 786                         parms.Gains.Current.data(), TargetGains, Counter, OutPos);
 787             }
 788         }
 789
 790         for(uint send{0};send < NumSends;++send)
 791         {
 792             if(mSend[send].Buffer.empty())
 793                 continue;
 794
 795             SendParams &parms = chandata.mWetParams[send];
 796             const float *samples{DoFilters(parms.LowPass, parms.HighPass, FilterBuf.data(),
 797                 {*voiceSamples, samplesToMix}, mSend[send].FilterType)};
 798
 799             const float *TargetGains{(vstate == Playing) ? parms.Gains.Target.data()
 800                 : SilentTarget.data()};
 801             MixSamples({samples, samplesToMix}, mSend[send].Buffer,
 802                 parms.Gains.Current.data(), TargetGains, Counter, OutPos);
 803         }
 804
 805         ++voiceSamples;
 806     }
 807
 808     mFlags.set(VoiceIsFading);
 809
 810     /* Don't update positions and buffers if we were stopping. */
 811     if(vstate == Stopping) [[unlikely]]
 812     {
 813         mPlayState.store(Stopped, std::memory_order_release);
 814         return;
 815     }
 816
 817     /* Update positions */
 818     DataPosFrac += increment*samplesToMix;
 819     const uint SrcSamplesDone{DataPosFrac>>MixerFracBits};
 820     DataPosInt  += SrcSamplesDone;
 821     DataPosFrac &= MixerFracMask;
 822
 823     /* Update voice positions and buffers as needed. */
 824     uint buffers_done{0u};
 825     if(BufferListItem && DataPosInt >= 0) [[likely]]
 826     {
 827         if(mFlags.test(VoiceIsStatic))
 828         {
 829             if(BufferLoopItem)
 830             {
 831                 /* Handle looping static source */
 832                 const uint LoopStart{BufferListItem->mLoopStart};
 833                 const uint LoopEnd{BufferListItem->mLoopEnd};
 834                 uint DataPosUInt{static_cast<uint>(DataPosInt)};
 835                 if(DataPosUInt >= LoopEnd)
 836                 {
 837                     assert(LoopEnd > LoopStart);
 838                     DataPosUInt = ((DataPosUInt-LoopStart)%(LoopEnd-LoopStart)) + LoopStart;
 839                     DataPosInt = static_cast<int>(DataPosUInt);
 840                 }
 841             }
 842             else
 843             {
 844                 /* Handle non-looping static source */
 845                 if(static_cast<uint>(DataPosInt) >= BufferListItem->mSampleLen)
 846                     BufferListItem = nullptr;
 847             }
 848         }
 849         else if(mFlags.test(VoiceIsCallback))
 850         {
 851             /* Handle callback buffer source */
 852             if(SrcSamplesDone < mNumCallbackSamples)
 853             {
 854                 const size_t byteOffset{SrcSamplesDone*mFrameSize};
 855                 const size_t byteEnd{mNumCallbackSamples*mFrameSize};
 856                 al::byte *data{BufferListItem->mSamples};
 857                 std::copy(data+byteOffset, data+byteEnd, data);
 858                 mNumCallbackSamples -= SrcSamplesDone;
 859             }
 860             else
 861             {
 862                 BufferListItem = nullptr;
 863                 mNumCallbackSamples = 0;
 864             }
 865         }
 866         else
 867         {
 868             /* Handle streaming source */
 869             do {
 870                 if(BufferListItem->mSampleLen > static_cast<uint>(DataPosInt))
 871                     break;
 872
 873                 DataPosInt -= BufferListItem->mSampleLen;
 874
 875                 ++buffers_done;
 876                 BufferListItem = BufferListItem->mNext.load(std::memory_order_relaxed);
 877                 if(!BufferListItem) BufferListItem = BufferLoopItem;
 878             } while(BufferListItem);
 879         }
 880     }
 881
 882     /* Capture the source ID in case it gets reset for stopping. */
 883     const uint SourceID{mSourceID.load(std::memory_order_relaxed)};
 884
 885     /* Update voice info */
 886     mPosition.store(DataPosInt, std::memory_order_relaxed);
 887     mPositionFrac.store(DataPosFrac, std::memory_order_relaxed);
 888     mCurrentBuffer.store(BufferListItem, std::memory_order_relaxed);
 889     if(!BufferListItem)
 890     {
 891         mLoopBuffer.store(nullptr, std::memory_order_relaxed);
 892         mSourceID.store(0u, std::memory_order_relaxed);
 893     }
 894     std::atomic_thread_fence(std::memory_order_release);
 895
 896     /* Send any events now, after the position/buffer info was updated. */
 897     const auto enabledevt = Context->mEnabledEvts.load(std::memory_order_acquire);
 898     if(buffers_done > 0 && enabledevt.test(AsyncEvent::BufferCompleted))
 899     {
 900         RingBuffer *ring{Context->mAsyncEvents.get()};
 901         auto evt_vec = ring->getWriteVector();
 902         if(evt_vec.first.len > 0)
 903         {
 904             AsyncEvent *evt{al::construct_at(reinterpret_cast<AsyncEvent*>(evt_vec.first.buf),
 905                 AsyncEvent::BufferCompleted)};
 906             evt->u.bufcomp.id = SourceID;
 907             evt->u.bufcomp.count = buffers_done;
 908             ring->writeAdvance(1);
 909         }
 910     }
 911
 912     if(!BufferListItem)
 913     {
 914         /* If the voice just ended, set it to Stopping so the next render
 915          * ensures any residual noise fades to 0 amplitude.
 916          */
 917         mPlayState.store(Stopping, std::memory_order_release);
 918         if(enabledevt.test(AsyncEvent::SourceStateChange))
 919             SendSourceStoppedEvent(Context, SourceID);
 920     }
 921 }
 922
 923 void Voice::prepare(DeviceBase *device)
 924 {
 925     /* Even if storing really high order ambisonics, we only mix channels for
 926      * orders up to the device order. The rest are simply dropped.
 927      */
 928     uint num_channels{(mFmtChannels == FmtUHJ2 || mFmtChannels == FmtSuperStereo) ? 3 :
 929         ChannelsFromFmt(mFmtChannels, minu(mAmbiOrder, device->mAmbiOrder))};
 930     if(num_channels > device->mSampleData.size()) [[unlikely]]
 931     {
 932         ERR("Unexpected channel count: %u (limit: %zu, %d:%d)\n", num_channels,
 933             device->mSampleData.size(), mFmtChannels, mAmbiOrder);
 934         num_channels = static_cast<uint>(device->mSampleData.size());
 935     }
 936     if(mChans.capacity() > 2 && num_channels < mChans.capacity())
 937     {
 938         decltype(mChans){}.swap(mChans);
 939         decltype(mPrevSamples){}.swap(mPrevSamples);
 940     }
 941     mChans.reserve(maxu(2, num_channels));
 942     mChans.resize(num_channels);
 943     mPrevSamples.reserve(maxu(2, num_channels));
 944     mPrevSamples.resize(num_channels);
 945
 946     mDecoder = nullptr;
 947     mDecoderPadding = 0;
 948     if(mFmtChannels == FmtSuperStereo)
 949     {
 950         switch(UhjDecodeQuality)
 951         {
 952         case UhjQualityType::IIR:
 953             mDecoder = std::make_unique<UhjStereoDecoderIIR>();
 954             mDecoderPadding = UhjStereoDecoderIIR::sInputPadding;
 955             break;
 956         case UhjQualityType::FIR256:
 957             mDecoder = std::make_unique<UhjStereoDecoder<UhjLength256>>();
 958             mDecoderPadding = UhjStereoDecoder<UhjLength256>::sInputPadding;
 959             break;
 960         case UhjQualityType::FIR512:
 961             mDecoder = std::make_unique<UhjStereoDecoder<UhjLength512>>();
 962             mDecoderPadding = UhjStereoDecoder<UhjLength512>::sInputPadding;
 963             break;
 964         }
 965     }
 966     else if(IsUHJ(mFmtChannels))
 967     {
 968         switch(UhjDecodeQuality)
 969         {
 970         case UhjQualityType::IIR:
 971             mDecoder = std::make_unique<UhjDecoderIIR>();
 972             mDecoderPadding = UhjDecoderIIR::sInputPadding;
 973             break;
 974         case UhjQualityType::FIR256:
 975             mDecoder = std::make_unique<UhjDecoder<UhjLength256>>();
 976             mDecoderPadding = UhjDecoder<UhjLength256>::sInputPadding;
 977             break;
 978         case UhjQualityType::FIR512:
 979             mDecoder = std::make_unique<UhjDecoder<UhjLength512>>();
 980             mDecoderPadding = UhjDecoder<UhjLength512>::sInputPadding;
 981             break;
 982         }
 983     }
 984
 985     /* Clear the stepping value explicitly so the mixer knows not to mix this
 986      * until the update gets applied.
 987      */
 988     mStep = 0;
 989
 990     /* Make sure the sample history is cleared. */
 991     std::fill(mPrevSamples.begin(), mPrevSamples.end(), HistoryLine{});
 992
 993     if(mFmtChannels == FmtUHJ2 && !device->mUhjEncoder)
 994     {
 995         /* 2-channel UHJ needs different shelf filters. However, we can't just
 996          * use different shelf filters after mixing it, given any old speaker
 997          * setup the user has. To make this work, we apply the expected shelf
 998          * filters for decoding UHJ2 to quad (only needs LF scaling), and act
 999          * as if those 4 quad channels are encoded right back into B-Format.
1000          *
1001          * This isn't perfect, but without an entirely separate and limited
1002          * UHJ2 path, it's better than nothing.
1003          *
1004          * Note this isn't needed with UHJ output (UHJ2->B-Format->UHJ2 is
1005          * identity, so don't mess with it).
1006          */
1007         const BandSplitter splitter{device->mXOverFreq / static_cast<float>(device->Frequency)};
1008         for(auto &chandata : mChans)
1009         {
1010             chandata.mAmbiHFScale = 1.0f;
1011             chandata.mAmbiLFScale = 1.0f;
1012             chandata.mAmbiSplitter = splitter;
1013             chandata.mDryParams = DirectParams{};
1014             chandata.mDryParams.NFCtrlFilter = device->mNFCtrlFilter;
1015             std::fill_n(chandata.mWetParams.begin(), device->NumAuxSends, SendParams{});
1016         }
1017         mChans[0].mAmbiLFScale = DecoderBase::sWLFScale;
1018         mChans[1].mAmbiLFScale = DecoderBase::sXYLFScale;
1019         mChans[2].mAmbiLFScale = DecoderBase::sXYLFScale;
1020         mFlags.set(VoiceIsAmbisonic);
1021     }
1022     /* Don't need to set the VoiceIsAmbisonic flag if the device is not higher
1023      * order than the voice. No HF scaling is necessary to mix it.
1024      */
1025     else if(mAmbiOrder && device->mAmbiOrder > mAmbiOrder)
1026     {
1027         const uint8_t *OrderFromChan{Is2DAmbisonic(mFmtChannels) ?
1028             AmbiIndex::OrderFrom2DChannel().data() : AmbiIndex::OrderFromChannel().data()};
1029         const auto scales = AmbiScale::GetHFOrderScales(mAmbiOrder, device->mAmbiOrder,
1030             device->m2DMixing);
1031
1032         const BandSplitter splitter{device->mXOverFreq / static_cast<float>(device->Frequency)};
1033         for(auto &chandata : mChans)
1034         {
1035             chandata.mAmbiHFScale = scales[*(OrderFromChan++)];
1036             chandata.mAmbiLFScale = 1.0f;
1037             chandata.mAmbiSplitter = splitter;
1038             chandata.mDryParams = DirectParams{};
1039             chandata.mDryParams.NFCtrlFilter = device->mNFCtrlFilter;
1040             std::fill_n(chandata.mWetParams.begin(), device->NumAuxSends, SendParams{});
1041         }
1042         mFlags.set(VoiceIsAmbisonic);
1043     }
1044     else
1045     {
1046         for(auto &chandata : mChans)
1047         {
1048             chandata.mDryParams = DirectParams{};
1049             chandata.mDryParams.NFCtrlFilter = device->mNFCtrlFilter;
1050             std::fill_n(chandata.mWetParams.begin(), device->NumAuxSends, SendParams{});
1051         }
1052         mFlags.reset(VoiceIsAmbisonic);
1053     }
1054 }