alc/effects/convolution.cpp

   1
   2 #include "config.h"
   3
   4 #include "AL/al.h"
   5 #include "AL/alc.h"
   6
   7 #include "al/auxeffectslot.h"
   8 #include "alcmain.h"
   9 #include "alcomplex.h"
  10 #include "alcontext.h"
  11 #include "almalloc.h"
  12 #include "alspan.h"
  13 #include "ambidefs.h"
  14 #include "bformatdec.h"
  15 #include "buffer_storage.h"
  16 #include "effects/base.h"
  17 #include "filters/splitter.h"
  18 #include "fmt_traits.h"
  19 #include "logging.h"
  20 #include "polyphase_resampler.h"
  21
  22
  23 namespace {
  24
  25 /* Convolution reverb is implemented using a segmented overlap-add method. The
  26  * impulse response is broken up into multiple segments of 512 samples, and
  27  * each segment has an FFT applied with a 1024-sample buffer (the latter half
  28  * left silent) to get its frequency-domain response. The resulting response
  29  * has its positive/non-mirrored frequencies saved (513 bins) in each segment.
  30  *
  31  * Input samples are similarly broken up into 512-sample segments, with an FFT
  32  * applied to each new incoming segment to get its 513 bins. A history of FFT'd
  33  * input segments is maintained, equal to the length of the impulse response.
  34  *
  35  * To apply the reverberation, each impulse response segment is convolved with
  36  * its paired input segment (using complex multiplies, far cheaper than FIRs),
  37  * accumulating into a 1024-bin FFT buffer. The input history is then shifted
  38  * to align with later impulse response segments for next time.
  39  *
  40  * An inverse FFT is then applied to the accumulated FFT buffer to get a 1024-
  41  * sample time-domain response for output, which is split in two halves. The
  42  * first half is the 512-sample output, and the second half is a 512-sample
  43  * (really, 511) delayed extension, which gets added to the output next time.
  44  * Convolving two time-domain responses of lengths N and M results in a time-
  45  * domain signal of length N+M-1, and this holds true regardless of the
  46  * convolution being applied in the frequency domain, so these "overflow"
  47  * samples need to be accounted for.
  48  *
  49  * Limitations:
  50  * There is currently a 512-sample delay on the output, as a result of needing
  51  * to collect that many input samples to do an FFT with. This can be fixed by
  52  * excluding the first impulse response segment from being FFT'd, and applying
  53  * it directly in the time domain. This will have higher CPU consumption, but
  54  * it won't have to wait before generating output.
  55  */
  56
  57
  58 void LoadSamples(double *RESTRICT dst, const al::byte *src, const size_t srcstep, FmtType srctype,
  59     const size_t samples) noexcept
  60 {
  61 #define HANDLE_FMT(T)  case T: al::LoadSampleArray<T>(dst, src, srcstep, samples); break
  62     switch(srctype)
  63     {
  64     HANDLE_FMT(FmtUByte);
  65     HANDLE_FMT(FmtShort);
  66     HANDLE_FMT(FmtFloat);
  67     HANDLE_FMT(FmtDouble);
  68     HANDLE_FMT(FmtMulaw);
  69     HANDLE_FMT(FmtAlaw);
  70     }
  71 #undef HANDLE_FMT
  72 }
  73
  74
  75 auto GetAmbiScales(AmbiScaling scaletype) noexcept -> const std::array<float,MAX_AMBI_CHANNELS>&
  76 {
  77     if(scaletype == AmbiScaling::FuMa) return AmbiScale::FromFuMa;
  78     if(scaletype == AmbiScaling::SN3D) return AmbiScale::FromSN3D;
  79     return AmbiScale::FromN3D;
  80 }
  81
  82 auto GetAmbiLayout(AmbiLayout layouttype) noexcept -> const std::array<uint8_t,MAX_AMBI_CHANNELS>&
  83 {
  84     if(layouttype == AmbiLayout::FuMa) return AmbiIndex::FromFuMa;
  85     return AmbiIndex::FromACN;
  86 }
  87
  88 auto GetAmbi2DLayout(AmbiLayout layouttype) noexcept -> const std::array<uint8_t,MAX_AMBI2D_CHANNELS>&
  89 {
  90     if(layouttype == AmbiLayout::FuMa) return AmbiIndex::FromFuMa2D;
  91     return AmbiIndex::From2D;
  92 }
  93
  94
  95 using complex_d = std::complex<double>;
  96
  97 constexpr size_t ConvolveUpdateSize{1024};
  98 constexpr size_t ConvolveUpdateSamples{ConvolveUpdateSize / 2};
  99
 100 struct ConvolutionFilter final : public EffectBufferBase {
 101     FmtChannels mChannels{};
 102     AmbiLayout mAmbiLayout{};
 103     AmbiScaling mAmbiScaling{};
 104     ALuint mAmbiOrder{};
 105
 106     size_t mFifoPos{0};
 107     al::vector<std::array<double,ConvolveUpdateSamples*2>,16> mOutput;
 108     alignas(16) std::array<complex_d,ConvolveUpdateSize> mFftBuffer{};
 109
 110     size_t mCurrentSegment{0};
 111     size_t mNumConvolveSegs{0};
 112
 113     struct ChannelData {
 114         alignas(16) FloatBufferLine mBuffer{};
 115         float mHfScale{};
 116         BandSplitter mFilter{};
 117         float Current[MAX_OUTPUT_CHANNELS]{};
 118         float Target[MAX_OUTPUT_CHANNELS]{};
 119     };
 120     using ChannelDataArray = al::FlexArray<ChannelData>;
 121     std::unique_ptr<ChannelDataArray> mChans;
 122     std::unique_ptr<complex_d[]> mComplexData;
 123
 124     ConvolutionFilter(size_t numChannels) : mChans{ChannelDataArray::Create(numChannels)}
 125     { }
 126
 127     bool init(const ALCdevice *device, const BufferStorage &buffer);
 128
 129     void NormalMix(const al::span<FloatBufferLine> samplesOut, const size_t samplesToDo);
 130     void UpsampleMix(const al::span<FloatBufferLine> samplesOut, const size_t samplesToDo);
 131     void (ConvolutionFilter::*mMix)(const al::span<FloatBufferLine>,const size_t)
 132     {&ConvolutionFilter::NormalMix};
 133
 134     void update(al::span<FloatBufferLine> &outTarget, const ALCcontext *context,
 135         const ALeffectslot *slot, const EffectProps *props, const EffectTarget target);
 136     void process(const size_t samplesToDo, const al::span<const FloatBufferLine> samplesIn,
 137         const al::span<FloatBufferLine> samplesOut);
 138
 139     DEF_NEWDEL(ConvolutionFilter)
 140 };
 141
 142 bool ConvolutionFilter::init(const ALCdevice *device, const BufferStorage &buffer)
 143 {
 144     constexpr size_t m{ConvolveUpdateSize/2 + 1};
 145
 146     /* FIXME: Support anything. */
 147     if(buffer.mChannels != FmtMono && buffer.mChannels != FmtStereo
 148         && buffer.mChannels != FmtBFormat2D && buffer.mChannels != FmtBFormat3D)
 149         return false;
 150     if((buffer.mChannels == FmtBFormat2D || buffer.mChannels == FmtBFormat3D)
 151         && buffer.mAmbiOrder > 1)
 152         return false;
 153
 154     /* The impulse response needs to have the same sample rate as the input and
 155      * output. The bsinc24 resampler is decent, but there is high-frequency
 156      * attenation that some people may be able to pick up on. Since this is
 157      * very infrequent called, go ahead and use the polyphase resampler.
 158      */
 159     PPhaseResampler resampler;
 160     if(device->Frequency != buffer.mSampleRate)
 161         resampler.init(buffer.mSampleRate, device->Frequency);
 162     const auto resampledCount = static_cast<ALuint>(
 163         (uint64_t{buffer.mSampleLen}*device->Frequency + (buffer.mSampleRate-1)) /
 164         buffer.mSampleRate);
 165
 166     auto bytesPerSample = BytesFromFmt(buffer.mType);
 167     auto realChannels = ChannelsFromFmt(buffer.mChannels, buffer.mAmbiOrder);
 168     auto numChannels = mChans->size();
 169
 170     const BandSplitter splitter{400.0f / static_cast<float>(device->Frequency)};
 171     for(auto &e : *mChans)
 172         e.mFilter = splitter;
 173
 174     mOutput.resize(numChannels, {});
 175
 176     /* Calculate the number of segments needed to hold the impulse response and
 177      * the input history (rounded up), and allocate them.
 178      */
 179     mNumConvolveSegs = (resampledCount+(ConvolveUpdateSamples-1)) / ConvolveUpdateSamples;
 180
 181     const size_t complex_length{mNumConvolveSegs * m * (numChannels+1)};
 182     mComplexData = std::make_unique<complex_d[]>(complex_length);
 183     std::fill_n(mComplexData.get(), complex_length, complex_d{});
 184
 185     mChannels = buffer.mChannels;
 186     mAmbiLayout = buffer.mAmbiLayout;
 187     mAmbiScaling = buffer.mAmbiScaling;
 188     mAmbiOrder = buffer.mAmbiOrder;
 189
 190     auto fftbuffer = std::make_unique<std::array<complex_d,ConvolveUpdateSize>>();
 191     auto srcsamples = std::make_unique<double[]>(maxz(buffer.mSampleLen, resampledCount));
 192     complex_d *filteriter = mComplexData.get() + mNumConvolveSegs*m;
 193     for(size_t c{0};c < numChannels;++c)
 194     {
 195         /* Load the samples from the buffer, and resample to match the device. */
 196         LoadSamples(srcsamples.get(), buffer.mData.data() + bytesPerSample*c, realChannels,
 197             buffer.mType, buffer.mSampleLen);
 198         if(device->Frequency != buffer.mSampleRate)
 199             resampler.process(buffer.mSampleLen, srcsamples.get(), resampledCount,
 200                 srcsamples.get());
 201
 202         size_t done{0};
 203         for(size_t s{0};s < mNumConvolveSegs;++s)
 204         {
 205             const size_t todo{minz(resampledCount-done, ConvolveUpdateSamples)};
 206
 207             auto iter = std::copy_n(&srcsamples[done], todo, fftbuffer->begin());
 208             done += todo;
 209             std::fill(iter, fftbuffer->end(), complex_d{});
 210
 211             complex_fft(*fftbuffer, -1.0);
 212             filteriter = std::copy_n(fftbuffer->cbegin(), m, filteriter);
 213         }
 214     }
 215     return true;
 216 }
 217
 218 void ConvolutionFilter::NormalMix(const al::span<FloatBufferLine> samplesOut,
 219     const size_t samplesToDo)
 220 {
 221     for(auto &chan : *mChans)
 222         MixSamples({chan.mBuffer.data(), samplesToDo}, samplesOut, chan.Current, chan.Target,
 223             samplesToDo, 0);
 224 }
 225
 226 void ConvolutionFilter::UpsampleMix(const al::span<FloatBufferLine> samplesOut,
 227     const size_t samplesToDo)
 228 {
 229     for(auto &chan : *mChans)
 230     {
 231         const al::span<float> src{chan.mBuffer.data(), samplesToDo};
 232         chan.mFilter.processHfScale(src, chan.mHfScale);
 233         MixSamples(src, samplesOut, chan.Current, chan.Target, samplesToDo, 0);
 234     }
 235 }
 236
 237 void ConvolutionFilter::update(al::span<FloatBufferLine> &outTarget, const ALCcontext *context,
 238     const ALeffectslot *slot, const EffectProps* /*props*/, const EffectTarget target)
 239 {
 240     ALCdevice *device{context->mDevice.get()};
 241     mMix = &ConvolutionFilter::NormalMix;
 242
 243     /* The iFFT'd response is scaled up by the number of bins, so apply the
 244      * inverse to the output mixing gain.
 245      */
 246     constexpr size_t m{ConvolveUpdateSize/2 + 1};
 247     const float gain{slot->Params.Gain * (1.0f/m)};
 248     auto &chans = *mChans;
 249     if(mChannels == FmtBFormat3D || mChannels == FmtBFormat2D)
 250     {
 251         if(device->mAmbiOrder > mAmbiOrder)
 252         {
 253             mMix = &ConvolutionFilter::UpsampleMix;
 254             const auto scales = BFormatDec::GetHFOrderScales(mAmbiOrder, device->mAmbiOrder);
 255             chans[0].mHfScale = scales[0];
 256             for(size_t i{1};i < chans.size();++i)
 257                 chans[i].mHfScale = scales[1];
 258         }
 259         outTarget = target.Main->Buffer;
 260
 261         const auto &scales = GetAmbiScales(mAmbiScaling);
 262         const uint8_t *index_map{(mChannels == FmtBFormat2D) ?
 263             GetAmbi2DLayout(mAmbiLayout).data() :
 264             GetAmbiLayout(mAmbiLayout).data()};
 265
 266         std::array<float,MAX_AMBI_CHANNELS> coeffs{};
 267         for(size_t c{0u};c < chans.size();++c)
 268         {
 269             const size_t acn{index_map[c]};
 270             coeffs[acn] = scales[acn];
 271             ComputePanGains(target.Main, coeffs.data(), gain, chans[c].Target);
 272             coeffs[acn] = 0.0f;
 273         }
 274     }
 275     else if(mChannels == FmtStereo)
 276     {
 277         /* TODO: Add a "direct channels" setting for this effect? */
 278         const ALuint lidx{!target.RealOut ? INVALID_CHANNEL_INDEX :
 279             GetChannelIdxByName(*target.RealOut, FrontLeft)};
 280         const ALuint ridx{!target.RealOut ? INVALID_CHANNEL_INDEX :
 281             GetChannelIdxByName(*target.RealOut, FrontRight)};
 282         if(lidx != INVALID_CHANNEL_INDEX && ridx != INVALID_CHANNEL_INDEX)
 283         {
 284             outTarget = target.RealOut->Buffer;
 285             chans[0].Target[lidx] = gain;
 286             chans[1].Target[ridx] = gain;
 287         }
 288         else
 289         {
 290             const auto lcoeffs = CalcDirectionCoeffs({-1.0f, 0.0f, 0.0f}, 0.0f);
 291             const auto rcoeffs = CalcDirectionCoeffs({ 1.0f, 0.0f, 0.0f}, 0.0f);
 292
 293             outTarget = target.Main->Buffer;
 294             ComputePanGains(target.Main, lcoeffs.data(), gain, chans[0].Target);
 295             ComputePanGains(target.Main, rcoeffs.data(), gain, chans[1].Target);
 296         }
 297     }
 298     else if(mChannels == FmtMono)
 299     {
 300         const auto coeffs = CalcDirectionCoeffs({0.0f, 0.0f, -1.0f}, 0.0f);
 301
 302         outTarget = target.Main->Buffer;
 303         ComputePanGains(target.Main, coeffs.data(), gain, chans[0].Target);
 304     }
 305 }
 306
 307 void ConvolutionFilter::process(const size_t samplesToDo,
 308     const al::span<const FloatBufferLine> samplesIn, const al::span<FloatBufferLine> samplesOut)
 309 {
 310     constexpr size_t m{ConvolveUpdateSize/2 + 1};
 311     size_t curseg{mCurrentSegment};
 312     auto &chans = *mChans;
 313
 314     for(size_t base{0u};base < samplesToDo;)
 315     {
 316         const size_t todo{minz(ConvolveUpdateSamples-mFifoPos, samplesToDo-base)};
 317
 318         /* Retrieve the output samples from the FIFO and fill in the new input
 319          * samples.
 320          */
 321         for(size_t c{0};c < chans.size();++c)
 322         {
 323             auto fifo_iter = mOutput[c].begin() + mFifoPos;
 324             std::transform(fifo_iter, fifo_iter+todo, chans[c].mBuffer.begin()+base,
 325                 [](double d) noexcept -> float { return static_cast<float>(d); });
 326         }
 327
 328         std::copy_n(samplesIn[0].begin()+base, todo, mFftBuffer.begin()+mFifoPos);
 329         mFifoPos += todo;
 330         base += todo;
 331
 332         /* Check whether FIFO buffer is filled with new samples. */
 333         if(mFifoPos < ConvolveUpdateSamples) break;
 334         mFifoPos = 0;
 335
 336         /* Calculate the frequency domain response and add the relevant
 337          * frequency bins to the input history.
 338          */
 339         complex_fft(mFftBuffer, -1.0);
 340
 341         std::copy_n(mFftBuffer.begin(), m, &mComplexData[curseg*m]);
 342         mFftBuffer.fill(complex_d{});
 343
 344         const complex_d *RESTRICT filter{mComplexData.get() + mNumConvolveSegs*m};
 345         for(size_t c{0};c < chans.size();++c)
 346         {
 347             /* Convolve each input segment with its IR filter counterpart
 348              * (aligned in time).
 349              */
 350             const complex_d *RESTRICT input{&mComplexData[curseg*m]};
 351             for(size_t s{curseg};s < mNumConvolveSegs;++s)
 352             {
 353                 for(size_t i{0};i < m;++i,++input,++filter)
 354                     mFftBuffer[i] += *input * *filter;
 355             }
 356             input = mComplexData.get();
 357             for(size_t s{0};s < curseg;++s)
 358             {
 359                 for(size_t i{0};i < m;++i,++input,++filter)
 360                     mFftBuffer[i] += *input * *filter;
 361             }
 362
 363             /* Apply iFFT to get the 1024 (really 1023) samples for output. The
 364              * 512 output samples are combined with the last output's 511
 365              * second-half samples (and this output's second half is
 366              * subsequently saved for next time).
 367              */
 368             complex_fft(mFftBuffer, 1.0);
 369
 370             for(size_t i{0};i < ConvolveUpdateSamples;++i)
 371                 mOutput[c][i] = mFftBuffer[i].real() + mOutput[c][ConvolveUpdateSamples+i];
 372             for(size_t i{0};i < ConvolveUpdateSamples;++i)
 373                 mOutput[c][ConvolveUpdateSamples+i] = mFftBuffer[ConvolveUpdateSamples+i].real();
 374             mFftBuffer.fill(complex_d{});
 375         }
 376
 377         /* Shift the input history. */
 378         curseg = curseg ? (curseg-1) : (mNumConvolveSegs-1);
 379     }
 380     mCurrentSegment = curseg;
 381
 382     /* Finally, mix to the output. */
 383     (this->*mMix)(samplesOut, samplesToDo);
 384 }
 385
 386
 387 struct ConvolutionState final : public EffectState {
 388     ConvolutionFilter *mFilter{};
 389
 390     ConvolutionState() = default;
 391     ~ConvolutionState() override = default;
 392
 393     void deviceUpdate(const ALCdevice *device) override;
 394     EffectBufferBase *createBuffer(const ALCdevice *device, const BufferStorage &buffer) override;
 395     void update(const ALCcontext *context, const ALeffectslot *slot, const EffectProps *props, const EffectTarget target) override;
 396     void process(const size_t samplesToDo, const al::span<const FloatBufferLine> samplesIn, const al::span<FloatBufferLine> samplesOut) override;
 397
 398     DEF_NEWDEL(ConvolutionState)
 399 };
 400
 401 void ConvolutionState::deviceUpdate(const ALCdevice* /*device*/)
 402 {
 403 }
 404
 405 EffectBufferBase *ConvolutionState::createBuffer(const ALCdevice *device,
 406     const BufferStorage &buffer)
 407 {
 408     /* An empty buffer doesn't need a convolution filter. */
 409     if(buffer.mSampleLen < 1) return nullptr;
 410
 411     auto numChannels = ChannelsFromFmt(buffer.mChannels,
 412         minu(buffer.mAmbiOrder, device->mAmbiOrder));
 413
 414     al::intrusive_ptr<ConvolutionFilter> filter{new ConvolutionFilter{numChannels}};
 415     if LIKELY(filter->init(device, buffer))
 416         return filter.release();
 417     return nullptr;
 418 }
 419
 420
 421 void ConvolutionState::update(const ALCcontext *context, const ALeffectslot *slot,
 422     const EffectProps *props, const EffectTarget target)
 423 {
 424     mFilter = static_cast<ConvolutionFilter*>(slot->Params.mEffectBuffer);
 425     if(!mFilter) return;
 426
 427     mFilter->update(mOutTarget, context, slot, props, target);
 428 }
 429
 430 void ConvolutionState::process(const size_t samplesToDo,
 431     const al::span<const FloatBufferLine> samplesIn, const al::span<FloatBufferLine> samplesOut)
 432 {
 433     if(mFilter)
 434         mFilter->process(samplesToDo, samplesIn, samplesOut);
 435 }
 436
 437
 438 void ConvolutionEffect_setParami(EffectProps* /*props*/, ALenum param, int /*val*/)
 439 {
 440     switch(param)
 441     {
 442     default:
 443         throw effect_exception{AL_INVALID_ENUM, "Invalid null effect integer property 0x%04x",
 444             param};
 445     }
 446 }
 447 void ConvolutionEffect_setParamiv(EffectProps *props, ALenum param, const int *vals)
 448 {
 449     switch(param)
 450     {
 451     default:
 452         ConvolutionEffect_setParami(props, param, vals[0]);
 453     }
 454 }
 455 void ConvolutionEffect_setParamf(EffectProps* /*props*/, ALenum param, float /*val*/)
 456 {
 457     switch(param)
 458     {
 459     default:
 460         throw effect_exception{AL_INVALID_ENUM, "Invalid null effect float property 0x%04x",
 461             param};
 462     }
 463 }
 464 void ConvolutionEffect_setParamfv(EffectProps *props, ALenum param, const float *vals)
 465 {
 466     switch(param)
 467     {
 468     default:
 469         ConvolutionEffect_setParamf(props, param, vals[0]);
 470     }
 471 }
 472
 473 void ConvolutionEffect_getParami(const EffectProps* /*props*/, ALenum param, int* /*val*/)
 474 {
 475     switch(param)
 476     {
 477     default:
 478         throw effect_exception{AL_INVALID_ENUM, "Invalid null effect integer property 0x%04x",
 479             param};
 480     }
 481 }
 482 void ConvolutionEffect_getParamiv(const EffectProps *props, ALenum param, int *vals)
 483 {
 484     switch(param)
 485     {
 486     default:
 487         ConvolutionEffect_getParami(props, param, vals);
 488     }
 489 }
 490 void ConvolutionEffect_getParamf(const EffectProps* /*props*/, ALenum param, float* /*val*/)
 491 {
 492     switch(param)
 493     {
 494     default:
 495         throw effect_exception{AL_INVALID_ENUM, "Invalid null effect float property 0x%04x",
 496             param};
 497     }
 498 }
 499 void ConvolutionEffect_getParamfv(const EffectProps *props, ALenum param, float *vals)
 500 {
 501     switch(param)
 502     {
 503     default:
 504         ConvolutionEffect_getParamf(props, param, vals);
 505     }
 506 }
 507
 508 DEFINE_ALEFFECT_VTABLE(ConvolutionEffect);
 509
 510
 511 struct ConvolutionStateFactory final : public EffectStateFactory {
 512     EffectState *create() override;
 513     EffectProps getDefaultProps() const noexcept override;
 514     const EffectVtable *getEffectVtable() const noexcept override;
 515 };
 516
 517 /* Creates EffectState objects of the appropriate type. */
 518 EffectState *ConvolutionStateFactory::create()
 519 { return new ConvolutionState{}; }
 520
 521 /* Returns an ALeffectProps initialized with this effect type's default
 522  * property values.
 523  */
 524 EffectProps ConvolutionStateFactory::getDefaultProps() const noexcept
 525 {
 526     EffectProps props{};
 527     return props;
 528 }
 529
 530 /* Returns a pointer to this effect type's global set/get vtable. */
 531 const EffectVtable *ConvolutionStateFactory::getEffectVtable() const noexcept
 532 { return &ConvolutionEffect_vtable; }
 533
 534 } // namespace
 535
 536 EffectStateFactory *ConvolutionStateFactory_getFactory()
 537 {
 538     static ConvolutionStateFactory ConvolutionFactory{};
 539     return &ConvolutionFactory;
 540 }