alc/effects/convolution.cpp

   1
   2 #include "config.h"
   3
   4 #include "AL/al.h"
   5 #include "AL/alc.h"
   6
   7 #include "al/auxeffectslot.h"
   8 #include "alcmain.h"
   9 #include "alcomplex.h"
  10 #include "alcontext.h"
  11 #include "almalloc.h"
  12 #include "alspan.h"
  13 #include "ambidefs.h"
  14 #include "bformatdec.h"
  15 #include "buffer_storage.h"
  16 #include "effects/base.h"
  17 #include "filters/splitter.h"
  18 #include "fmt_traits.h"
  19 #include "logging.h"
  20 #include "polyphase_resampler.h"
  21
  22
  23 namespace {
  24
  25 /* Convolution reverb is implemented using a segmented overlap-add method. The
  26  * impulse response is broken up into multiple segments of 512 samples, and
  27  * each segment has an FFT applied with a 1024-sample buffer (the latter half
  28  * left silent) to get its frequency-domain response. The resulting response
  29  * has its positive/non-mirrored frequencies saved (513 bins) in each segment.
  30  *
  31  * Input samples are similarly broken up into 512-sample segments, with an FFT
  32  * applied to each new incoming segment to get its 513 bins. A history of FFT'd
  33  * input segments is maintained, equal to the length of the impulse response.
  34  *
  35  * To apply the reverberation, each impulse response segment is convolved with
  36  * its paired input segment (using complex multiplies, far cheaper than FIRs),
  37  * accumulating into a 1024-bin FFT buffer. The input history is then shifted
  38  * to align with later impulse response segments for next time.
  39  *
  40  * An inverse FFT is then applied to the accumulated FFT buffer to get a 1024-
  41  * sample time-domain response for output, which is split in two halves. The
  42  * first half is the 512-sample output, and the second half is a 512-sample
  43  * (really, 511) delayed extension, which gets added to the output next time.
  44  * Convolving two time-domain responses of lengths N and M results in a time-
  45  * domain signal of length N+M-1, and this holds true regardless of the
  46  * convolution being applied in the frequency domain, so these "overflow"
  47  * samples need to be accounted for.
  48  *
  49  * Limitations:
  50  * There is currently a 512-sample delay on the output, as a result of needing
  51  * to collect that many input samples to do an FFT with. This can be fixed by
  52  * excluding the first impulse response segment from being FFT'd, and applying
  53  * it directly in the time domain. This will have higher CPU consumption, but
  54  * it won't have to wait before generating output.
  55  */
  56
  57
  58 void LoadSamples(double *RESTRICT dst, const al::byte *src, const size_t srcstep, FmtType srctype,
  59     const size_t samples) noexcept
  60 {
  61 #define HANDLE_FMT(T)  case T: al::LoadSampleArray<T>(dst, src, srcstep, samples); break
  62     switch(srctype)
  63     {
  64     HANDLE_FMT(FmtUByte);
  65     HANDLE_FMT(FmtShort);
  66     HANDLE_FMT(FmtFloat);
  67     HANDLE_FMT(FmtDouble);
  68     HANDLE_FMT(FmtMulaw);
  69     HANDLE_FMT(FmtAlaw);
  70     }
  71 #undef HANDLE_FMT
  72 }
  73
  74
  75 auto GetAmbiScales(AmbiScaling scaletype) noexcept -> const std::array<float,MAX_AMBI_CHANNELS>&
  76 {
  77     if(scaletype == AmbiScaling::FuMa) return AmbiScale::FromFuMa;
  78     if(scaletype == AmbiScaling::SN3D) return AmbiScale::FromSN3D;
  79     return AmbiScale::FromN3D;
  80 }
  81
  82 auto GetAmbiLayout(AmbiLayout layouttype) noexcept -> const std::array<uint8_t,MAX_AMBI_CHANNELS>&
  83 {
  84     if(layouttype == AmbiLayout::FuMa) return AmbiIndex::FromFuMa;
  85     return AmbiIndex::FromACN;
  86 }
  87
  88 auto GetAmbi2DLayout(AmbiLayout layouttype) noexcept -> const std::array<uint8_t,MAX_AMBI2D_CHANNELS>&
  89 {
  90     if(layouttype == AmbiLayout::FuMa) return AmbiIndex::FromFuMa2D;
  91     return AmbiIndex::From2D;
  92 }
  93
  94
  95 using complex_d = std::complex<double>;
  96
  97 constexpr size_t ConvolveUpdateSize{1024};
  98 constexpr size_t ConvolveUpdateSamples{ConvolveUpdateSize / 2};
  99
 100 #define MAX_FILTER_CHANNELS 4
 101
 102
 103 struct ConvolutionFilter final : public EffectBufferBase {
 104     size_t mCurrentSegment{0};
 105     size_t mNumConvolveSegs{0};
 106     complex_d *mInputHistory{};
 107     complex_d *mConvolveFilter[MAX_FILTER_CHANNELS]{};
 108
 109     FmtChannels mChannels{};
 110     AmbiLayout mAmbiLayout{};
 111     AmbiScaling mAmbiScaling{};
 112     ALuint mAmbiOrder{};
 113
 114     std::unique_ptr<complex_d[]> mComplexData;
 115
 116     DEF_NEWDEL(ConvolutionFilter)
 117 };
 118
 119 struct ConvolutionState final : public EffectState {
 120     ConvolutionFilter *mFilter{};
 121
 122     size_t mFifoPos{0};
 123     alignas(16) std::array<double,ConvolveUpdateSamples*2> mOutput[MAX_FILTER_CHANNELS]{};
 124     alignas(16) std::array<complex_d,ConvolveUpdateSize> mFftBuffer{};
 125
 126     ALuint mNumChannels;
 127     alignas(16) FloatBufferLine mTempBuffer[MAX_FILTER_CHANNELS]{};
 128
 129     struct {
 130         float mHfScale{};
 131         BandSplitter mFilter{};
 132         float Current[MAX_OUTPUT_CHANNELS]{};
 133         float Target[MAX_OUTPUT_CHANNELS]{};
 134     } mOutChans[MAX_FILTER_CHANNELS];
 135
 136     ConvolutionState() = default;
 137     ~ConvolutionState() override = default;
 138
 139     void NormalMix(const al::span<FloatBufferLine> samplesOut, const size_t samplesToDo);
 140     void UpsampleMix(const al::span<FloatBufferLine> samplesOut, const size_t samplesToDo);
 141     void (ConvolutionState::*mMix)(const al::span<FloatBufferLine>,const size_t)
 142     {&ConvolutionState::NormalMix};
 143
 144     void deviceUpdate(const ALCdevice *device) override;
 145     EffectBufferBase *createBuffer(const ALCdevice *device, const BufferStorage &buffer) override;
 146     void update(const ALCcontext *context, const ALeffectslot *slot, const EffectProps *props, const EffectTarget target) override;
 147     void process(const size_t samplesToDo, const al::span<const FloatBufferLine> samplesIn, const al::span<FloatBufferLine> samplesOut) override;
 148
 149     DEF_NEWDEL(ConvolutionState)
 150 };
 151
 152 void ConvolutionState::NormalMix(const al::span<FloatBufferLine> samplesOut,
 153     const size_t samplesToDo)
 154 {
 155     for(size_t c{0};c < mNumChannels;++c)
 156         MixSamples({mTempBuffer[c].data(), samplesToDo}, samplesOut, mOutChans[c].Current,
 157             mOutChans[c].Target, samplesToDo, 0);
 158 }
 159
 160 void ConvolutionState::UpsampleMix(const al::span<FloatBufferLine> samplesOut,
 161     const size_t samplesToDo)
 162 {
 163     for(size_t c{0};c < mNumChannels;++c)
 164     {
 165         const al::span<float> src{mTempBuffer[c].data(), samplesToDo};
 166         mOutChans[c].mFilter.processHfScale(src, mOutChans[c].mHfScale);
 167         MixSamples(src, samplesOut, mOutChans[c].Current, mOutChans[c].Target, samplesToDo, 0);
 168     }
 169 }
 170
 171 void ConvolutionState::deviceUpdate(const ALCdevice *device)
 172 {
 173     mFifoPos = 0;
 174     for(auto &buffer : mOutput)
 175         buffer.fill(0.0f);
 176     mFftBuffer.fill(complex_d{});
 177
 178     for(auto &buffer : mTempBuffer)
 179         buffer.fill(0.0);
 180
 181     const BandSplitter splitter{400.0f / static_cast<float>(device->Frequency)};
 182     for(auto &e : mOutChans)
 183     {
 184         e.mHfScale = 1.0f;
 185         e.mFilter = splitter;
 186         std::fill(std::begin(e.Current), std::end(e.Current), 0.0f);
 187         std::fill(std::begin(e.Target), std::end(e.Target), 0.0f);
 188     }
 189 }
 190
 191 EffectBufferBase *ConvolutionState::createBuffer(const ALCdevice *device,
 192     const BufferStorage &buffer)
 193 {
 194     /* An empty buffer doesn't need a convolution filter. */
 195     if(buffer.mSampleLen < 1) return nullptr;
 196
 197     /* FIXME: Support anything. */
 198     if(buffer.mChannels != FmtMono && buffer.mChannels != FmtStereo
 199         && buffer.mChannels != FmtBFormat2D && buffer.mChannels != FmtBFormat3D)
 200         return nullptr;
 201     if((buffer.mChannels == FmtBFormat2D || buffer.mChannels == FmtBFormat3D)
 202         && buffer.mAmbiOrder > 1)
 203         return nullptr;
 204
 205     /* The impulse response needs to have the same sample rate as the input and
 206      * output. The bsinc24 resampler is decent, but there is high-frequency
 207      * attenation that some people may be able to pick up on. Since this is
 208      * very infrequent called, go ahead and use the polyphase resampler.
 209      */
 210     PPhaseResampler resampler;
 211     if(device->Frequency != buffer.mSampleRate)
 212         resampler.init(buffer.mSampleRate, device->Frequency);
 213     const auto resampledCount = static_cast<ALuint>(
 214         (uint64_t{buffer.mSampleLen}*device->Frequency + (buffer.mSampleRate-1)) /
 215         buffer.mSampleRate);
 216
 217     al::intrusive_ptr<ConvolutionFilter> filter{new ConvolutionFilter{}};
 218
 219     auto bytesPerSample = BytesFromFmt(buffer.mType);
 220     auto numChannels = ChannelsFromFmt(buffer.mChannels, buffer.mAmbiOrder);
 221     constexpr size_t m{ConvolveUpdateSize/2 + 1};
 222
 223     /* Calculate the number of segments needed to hold the impulse response and
 224      * the input history (rounded up), and allocate them.
 225      */
 226     filter->mNumConvolveSegs = (buffer.mSampleLen+(ConvolveUpdateSamples-1)) /
 227         ConvolveUpdateSamples;
 228
 229     const size_t complex_length{filter->mNumConvolveSegs * m * (numChannels+1)};
 230     filter->mComplexData = std::make_unique<complex_d[]>(complex_length);
 231     std::fill_n(filter->mComplexData.get(), complex_length, complex_d{});
 232
 233     filter->mInputHistory = filter->mComplexData.get();
 234     filter->mConvolveFilter[0] = filter->mInputHistory + filter->mNumConvolveSegs*m;
 235     for(size_t c{1};c < numChannels;++c)
 236         filter->mConvolveFilter[c] = filter->mConvolveFilter[c-1] + filter->mNumConvolveSegs*m;
 237
 238     filter->mChannels = buffer.mChannels;
 239     filter->mAmbiLayout = buffer.mAmbiLayout;
 240     filter->mAmbiScaling = buffer.mAmbiScaling;
 241     filter->mAmbiOrder = buffer.mAmbiOrder;
 242
 243     auto fftbuffer = std::make_unique<std::array<complex_d,ConvolveUpdateSize>>();
 244     auto srcsamples = std::make_unique<double[]>(maxz(buffer.mSampleLen, resampledCount));
 245     for(size_t c{0};c < numChannels;++c)
 246     {
 247         /* Load the samples from the buffer, and resample to match the device. */
 248         LoadSamples(srcsamples.get(), buffer.mData.data() + bytesPerSample*c, numChannels,
 249             buffer.mType, buffer.mSampleLen);
 250         if(device->Frequency != buffer.mSampleRate)
 251             resampler.process(buffer.mSampleLen, srcsamples.get(), resampledCount,
 252                 srcsamples.get());
 253
 254         size_t done{0};
 255         complex_d *filteriter = filter->mConvolveFilter[c];
 256         for(size_t s{0};s < filter->mNumConvolveSegs;++s)
 257         {
 258             const size_t todo{minz(resampledCount-done, ConvolveUpdateSamples)};
 259
 260             auto iter = std::copy_n(&srcsamples[done], todo, fftbuffer->begin());
 261             done += todo;
 262             std::fill(iter, fftbuffer->end(), complex_d{});
 263
 264             complex_fft(*fftbuffer, -1.0);
 265             filteriter = std::copy_n(fftbuffer->cbegin(), m, filteriter);
 266         }
 267     }
 268
 269     return filter.release();
 270 }
 271
 272 void ConvolutionState::update(const ALCcontext *context, const ALeffectslot *slot,
 273     const EffectProps* /*props*/, const EffectTarget target)
 274 {
 275     mFilter = static_cast<ConvolutionFilter*>(slot->Params.mEffectBuffer);
 276     if(!mFilter) return;
 277
 278     ALCdevice *device{context->mDevice.get()};
 279     const ALuint min_order{minu(mFilter->mAmbiOrder, device->mAmbiOrder)};
 280     mNumChannels = mFilter ? ChannelsFromFmt(mFilter->mChannels, min_order) : 0u;
 281     mMix = &ConvolutionState::NormalMix;
 282
 283     /* The iFFT'd response is scaled up by the number of bins, so apply the
 284      * inverse to the output mixing gain.
 285      */
 286     constexpr size_t m{ConvolveUpdateSize/2 + 1};
 287     const float gain{slot->Params.Gain * (1.0f/m)};
 288     if(mFilter->mChannels == FmtBFormat3D || mFilter->mChannels == FmtBFormat2D)
 289     {
 290         if(device->mAmbiOrder > mFilter->mAmbiOrder)
 291         {
 292             mMix = &ConvolutionState::UpsampleMix;
 293             const auto scales = BFormatDec::GetHFOrderScales(mFilter->mAmbiOrder,
 294                 device->mAmbiOrder);
 295             mOutChans[0].mHfScale = scales[0];
 296             for(size_t i{1};i < mNumChannels;++i)
 297                 mOutChans[i].mHfScale = scales[1];
 298         }
 299         mOutTarget = target.Main->Buffer;
 300
 301         const auto &scales = GetAmbiScales(mFilter->mAmbiScaling);
 302         const uint8_t *index_map{(mFilter->mChannels == FmtBFormat2D) ?
 303             GetAmbi2DLayout(mFilter->mAmbiLayout).data() :
 304             GetAmbiLayout(mFilter->mAmbiLayout).data()};
 305
 306         std::array<float,MAX_AMBI_CHANNELS> coeffs{};
 307         for(size_t c{0u};c < mNumChannels;++c)
 308         {
 309             const size_t acn{index_map[c]};
 310             coeffs[acn] = scales[acn];
 311             ComputePanGains(target.Main, coeffs.data(), gain, mOutChans[c].Target);
 312             coeffs[acn] = 0.0f;
 313         }
 314     }
 315     else if(mFilter->mChannels == FmtStereo)
 316     {
 317         /* TODO: Add a "direct channels" setting for this effect? */
 318         const ALuint lidx{!target.RealOut ? INVALID_CHANNEL_INDEX :
 319             GetChannelIdxByName(*target.RealOut, FrontLeft)};
 320         const ALuint ridx{!target.RealOut ? INVALID_CHANNEL_INDEX :
 321             GetChannelIdxByName(*target.RealOut, FrontRight)};
 322         if(lidx != INVALID_CHANNEL_INDEX && ridx != INVALID_CHANNEL_INDEX)
 323         {
 324             mOutTarget = target.RealOut->Buffer;
 325             mOutChans[0].Target[lidx] = gain;
 326             mOutChans[1].Target[ridx] = gain;
 327         }
 328         else
 329         {
 330             const auto lcoeffs = CalcDirectionCoeffs({-1.0f, 0.0f, 0.0f}, 0.0f);
 331             const auto rcoeffs = CalcDirectionCoeffs({ 1.0f, 0.0f, 0.0f}, 0.0f);
 332
 333             mOutTarget = target.Main->Buffer;
 334             ComputePanGains(target.Main, lcoeffs.data(), gain, mOutChans[0].Target);
 335             ComputePanGains(target.Main, rcoeffs.data(), gain, mOutChans[1].Target);
 336         }
 337     }
 338     else if(mFilter->mChannels == FmtMono)
 339     {
 340         const auto coeffs = CalcDirectionCoeffs({0.0f, 0.0f, -1.0f}, 0.0f);
 341
 342         mOutTarget = target.Main->Buffer;
 343         ComputePanGains(target.Main, coeffs.data(), gain, mOutChans[0].Target);
 344     }
 345 }
 346
 347 void ConvolutionState::process(const size_t samplesToDo,
 348     const al::span<const FloatBufferLine> samplesIn, const al::span<FloatBufferLine> samplesOut)
 349 {
 350     /* No filter, no response. */
 351     if(!mFilter) return;
 352
 353     constexpr size_t m{ConvolveUpdateSize/2 + 1};
 354     size_t curseg{mFilter->mCurrentSegment};
 355
 356     for(size_t base{0u};base < samplesToDo;)
 357     {
 358         const size_t todo{minz(ConvolveUpdateSamples-mFifoPos, samplesToDo-base)};
 359
 360         /* Retrieve the output samples from the FIFO and fill in the new input
 361          * samples.
 362          */
 363         for(size_t c{0};c < mNumChannels;++c)
 364         {
 365             auto fifo_iter = mOutput[c].begin() + mFifoPos;
 366             std::transform(fifo_iter, fifo_iter+todo, mTempBuffer[c].begin()+base,
 367                 [](double d) noexcept -> float { return static_cast<float>(d); });
 368         }
 369
 370         std::copy_n(samplesIn[0].begin()+base, todo, mFftBuffer.begin()+mFifoPos);
 371         mFifoPos += todo;
 372         base += todo;
 373
 374         /* Check whether FIFO buffer is filled with new samples. */
 375         if(mFifoPos < ConvolveUpdateSamples) break;
 376         mFifoPos = 0;
 377
 378         /* Calculate the frequency domain response and add the relevant
 379          * frequency bins to the input history.
 380          */
 381         complex_fft(mFftBuffer, -1.0);
 382
 383         std::copy_n(mFftBuffer.begin(), m, &mFilter->mInputHistory[curseg*m]);
 384         mFftBuffer.fill(complex_d{});
 385
 386         for(size_t c{0};c < mNumChannels;++c)
 387         {
 388             /* Convolve each input segment with its IR filter counterpart
 389              * (aligned in time).
 390              */
 391             const complex_d *RESTRICT filter{mFilter->mConvolveFilter[c]};
 392             const complex_d *RESTRICT input{&mFilter->mInputHistory[curseg*m]};
 393             for(size_t s{curseg};s < mFilter->mNumConvolveSegs;++s)
 394             {
 395                 for(size_t i{0};i < m;++i,++input,++filter)
 396                     mFftBuffer[i] += *input * *filter;
 397             }
 398             input = mFilter->mInputHistory;
 399             for(size_t s{0};s < curseg;++s)
 400             {
 401                 for(size_t i{0};i < m;++i,++input,++filter)
 402                     mFftBuffer[i] += *input * *filter;
 403             }
 404
 405             /* Apply iFFT to get the 1024 (really 1023) samples for output. The
 406              * 512 output samples are combined with the last output's 511
 407              * second-half samples (and this output's second half is
 408              * subsequently saved for next time).
 409              */
 410             complex_fft(mFftBuffer, 1.0);
 411
 412             for(size_t i{0};i < ConvolveUpdateSamples;++i)
 413                 mOutput[c][i] = mFftBuffer[i].real() + mOutput[c][ConvolveUpdateSamples+i];
 414             for(size_t i{0};i < ConvolveUpdateSamples;++i)
 415                 mOutput[c][ConvolveUpdateSamples+i] = mFftBuffer[ConvolveUpdateSamples+i].real();
 416             mFftBuffer.fill(complex_d{});
 417         }
 418
 419         /* Shift the input history. */
 420         curseg = curseg ? (curseg-1) : (mFilter->mNumConvolveSegs-1);
 421     }
 422     mFilter->mCurrentSegment = curseg;
 423
 424     /* Finally, mix to the output. */
 425     (this->*mMix)(samplesOut, samplesToDo);
 426 }
 427
 428
 429 void ConvolutionEffect_setParami(EffectProps* /*props*/, ALenum param, int /*val*/)
 430 {
 431     switch(param)
 432     {
 433     default:
 434         throw effect_exception{AL_INVALID_ENUM, "Invalid null effect integer property 0x%04x",
 435             param};
 436     }
 437 }
 438 void ConvolutionEffect_setParamiv(EffectProps *props, ALenum param, const int *vals)
 439 {
 440     switch(param)
 441     {
 442     default:
 443         ConvolutionEffect_setParami(props, param, vals[0]);
 444     }
 445 }
 446 void ConvolutionEffect_setParamf(EffectProps* /*props*/, ALenum param, float /*val*/)
 447 {
 448     switch(param)
 449     {
 450     default:
 451         throw effect_exception{AL_INVALID_ENUM, "Invalid null effect float property 0x%04x",
 452             param};
 453     }
 454 }
 455 void ConvolutionEffect_setParamfv(EffectProps *props, ALenum param, const float *vals)
 456 {
 457     switch(param)
 458     {
 459     default:
 460         ConvolutionEffect_setParamf(props, param, vals[0]);
 461     }
 462 }
 463
 464 void ConvolutionEffect_getParami(const EffectProps* /*props*/, ALenum param, int* /*val*/)
 465 {
 466     switch(param)
 467     {
 468     default:
 469         throw effect_exception{AL_INVALID_ENUM, "Invalid null effect integer property 0x%04x",
 470             param};
 471     }
 472 }
 473 void ConvolutionEffect_getParamiv(const EffectProps *props, ALenum param, int *vals)
 474 {
 475     switch(param)
 476     {
 477     default:
 478         ConvolutionEffect_getParami(props, param, vals);
 479     }
 480 }
 481 void ConvolutionEffect_getParamf(const EffectProps* /*props*/, ALenum param, float* /*val*/)
 482 {
 483     switch(param)
 484     {
 485     default:
 486         throw effect_exception{AL_INVALID_ENUM, "Invalid null effect float property 0x%04x",
 487             param};
 488     }
 489 }
 490 void ConvolutionEffect_getParamfv(const EffectProps *props, ALenum param, float *vals)
 491 {
 492     switch(param)
 493     {
 494     default:
 495         ConvolutionEffect_getParamf(props, param, vals);
 496     }
 497 }
 498
 499 DEFINE_ALEFFECT_VTABLE(ConvolutionEffect);
 500
 501
 502 struct ConvolutionStateFactory final : public EffectStateFactory {
 503     EffectState *create() override;
 504     EffectProps getDefaultProps() const noexcept override;
 505     const EffectVtable *getEffectVtable() const noexcept override;
 506 };
 507
 508 /* Creates EffectState objects of the appropriate type. */
 509 EffectState *ConvolutionStateFactory::create()
 510 { return new ConvolutionState{}; }
 511
 512 /* Returns an ALeffectProps initialized with this effect type's default
 513  * property values.
 514  */
 515 EffectProps ConvolutionStateFactory::getDefaultProps() const noexcept
 516 {
 517     EffectProps props{};
 518     return props;
 519 }
 520
 521 /* Returns a pointer to this effect type's global set/get vtable. */
 522 const EffectVtable *ConvolutionStateFactory::getEffectVtable() const noexcept
 523 { return &ConvolutionEffect_vtable; }
 524
 525 } // namespace
 526
 527 EffectStateFactory *ConvolutionStateFactory_getFactory()
 528 {
 529     static ConvolutionStateFactory ConvolutionFactory{};
 530     return &ConvolutionFactory;
 531 }