Avoid class templates for the POPCNT64/CTZ64 macros
[openal-soft.git] / alc / effects / convolution.cpp
blob1ce3bae2828abeb87f7be01b5617123d8800ec76
2 #include "config.h"
4 #include "AL/al.h"
5 #include "AL/alc.h"
7 #include "al/auxeffectslot.h"
8 #include "alcmain.h"
9 #include "alcomplex.h"
10 #include "alcontext.h"
11 #include "almalloc.h"
12 #include "alspan.h"
13 #include "ambidefs.h"
14 #include "bformatdec.h"
15 #include "buffer_storage.h"
16 #include "effects/base.h"
17 #include "filters/splitter.h"
18 #include "fmt_traits.h"
19 #include "logging.h"
20 #include "polyphase_resampler.h"
23 namespace {
25 /* Convolution reverb is implemented using a segmented overlap-add method. The
26 * impulse response is broken up into multiple segments of 512 samples, and
27 * each segment has an FFT applied with a 1024-sample buffer (the latter half
28 * left silent) to get its frequency-domain response. The resulting response
29 * has its positive/non-mirrored frequencies saved (513 bins) in each segment.
31 * Input samples are similarly broken up into 512-sample segments, with an FFT
32 * applied to each new incoming segment to get its 513 bins. A history of FFT'd
33 * input segments is maintained, equal to the length of the impulse response.
35 * To apply the reverberation, each impulse response segment is convolved with
36 * its paired input segment (using complex multiplies, far cheaper than FIRs),
37 * accumulating into a 1024-bin FFT buffer. The input history is then shifted
38 * to align with later impulse response segments for next time.
40 * An inverse FFT is then applied to the accumulated FFT buffer to get a 1024-
41 * sample time-domain response for output, which is split in two halves. The
42 * first half is the 512-sample output, and the second half is a 512-sample
43 * (really, 511) delayed extension, which gets added to the output next time.
44 * Convolving two time-domain responses of lengths N and M results in a time-
45 * domain signal of length N+M-1, and this holds true regardless of the
46 * convolution being applied in the frequency domain, so these "overflow"
47 * samples need to be accounted for.
49 * Limitations:
50 * There is currently a 512-sample delay on the output, as a result of needing
51 * to collect that many input samples to do an FFT with. This can be fixed by
52 * excluding the first impulse response segment from being FFT'd, and applying
53 * it directly in the time domain. This will have higher CPU consumption, but
54 * it won't have to wait before generating output.
58 void LoadSamples(double *RESTRICT dst, const al::byte *src, const size_t srcstep, FmtType srctype,
59 const size_t samples) noexcept
61 #define HANDLE_FMT(T) case T: al::LoadSampleArray<T>(dst, src, srcstep, samples); break
62 switch(srctype)
64 HANDLE_FMT(FmtUByte);
65 HANDLE_FMT(FmtShort);
66 HANDLE_FMT(FmtFloat);
67 HANDLE_FMT(FmtDouble);
68 HANDLE_FMT(FmtMulaw);
69 HANDLE_FMT(FmtAlaw);
71 #undef HANDLE_FMT
75 auto GetAmbiScales(AmbiScaling scaletype) noexcept -> const std::array<float,MAX_AMBI_CHANNELS>&
77 if(scaletype == AmbiScaling::FuMa) return AmbiScale::FromFuMa;
78 if(scaletype == AmbiScaling::SN3D) return AmbiScale::FromSN3D;
79 return AmbiScale::FromN3D;
82 auto GetAmbiLayout(AmbiLayout layouttype) noexcept -> const std::array<uint8_t,MAX_AMBI_CHANNELS>&
84 if(layouttype == AmbiLayout::FuMa) return AmbiIndex::FromFuMa;
85 return AmbiIndex::FromACN;
88 auto GetAmbi2DLayout(AmbiLayout layouttype) noexcept -> const std::array<uint8_t,MAX_AMBI2D_CHANNELS>&
90 if(layouttype == AmbiLayout::FuMa) return AmbiIndex::FromFuMa2D;
91 return AmbiIndex::From2D;
95 using complex_d = std::complex<double>;
97 constexpr size_t ConvolveUpdateSize{1024};
98 constexpr size_t ConvolveUpdateSamples{ConvolveUpdateSize / 2};
100 struct ConvolutionFilter final : public EffectBufferBase {
101 FmtChannels mChannels{};
102 AmbiLayout mAmbiLayout{};
103 AmbiScaling mAmbiScaling{};
104 ALuint mAmbiOrder{};
106 size_t mFifoPos{0};
107 al::vector<std::array<double,ConvolveUpdateSamples*2>,16> mOutput;
108 alignas(16) std::array<complex_d,ConvolveUpdateSize> mFftBuffer{};
110 size_t mCurrentSegment{0};
111 size_t mNumConvolveSegs{0};
113 struct ChannelData {
114 alignas(16) FloatBufferLine mBuffer{};
115 float mHfScale{};
116 BandSplitter mFilter{};
117 float Current[MAX_OUTPUT_CHANNELS]{};
118 float Target[MAX_OUTPUT_CHANNELS]{};
120 using ChannelDataArray = al::FlexArray<ChannelData>;
121 std::unique_ptr<ChannelDataArray> mChans;
122 std::unique_ptr<complex_d[]> mComplexData;
124 ConvolutionFilter(size_t numChannels) : mChans{ChannelDataArray::Create(numChannels)}
127 bool init(const ALCdevice *device, const BufferStorage &buffer);
129 void NormalMix(const al::span<FloatBufferLine> samplesOut, const size_t samplesToDo);
130 void UpsampleMix(const al::span<FloatBufferLine> samplesOut, const size_t samplesToDo);
131 void (ConvolutionFilter::*mMix)(const al::span<FloatBufferLine>,const size_t)
132 {&ConvolutionFilter::NormalMix};
134 void update(al::span<FloatBufferLine> &outTarget, const ALCcontext *context,
135 const ALeffectslot *slot, const EffectProps *props, const EffectTarget target);
136 void process(const size_t samplesToDo, const al::span<const FloatBufferLine> samplesIn,
137 const al::span<FloatBufferLine> samplesOut);
139 DEF_NEWDEL(ConvolutionFilter)
142 bool ConvolutionFilter::init(const ALCdevice *device, const BufferStorage &buffer)
144 constexpr size_t m{ConvolveUpdateSize/2 + 1};
146 /* FIXME: Support anything. */
147 if(buffer.mChannels != FmtMono && buffer.mChannels != FmtStereo
148 && buffer.mChannels != FmtBFormat2D && buffer.mChannels != FmtBFormat3D)
149 return false;
150 if((buffer.mChannels == FmtBFormat2D || buffer.mChannels == FmtBFormat3D)
151 && buffer.mAmbiOrder > 1)
152 return false;
154 /* The impulse response needs to have the same sample rate as the input and
155 * output. The bsinc24 resampler is decent, but there is high-frequency
156 * attenation that some people may be able to pick up on. Since this is
157 * very infrequent called, go ahead and use the polyphase resampler.
159 PPhaseResampler resampler;
160 if(device->Frequency != buffer.mSampleRate)
161 resampler.init(buffer.mSampleRate, device->Frequency);
162 const auto resampledCount = static_cast<ALuint>(
163 (uint64_t{buffer.mSampleLen}*device->Frequency + (buffer.mSampleRate-1)) /
164 buffer.mSampleRate);
166 auto bytesPerSample = BytesFromFmt(buffer.mType);
167 auto realChannels = ChannelsFromFmt(buffer.mChannels, buffer.mAmbiOrder);
168 auto numChannels = mChans->size();
170 const BandSplitter splitter{400.0f / static_cast<float>(device->Frequency)};
171 for(auto &e : *mChans)
172 e.mFilter = splitter;
174 mOutput.resize(numChannels, {});
176 /* Calculate the number of segments needed to hold the impulse response and
177 * the input history (rounded up), and allocate them.
179 mNumConvolveSegs = (resampledCount+(ConvolveUpdateSamples-1)) / ConvolveUpdateSamples;
181 const size_t complex_length{mNumConvolveSegs * m * (numChannels+1)};
182 mComplexData = std::make_unique<complex_d[]>(complex_length);
183 std::fill_n(mComplexData.get(), complex_length, complex_d{});
185 mChannels = buffer.mChannels;
186 mAmbiLayout = buffer.mAmbiLayout;
187 mAmbiScaling = buffer.mAmbiScaling;
188 mAmbiOrder = buffer.mAmbiOrder;
190 auto fftbuffer = std::make_unique<std::array<complex_d,ConvolveUpdateSize>>();
191 auto srcsamples = std::make_unique<double[]>(maxz(buffer.mSampleLen, resampledCount));
192 complex_d *filteriter = mComplexData.get() + mNumConvolveSegs*m;
193 for(size_t c{0};c < numChannels;++c)
195 /* Load the samples from the buffer, and resample to match the device. */
196 LoadSamples(srcsamples.get(), buffer.mData.data() + bytesPerSample*c, realChannels,
197 buffer.mType, buffer.mSampleLen);
198 if(device->Frequency != buffer.mSampleRate)
199 resampler.process(buffer.mSampleLen, srcsamples.get(), resampledCount,
200 srcsamples.get());
202 size_t done{0};
203 for(size_t s{0};s < mNumConvolveSegs;++s)
205 const size_t todo{minz(resampledCount-done, ConvolveUpdateSamples)};
207 auto iter = std::copy_n(&srcsamples[done], todo, fftbuffer->begin());
208 done += todo;
209 std::fill(iter, fftbuffer->end(), complex_d{});
211 complex_fft(*fftbuffer, -1.0);
212 filteriter = std::copy_n(fftbuffer->cbegin(), m, filteriter);
215 return true;
218 void ConvolutionFilter::NormalMix(const al::span<FloatBufferLine> samplesOut,
219 const size_t samplesToDo)
221 for(auto &chan : *mChans)
222 MixSamples({chan.mBuffer.data(), samplesToDo}, samplesOut, chan.Current, chan.Target,
223 samplesToDo, 0);
226 void ConvolutionFilter::UpsampleMix(const al::span<FloatBufferLine> samplesOut,
227 const size_t samplesToDo)
229 for(auto &chan : *mChans)
231 const al::span<float> src{chan.mBuffer.data(), samplesToDo};
232 chan.mFilter.processHfScale(src, chan.mHfScale);
233 MixSamples(src, samplesOut, chan.Current, chan.Target, samplesToDo, 0);
237 void ConvolutionFilter::update(al::span<FloatBufferLine> &outTarget, const ALCcontext *context,
238 const ALeffectslot *slot, const EffectProps* /*props*/, const EffectTarget target)
240 ALCdevice *device{context->mDevice.get()};
241 mMix = &ConvolutionFilter::NormalMix;
243 /* The iFFT'd response is scaled up by the number of bins, so apply the
244 * inverse to the output mixing gain.
246 constexpr size_t m{ConvolveUpdateSize/2 + 1};
247 const float gain{slot->Params.Gain * (1.0f/m)};
248 auto &chans = *mChans;
249 if(mChannels == FmtBFormat3D || mChannels == FmtBFormat2D)
251 if(device->mAmbiOrder > mAmbiOrder)
253 mMix = &ConvolutionFilter::UpsampleMix;
254 const auto scales = BFormatDec::GetHFOrderScales(mAmbiOrder, device->mAmbiOrder);
255 chans[0].mHfScale = scales[0];
256 for(size_t i{1};i < chans.size();++i)
257 chans[i].mHfScale = scales[1];
259 outTarget = target.Main->Buffer;
261 const auto &scales = GetAmbiScales(mAmbiScaling);
262 const uint8_t *index_map{(mChannels == FmtBFormat2D) ?
263 GetAmbi2DLayout(mAmbiLayout).data() :
264 GetAmbiLayout(mAmbiLayout).data()};
266 std::array<float,MAX_AMBI_CHANNELS> coeffs{};
267 for(size_t c{0u};c < chans.size();++c)
269 const size_t acn{index_map[c]};
270 coeffs[acn] = scales[acn];
271 ComputePanGains(target.Main, coeffs.data(), gain, chans[c].Target);
272 coeffs[acn] = 0.0f;
275 else if(mChannels == FmtStereo)
277 /* TODO: Add a "direct channels" setting for this effect? */
278 const ALuint lidx{!target.RealOut ? INVALID_CHANNEL_INDEX :
279 GetChannelIdxByName(*target.RealOut, FrontLeft)};
280 const ALuint ridx{!target.RealOut ? INVALID_CHANNEL_INDEX :
281 GetChannelIdxByName(*target.RealOut, FrontRight)};
282 if(lidx != INVALID_CHANNEL_INDEX && ridx != INVALID_CHANNEL_INDEX)
284 outTarget = target.RealOut->Buffer;
285 chans[0].Target[lidx] = gain;
286 chans[1].Target[ridx] = gain;
288 else
290 const auto lcoeffs = CalcDirectionCoeffs({-1.0f, 0.0f, 0.0f}, 0.0f);
291 const auto rcoeffs = CalcDirectionCoeffs({ 1.0f, 0.0f, 0.0f}, 0.0f);
293 outTarget = target.Main->Buffer;
294 ComputePanGains(target.Main, lcoeffs.data(), gain, chans[0].Target);
295 ComputePanGains(target.Main, rcoeffs.data(), gain, chans[1].Target);
298 else if(mChannels == FmtMono)
300 const auto coeffs = CalcDirectionCoeffs({0.0f, 0.0f, -1.0f}, 0.0f);
302 outTarget = target.Main->Buffer;
303 ComputePanGains(target.Main, coeffs.data(), gain, chans[0].Target);
307 void ConvolutionFilter::process(const size_t samplesToDo,
308 const al::span<const FloatBufferLine> samplesIn, const al::span<FloatBufferLine> samplesOut)
310 constexpr size_t m{ConvolveUpdateSize/2 + 1};
311 size_t curseg{mCurrentSegment};
312 auto &chans = *mChans;
314 for(size_t base{0u};base < samplesToDo;)
316 const size_t todo{minz(ConvolveUpdateSamples-mFifoPos, samplesToDo-base)};
318 /* Retrieve the output samples from the FIFO and fill in the new input
319 * samples.
321 for(size_t c{0};c < chans.size();++c)
323 auto fifo_iter = mOutput[c].begin() + mFifoPos;
324 std::transform(fifo_iter, fifo_iter+todo, chans[c].mBuffer.begin()+base,
325 [](double d) noexcept -> float { return static_cast<float>(d); });
328 std::copy_n(samplesIn[0].begin()+base, todo, mFftBuffer.begin()+mFifoPos);
329 mFifoPos += todo;
330 base += todo;
332 /* Check whether FIFO buffer is filled with new samples. */
333 if(mFifoPos < ConvolveUpdateSamples) break;
334 mFifoPos = 0;
336 /* Calculate the frequency domain response and add the relevant
337 * frequency bins to the input history.
339 complex_fft(mFftBuffer, -1.0);
341 std::copy_n(mFftBuffer.begin(), m, &mComplexData[curseg*m]);
342 mFftBuffer.fill(complex_d{});
344 const complex_d *RESTRICT filter{mComplexData.get() + mNumConvolveSegs*m};
345 for(size_t c{0};c < chans.size();++c)
347 /* Convolve each input segment with its IR filter counterpart
348 * (aligned in time).
350 const complex_d *RESTRICT input{&mComplexData[curseg*m]};
351 for(size_t s{curseg};s < mNumConvolveSegs;++s)
353 for(size_t i{0};i < m;++i,++input,++filter)
354 mFftBuffer[i] += *input * *filter;
356 input = mComplexData.get();
357 for(size_t s{0};s < curseg;++s)
359 for(size_t i{0};i < m;++i,++input,++filter)
360 mFftBuffer[i] += *input * *filter;
363 /* Apply iFFT to get the 1024 (really 1023) samples for output. The
364 * 512 output samples are combined with the last output's 511
365 * second-half samples (and this output's second half is
366 * subsequently saved for next time).
368 complex_fft(mFftBuffer, 1.0);
370 for(size_t i{0};i < ConvolveUpdateSamples;++i)
371 mOutput[c][i] = mFftBuffer[i].real() + mOutput[c][ConvolveUpdateSamples+i];
372 for(size_t i{0};i < ConvolveUpdateSamples;++i)
373 mOutput[c][ConvolveUpdateSamples+i] = mFftBuffer[ConvolveUpdateSamples+i].real();
374 mFftBuffer.fill(complex_d{});
377 /* Shift the input history. */
378 curseg = curseg ? (curseg-1) : (mNumConvolveSegs-1);
380 mCurrentSegment = curseg;
382 /* Finally, mix to the output. */
383 (this->*mMix)(samplesOut, samplesToDo);
387 struct ConvolutionState final : public EffectState {
388 ConvolutionFilter *mFilter{};
390 ConvolutionState() = default;
391 ~ConvolutionState() override = default;
393 void deviceUpdate(const ALCdevice *device) override;
394 EffectBufferBase *createBuffer(const ALCdevice *device, const BufferStorage &buffer) override;
395 void update(const ALCcontext *context, const ALeffectslot *slot, const EffectProps *props, const EffectTarget target) override;
396 void process(const size_t samplesToDo, const al::span<const FloatBufferLine> samplesIn, const al::span<FloatBufferLine> samplesOut) override;
398 DEF_NEWDEL(ConvolutionState)
401 void ConvolutionState::deviceUpdate(const ALCdevice* /*device*/)
405 EffectBufferBase *ConvolutionState::createBuffer(const ALCdevice *device,
406 const BufferStorage &buffer)
408 /* An empty buffer doesn't need a convolution filter. */
409 if(buffer.mSampleLen < 1) return nullptr;
411 auto numChannels = ChannelsFromFmt(buffer.mChannels,
412 minu(buffer.mAmbiOrder, device->mAmbiOrder));
414 al::intrusive_ptr<ConvolutionFilter> filter{new ConvolutionFilter{numChannels}};
415 if LIKELY(filter->init(device, buffer))
416 return filter.release();
417 return nullptr;
421 void ConvolutionState::update(const ALCcontext *context, const ALeffectslot *slot,
422 const EffectProps *props, const EffectTarget target)
424 mFilter = static_cast<ConvolutionFilter*>(slot->Params.mEffectBuffer);
425 if(!mFilter) return;
427 mFilter->update(mOutTarget, context, slot, props, target);
430 void ConvolutionState::process(const size_t samplesToDo,
431 const al::span<const FloatBufferLine> samplesIn, const al::span<FloatBufferLine> samplesOut)
433 if(mFilter)
434 mFilter->process(samplesToDo, samplesIn, samplesOut);
438 void ConvolutionEffect_setParami(EffectProps* /*props*/, ALenum param, int /*val*/)
440 switch(param)
442 default:
443 throw effect_exception{AL_INVALID_ENUM, "Invalid null effect integer property 0x%04x",
444 param};
447 void ConvolutionEffect_setParamiv(EffectProps *props, ALenum param, const int *vals)
449 switch(param)
451 default:
452 ConvolutionEffect_setParami(props, param, vals[0]);
455 void ConvolutionEffect_setParamf(EffectProps* /*props*/, ALenum param, float /*val*/)
457 switch(param)
459 default:
460 throw effect_exception{AL_INVALID_ENUM, "Invalid null effect float property 0x%04x",
461 param};
464 void ConvolutionEffect_setParamfv(EffectProps *props, ALenum param, const float *vals)
466 switch(param)
468 default:
469 ConvolutionEffect_setParamf(props, param, vals[0]);
473 void ConvolutionEffect_getParami(const EffectProps* /*props*/, ALenum param, int* /*val*/)
475 switch(param)
477 default:
478 throw effect_exception{AL_INVALID_ENUM, "Invalid null effect integer property 0x%04x",
479 param};
482 void ConvolutionEffect_getParamiv(const EffectProps *props, ALenum param, int *vals)
484 switch(param)
486 default:
487 ConvolutionEffect_getParami(props, param, vals);
490 void ConvolutionEffect_getParamf(const EffectProps* /*props*/, ALenum param, float* /*val*/)
492 switch(param)
494 default:
495 throw effect_exception{AL_INVALID_ENUM, "Invalid null effect float property 0x%04x",
496 param};
499 void ConvolutionEffect_getParamfv(const EffectProps *props, ALenum param, float *vals)
501 switch(param)
503 default:
504 ConvolutionEffect_getParamf(props, param, vals);
508 DEFINE_ALEFFECT_VTABLE(ConvolutionEffect);
511 struct ConvolutionStateFactory final : public EffectStateFactory {
512 EffectState *create() override;
513 EffectProps getDefaultProps() const noexcept override;
514 const EffectVtable *getEffectVtable() const noexcept override;
517 /* Creates EffectState objects of the appropriate type. */
518 EffectState *ConvolutionStateFactory::create()
519 { return new ConvolutionState{}; }
521 /* Returns an ALeffectProps initialized with this effect type's default
522 * property values.
524 EffectProps ConvolutionStateFactory::getDefaultProps() const noexcept
526 EffectProps props{};
527 return props;
530 /* Returns a pointer to this effect type's global set/get vtable. */
531 const EffectVtable *ConvolutionStateFactory::getEffectVtable() const noexcept
532 { return &ConvolutionEffect_vtable; }
534 } // namespace
536 EffectStateFactory *ConvolutionStateFactory_getFactory()
538 static ConvolutionStateFactory ConvolutionFactory{};
539 return &ConvolutionFactory;