Limit convolution processing to the output ambisonic order
[openal-soft.git] / alc / effects / convolution.cpp
blob68becf49187972b8315bd052a744d97b6d56cd8f
2 #include "config.h"
4 #include "AL/al.h"
5 #include "AL/alc.h"
7 #include "al/auxeffectslot.h"
8 #include "alcmain.h"
9 #include "alcomplex.h"
10 #include "alcontext.h"
11 #include "almalloc.h"
12 #include "alspan.h"
13 #include "ambidefs.h"
14 #include "bformatdec.h"
15 #include "buffer_storage.h"
16 #include "effects/base.h"
17 #include "filters/splitter.h"
18 #include "fmt_traits.h"
19 #include "logging.h"
20 #include "polyphase_resampler.h"
23 namespace {
25 /* Convolution reverb is implemented using a segmented overlap-add method. The
26 * impulse response is broken up into multiple segments of 512 samples, and
27 * each segment has an FFT applied with a 1024-sample buffer (the latter half
28 * left silent) to get its frequency-domain response. The resulting response
29 * has its positive/non-mirrored frequencies saved (513 bins) in each segment.
31 * Input samples are similarly broken up into 512-sample segments, with an FFT
32 * applied to each new incoming segment to get its 513 bins. A history of FFT'd
33 * input segments is maintained, equal to the length of the impulse response.
35 * To apply the reverberation, each impulse response segment is convolved with
36 * its paired input segment (using complex multiplies, far cheaper than FIRs),
37 * accumulating into a 1024-bin FFT buffer. The input history is then shifted
38 * to align with later impulse response segments for next time.
40 * An inverse FFT is then applied to the accumulated FFT buffer to get a 1024-
41 * sample time-domain response for output, which is split in two halves. The
42 * first half is the 512-sample output, and the second half is a 512-sample
43 * (really, 511) delayed extension, which gets added to the output next time.
44 * Convolving two time-domain responses of lengths N and M results in a time-
45 * domain signal of length N+M-1, and this holds true regardless of the
46 * convolution being applied in the frequency domain, so these "overflow"
47 * samples need to be accounted for.
49 * Limitations:
50 * There is currently a 512-sample delay on the output, as a result of needing
51 * to collect that many input samples to do an FFT with. This can be fixed by
52 * excluding the first impulse response segment from being FFT'd, and applying
53 * it directly in the time domain. This will have higher CPU consumption, but
54 * it won't have to wait before generating output.
58 void LoadSamples(double *RESTRICT dst, const al::byte *src, const size_t srcstep, FmtType srctype,
59 const size_t samples) noexcept
61 #define HANDLE_FMT(T) case T: al::LoadSampleArray<T>(dst, src, srcstep, samples); break
62 switch(srctype)
64 HANDLE_FMT(FmtUByte);
65 HANDLE_FMT(FmtShort);
66 HANDLE_FMT(FmtFloat);
67 HANDLE_FMT(FmtDouble);
68 HANDLE_FMT(FmtMulaw);
69 HANDLE_FMT(FmtAlaw);
71 #undef HANDLE_FMT
75 auto GetAmbiScales(AmbiScaling scaletype) noexcept -> const std::array<float,MAX_AMBI_CHANNELS>&
77 if(scaletype == AmbiScaling::FuMa) return AmbiScale::FromFuMa;
78 if(scaletype == AmbiScaling::SN3D) return AmbiScale::FromSN3D;
79 return AmbiScale::FromN3D;
82 auto GetAmbiLayout(AmbiLayout layouttype) noexcept -> const std::array<uint8_t,MAX_AMBI_CHANNELS>&
84 if(layouttype == AmbiLayout::FuMa) return AmbiIndex::FromFuMa;
85 return AmbiIndex::FromACN;
88 auto GetAmbi2DLayout(AmbiLayout layouttype) noexcept -> const std::array<uint8_t,MAX_AMBI2D_CHANNELS>&
90 if(layouttype == AmbiLayout::FuMa) return AmbiIndex::FromFuMa2D;
91 return AmbiIndex::From2D;
95 using complex_d = std::complex<double>;
97 constexpr size_t ConvolveUpdateSize{1024};
98 constexpr size_t ConvolveUpdateSamples{ConvolveUpdateSize / 2};
100 #define MAX_FILTER_CHANNELS 4
103 struct ConvolutionFilter final : public EffectBufferBase {
104 size_t mCurrentSegment{0};
105 size_t mNumConvolveSegs{0};
106 complex_d *mInputHistory{};
107 complex_d *mConvolveFilter[MAX_FILTER_CHANNELS]{};
109 FmtChannels mChannels{};
110 AmbiLayout mAmbiLayout{};
111 AmbiScaling mAmbiScaling{};
112 ALuint mAmbiOrder{};
114 std::unique_ptr<complex_d[]> mComplexData;
116 DEF_NEWDEL(ConvolutionFilter)
119 struct ConvolutionState final : public EffectState {
120 ConvolutionFilter *mFilter{};
122 size_t mFifoPos{0};
123 alignas(16) std::array<double,ConvolveUpdateSamples*2> mOutput[MAX_FILTER_CHANNELS]{};
124 alignas(16) std::array<complex_d,ConvolveUpdateSize> mFftBuffer{};
126 ALuint mNumChannels;
127 alignas(16) FloatBufferLine mTempBuffer[MAX_FILTER_CHANNELS]{};
129 struct {
130 float mHfScale{};
131 BandSplitter mFilter{};
132 float Current[MAX_OUTPUT_CHANNELS]{};
133 float Target[MAX_OUTPUT_CHANNELS]{};
134 } mOutChans[MAX_FILTER_CHANNELS];
136 ConvolutionState() = default;
137 ~ConvolutionState() override = default;
139 void NormalMix(const al::span<FloatBufferLine> samplesOut, const size_t samplesToDo);
140 void UpsampleMix(const al::span<FloatBufferLine> samplesOut, const size_t samplesToDo);
141 void (ConvolutionState::*mMix)(const al::span<FloatBufferLine>,const size_t)
142 {&ConvolutionState::NormalMix};
144 void deviceUpdate(const ALCdevice *device) override;
145 EffectBufferBase *createBuffer(const ALCdevice *device, const BufferStorage &buffer) override;
146 void update(const ALCcontext *context, const ALeffectslot *slot, const EffectProps *props, const EffectTarget target) override;
147 void process(const size_t samplesToDo, const al::span<const FloatBufferLine> samplesIn, const al::span<FloatBufferLine> samplesOut) override;
149 DEF_NEWDEL(ConvolutionState)
152 void ConvolutionState::NormalMix(const al::span<FloatBufferLine> samplesOut,
153 const size_t samplesToDo)
155 for(size_t c{0};c < mNumChannels;++c)
156 MixSamples({mTempBuffer[c].data(), samplesToDo}, samplesOut, mOutChans[c].Current,
157 mOutChans[c].Target, samplesToDo, 0);
160 void ConvolutionState::UpsampleMix(const al::span<FloatBufferLine> samplesOut,
161 const size_t samplesToDo)
163 for(size_t c{0};c < mNumChannels;++c)
165 const al::span<float> src{mTempBuffer[c].data(), samplesToDo};
166 mOutChans[c].mFilter.processHfScale(src, mOutChans[c].mHfScale);
167 MixSamples(src, samplesOut, mOutChans[c].Current, mOutChans[c].Target, samplesToDo, 0);
171 void ConvolutionState::deviceUpdate(const ALCdevice *device)
173 mFifoPos = 0;
174 for(auto &buffer : mOutput)
175 buffer.fill(0.0f);
176 mFftBuffer.fill(complex_d{});
178 for(auto &buffer : mTempBuffer)
179 buffer.fill(0.0);
181 const BandSplitter splitter{400.0f / static_cast<float>(device->Frequency)};
182 for(auto &e : mOutChans)
184 e.mHfScale = 1.0f;
185 e.mFilter = splitter;
186 std::fill(std::begin(e.Current), std::end(e.Current), 0.0f);
187 std::fill(std::begin(e.Target), std::end(e.Target), 0.0f);
191 EffectBufferBase *ConvolutionState::createBuffer(const ALCdevice *device,
192 const BufferStorage &buffer)
194 /* An empty buffer doesn't need a convolution filter. */
195 if(buffer.mSampleLen < 1) return nullptr;
197 /* FIXME: Support anything. */
198 if(buffer.mChannels != FmtMono && buffer.mChannels != FmtStereo
199 && buffer.mChannels != FmtBFormat2D && buffer.mChannels != FmtBFormat3D)
200 return nullptr;
201 if((buffer.mChannels == FmtBFormat2D || buffer.mChannels == FmtBFormat3D)
202 && buffer.mAmbiOrder > 1)
203 return nullptr;
205 /* The impulse response needs to have the same sample rate as the input and
206 * output. The bsinc24 resampler is decent, but there is high-frequency
207 * attenation that some people may be able to pick up on. Since this is
208 * very infrequent called, go ahead and use the polyphase resampler.
210 PPhaseResampler resampler;
211 if(device->Frequency != buffer.mSampleRate)
212 resampler.init(buffer.mSampleRate, device->Frequency);
213 const auto resampledCount = static_cast<ALuint>(
214 (uint64_t{buffer.mSampleLen}*device->Frequency + (buffer.mSampleRate-1)) /
215 buffer.mSampleRate);
217 al::intrusive_ptr<ConvolutionFilter> filter{new ConvolutionFilter{}};
219 auto bytesPerSample = BytesFromFmt(buffer.mType);
220 auto numChannels = ChannelsFromFmt(buffer.mChannels, buffer.mAmbiOrder);
221 constexpr size_t m{ConvolveUpdateSize/2 + 1};
223 /* Calculate the number of segments needed to hold the impulse response and
224 * the input history (rounded up), and allocate them.
226 filter->mNumConvolveSegs = (buffer.mSampleLen+(ConvolveUpdateSamples-1)) /
227 ConvolveUpdateSamples;
229 const size_t complex_length{filter->mNumConvolveSegs * m * (numChannels+1)};
230 filter->mComplexData = std::make_unique<complex_d[]>(complex_length);
231 std::fill_n(filter->mComplexData.get(), complex_length, complex_d{});
233 filter->mInputHistory = filter->mComplexData.get();
234 filter->mConvolveFilter[0] = filter->mInputHistory + filter->mNumConvolveSegs*m;
235 for(size_t c{1};c < numChannels;++c)
236 filter->mConvolveFilter[c] = filter->mConvolveFilter[c-1] + filter->mNumConvolveSegs*m;
238 filter->mChannels = buffer.mChannels;
239 filter->mAmbiLayout = buffer.mAmbiLayout;
240 filter->mAmbiScaling = buffer.mAmbiScaling;
241 filter->mAmbiOrder = buffer.mAmbiOrder;
243 auto fftbuffer = std::make_unique<std::array<complex_d,ConvolveUpdateSize>>();
244 auto srcsamples = std::make_unique<double[]>(maxz(buffer.mSampleLen, resampledCount));
245 for(size_t c{0};c < numChannels;++c)
247 /* Load the samples from the buffer, and resample to match the device. */
248 LoadSamples(srcsamples.get(), buffer.mData.data() + bytesPerSample*c, numChannels,
249 buffer.mType, buffer.mSampleLen);
250 if(device->Frequency != buffer.mSampleRate)
251 resampler.process(buffer.mSampleLen, srcsamples.get(), resampledCount,
252 srcsamples.get());
254 size_t done{0};
255 complex_d *filteriter = filter->mConvolveFilter[c];
256 for(size_t s{0};s < filter->mNumConvolveSegs;++s)
258 const size_t todo{minz(resampledCount-done, ConvolveUpdateSamples)};
260 auto iter = std::copy_n(&srcsamples[done], todo, fftbuffer->begin());
261 done += todo;
262 std::fill(iter, fftbuffer->end(), complex_d{});
264 complex_fft(*fftbuffer, -1.0);
265 filteriter = std::copy_n(fftbuffer->cbegin(), m, filteriter);
269 return filter.release();
272 void ConvolutionState::update(const ALCcontext *context, const ALeffectslot *slot,
273 const EffectProps* /*props*/, const EffectTarget target)
275 mFilter = static_cast<ConvolutionFilter*>(slot->Params.mEffectBuffer);
276 if(!mFilter) return;
278 ALCdevice *device{context->mDevice.get()};
279 const ALuint min_order{minu(mFilter->mAmbiOrder, device->mAmbiOrder)};
280 mNumChannels = mFilter ? ChannelsFromFmt(mFilter->mChannels, min_order) : 0u;
281 mMix = &ConvolutionState::NormalMix;
283 /* The iFFT'd response is scaled up by the number of bins, so apply the
284 * inverse to the output mixing gain.
286 constexpr size_t m{ConvolveUpdateSize/2 + 1};
287 const float gain{slot->Params.Gain * (1.0f/m)};
288 if(mFilter->mChannels == FmtBFormat3D || mFilter->mChannels == FmtBFormat2D)
290 if(device->mAmbiOrder > mFilter->mAmbiOrder)
292 mMix = &ConvolutionState::UpsampleMix;
293 const auto scales = BFormatDec::GetHFOrderScales(mFilter->mAmbiOrder,
294 device->mAmbiOrder);
295 mOutChans[0].mHfScale = scales[0];
296 for(size_t i{1};i < mNumChannels;++i)
297 mOutChans[i].mHfScale = scales[1];
299 mOutTarget = target.Main->Buffer;
301 const auto &scales = GetAmbiScales(mFilter->mAmbiScaling);
302 const uint8_t *index_map{(mFilter->mChannels == FmtBFormat2D) ?
303 GetAmbi2DLayout(mFilter->mAmbiLayout).data() :
304 GetAmbiLayout(mFilter->mAmbiLayout).data()};
306 std::array<float,MAX_AMBI_CHANNELS> coeffs{};
307 for(size_t c{0u};c < mNumChannels;++c)
309 const size_t acn{index_map[c]};
310 coeffs[acn] = scales[acn];
311 ComputePanGains(target.Main, coeffs.data(), gain, mOutChans[c].Target);
312 coeffs[acn] = 0.0f;
315 else if(mFilter->mChannels == FmtStereo)
317 /* TODO: Add a "direct channels" setting for this effect? */
318 const ALuint lidx{!target.RealOut ? INVALID_CHANNEL_INDEX :
319 GetChannelIdxByName(*target.RealOut, FrontLeft)};
320 const ALuint ridx{!target.RealOut ? INVALID_CHANNEL_INDEX :
321 GetChannelIdxByName(*target.RealOut, FrontRight)};
322 if(lidx != INVALID_CHANNEL_INDEX && ridx != INVALID_CHANNEL_INDEX)
324 mOutTarget = target.RealOut->Buffer;
325 mOutChans[0].Target[lidx] = gain;
326 mOutChans[1].Target[ridx] = gain;
328 else
330 const auto lcoeffs = CalcDirectionCoeffs({-1.0f, 0.0f, 0.0f}, 0.0f);
331 const auto rcoeffs = CalcDirectionCoeffs({ 1.0f, 0.0f, 0.0f}, 0.0f);
333 mOutTarget = target.Main->Buffer;
334 ComputePanGains(target.Main, lcoeffs.data(), gain, mOutChans[0].Target);
335 ComputePanGains(target.Main, rcoeffs.data(), gain, mOutChans[1].Target);
338 else if(mFilter->mChannels == FmtMono)
340 const auto coeffs = CalcDirectionCoeffs({0.0f, 0.0f, -1.0f}, 0.0f);
342 mOutTarget = target.Main->Buffer;
343 ComputePanGains(target.Main, coeffs.data(), gain, mOutChans[0].Target);
347 void ConvolutionState::process(const size_t samplesToDo,
348 const al::span<const FloatBufferLine> samplesIn, const al::span<FloatBufferLine> samplesOut)
350 /* No filter, no response. */
351 if(!mFilter) return;
353 constexpr size_t m{ConvolveUpdateSize/2 + 1};
354 size_t curseg{mFilter->mCurrentSegment};
356 for(size_t base{0u};base < samplesToDo;)
358 const size_t todo{minz(ConvolveUpdateSamples-mFifoPos, samplesToDo-base)};
360 /* Retrieve the output samples from the FIFO and fill in the new input
361 * samples.
363 for(size_t c{0};c < mNumChannels;++c)
365 auto fifo_iter = mOutput[c].begin() + mFifoPos;
366 std::transform(fifo_iter, fifo_iter+todo, mTempBuffer[c].begin()+base,
367 [](double d) noexcept -> float { return static_cast<float>(d); });
370 std::copy_n(samplesIn[0].begin()+base, todo, mFftBuffer.begin()+mFifoPos);
371 mFifoPos += todo;
372 base += todo;
374 /* Check whether FIFO buffer is filled with new samples. */
375 if(mFifoPos < ConvolveUpdateSamples) break;
376 mFifoPos = 0;
378 /* Calculate the frequency domain response and add the relevant
379 * frequency bins to the input history.
381 complex_fft(mFftBuffer, -1.0);
383 std::copy_n(mFftBuffer.begin(), m, &mFilter->mInputHistory[curseg*m]);
384 mFftBuffer.fill(complex_d{});
386 for(size_t c{0};c < mNumChannels;++c)
388 /* Convolve each input segment with its IR filter counterpart
389 * (aligned in time).
391 const complex_d *RESTRICT filter{mFilter->mConvolveFilter[c]};
392 const complex_d *RESTRICT input{&mFilter->mInputHistory[curseg*m]};
393 for(size_t s{curseg};s < mFilter->mNumConvolveSegs;++s)
395 for(size_t i{0};i < m;++i,++input,++filter)
396 mFftBuffer[i] += *input * *filter;
398 input = mFilter->mInputHistory;
399 for(size_t s{0};s < curseg;++s)
401 for(size_t i{0};i < m;++i,++input,++filter)
402 mFftBuffer[i] += *input * *filter;
405 /* Apply iFFT to get the 1024 (really 1023) samples for output. The
406 * 512 output samples are combined with the last output's 511
407 * second-half samples (and this output's second half is
408 * subsequently saved for next time).
410 complex_fft(mFftBuffer, 1.0);
412 for(size_t i{0};i < ConvolveUpdateSamples;++i)
413 mOutput[c][i] = mFftBuffer[i].real() + mOutput[c][ConvolveUpdateSamples+i];
414 for(size_t i{0};i < ConvolveUpdateSamples;++i)
415 mOutput[c][ConvolveUpdateSamples+i] = mFftBuffer[ConvolveUpdateSamples+i].real();
416 mFftBuffer.fill(complex_d{});
419 /* Shift the input history. */
420 curseg = curseg ? (curseg-1) : (mFilter->mNumConvolveSegs-1);
422 mFilter->mCurrentSegment = curseg;
424 /* Finally, mix to the output. */
425 (this->*mMix)(samplesOut, samplesToDo);
429 void ConvolutionEffect_setParami(EffectProps* /*props*/, ALenum param, int /*val*/)
431 switch(param)
433 default:
434 throw effect_exception{AL_INVALID_ENUM, "Invalid null effect integer property 0x%04x",
435 param};
438 void ConvolutionEffect_setParamiv(EffectProps *props, ALenum param, const int *vals)
440 switch(param)
442 default:
443 ConvolutionEffect_setParami(props, param, vals[0]);
446 void ConvolutionEffect_setParamf(EffectProps* /*props*/, ALenum param, float /*val*/)
448 switch(param)
450 default:
451 throw effect_exception{AL_INVALID_ENUM, "Invalid null effect float property 0x%04x",
452 param};
455 void ConvolutionEffect_setParamfv(EffectProps *props, ALenum param, const float *vals)
457 switch(param)
459 default:
460 ConvolutionEffect_setParamf(props, param, vals[0]);
464 void ConvolutionEffect_getParami(const EffectProps* /*props*/, ALenum param, int* /*val*/)
466 switch(param)
468 default:
469 throw effect_exception{AL_INVALID_ENUM, "Invalid null effect integer property 0x%04x",
470 param};
473 void ConvolutionEffect_getParamiv(const EffectProps *props, ALenum param, int *vals)
475 switch(param)
477 default:
478 ConvolutionEffect_getParami(props, param, vals);
481 void ConvolutionEffect_getParamf(const EffectProps* /*props*/, ALenum param, float* /*val*/)
483 switch(param)
485 default:
486 throw effect_exception{AL_INVALID_ENUM, "Invalid null effect float property 0x%04x",
487 param};
490 void ConvolutionEffect_getParamfv(const EffectProps *props, ALenum param, float *vals)
492 switch(param)
494 default:
495 ConvolutionEffect_getParamf(props, param, vals);
499 DEFINE_ALEFFECT_VTABLE(ConvolutionEffect);
502 struct ConvolutionStateFactory final : public EffectStateFactory {
503 EffectState *create() override;
504 EffectProps getDefaultProps() const noexcept override;
505 const EffectVtable *getEffectVtable() const noexcept override;
508 /* Creates EffectState objects of the appropriate type. */
509 EffectState *ConvolutionStateFactory::create()
510 { return new ConvolutionState{}; }
512 /* Returns an ALeffectProps initialized with this effect type's default
513 * property values.
515 EffectProps ConvolutionStateFactory::getDefaultProps() const noexcept
517 EffectProps props{};
518 return props;
521 /* Returns a pointer to this effect type's global set/get vtable. */
522 const EffectVtable *ConvolutionStateFactory::getEffectVtable() const noexcept
523 { return &ConvolutionEffect_vtable; }
525 } // namespace
527 EffectStateFactory *ConvolutionStateFactory_getFactory()
529 static ConvolutionStateFactory ConvolutionFactory{};
530 return &ConvolutionFactory;