7 #include "al/auxeffectslot.h"
10 #include "alcontext.h"
14 #include "bformatdec.h"
15 #include "buffer_storage.h"
16 #include "effects/base.h"
17 #include "filters/splitter.h"
18 #include "fmt_traits.h"
20 #include "polyphase_resampler.h"
25 /* Convolution reverb is implemented using a segmented overlap-add method. The
26 * impulse response is broken up into multiple segments of 512 samples, and
27 * each segment has an FFT applied with a 1024-sample buffer (the latter half
28 * left silent) to get its frequency-domain response. The resulting response
29 * has its positive/non-mirrored frequencies saved (513 bins) in each segment.
31 * Input samples are similarly broken up into 512-sample segments, with an FFT
32 * applied to each new incoming segment to get its 513 bins. A history of FFT'd
33 * input segments is maintained, equal to the length of the impulse response.
35 * To apply the reverberation, each impulse response segment is convolved with
36 * its paired input segment (using complex multiplies, far cheaper than FIRs),
37 * accumulating into a 1024-bin FFT buffer. The input history is then shifted
38 * to align with later impulse response segments for next time.
40 * An inverse FFT is then applied to the accumulated FFT buffer to get a 1024-
41 * sample time-domain response for output, which is split in two halves. The
42 * first half is the 512-sample output, and the second half is a 512-sample
43 * (really, 511) delayed extension, which gets added to the output next time.
44 * Convolving two time-domain responses of lengths N and M results in a time-
45 * domain signal of length N+M-1, and this holds true regardless of the
46 * convolution being applied in the frequency domain, so these "overflow"
47 * samples need to be accounted for.
50 * There is currently a 512-sample delay on the output, as a result of needing
51 * to collect that many input samples to do an FFT with. This can be fixed by
52 * excluding the first impulse response segment from being FFT'd, and applying
53 * it directly in the time domain. This will have higher CPU consumption, but
54 * it won't have to wait before generating output.
58 void LoadSamples(double *RESTRICT dst
, const al::byte
*src
, const size_t srcstep
, FmtType srctype
,
59 const size_t samples
) noexcept
61 #define HANDLE_FMT(T) case T: al::LoadSampleArray<T>(dst, src, srcstep, samples); break
67 HANDLE_FMT(FmtDouble
);
75 auto GetAmbiScales(AmbiScaling scaletype
) noexcept
-> const std::array
<float,MAX_AMBI_CHANNELS
>&
77 if(scaletype
== AmbiScaling::FuMa
) return AmbiScale::FromFuMa
;
78 if(scaletype
== AmbiScaling::SN3D
) return AmbiScale::FromSN3D
;
79 return AmbiScale::FromN3D
;
82 auto GetAmbiLayout(AmbiLayout layouttype
) noexcept
-> const std::array
<uint8_t,MAX_AMBI_CHANNELS
>&
84 if(layouttype
== AmbiLayout::FuMa
) return AmbiIndex::FromFuMa
;
85 return AmbiIndex::FromACN
;
88 auto GetAmbi2DLayout(AmbiLayout layouttype
) noexcept
-> const std::array
<uint8_t,MAX_AMBI2D_CHANNELS
>&
90 if(layouttype
== AmbiLayout::FuMa
) return AmbiIndex::FromFuMa2D
;
91 return AmbiIndex::From2D
;
95 using complex_d
= std::complex<double>;
97 constexpr size_t ConvolveUpdateSize
{1024};
98 constexpr size_t ConvolveUpdateSamples
{ConvolveUpdateSize
/ 2};
100 struct ConvolutionFilter final
: public EffectBufferBase
{
101 FmtChannels mChannels
{};
102 AmbiLayout mAmbiLayout
{};
103 AmbiScaling mAmbiScaling
{};
107 al::vector
<std::array
<double,ConvolveUpdateSamples
*2>,16> mOutput
;
108 alignas(16) std::array
<complex_d
,ConvolveUpdateSize
> mFftBuffer
{};
110 size_t mCurrentSegment
{0};
111 size_t mNumConvolveSegs
{0};
114 alignas(16) FloatBufferLine mBuffer
{};
116 BandSplitter mFilter
{};
117 float Current
[MAX_OUTPUT_CHANNELS
]{};
118 float Target
[MAX_OUTPUT_CHANNELS
]{};
120 using ChannelDataArray
= al::FlexArray
<ChannelData
>;
121 std::unique_ptr
<ChannelDataArray
> mChans
;
122 std::unique_ptr
<complex_d
[]> mComplexData
;
124 ConvolutionFilter(size_t numChannels
) : mChans
{ChannelDataArray::Create(numChannels
)}
127 bool init(const ALCdevice
*device
, const BufferStorage
&buffer
);
129 void NormalMix(const al::span
<FloatBufferLine
> samplesOut
, const size_t samplesToDo
);
130 void UpsampleMix(const al::span
<FloatBufferLine
> samplesOut
, const size_t samplesToDo
);
131 void (ConvolutionFilter::*mMix
)(const al::span
<FloatBufferLine
>,const size_t)
132 {&ConvolutionFilter::NormalMix
};
134 void update(al::span
<FloatBufferLine
> &outTarget
, const ALCcontext
*context
,
135 const ALeffectslot
*slot
, const EffectProps
*props
, const EffectTarget target
);
136 void process(const size_t samplesToDo
, const al::span
<const FloatBufferLine
> samplesIn
,
137 const al::span
<FloatBufferLine
> samplesOut
);
139 DEF_NEWDEL(ConvolutionFilter
)
142 bool ConvolutionFilter::init(const ALCdevice
*device
, const BufferStorage
&buffer
)
144 constexpr size_t m
{ConvolveUpdateSize
/2 + 1};
146 /* FIXME: Support anything. */
147 if(buffer
.mChannels
!= FmtMono
&& buffer
.mChannels
!= FmtStereo
148 && buffer
.mChannels
!= FmtBFormat2D
&& buffer
.mChannels
!= FmtBFormat3D
)
150 if((buffer
.mChannels
== FmtBFormat2D
|| buffer
.mChannels
== FmtBFormat3D
)
151 && buffer
.mAmbiOrder
> 1)
154 /* The impulse response needs to have the same sample rate as the input and
155 * output. The bsinc24 resampler is decent, but there is high-frequency
156 * attenation that some people may be able to pick up on. Since this is
157 * very infrequent called, go ahead and use the polyphase resampler.
159 PPhaseResampler resampler
;
160 if(device
->Frequency
!= buffer
.mSampleRate
)
161 resampler
.init(buffer
.mSampleRate
, device
->Frequency
);
162 const auto resampledCount
= static_cast<ALuint
>(
163 (uint64_t{buffer
.mSampleLen
}*device
->Frequency
+ (buffer
.mSampleRate
-1)) /
166 auto bytesPerSample
= BytesFromFmt(buffer
.mType
);
167 auto realChannels
= ChannelsFromFmt(buffer
.mChannels
, buffer
.mAmbiOrder
);
168 auto numChannels
= mChans
->size();
170 const BandSplitter splitter
{400.0f
/ static_cast<float>(device
->Frequency
)};
171 for(auto &e
: *mChans
)
172 e
.mFilter
= splitter
;
174 mOutput
.resize(numChannels
, {});
176 /* Calculate the number of segments needed to hold the impulse response and
177 * the input history (rounded up), and allocate them.
179 mNumConvolveSegs
= (resampledCount
+(ConvolveUpdateSamples
-1)) / ConvolveUpdateSamples
;
181 const size_t complex_length
{mNumConvolveSegs
* m
* (numChannels
+1)};
182 mComplexData
= std::make_unique
<complex_d
[]>(complex_length
);
183 std::fill_n(mComplexData
.get(), complex_length
, complex_d
{});
185 mChannels
= buffer
.mChannels
;
186 mAmbiLayout
= buffer
.mAmbiLayout
;
187 mAmbiScaling
= buffer
.mAmbiScaling
;
188 mAmbiOrder
= buffer
.mAmbiOrder
;
190 auto fftbuffer
= std::make_unique
<std::array
<complex_d
,ConvolveUpdateSize
>>();
191 auto srcsamples
= std::make_unique
<double[]>(maxz(buffer
.mSampleLen
, resampledCount
));
192 complex_d
*filteriter
= mComplexData
.get() + mNumConvolveSegs
*m
;
193 for(size_t c
{0};c
< numChannels
;++c
)
195 /* Load the samples from the buffer, and resample to match the device. */
196 LoadSamples(srcsamples
.get(), buffer
.mData
.data() + bytesPerSample
*c
, realChannels
,
197 buffer
.mType
, buffer
.mSampleLen
);
198 if(device
->Frequency
!= buffer
.mSampleRate
)
199 resampler
.process(buffer
.mSampleLen
, srcsamples
.get(), resampledCount
,
203 for(size_t s
{0};s
< mNumConvolveSegs
;++s
)
205 const size_t todo
{minz(resampledCount
-done
, ConvolveUpdateSamples
)};
207 auto iter
= std::copy_n(&srcsamples
[done
], todo
, fftbuffer
->begin());
209 std::fill(iter
, fftbuffer
->end(), complex_d
{});
211 complex_fft(*fftbuffer
, -1.0);
212 filteriter
= std::copy_n(fftbuffer
->cbegin(), m
, filteriter
);
218 void ConvolutionFilter::NormalMix(const al::span
<FloatBufferLine
> samplesOut
,
219 const size_t samplesToDo
)
221 for(auto &chan
: *mChans
)
222 MixSamples({chan
.mBuffer
.data(), samplesToDo
}, samplesOut
, chan
.Current
, chan
.Target
,
226 void ConvolutionFilter::UpsampleMix(const al::span
<FloatBufferLine
> samplesOut
,
227 const size_t samplesToDo
)
229 for(auto &chan
: *mChans
)
231 const al::span
<float> src
{chan
.mBuffer
.data(), samplesToDo
};
232 chan
.mFilter
.processHfScale(src
, chan
.mHfScale
);
233 MixSamples(src
, samplesOut
, chan
.Current
, chan
.Target
, samplesToDo
, 0);
237 void ConvolutionFilter::update(al::span
<FloatBufferLine
> &outTarget
, const ALCcontext
*context
,
238 const ALeffectslot
*slot
, const EffectProps
* /*props*/, const EffectTarget target
)
240 ALCdevice
*device
{context
->mDevice
.get()};
241 mMix
= &ConvolutionFilter::NormalMix
;
243 /* The iFFT'd response is scaled up by the number of bins, so apply the
244 * inverse to the output mixing gain.
246 constexpr size_t m
{ConvolveUpdateSize
/2 + 1};
247 const float gain
{slot
->Params
.Gain
* (1.0f
/m
)};
248 auto &chans
= *mChans
;
249 if(mChannels
== FmtBFormat3D
|| mChannels
== FmtBFormat2D
)
251 if(device
->mAmbiOrder
> mAmbiOrder
)
253 mMix
= &ConvolutionFilter::UpsampleMix
;
254 const auto scales
= BFormatDec::GetHFOrderScales(mAmbiOrder
, device
->mAmbiOrder
);
255 chans
[0].mHfScale
= scales
[0];
256 for(size_t i
{1};i
< chans
.size();++i
)
257 chans
[i
].mHfScale
= scales
[1];
259 outTarget
= target
.Main
->Buffer
;
261 const auto &scales
= GetAmbiScales(mAmbiScaling
);
262 const uint8_t *index_map
{(mChannels
== FmtBFormat2D
) ?
263 GetAmbi2DLayout(mAmbiLayout
).data() :
264 GetAmbiLayout(mAmbiLayout
).data()};
266 std::array
<float,MAX_AMBI_CHANNELS
> coeffs
{};
267 for(size_t c
{0u};c
< chans
.size();++c
)
269 const size_t acn
{index_map
[c
]};
270 coeffs
[acn
] = scales
[acn
];
271 ComputePanGains(target
.Main
, coeffs
.data(), gain
, chans
[c
].Target
);
275 else if(mChannels
== FmtStereo
)
277 /* TODO: Add a "direct channels" setting for this effect? */
278 const ALuint lidx
{!target
.RealOut
? INVALID_CHANNEL_INDEX
:
279 GetChannelIdxByName(*target
.RealOut
, FrontLeft
)};
280 const ALuint ridx
{!target
.RealOut
? INVALID_CHANNEL_INDEX
:
281 GetChannelIdxByName(*target
.RealOut
, FrontRight
)};
282 if(lidx
!= INVALID_CHANNEL_INDEX
&& ridx
!= INVALID_CHANNEL_INDEX
)
284 outTarget
= target
.RealOut
->Buffer
;
285 chans
[0].Target
[lidx
] = gain
;
286 chans
[1].Target
[ridx
] = gain
;
290 const auto lcoeffs
= CalcDirectionCoeffs({-1.0f
, 0.0f
, 0.0f
}, 0.0f
);
291 const auto rcoeffs
= CalcDirectionCoeffs({ 1.0f
, 0.0f
, 0.0f
}, 0.0f
);
293 outTarget
= target
.Main
->Buffer
;
294 ComputePanGains(target
.Main
, lcoeffs
.data(), gain
, chans
[0].Target
);
295 ComputePanGains(target
.Main
, rcoeffs
.data(), gain
, chans
[1].Target
);
298 else if(mChannels
== FmtMono
)
300 const auto coeffs
= CalcDirectionCoeffs({0.0f
, 0.0f
, -1.0f
}, 0.0f
);
302 outTarget
= target
.Main
->Buffer
;
303 ComputePanGains(target
.Main
, coeffs
.data(), gain
, chans
[0].Target
);
307 void ConvolutionFilter::process(const size_t samplesToDo
,
308 const al::span
<const FloatBufferLine
> samplesIn
, const al::span
<FloatBufferLine
> samplesOut
)
310 constexpr size_t m
{ConvolveUpdateSize
/2 + 1};
311 size_t curseg
{mCurrentSegment
};
312 auto &chans
= *mChans
;
314 for(size_t base
{0u};base
< samplesToDo
;)
316 const size_t todo
{minz(ConvolveUpdateSamples
-mFifoPos
, samplesToDo
-base
)};
318 /* Retrieve the output samples from the FIFO and fill in the new input
321 for(size_t c
{0};c
< chans
.size();++c
)
323 auto fifo_iter
= mOutput
[c
].begin() + mFifoPos
;
324 std::transform(fifo_iter
, fifo_iter
+todo
, chans
[c
].mBuffer
.begin()+base
,
325 [](double d
) noexcept
-> float { return static_cast<float>(d
); });
328 std::copy_n(samplesIn
[0].begin()+base
, todo
, mFftBuffer
.begin()+mFifoPos
);
332 /* Check whether FIFO buffer is filled with new samples. */
333 if(mFifoPos
< ConvolveUpdateSamples
) break;
336 /* Calculate the frequency domain response and add the relevant
337 * frequency bins to the input history.
339 complex_fft(mFftBuffer
, -1.0);
341 std::copy_n(mFftBuffer
.begin(), m
, &mComplexData
[curseg
*m
]);
342 mFftBuffer
.fill(complex_d
{});
344 const complex_d
*RESTRICT filter
{mComplexData
.get() + mNumConvolveSegs
*m
};
345 for(size_t c
{0};c
< chans
.size();++c
)
347 /* Convolve each input segment with its IR filter counterpart
350 const complex_d
*RESTRICT input
{&mComplexData
[curseg
*m
]};
351 for(size_t s
{curseg
};s
< mNumConvolveSegs
;++s
)
353 for(size_t i
{0};i
< m
;++i
,++input
,++filter
)
354 mFftBuffer
[i
] += *input
* *filter
;
356 input
= mComplexData
.get();
357 for(size_t s
{0};s
< curseg
;++s
)
359 for(size_t i
{0};i
< m
;++i
,++input
,++filter
)
360 mFftBuffer
[i
] += *input
* *filter
;
363 /* Apply iFFT to get the 1024 (really 1023) samples for output. The
364 * 512 output samples are combined with the last output's 511
365 * second-half samples (and this output's second half is
366 * subsequently saved for next time).
368 complex_fft(mFftBuffer
, 1.0);
370 for(size_t i
{0};i
< ConvolveUpdateSamples
;++i
)
371 mOutput
[c
][i
] = mFftBuffer
[i
].real() + mOutput
[c
][ConvolveUpdateSamples
+i
];
372 for(size_t i
{0};i
< ConvolveUpdateSamples
;++i
)
373 mOutput
[c
][ConvolveUpdateSamples
+i
] = mFftBuffer
[ConvolveUpdateSamples
+i
].real();
374 mFftBuffer
.fill(complex_d
{});
377 /* Shift the input history. */
378 curseg
= curseg
? (curseg
-1) : (mNumConvolveSegs
-1);
380 mCurrentSegment
= curseg
;
382 /* Finally, mix to the output. */
383 (this->*mMix
)(samplesOut
, samplesToDo
);
387 struct ConvolutionState final
: public EffectState
{
388 ConvolutionFilter
*mFilter
{};
390 ConvolutionState() = default;
391 ~ConvolutionState() override
= default;
393 void deviceUpdate(const ALCdevice
*device
) override
;
394 EffectBufferBase
*createBuffer(const ALCdevice
*device
, const BufferStorage
&buffer
) override
;
395 void update(const ALCcontext
*context
, const ALeffectslot
*slot
, const EffectProps
*props
, const EffectTarget target
) override
;
396 void process(const size_t samplesToDo
, const al::span
<const FloatBufferLine
> samplesIn
, const al::span
<FloatBufferLine
> samplesOut
) override
;
398 DEF_NEWDEL(ConvolutionState
)
401 void ConvolutionState::deviceUpdate(const ALCdevice
* /*device*/)
405 EffectBufferBase
*ConvolutionState::createBuffer(const ALCdevice
*device
,
406 const BufferStorage
&buffer
)
408 /* An empty buffer doesn't need a convolution filter. */
409 if(buffer
.mSampleLen
< 1) return nullptr;
411 auto numChannels
= ChannelsFromFmt(buffer
.mChannels
,
412 minu(buffer
.mAmbiOrder
, device
->mAmbiOrder
));
414 al::intrusive_ptr
<ConvolutionFilter
> filter
{new ConvolutionFilter
{numChannels
}};
415 if LIKELY(filter
->init(device
, buffer
))
416 return filter
.release();
421 void ConvolutionState::update(const ALCcontext
*context
, const ALeffectslot
*slot
,
422 const EffectProps
*props
, const EffectTarget target
)
424 mFilter
= static_cast<ConvolutionFilter
*>(slot
->Params
.mEffectBuffer
);
427 mFilter
->update(mOutTarget
, context
, slot
, props
, target
);
430 void ConvolutionState::process(const size_t samplesToDo
,
431 const al::span
<const FloatBufferLine
> samplesIn
, const al::span
<FloatBufferLine
> samplesOut
)
434 mFilter
->process(samplesToDo
, samplesIn
, samplesOut
);
438 void ConvolutionEffect_setParami(EffectProps
* /*props*/, ALenum param
, int /*val*/)
443 throw effect_exception
{AL_INVALID_ENUM
, "Invalid null effect integer property 0x%04x",
447 void ConvolutionEffect_setParamiv(EffectProps
*props
, ALenum param
, const int *vals
)
452 ConvolutionEffect_setParami(props
, param
, vals
[0]);
455 void ConvolutionEffect_setParamf(EffectProps
* /*props*/, ALenum param
, float /*val*/)
460 throw effect_exception
{AL_INVALID_ENUM
, "Invalid null effect float property 0x%04x",
464 void ConvolutionEffect_setParamfv(EffectProps
*props
, ALenum param
, const float *vals
)
469 ConvolutionEffect_setParamf(props
, param
, vals
[0]);
473 void ConvolutionEffect_getParami(const EffectProps
* /*props*/, ALenum param
, int* /*val*/)
478 throw effect_exception
{AL_INVALID_ENUM
, "Invalid null effect integer property 0x%04x",
482 void ConvolutionEffect_getParamiv(const EffectProps
*props
, ALenum param
, int *vals
)
487 ConvolutionEffect_getParami(props
, param
, vals
);
490 void ConvolutionEffect_getParamf(const EffectProps
* /*props*/, ALenum param
, float* /*val*/)
495 throw effect_exception
{AL_INVALID_ENUM
, "Invalid null effect float property 0x%04x",
499 void ConvolutionEffect_getParamfv(const EffectProps
*props
, ALenum param
, float *vals
)
504 ConvolutionEffect_getParamf(props
, param
, vals
);
508 DEFINE_ALEFFECT_VTABLE(ConvolutionEffect
);
511 struct ConvolutionStateFactory final
: public EffectStateFactory
{
512 EffectState
*create() override
;
513 EffectProps
getDefaultProps() const noexcept override
;
514 const EffectVtable
*getEffectVtable() const noexcept override
;
517 /* Creates EffectState objects of the appropriate type. */
518 EffectState
*ConvolutionStateFactory::create()
519 { return new ConvolutionState
{}; }
521 /* Returns an ALeffectProps initialized with this effect type's default
524 EffectProps
ConvolutionStateFactory::getDefaultProps() const noexcept
530 /* Returns a pointer to this effect type's global set/get vtable. */
531 const EffectVtable
*ConvolutionStateFactory::getEffectVtable() const noexcept
532 { return &ConvolutionEffect_vtable
; }
536 EffectStateFactory
*ConvolutionStateFactory_getFactory()
538 static ConvolutionStateFactory ConvolutionFactory
{};
539 return &ConvolutionFactory
;