7 #include "al/auxeffectslot.h"
10 #include "alcontext.h"
14 #include "bformatdec.h"
15 #include "buffer_storage.h"
16 #include "effects/base.h"
17 #include "filters/splitter.h"
18 #include "fmt_traits.h"
20 #include "polyphase_resampler.h"
25 /* Convolution reverb is implemented using a segmented overlap-add method. The
26 * impulse response is broken up into multiple segments of 512 samples, and
27 * each segment has an FFT applied with a 1024-sample buffer (the latter half
28 * left silent) to get its frequency-domain response. The resulting response
29 * has its positive/non-mirrored frequencies saved (513 bins) in each segment.
31 * Input samples are similarly broken up into 512-sample segments, with an FFT
32 * applied to each new incoming segment to get its 513 bins. A history of FFT'd
33 * input segments is maintained, equal to the length of the impulse response.
35 * To apply the reverberation, each impulse response segment is convolved with
36 * its paired input segment (using complex multiplies, far cheaper than FIRs),
37 * accumulating into a 1024-bin FFT buffer. The input history is then shifted
38 * to align with later impulse response segments for next time.
40 * An inverse FFT is then applied to the accumulated FFT buffer to get a 1024-
41 * sample time-domain response for output, which is split in two halves. The
42 * first half is the 512-sample output, and the second half is a 512-sample
43 * (really, 511) delayed extension, which gets added to the output next time.
44 * Convolving two time-domain responses of lengths N and M results in a time-
45 * domain signal of length N+M-1, and this holds true regardless of the
46 * convolution being applied in the frequency domain, so these "overflow"
47 * samples need to be accounted for.
50 * There is currently a 512-sample delay on the output, as a result of needing
51 * to collect that many input samples to do an FFT with. This can be fixed by
52 * excluding the first impulse response segment from being FFT'd, and applying
53 * it directly in the time domain. This will have higher CPU consumption, but
54 * it won't have to wait before generating output.
58 void LoadSamples(double *RESTRICT dst
, const al::byte
*src
, const size_t srcstep
, FmtType srctype
,
59 const size_t samples
) noexcept
61 #define HANDLE_FMT(T) case T: al::LoadSampleArray<T>(dst, src, srcstep, samples); break
67 HANDLE_FMT(FmtDouble
);
75 auto GetAmbiScales(AmbiScaling scaletype
) noexcept
-> const std::array
<float,MAX_AMBI_CHANNELS
>&
77 if(scaletype
== AmbiScaling::FuMa
) return AmbiScale::FromFuMa
;
78 if(scaletype
== AmbiScaling::SN3D
) return AmbiScale::FromSN3D
;
79 return AmbiScale::FromN3D
;
82 auto GetAmbiLayout(AmbiLayout layouttype
) noexcept
-> const std::array
<uint8_t,MAX_AMBI_CHANNELS
>&
84 if(layouttype
== AmbiLayout::FuMa
) return AmbiIndex::FromFuMa
;
85 return AmbiIndex::FromACN
;
88 auto GetAmbi2DLayout(AmbiLayout layouttype
) noexcept
-> const std::array
<uint8_t,MAX_AMBI2D_CHANNELS
>&
90 if(layouttype
== AmbiLayout::FuMa
) return AmbiIndex::FromFuMa2D
;
91 return AmbiIndex::From2D
;
95 using complex_d
= std::complex<double>;
97 constexpr size_t ConvolveUpdateSize
{1024};
98 constexpr size_t ConvolveUpdateSamples
{ConvolveUpdateSize
/ 2};
100 #define MAX_FILTER_CHANNELS 4
103 struct ConvolutionFilter final
: public EffectBufferBase
{
104 size_t mCurrentSegment
{0};
105 size_t mNumConvolveSegs
{0};
106 complex_d
*mInputHistory
{};
107 complex_d
*mConvolveFilter
[MAX_FILTER_CHANNELS
]{};
109 FmtChannels mChannels
{};
110 AmbiLayout mAmbiLayout
{};
111 AmbiScaling mAmbiScaling
{};
114 std::unique_ptr
<complex_d
[]> mComplexData
;
116 DEF_NEWDEL(ConvolutionFilter
)
119 struct ConvolutionState final
: public EffectState
{
120 ConvolutionFilter
*mFilter
{};
123 alignas(16) std::array
<double,ConvolveUpdateSamples
*2> mOutput
[MAX_FILTER_CHANNELS
]{};
124 alignas(16) std::array
<complex_d
,ConvolveUpdateSize
> mFftBuffer
{};
127 alignas(16) FloatBufferLine mTempBuffer
[MAX_FILTER_CHANNELS
]{};
131 BandSplitter mFilter
{};
132 float Current
[MAX_OUTPUT_CHANNELS
]{};
133 float Target
[MAX_OUTPUT_CHANNELS
]{};
134 } mOutChans
[MAX_FILTER_CHANNELS
];
136 ConvolutionState() = default;
137 ~ConvolutionState() override
= default;
139 void NormalMix(const al::span
<FloatBufferLine
> samplesOut
, const size_t samplesToDo
);
140 void UpsampleMix(const al::span
<FloatBufferLine
> samplesOut
, const size_t samplesToDo
);
141 void (ConvolutionState::*mMix
)(const al::span
<FloatBufferLine
>,const size_t)
142 {&ConvolutionState::NormalMix
};
144 void deviceUpdate(const ALCdevice
*device
) override
;
145 EffectBufferBase
*createBuffer(const ALCdevice
*device
, const BufferStorage
&buffer
) override
;
146 void update(const ALCcontext
*context
, const ALeffectslot
*slot
, const EffectProps
*props
, const EffectTarget target
) override
;
147 void process(const size_t samplesToDo
, const al::span
<const FloatBufferLine
> samplesIn
, const al::span
<FloatBufferLine
> samplesOut
) override
;
149 DEF_NEWDEL(ConvolutionState
)
152 void ConvolutionState::NormalMix(const al::span
<FloatBufferLine
> samplesOut
,
153 const size_t samplesToDo
)
155 for(size_t c
{0};c
< mNumChannels
;++c
)
156 MixSamples({mTempBuffer
[c
].data(), samplesToDo
}, samplesOut
, mOutChans
[c
].Current
,
157 mOutChans
[c
].Target
, samplesToDo
, 0);
160 void ConvolutionState::UpsampleMix(const al::span
<FloatBufferLine
> samplesOut
,
161 const size_t samplesToDo
)
163 for(size_t c
{0};c
< mNumChannels
;++c
)
165 const al::span
<float> src
{mTempBuffer
[c
].data(), samplesToDo
};
166 mOutChans
[c
].mFilter
.processHfScale(src
, mOutChans
[c
].mHfScale
);
167 MixSamples(src
, samplesOut
, mOutChans
[c
].Current
, mOutChans
[c
].Target
, samplesToDo
, 0);
171 void ConvolutionState::deviceUpdate(const ALCdevice
*device
)
174 for(auto &buffer
: mOutput
)
176 mFftBuffer
.fill(complex_d
{});
178 for(auto &buffer
: mTempBuffer
)
181 const BandSplitter splitter
{400.0f
/ static_cast<float>(device
->Frequency
)};
182 for(auto &e
: mOutChans
)
185 e
.mFilter
= splitter
;
186 std::fill(std::begin(e
.Current
), std::end(e
.Current
), 0.0f
);
187 std::fill(std::begin(e
.Target
), std::end(e
.Target
), 0.0f
);
191 EffectBufferBase
*ConvolutionState::createBuffer(const ALCdevice
*device
,
192 const BufferStorage
&buffer
)
194 /* An empty buffer doesn't need a convolution filter. */
195 if(buffer
.mSampleLen
< 1) return nullptr;
197 /* FIXME: Support anything. */
198 if(buffer
.mChannels
!= FmtMono
&& buffer
.mChannels
!= FmtStereo
199 && buffer
.mChannels
!= FmtBFormat2D
&& buffer
.mChannels
!= FmtBFormat3D
)
201 if((buffer
.mChannels
== FmtBFormat2D
|| buffer
.mChannels
== FmtBFormat3D
)
202 && buffer
.mAmbiOrder
> 1)
205 /* The impulse response needs to have the same sample rate as the input and
206 * output. The bsinc24 resampler is decent, but there is high-frequency
207 * attenation that some people may be able to pick up on. Since this is
208 * very infrequent called, go ahead and use the polyphase resampler.
210 PPhaseResampler resampler
;
211 if(device
->Frequency
!= buffer
.mSampleRate
)
212 resampler
.init(buffer
.mSampleRate
, device
->Frequency
);
213 const auto resampledCount
= static_cast<ALuint
>(
214 (uint64_t{buffer
.mSampleLen
}*device
->Frequency
+ (buffer
.mSampleRate
-1)) /
217 al::intrusive_ptr
<ConvolutionFilter
> filter
{new ConvolutionFilter
{}};
219 auto bytesPerSample
= BytesFromFmt(buffer
.mType
);
220 auto numChannels
= ChannelsFromFmt(buffer
.mChannels
, buffer
.mAmbiOrder
);
221 constexpr size_t m
{ConvolveUpdateSize
/2 + 1};
223 /* Calculate the number of segments needed to hold the impulse response and
224 * the input history (rounded up), and allocate them.
226 filter
->mNumConvolveSegs
= (buffer
.mSampleLen
+(ConvolveUpdateSamples
-1)) /
227 ConvolveUpdateSamples
;
229 const size_t complex_length
{filter
->mNumConvolveSegs
* m
* (numChannels
+1)};
230 filter
->mComplexData
= std::make_unique
<complex_d
[]>(complex_length
);
231 std::fill_n(filter
->mComplexData
.get(), complex_length
, complex_d
{});
233 filter
->mInputHistory
= filter
->mComplexData
.get();
234 filter
->mConvolveFilter
[0] = filter
->mInputHistory
+ filter
->mNumConvolveSegs
*m
;
235 for(size_t c
{1};c
< numChannels
;++c
)
236 filter
->mConvolveFilter
[c
] = filter
->mConvolveFilter
[c
-1] + filter
->mNumConvolveSegs
*m
;
238 filter
->mChannels
= buffer
.mChannels
;
239 filter
->mAmbiLayout
= buffer
.mAmbiLayout
;
240 filter
->mAmbiScaling
= buffer
.mAmbiScaling
;
241 filter
->mAmbiOrder
= buffer
.mAmbiOrder
;
243 auto fftbuffer
= std::make_unique
<std::array
<complex_d
,ConvolveUpdateSize
>>();
244 auto srcsamples
= std::make_unique
<double[]>(maxz(buffer
.mSampleLen
, resampledCount
));
245 for(size_t c
{0};c
< numChannels
;++c
)
247 /* Load the samples from the buffer, and resample to match the device. */
248 LoadSamples(srcsamples
.get(), buffer
.mData
.data() + bytesPerSample
*c
, numChannels
,
249 buffer
.mType
, buffer
.mSampleLen
);
250 if(device
->Frequency
!= buffer
.mSampleRate
)
251 resampler
.process(buffer
.mSampleLen
, srcsamples
.get(), resampledCount
,
255 complex_d
*filteriter
= filter
->mConvolveFilter
[c
];
256 for(size_t s
{0};s
< filter
->mNumConvolveSegs
;++s
)
258 const size_t todo
{minz(resampledCount
-done
, ConvolveUpdateSamples
)};
260 auto iter
= std::copy_n(&srcsamples
[done
], todo
, fftbuffer
->begin());
262 std::fill(iter
, fftbuffer
->end(), complex_d
{});
264 complex_fft(*fftbuffer
, -1.0);
265 filteriter
= std::copy_n(fftbuffer
->cbegin(), m
, filteriter
);
269 return filter
.release();
272 void ConvolutionState::update(const ALCcontext
*context
, const ALeffectslot
*slot
,
273 const EffectProps
* /*props*/, const EffectTarget target
)
275 mFilter
= static_cast<ConvolutionFilter
*>(slot
->Params
.mEffectBuffer
);
278 ALCdevice
*device
{context
->mDevice
.get()};
279 const ALuint min_order
{minu(mFilter
->mAmbiOrder
, device
->mAmbiOrder
)};
280 mNumChannels
= mFilter
? ChannelsFromFmt(mFilter
->mChannels
, min_order
) : 0u;
281 mMix
= &ConvolutionState::NormalMix
;
283 /* The iFFT'd response is scaled up by the number of bins, so apply the
284 * inverse to the output mixing gain.
286 constexpr size_t m
{ConvolveUpdateSize
/2 + 1};
287 const float gain
{slot
->Params
.Gain
* (1.0f
/m
)};
288 if(mFilter
->mChannels
== FmtBFormat3D
|| mFilter
->mChannels
== FmtBFormat2D
)
290 if(device
->mAmbiOrder
> mFilter
->mAmbiOrder
)
292 mMix
= &ConvolutionState::UpsampleMix
;
293 const auto scales
= BFormatDec::GetHFOrderScales(mFilter
->mAmbiOrder
,
295 mOutChans
[0].mHfScale
= scales
[0];
296 for(size_t i
{1};i
< mNumChannels
;++i
)
297 mOutChans
[i
].mHfScale
= scales
[1];
299 mOutTarget
= target
.Main
->Buffer
;
301 const auto &scales
= GetAmbiScales(mFilter
->mAmbiScaling
);
302 const uint8_t *index_map
{(mFilter
->mChannels
== FmtBFormat2D
) ?
303 GetAmbi2DLayout(mFilter
->mAmbiLayout
).data() :
304 GetAmbiLayout(mFilter
->mAmbiLayout
).data()};
306 std::array
<float,MAX_AMBI_CHANNELS
> coeffs
{};
307 for(size_t c
{0u};c
< mNumChannels
;++c
)
309 const size_t acn
{index_map
[c
]};
310 coeffs
[acn
] = scales
[acn
];
311 ComputePanGains(target
.Main
, coeffs
.data(), gain
, mOutChans
[c
].Target
);
315 else if(mFilter
->mChannels
== FmtStereo
)
317 /* TODO: Add a "direct channels" setting for this effect? */
318 const ALuint lidx
{!target
.RealOut
? INVALID_CHANNEL_INDEX
:
319 GetChannelIdxByName(*target
.RealOut
, FrontLeft
)};
320 const ALuint ridx
{!target
.RealOut
? INVALID_CHANNEL_INDEX
:
321 GetChannelIdxByName(*target
.RealOut
, FrontRight
)};
322 if(lidx
!= INVALID_CHANNEL_INDEX
&& ridx
!= INVALID_CHANNEL_INDEX
)
324 mOutTarget
= target
.RealOut
->Buffer
;
325 mOutChans
[0].Target
[lidx
] = gain
;
326 mOutChans
[1].Target
[ridx
] = gain
;
330 const auto lcoeffs
= CalcDirectionCoeffs({-1.0f
, 0.0f
, 0.0f
}, 0.0f
);
331 const auto rcoeffs
= CalcDirectionCoeffs({ 1.0f
, 0.0f
, 0.0f
}, 0.0f
);
333 mOutTarget
= target
.Main
->Buffer
;
334 ComputePanGains(target
.Main
, lcoeffs
.data(), gain
, mOutChans
[0].Target
);
335 ComputePanGains(target
.Main
, rcoeffs
.data(), gain
, mOutChans
[1].Target
);
338 else if(mFilter
->mChannels
== FmtMono
)
340 const auto coeffs
= CalcDirectionCoeffs({0.0f
, 0.0f
, -1.0f
}, 0.0f
);
342 mOutTarget
= target
.Main
->Buffer
;
343 ComputePanGains(target
.Main
, coeffs
.data(), gain
, mOutChans
[0].Target
);
347 void ConvolutionState::process(const size_t samplesToDo
,
348 const al::span
<const FloatBufferLine
> samplesIn
, const al::span
<FloatBufferLine
> samplesOut
)
350 /* No filter, no response. */
353 constexpr size_t m
{ConvolveUpdateSize
/2 + 1};
354 size_t curseg
{mFilter
->mCurrentSegment
};
356 for(size_t base
{0u};base
< samplesToDo
;)
358 const size_t todo
{minz(ConvolveUpdateSamples
-mFifoPos
, samplesToDo
-base
)};
360 /* Retrieve the output samples from the FIFO and fill in the new input
363 for(size_t c
{0};c
< mNumChannels
;++c
)
365 auto fifo_iter
= mOutput
[c
].begin() + mFifoPos
;
366 std::transform(fifo_iter
, fifo_iter
+todo
, mTempBuffer
[c
].begin()+base
,
367 [](double d
) noexcept
-> float { return static_cast<float>(d
); });
370 std::copy_n(samplesIn
[0].begin()+base
, todo
, mFftBuffer
.begin()+mFifoPos
);
374 /* Check whether FIFO buffer is filled with new samples. */
375 if(mFifoPos
< ConvolveUpdateSamples
) break;
378 /* Calculate the frequency domain response and add the relevant
379 * frequency bins to the input history.
381 complex_fft(mFftBuffer
, -1.0);
383 std::copy_n(mFftBuffer
.begin(), m
, &mFilter
->mInputHistory
[curseg
*m
]);
384 mFftBuffer
.fill(complex_d
{});
386 for(size_t c
{0};c
< mNumChannels
;++c
)
388 /* Convolve each input segment with its IR filter counterpart
391 const complex_d
*RESTRICT filter
{mFilter
->mConvolveFilter
[c
]};
392 const complex_d
*RESTRICT input
{&mFilter
->mInputHistory
[curseg
*m
]};
393 for(size_t s
{curseg
};s
< mFilter
->mNumConvolveSegs
;++s
)
395 for(size_t i
{0};i
< m
;++i
,++input
,++filter
)
396 mFftBuffer
[i
] += *input
* *filter
;
398 input
= mFilter
->mInputHistory
;
399 for(size_t s
{0};s
< curseg
;++s
)
401 for(size_t i
{0};i
< m
;++i
,++input
,++filter
)
402 mFftBuffer
[i
] += *input
* *filter
;
405 /* Apply iFFT to get the 1024 (really 1023) samples for output. The
406 * 512 output samples are combined with the last output's 511
407 * second-half samples (and this output's second half is
408 * subsequently saved for next time).
410 complex_fft(mFftBuffer
, 1.0);
412 for(size_t i
{0};i
< ConvolveUpdateSamples
;++i
)
413 mOutput
[c
][i
] = mFftBuffer
[i
].real() + mOutput
[c
][ConvolveUpdateSamples
+i
];
414 for(size_t i
{0};i
< ConvolveUpdateSamples
;++i
)
415 mOutput
[c
][ConvolveUpdateSamples
+i
] = mFftBuffer
[ConvolveUpdateSamples
+i
].real();
416 mFftBuffer
.fill(complex_d
{});
419 /* Shift the input history. */
420 curseg
= curseg
? (curseg
-1) : (mFilter
->mNumConvolveSegs
-1);
422 mFilter
->mCurrentSegment
= curseg
;
424 /* Finally, mix to the output. */
425 (this->*mMix
)(samplesOut
, samplesToDo
);
429 void ConvolutionEffect_setParami(EffectProps
* /*props*/, ALenum param
, int /*val*/)
434 throw effect_exception
{AL_INVALID_ENUM
, "Invalid null effect integer property 0x%04x",
438 void ConvolutionEffect_setParamiv(EffectProps
*props
, ALenum param
, const int *vals
)
443 ConvolutionEffect_setParami(props
, param
, vals
[0]);
446 void ConvolutionEffect_setParamf(EffectProps
* /*props*/, ALenum param
, float /*val*/)
451 throw effect_exception
{AL_INVALID_ENUM
, "Invalid null effect float property 0x%04x",
455 void ConvolutionEffect_setParamfv(EffectProps
*props
, ALenum param
, const float *vals
)
460 ConvolutionEffect_setParamf(props
, param
, vals
[0]);
464 void ConvolutionEffect_getParami(const EffectProps
* /*props*/, ALenum param
, int* /*val*/)
469 throw effect_exception
{AL_INVALID_ENUM
, "Invalid null effect integer property 0x%04x",
473 void ConvolutionEffect_getParamiv(const EffectProps
*props
, ALenum param
, int *vals
)
478 ConvolutionEffect_getParami(props
, param
, vals
);
481 void ConvolutionEffect_getParamf(const EffectProps
* /*props*/, ALenum param
, float* /*val*/)
486 throw effect_exception
{AL_INVALID_ENUM
, "Invalid null effect float property 0x%04x",
490 void ConvolutionEffect_getParamfv(const EffectProps
*props
, ALenum param
, float *vals
)
495 ConvolutionEffect_getParamf(props
, param
, vals
);
499 DEFINE_ALEFFECT_VTABLE(ConvolutionEffect
);
502 struct ConvolutionStateFactory final
: public EffectStateFactory
{
503 EffectState
*create() override
;
504 EffectProps
getDefaultProps() const noexcept override
;
505 const EffectVtable
*getEffectVtable() const noexcept override
;
508 /* Creates EffectState objects of the appropriate type. */
509 EffectState
*ConvolutionStateFactory::create()
510 { return new ConvolutionState
{}; }
512 /* Returns an ALeffectProps initialized with this effect type's default
515 EffectProps
ConvolutionStateFactory::getDefaultProps() const noexcept
521 /* Returns a pointer to this effect type's global set/get vtable. */
522 const EffectVtable
*ConvolutionStateFactory::getEffectVtable() const noexcept
523 { return &ConvolutionEffect_vtable
; }
527 EffectStateFactory
*ConvolutionStateFactory_getFactory()
529 static ConvolutionStateFactory ConvolutionFactory
{};
530 return &ConvolutionFactory
;