16 #ifdef HAVE_SSE_INTRINSICS
17 #include <xmmintrin.h>
18 #elif defined(HAVE_NEON)
22 #include "alcomplex.h"
24 #include "alnumbers.h"
25 #include "alnumeric.h"
28 #include "core/ambidefs.h"
29 #include "core/bufferline.h"
30 #include "core/buffer_storage.h"
31 #include "core/context.h"
32 #include "core/devformat.h"
33 #include "core/device.h"
34 #include "core/effects/base.h"
35 #include "core/effectslot.h"
36 #include "core/filters/splitter.h"
37 #include "core/fmt_traits.h"
38 #include "core/mixer.h"
39 #include "core/uhjfilter.h"
40 #include "intrusive_ptr.h"
41 #include "opthelpers.h"
43 #include "polyphase_resampler.h"
50 /* Convolution is implemented using a segmented overlap-add method. The impulse
51 * response is split into multiple segments of 128 samples, and each segment
52 * has an FFT applied with a 256-sample buffer (the latter half left silent) to
53 * get its frequency-domain response. The resulting response has its positive/
54 * non-mirrored frequencies saved (129 bins) in each segment. Note that since
55 * the 0- and half-frequency bins are real for a real signal, their imaginary
56 * components are always 0 and can be dropped, allowing their real components
57 * to be combined so only 128 complex values are stored for the 129 bins.
59 * Input samples are similarly broken up into 128-sample segments, with a 256-
60 * sample FFT applied to each new incoming segment to get its 129 bins. A
61 * history of FFT'd input segments is maintained, equal to the number of
62 * impulse response segments.
64 * To apply the convolution, each impulse response segment is convolved with
65 * its paired input segment (using complex multiplies, far cheaper than FIRs),
66 * accumulating into a 129-bin FFT buffer. The input history is then shifted to
67 * align with later impulse response segments for the next input segment.
69 * An inverse FFT is then applied to the accumulated FFT buffer to get a 256-
70 * sample time-domain response for output, which is split in two halves. The
71 * first half is the 128-sample output, and the second half is a 128-sample
72 * (really, 127) delayed extension, which gets added to the output next time.
73 * Convolving two time-domain responses of length N results in a time-domain
74 * signal of length N*2 - 1, and this holds true regardless of the convolution
75 * being applied in the frequency domain, so these "overflow" samples need to
78 * To avoid a delay with gathering enough input samples for the FFT, the first
79 * segment is applied directly in the time-domain as the samples come in. Once
80 * enough have been retrieved, the FFT is applied on the input and it's paired
81 * with the remaining (FFT'd) filter segments for processing.
85 template<FmtType SrcType
>
86 inline void LoadSampleArray(const al::span
<float> dst
, const std::byte
*src
,
87 const std::size_t channel
, const std::size_t srcstep
) noexcept
89 using TypeTraits
= al::FmtTypeTraits
<SrcType
>;
90 using SampleType
= typename
TypeTraits::Type
;
91 const auto converter
= TypeTraits
{};
92 assert(channel
< srcstep
);
94 const auto srcspan
= al::span
{reinterpret_cast<const SampleType
*>(src
), dst
.size()*srcstep
};
95 auto ssrc
= srcspan
.cbegin();
96 std::generate(dst
.begin(), dst
.end(), [converter
,channel
,srcstep
,&ssrc
]
98 const auto ret
= converter(ssrc
[channel
]);
99 ssrc
+= ptrdiff_t(srcstep
);
104 void LoadSamples(const al::span
<float> dst
, const std::byte
*src
, const size_t channel
,
105 const size_t srcstep
, const FmtType srctype
) noexcept
107 #define HANDLE_FMT(T) case T: LoadSampleArray<T>(dst, src, channel, srcstep); break
110 HANDLE_FMT(FmtUByte
);
111 HANDLE_FMT(FmtShort
);
113 HANDLE_FMT(FmtFloat
);
114 HANDLE_FMT(FmtDouble
);
115 HANDLE_FMT(FmtMulaw
);
117 /* FIXME: Handle ADPCM decoding here. */
120 std::fill(dst
.begin(), dst
.end(), 0.0f
);
127 constexpr auto GetAmbiScales(AmbiScaling scaletype
) noexcept
131 case AmbiScaling::FuMa
: return al::span
{AmbiScale::FromFuMa
};
132 case AmbiScaling::SN3D
: return al::span
{AmbiScale::FromSN3D
};
133 case AmbiScaling::UHJ
: return al::span
{AmbiScale::FromUHJ
};
134 case AmbiScaling::N3D
: break;
136 return al::span
{AmbiScale::FromN3D
};
139 constexpr auto GetAmbiLayout(AmbiLayout layouttype
) noexcept
141 if(layouttype
== AmbiLayout::FuMa
) return al::span
{AmbiIndex::FromFuMa
};
142 return al::span
{AmbiIndex::FromACN
};
145 constexpr auto GetAmbi2DLayout(AmbiLayout layouttype
) noexcept
147 if(layouttype
== AmbiLayout::FuMa
) return al::span
{AmbiIndex::FromFuMa2D
};
148 return al::span
{AmbiIndex::FromACN2D
};
152 constexpr float sin30
{0.5f
};
153 constexpr float cos30
{0.866025403785f
};
154 constexpr float sin45
{al::numbers::sqrt2_v
<float>*0.5f
};
155 constexpr float cos45
{al::numbers::sqrt2_v
<float>*0.5f
};
156 constexpr float sin110
{ 0.939692620786f
};
157 constexpr float cos110
{-0.342020143326f
};
161 std::array
<float,3> pos
;
165 using complex_f
= std::complex<float>;
167 constexpr size_t ConvolveUpdateSize
{256};
168 constexpr size_t ConvolveUpdateSamples
{ConvolveUpdateSize
/ 2};
171 void apply_fir(al::span
<float> dst
, const al::span
<const float> input
, const al::span
<const float,ConvolveUpdateSamples
> filter
)
173 auto src
= input
.begin();
174 #ifdef HAVE_SSE_INTRINSICS
175 std::generate(dst
.begin(), dst
.end(), [&src
,filter
]
177 __m128 r4
{_mm_setzero_ps()};
178 for(size_t j
{0};j
< ConvolveUpdateSamples
;j
+=4)
180 const __m128 coeffs
{_mm_load_ps(&filter
[j
])};
181 const __m128 s
{_mm_loadu_ps(&src
[j
])};
183 r4
= _mm_add_ps(r4
, _mm_mul_ps(s
, coeffs
));
187 r4
= _mm_add_ps(r4
, _mm_shuffle_ps(r4
, r4
, _MM_SHUFFLE(0, 1, 2, 3)));
188 r4
= _mm_add_ps(r4
, _mm_movehl_ps(r4
, r4
));
189 return _mm_cvtss_f32(r4
);
192 #elif defined(HAVE_NEON)
194 std::generate(dst
.begin(), dst
.end(), [&src
,filter
]
196 float32x4_t r4
{vdupq_n_f32(0.0f
)};
197 for(size_t j
{0};j
< ConvolveUpdateSamples
;j
+=4)
198 r4
= vmlaq_f32(r4
, vld1q_f32(&src
[j
]), vld1q_f32(&filter
[j
]));
201 r4
= vaddq_f32(r4
, vrev64q_f32(r4
));
202 return vget_lane_f32(vadd_f32(vget_low_f32(r4
), vget_high_f32(r4
)), 0);
207 std::generate(dst
.begin(), dst
.end(), [&src
,filter
]
210 for(size_t j
{0};j
< ConvolveUpdateSamples
;++j
)
211 ret
+= src
[j
] * filter
[j
];
219 struct ConvolutionState final
: public EffectState
{
220 FmtChannels mChannels
{};
221 AmbiLayout mAmbiLayout
{};
222 AmbiScaling mAmbiScaling
{};
226 alignas(16) std::array
<float,ConvolveUpdateSamples
*2> mInput
{};
227 al::vector
<std::array
<float,ConvolveUpdateSamples
>,16> mFilter
;
228 al::vector
<std::array
<float,ConvolveUpdateSamples
*2>,16> mOutput
;
231 alignas(16) std::array
<float,ConvolveUpdateSize
> mFftBuffer
{};
232 alignas(16) std::array
<float,ConvolveUpdateSize
> mFftWorkBuffer
{};
234 size_t mCurrentSegment
{0};
235 size_t mNumConvolveSegs
{0};
238 alignas(16) FloatBufferLine mBuffer
{};
239 float mHfScale
{}, mLfScale
{};
240 BandSplitter mFilter
{};
241 std::array
<float,MaxOutputChannels
> Current
{};
242 std::array
<float,MaxOutputChannels
> Target
{};
244 std::vector
<ChannelData
> mChans
;
245 al::vector
<float,16> mComplexData
;
248 ConvolutionState() = default;
249 ~ConvolutionState() override
= default;
251 void NormalMix(const al::span
<FloatBufferLine
> samplesOut
, const size_t samplesToDo
);
252 void UpsampleMix(const al::span
<FloatBufferLine
> samplesOut
, const size_t samplesToDo
);
253 void (ConvolutionState::*mMix
)(const al::span
<FloatBufferLine
>,const size_t)
254 {&ConvolutionState::NormalMix
};
256 void deviceUpdate(const DeviceBase
*device
, const BufferStorage
*buffer
) override
;
257 void update(const ContextBase
*context
, const EffectSlot
*slot
, const EffectProps
*props
,
258 const EffectTarget target
) override
;
259 void process(const size_t samplesToDo
, const al::span
<const FloatBufferLine
> samplesIn
,
260 const al::span
<FloatBufferLine
> samplesOut
) override
;
263 void ConvolutionState::NormalMix(const al::span
<FloatBufferLine
> samplesOut
,
264 const size_t samplesToDo
)
266 for(auto &chan
: mChans
)
267 MixSamples(al::span
{chan
.mBuffer
}.first(samplesToDo
), samplesOut
, chan
.Current
,
268 chan
.Target
, samplesToDo
, 0);
271 void ConvolutionState::UpsampleMix(const al::span
<FloatBufferLine
> samplesOut
,
272 const size_t samplesToDo
)
274 for(auto &chan
: mChans
)
276 const auto src
= al::span
{chan
.mBuffer
}.first(samplesToDo
);
277 chan
.mFilter
.processScale(src
, chan
.mHfScale
, chan
.mLfScale
);
278 MixSamples(src
, samplesOut
, chan
.Current
, chan
.Target
, samplesToDo
, 0);
283 void ConvolutionState::deviceUpdate(const DeviceBase
*device
, const BufferStorage
*buffer
)
285 using UhjDecoderType
= UhjDecoder
<512>;
286 static constexpr auto DecoderPadding
= UhjDecoderType::sInputPadding
;
288 static constexpr uint MaxConvolveAmbiOrder
{1u};
291 mFft
= PFFFTSetup
{ConvolveUpdateSize
, PFFFT_REAL
};
295 decltype(mFilter
){}.swap(mFilter
);
296 decltype(mOutput
){}.swap(mOutput
);
297 mFftBuffer
.fill(0.0f
);
298 mFftWorkBuffer
.fill(0.0f
);
301 mNumConvolveSegs
= 0;
303 decltype(mChans
){}.swap(mChans
);
304 decltype(mComplexData
){}.swap(mComplexData
);
306 /* An empty buffer doesn't need a convolution filter. */
307 if(!buffer
|| buffer
->mSampleLen
< 1) return;
309 mChannels
= buffer
->mChannels
;
310 mAmbiLayout
= IsUHJ(mChannels
) ? AmbiLayout::FuMa
: buffer
->mAmbiLayout
;
311 mAmbiScaling
= IsUHJ(mChannels
) ? AmbiScaling::UHJ
: buffer
->mAmbiScaling
;
312 mAmbiOrder
= std::min(buffer
->mAmbiOrder
, MaxConvolveAmbiOrder
);
314 const auto realChannels
= buffer
->channelsFromFmt();
315 const auto numChannels
= (mChannels
== FmtUHJ2
) ? 3u : ChannelsFromFmt(mChannels
, mAmbiOrder
);
317 mChans
.resize(numChannels
);
319 /* The impulse response needs to have the same sample rate as the input and
320 * output. The bsinc24 resampler is decent, but there is high-frequency
321 * attenuation that some people may be able to pick up on. Since this is
322 * called very infrequently, go ahead and use the polyphase resampler.
324 PPhaseResampler resampler
;
325 if(device
->Frequency
!= buffer
->mSampleRate
)
326 resampler
.init(buffer
->mSampleRate
, device
->Frequency
);
327 const auto resampledCount
= static_cast<uint
>(
328 (uint64_t{buffer
->mSampleLen
}*device
->Frequency
+(buffer
->mSampleRate
-1)) /
329 buffer
->mSampleRate
);
331 const BandSplitter splitter
{device
->mXOverFreq
/ static_cast<float>(device
->Frequency
)};
332 for(auto &e
: mChans
)
333 e
.mFilter
= splitter
;
335 mFilter
.resize(numChannels
, {});
336 mOutput
.resize(numChannels
, {});
338 /* Calculate the number of segments needed to hold the impulse response and
339 * the input history (rounded up), and allocate them. Exclude one segment
340 * which gets applied as a time-domain FIR filter. Make sure at least one
341 * segment is allocated to simplify handling.
343 mNumConvolveSegs
= (resampledCount
+(ConvolveUpdateSamples
-1)) / ConvolveUpdateSamples
;
344 mNumConvolveSegs
= std::max(mNumConvolveSegs
, 2_uz
) - 1_uz
;
346 const size_t complex_length
{mNumConvolveSegs
* ConvolveUpdateSize
* (numChannels
+1)};
347 mComplexData
.resize(complex_length
, 0.0f
);
349 /* Load the samples from the buffer. */
350 const size_t srclinelength
{RoundUp(buffer
->mSampleLen
+DecoderPadding
, 16)};
351 auto srcsamples
= std::vector
<float>(srclinelength
* numChannels
);
352 std::fill(srcsamples
.begin(), srcsamples
.end(), 0.0f
);
353 for(size_t c
{0};c
< numChannels
&& c
< realChannels
;++c
)
354 LoadSamples(al::span
{srcsamples
}.subspan(srclinelength
*c
, buffer
->mSampleLen
),
355 buffer
->mData
.data(), c
, realChannels
, buffer
->mType
);
359 auto decoder
= std::make_unique
<UhjDecoderType
>();
360 std::array
<float*,4> samples
{};
361 for(size_t c
{0};c
< numChannels
;++c
)
362 samples
[c
] = al::to_address(srcsamples
.begin() + ptrdiff_t(srclinelength
*c
));
363 decoder
->decode({samples
.data(), numChannels
}, buffer
->mSampleLen
, buffer
->mSampleLen
);
366 auto ressamples
= std::vector
<double>(buffer
->mSampleLen
+ (resampler
? resampledCount
: 0));
367 auto ffttmp
= al::vector
<float,16>(ConvolveUpdateSize
);
368 auto fftbuffer
= std::vector
<std::complex<double>>(ConvolveUpdateSize
);
370 auto filteriter
= mComplexData
.begin() + ptrdiff_t(mNumConvolveSegs
*ConvolveUpdateSize
);
371 for(size_t c
{0};c
< numChannels
;++c
)
373 auto bufsamples
= al::span
{srcsamples
}.subspan(srclinelength
*c
, buffer
->mSampleLen
);
374 /* Resample to match the device. */
377 auto restmp
= al::span
{ressamples
}.subspan(resampledCount
, buffer
->mSampleLen
);
378 std::copy(bufsamples
.cbegin(), bufsamples
.cend(), restmp
.begin());
379 resampler
.process(restmp
, al::span
{ressamples
}.first(resampledCount
));
382 std::copy(bufsamples
.cbegin(), bufsamples
.cend(), ressamples
.begin());
384 /* Store the first segment's samples in reverse in the time-domain, to
385 * apply as a FIR filter.
387 const size_t first_size
{std::min(size_t{resampledCount
}, ConvolveUpdateSamples
)};
388 auto sampleseg
= al::span
{ressamples
.cbegin(), first_size
};
389 std::transform(sampleseg
.cbegin(), sampleseg
.cend(), mFilter
[c
].rbegin(),
390 [](const double d
) noexcept
-> float { return static_cast<float>(d
); });
392 size_t done
{first_size
};
393 for(size_t s
{0};s
< mNumConvolveSegs
;++s
)
395 const size_t todo
{std::min(resampledCount
-done
, ConvolveUpdateSamples
)};
396 sampleseg
= al::span
{ressamples
}.subspan(done
, todo
);
398 /* Apply a double-precision forward FFT for more precise frequency
401 auto iter
= std::copy(sampleseg
.cbegin(), sampleseg
.cend(), fftbuffer
.begin());
403 std::fill(iter
, fftbuffer
.end(), std::complex<double>{});
404 forward_fft(al::span
{fftbuffer
});
406 /* Convert to, and pack in, a float buffer for PFFFT. Note that the
407 * first bin stores the real component of the half-frequency bin in
408 * the imaginary component. Also scale the FFT by its length so the
409 * iFFT'd output will be normalized.
411 static constexpr float fftscale
{1.0f
/ float{ConvolveUpdateSize
}};
412 for(size_t i
{0};i
< ConvolveUpdateSamples
;++i
)
414 ffttmp
[i
*2 ] = static_cast<float>(fftbuffer
[i
].real()) * fftscale
;
415 ffttmp
[i
*2 + 1] = static_cast<float>((i
== 0) ?
416 fftbuffer
[ConvolveUpdateSamples
].real() : fftbuffer
[i
].imag()) * fftscale
;
418 /* Reorder backward to make it suitable for pffft_zconvolve and the
419 * subsequent pffft_transform(..., PFFFT_BACKWARD).
421 mFft
.zreorder(ffttmp
.data(), al::to_address(filteriter
), PFFFT_BACKWARD
);
422 filteriter
+= ConvolveUpdateSize
;
428 void ConvolutionState::update(const ContextBase
*context
, const EffectSlot
*slot
,
429 const EffectProps
*props_
, const EffectTarget target
)
431 /* TODO: LFE is not mixed to output. This will require each buffer channel
432 * to have its own output target since the main mixing buffer won't have an
433 * LFE channel (due to being B-Format).
435 static constexpr std::array MonoMap
{
436 ChanPosMap
{FrontCenter
, std::array
{0.0f
, 0.0f
, -1.0f
}}
438 static constexpr std::array StereoMap
{
439 ChanPosMap
{FrontLeft
, std::array
{-sin30
, 0.0f
, -cos30
}},
440 ChanPosMap
{FrontRight
, std::array
{ sin30
, 0.0f
, -cos30
}},
442 static constexpr std::array RearMap
{
443 ChanPosMap
{BackLeft
, std::array
{-sin30
, 0.0f
, cos30
}},
444 ChanPosMap
{BackRight
, std::array
{ sin30
, 0.0f
, cos30
}},
446 static constexpr std::array QuadMap
{
447 ChanPosMap
{FrontLeft
, std::array
{-sin45
, 0.0f
, -cos45
}},
448 ChanPosMap
{FrontRight
, std::array
{ sin45
, 0.0f
, -cos45
}},
449 ChanPosMap
{BackLeft
, std::array
{-sin45
, 0.0f
, cos45
}},
450 ChanPosMap
{BackRight
, std::array
{ sin45
, 0.0f
, cos45
}},
452 static constexpr std::array X51Map
{
453 ChanPosMap
{FrontLeft
, std::array
{-sin30
, 0.0f
, -cos30
}},
454 ChanPosMap
{FrontRight
, std::array
{ sin30
, 0.0f
, -cos30
}},
455 ChanPosMap
{FrontCenter
, std::array
{ 0.0f
, 0.0f
, -1.0f
}},
457 ChanPosMap
{SideLeft
, std::array
{-sin110
, 0.0f
, -cos110
}},
458 ChanPosMap
{SideRight
, std::array
{ sin110
, 0.0f
, -cos110
}},
460 static constexpr std::array X61Map
{
461 ChanPosMap
{FrontLeft
, std::array
{-sin30
, 0.0f
, -cos30
}},
462 ChanPosMap
{FrontRight
, std::array
{ sin30
, 0.0f
, -cos30
}},
463 ChanPosMap
{FrontCenter
, std::array
{ 0.0f
, 0.0f
, -1.0f
}},
465 ChanPosMap
{BackCenter
, std::array
{ 0.0f
, 0.0f
, 1.0f
} },
466 ChanPosMap
{SideLeft
, std::array
{-1.0f
, 0.0f
, 0.0f
} },
467 ChanPosMap
{SideRight
, std::array
{ 1.0f
, 0.0f
, 0.0f
} },
469 static constexpr std::array X71Map
{
470 ChanPosMap
{FrontLeft
, std::array
{-sin30
, 0.0f
, -cos30
}},
471 ChanPosMap
{FrontRight
, std::array
{ sin30
, 0.0f
, -cos30
}},
472 ChanPosMap
{FrontCenter
, std::array
{ 0.0f
, 0.0f
, -1.0f
}},
474 ChanPosMap
{BackLeft
, std::array
{-sin30
, 0.0f
, cos30
}},
475 ChanPosMap
{BackRight
, std::array
{ sin30
, 0.0f
, cos30
}},
476 ChanPosMap
{SideLeft
, std::array
{ -1.0f
, 0.0f
, 0.0f
}},
477 ChanPosMap
{SideRight
, std::array
{ 1.0f
, 0.0f
, 0.0f
}},
480 if(mNumConvolveSegs
< 1) UNLIKELY
483 auto &props
= std::get
<ConvolutionProps
>(*props_
);
484 mMix
= &ConvolutionState::NormalMix
;
486 for(auto &chan
: mChans
)
487 std::fill(chan
.Target
.begin(), chan
.Target
.end(), 0.0f
);
488 const float gain
{slot
->Gain
};
489 if(IsAmbisonic(mChannels
))
491 DeviceBase
*device
{context
->mDevice
};
492 if(mChannels
== FmtUHJ2
&& !device
->mUhjEncoder
)
494 mMix
= &ConvolutionState::UpsampleMix
;
495 mChans
[0].mHfScale
= 1.0f
;
496 mChans
[0].mLfScale
= DecoderBase::sWLFScale
;
497 mChans
[1].mHfScale
= 1.0f
;
498 mChans
[1].mLfScale
= DecoderBase::sXYLFScale
;
499 mChans
[2].mHfScale
= 1.0f
;
500 mChans
[2].mLfScale
= DecoderBase::sXYLFScale
;
502 else if(device
->mAmbiOrder
> mAmbiOrder
)
504 mMix
= &ConvolutionState::UpsampleMix
;
505 const auto scales
= AmbiScale::GetHFOrderScales(mAmbiOrder
, device
->mAmbiOrder
,
507 mChans
[0].mHfScale
= scales
[0];
508 mChans
[0].mLfScale
= 1.0f
;
509 for(size_t i
{1};i
< mChans
.size();++i
)
511 mChans
[i
].mHfScale
= scales
[1];
512 mChans
[i
].mLfScale
= 1.0f
;
515 mOutTarget
= target
.Main
->Buffer
;
517 alu::Vector N
{props
.OrientAt
[0], props
.OrientAt
[1], props
.OrientAt
[2], 0.0f
};
519 alu::Vector V
{props
.OrientUp
[0], props
.OrientUp
[1], props
.OrientUp
[2], 0.0f
};
521 /* Build and normalize right-vector */
522 alu::Vector U
{N
.cross_product(V
)};
525 const std::array mixmatrix
{
526 std::array
{1.0f
, 0.0f
, 0.0f
, 0.0f
},
527 std::array
{0.0f
, U
[0], -U
[1], U
[2]},
528 std::array
{0.0f
, -V
[0], V
[1], -V
[2]},
529 std::array
{0.0f
, -N
[0], N
[1], -N
[2]},
532 const auto scales
= GetAmbiScales(mAmbiScaling
);
533 const auto index_map
= Is2DAmbisonic(mChannels
) ?
534 al::span
{GetAmbi2DLayout(mAmbiLayout
)}.subspan(0) :
535 al::span
{GetAmbiLayout(mAmbiLayout
)}.subspan(0);
537 std::array
<float,MaxAmbiChannels
> coeffs
{};
538 for(size_t c
{0u};c
< mChans
.size();++c
)
540 const size_t acn
{index_map
[c
]};
541 const float scale
{scales
[acn
]};
543 std::transform(mixmatrix
[acn
].cbegin(), mixmatrix
[acn
].cend(), coeffs
.begin(),
544 [scale
](const float in
) noexcept
-> float { return in
* scale
; });
546 ComputePanGains(target
.Main
, coeffs
, gain
, mChans
[c
].Target
);
551 DeviceBase
*device
{context
->mDevice
};
552 al::span
<const ChanPosMap
> chanmap
{};
555 case FmtMono
: chanmap
= MonoMap
; break;
556 case FmtMonoDup
: chanmap
= MonoMap
; break;
558 case FmtStereo
: chanmap
= StereoMap
; break;
559 case FmtRear
: chanmap
= RearMap
; break;
560 case FmtQuad
: chanmap
= QuadMap
; break;
561 case FmtX51
: chanmap
= X51Map
; break;
562 case FmtX61
: chanmap
= X61Map
; break;
563 case FmtX71
: chanmap
= X71Map
; break;
572 mOutTarget
= target
.Main
->Buffer
;
573 if(device
->mRenderMode
== RenderMode::Pairwise
)
575 /* Scales the azimuth of the given vector by 3 if it's in front.
576 * Effectively scales +/-30 degrees to +/-90 degrees, leaving > +90
579 auto ScaleAzimuthFront
= [](std::array
<float,3> pos
) -> std::array
<float,3>
583 /* Normalize the length of the x,z components for a 2D
584 * vector of the azimuth angle. Negate Z since {0,0,-1} is
587 const float len2d
{std::sqrt(pos
[0]*pos
[0] + pos
[2]*pos
[2])};
588 float x
{pos
[0] / len2d
};
589 float z
{-pos
[2] / len2d
};
591 /* Z > cos(pi/6) = -30 < azimuth < 30 degrees. */
594 /* Triple the angle represented by x,z. */
595 x
= x
*3.0f
- x
*x
*x
*4.0f
;
596 z
= z
*z
*z
*4.0f
- z
*3.0f
;
598 /* Scale the vector back to fit in 3D. */
604 /* If azimuth >= 30 degrees, clamp to 90 degrees. */
605 pos
[0] = std::copysign(len2d
, pos
[0]);
612 for(size_t i
{0};i
< chanmap
.size();++i
)
614 if(chanmap
[i
].channel
== LFE
) continue;
615 const auto coeffs
= CalcDirectionCoeffs(ScaleAzimuthFront(chanmap
[i
].pos
), 0.0f
);
616 ComputePanGains(target
.Main
, coeffs
, gain
, mChans
[i
].Target
);
619 else for(size_t i
{0};i
< chanmap
.size();++i
)
621 if(chanmap
[i
].channel
== LFE
) continue;
622 const auto coeffs
= CalcDirectionCoeffs(chanmap
[i
].pos
, 0.0f
);
623 ComputePanGains(target
.Main
, coeffs
, gain
, mChans
[i
].Target
);
628 void ConvolutionState::process(const size_t samplesToDo
,
629 const al::span
<const FloatBufferLine
> samplesIn
, const al::span
<FloatBufferLine
> samplesOut
)
631 if(mNumConvolveSegs
< 1) UNLIKELY
634 size_t curseg
{mCurrentSegment
};
636 for(size_t base
{0u};base
< samplesToDo
;)
638 const size_t todo
{std::min(ConvolveUpdateSamples
-mFifoPos
, samplesToDo
-base
)};
640 std::copy_n(samplesIn
[0].begin() + ptrdiff_t(base
), todo
,
641 mInput
.begin()+ptrdiff_t(ConvolveUpdateSamples
+mFifoPos
));
643 /* Apply the FIR for the newly retrieved input samples, and combine it
644 * with the inverse FFT'd output samples.
646 for(size_t c
{0};c
< mChans
.size();++c
)
648 auto outspan
= al::span
{mChans
[c
].mBuffer
}.subspan(base
, todo
);
649 apply_fir(outspan
, al::span
{mInput
}.subspan(1+mFifoPos
), mFilter
[c
]);
651 auto fifospan
= al::span
{mOutput
[c
]}.subspan(mFifoPos
, todo
);
652 std::transform(fifospan
.cbegin(), fifospan
.cend(), outspan
.cbegin(), outspan
.begin(),
659 /* Check whether the input buffer is filled with new samples. */
660 if(mFifoPos
< ConvolveUpdateSamples
) break;
663 /* Move the newest input to the front for the next iteration's history. */
664 std::copy(mInput
.cbegin()+ConvolveUpdateSamples
, mInput
.cend(), mInput
.begin());
665 std::fill(mInput
.begin()+ConvolveUpdateSamples
, mInput
.end(), 0.0f
);
667 /* Calculate the frequency-domain response and add the relevant
668 * frequency bins to the FFT history.
670 mFft
.transform(mInput
.data(), &mComplexData
[curseg
*ConvolveUpdateSize
],
671 mFftWorkBuffer
.data(), PFFFT_FORWARD
);
673 auto filter
= mComplexData
.cbegin() + ptrdiff_t(mNumConvolveSegs
*ConvolveUpdateSize
);
674 for(size_t c
{0};c
< mChans
.size();++c
)
676 /* Convolve each input segment with its IR filter counterpart
679 mFftBuffer
.fill(0.0f
);
680 auto input
= mComplexData
.cbegin() + ptrdiff_t(curseg
*ConvolveUpdateSize
);
681 for(size_t s
{curseg
};s
< mNumConvolveSegs
;++s
)
683 mFft
.zconvolve_accumulate(al::to_address(input
), al::to_address(filter
),
685 input
+= ConvolveUpdateSize
;
686 filter
+= ConvolveUpdateSize
;
688 input
= mComplexData
.cbegin();
689 for(size_t s
{0};s
< curseg
;++s
)
691 mFft
.zconvolve_accumulate(al::to_address(input
), al::to_address(filter
),
693 input
+= ConvolveUpdateSize
;
694 filter
+= ConvolveUpdateSize
;
697 /* Apply iFFT to get the 256 (really 255) samples for output. The
698 * 128 output samples are combined with the last output's 127
699 * second-half samples (and this output's second half is
700 * subsequently saved for next time).
702 mFft
.transform(mFftBuffer
.data(), mFftBuffer
.data(), mFftWorkBuffer
.data(),
705 /* The filter was attenuated, so the response is already scaled. */
706 std::transform(mFftBuffer
.cbegin(), mFftBuffer
.cbegin()+ConvolveUpdateSamples
,
707 mOutput
[c
].cbegin()+ConvolveUpdateSamples
, mOutput
[c
].begin(), std::plus
{});
708 std::copy(mFftBuffer
.cbegin()+ConvolveUpdateSamples
, mFftBuffer
.cend(),
709 mOutput
[c
].begin()+ConvolveUpdateSamples
);
712 /* Shift the input history. */
713 curseg
= curseg
? (curseg
-1) : (mNumConvolveSegs
-1);
715 mCurrentSegment
= curseg
;
717 /* Finally, mix to the output. */
718 (this->*mMix
)(samplesOut
, samplesToDo
);
722 struct ConvolutionStateFactory final
: public EffectStateFactory
{
723 al::intrusive_ptr
<EffectState
> create() override
724 { return al::intrusive_ptr
<EffectState
>{new ConvolutionState
{}}; }
729 EffectStateFactory
*ConvolutionStateFactory_getFactory()
731 static ConvolutionStateFactory ConvolutionFactory
{};
732 return &ConvolutionFactory
;