16 #ifdef HAVE_SSE_INTRINSICS
17 #include <xmmintrin.h>
18 #elif defined(HAVE_NEON)
22 #include "alcomplex.h"
24 #include "alnumbers.h"
25 #include "alnumeric.h"
28 #include "core/ambidefs.h"
29 #include "core/bufferline.h"
30 #include "core/buffer_storage.h"
31 #include "core/context.h"
32 #include "core/devformat.h"
33 #include "core/device.h"
34 #include "core/effects/base.h"
35 #include "core/effectslot.h"
36 #include "core/filters/splitter.h"
37 #include "core/fmt_traits.h"
38 #include "core/mixer.h"
39 #include "core/uhjfilter.h"
40 #include "intrusive_ptr.h"
41 #include "opthelpers.h"
43 #include "polyphase_resampler.h"
50 /* Convolution is implemented using a segmented overlap-add method. The impulse
51 * response is split into multiple segments of 128 samples, and each segment
52 * has an FFT applied with a 256-sample buffer (the latter half left silent) to
53 * get its frequency-domain response. The resulting response has its positive/
54 * non-mirrored frequencies saved (129 bins) in each segment. Note that since
55 * the 0- and half-frequency bins are real for a real signal, their imaginary
56 * components are always 0 and can be dropped, allowing their real components
57 * to be combined so only 128 complex values are stored for the 129 bins.
59 * Input samples are similarly broken up into 128-sample segments, with a 256-
60 * sample FFT applied to each new incoming segment to get its 129 bins. A
61 * history of FFT'd input segments is maintained, equal to the number of
62 * impulse response segments.
64 * To apply the convolution, each impulse response segment is convolved with
65 * its paired input segment (using complex multiplies, far cheaper than FIRs),
66 * accumulating into a 129-bin FFT buffer. The input history is then shifted to
67 * align with later impulse response segments for the next input segment.
69 * An inverse FFT is then applied to the accumulated FFT buffer to get a 256-
70 * sample time-domain response for output, which is split in two halves. The
71 * first half is the 128-sample output, and the second half is a 128-sample
72 * (really, 127) delayed extension, which gets added to the output next time.
73 * Convolving two time-domain responses of length N results in a time-domain
74 * signal of length N*2 - 1, and this holds true regardless of the convolution
75 * being applied in the frequency domain, so these "overflow" samples need to
78 * To avoid a delay with gathering enough input samples for the FFT, the first
79 * segment is applied directly in the time-domain as the samples come in. Once
80 * enough have been retrieved, the FFT is applied on the input and it's paired
81 * with the remaining (FFT'd) filter segments for processing.
85 template<FmtType SrcType
>
86 inline void LoadSampleArray(const al::span
<float> dst
, const std::byte
*src
,
87 const std::size_t channel
, const std::size_t srcstep
) noexcept
89 using TypeTraits
= al::FmtTypeTraits
<SrcType
>;
90 using SampleType
= typename
TypeTraits::Type
;
91 const auto converter
= TypeTraits
{};
92 assert(channel
< srcstep
);
94 const auto srcspan
= al::span
{reinterpret_cast<const SampleType
*>(src
), dst
.size()*srcstep
};
95 auto ssrc
= srcspan
.cbegin();
96 std::generate(dst
.begin(), dst
.end(), [converter
,channel
,srcstep
,&ssrc
]
98 const auto ret
= converter(ssrc
[channel
]);
104 void LoadSamples(const al::span
<float> dst
, const std::byte
*src
, const size_t channel
,
105 const size_t srcstep
, const FmtType srctype
) noexcept
107 #define HANDLE_FMT(T) case T: LoadSampleArray<T>(dst, src, channel, srcstep); break
110 HANDLE_FMT(FmtUByte
);
111 HANDLE_FMT(FmtShort
);
113 HANDLE_FMT(FmtFloat
);
114 HANDLE_FMT(FmtDouble
);
115 HANDLE_FMT(FmtMulaw
);
117 /* FIXME: Handle ADPCM decoding here. */
120 std::fill(dst
.begin(), dst
.end(), 0.0f
);
127 constexpr auto GetAmbiScales(AmbiScaling scaletype
) noexcept
131 case AmbiScaling::FuMa
: return al::span
{AmbiScale::FromFuMa
};
132 case AmbiScaling::SN3D
: return al::span
{AmbiScale::FromSN3D
};
133 case AmbiScaling::UHJ
: return al::span
{AmbiScale::FromUHJ
};
134 case AmbiScaling::N3D
: break;
136 return al::span
{AmbiScale::FromN3D
};
139 constexpr auto GetAmbiLayout(AmbiLayout layouttype
) noexcept
141 if(layouttype
== AmbiLayout::FuMa
) return al::span
{AmbiIndex::FromFuMa
};
142 return al::span
{AmbiIndex::FromACN
};
145 constexpr auto GetAmbi2DLayout(AmbiLayout layouttype
) noexcept
147 if(layouttype
== AmbiLayout::FuMa
) return al::span
{AmbiIndex::FromFuMa2D
};
148 return al::span
{AmbiIndex::FromACN2D
};
152 constexpr float sin30
{0.5f
};
153 constexpr float cos30
{0.866025403785f
};
154 constexpr float sin45
{al::numbers::sqrt2_v
<float>*0.5f
};
155 constexpr float cos45
{al::numbers::sqrt2_v
<float>*0.5f
};
156 constexpr float sin110
{ 0.939692620786f
};
157 constexpr float cos110
{-0.342020143326f
};
161 std::array
<float,3> pos
;
165 using complex_f
= std::complex<float>;
167 constexpr size_t ConvolveUpdateSize
{256};
168 constexpr size_t ConvolveUpdateSamples
{ConvolveUpdateSize
/ 2};
171 void apply_fir(al::span
<float> dst
, const float *RESTRICT src
, const float *RESTRICT filter
)
173 #ifdef HAVE_SSE_INTRINSICS
174 for(float &output
: dst
)
176 __m128 r4
{_mm_setzero_ps()};
177 for(size_t j
{0};j
< ConvolveUpdateSamples
;j
+=4)
179 const __m128 coeffs
{_mm_load_ps(&filter
[j
])};
180 const __m128 s
{_mm_loadu_ps(&src
[j
])};
182 r4
= _mm_add_ps(r4
, _mm_mul_ps(s
, coeffs
));
184 r4
= _mm_add_ps(r4
, _mm_shuffle_ps(r4
, r4
, _MM_SHUFFLE(0, 1, 2, 3)));
185 r4
= _mm_add_ps(r4
, _mm_movehl_ps(r4
, r4
));
186 output
= _mm_cvtss_f32(r4
);
191 #elif defined(HAVE_NEON)
193 for(float &output
: dst
)
195 float32x4_t r4
{vdupq_n_f32(0.0f
)};
196 for(size_t j
{0};j
< ConvolveUpdateSamples
;j
+=4)
197 r4
= vmlaq_f32(r4
, vld1q_f32(&src
[j
]), vld1q_f32(&filter
[j
]));
198 r4
= vaddq_f32(r4
, vrev64q_f32(r4
));
199 output
= vget_lane_f32(vadd_f32(vget_low_f32(r4
), vget_high_f32(r4
)), 0);
206 for(float &output
: dst
)
209 for(size_t j
{0};j
< ConvolveUpdateSamples
;++j
)
210 ret
+= src
[j
] * filter
[j
];
218 struct ConvolutionState final
: public EffectState
{
219 FmtChannels mChannels
{};
220 AmbiLayout mAmbiLayout
{};
221 AmbiScaling mAmbiScaling
{};
225 alignas(16) std::array
<float,ConvolveUpdateSamples
*2> mInput
{};
226 al::vector
<std::array
<float,ConvolveUpdateSamples
>,16> mFilter
;
227 al::vector
<std::array
<float,ConvolveUpdateSamples
*2>,16> mOutput
;
230 alignas(16) std::array
<float,ConvolveUpdateSize
> mFftBuffer
{};
231 alignas(16) std::array
<float,ConvolveUpdateSize
> mFftWorkBuffer
{};
233 size_t mCurrentSegment
{0};
234 size_t mNumConvolveSegs
{0};
237 alignas(16) FloatBufferLine mBuffer
{};
238 float mHfScale
{}, mLfScale
{};
239 BandSplitter mFilter
{};
240 std::array
<float,MaxOutputChannels
> Current
{};
241 std::array
<float,MaxOutputChannels
> Target
{};
243 std::vector
<ChannelData
> mChans
;
244 al::vector
<float,16> mComplexData
;
247 ConvolutionState() = default;
248 ~ConvolutionState() override
= default;
250 void NormalMix(const al::span
<FloatBufferLine
> samplesOut
, const size_t samplesToDo
);
251 void UpsampleMix(const al::span
<FloatBufferLine
> samplesOut
, const size_t samplesToDo
);
252 void (ConvolutionState::*mMix
)(const al::span
<FloatBufferLine
>,const size_t)
253 {&ConvolutionState::NormalMix
};
255 void deviceUpdate(const DeviceBase
*device
, const BufferStorage
*buffer
) override
;
256 void update(const ContextBase
*context
, const EffectSlot
*slot
, const EffectProps
*props
,
257 const EffectTarget target
) override
;
258 void process(const size_t samplesToDo
, const al::span
<const FloatBufferLine
> samplesIn
,
259 const al::span
<FloatBufferLine
> samplesOut
) override
;
262 void ConvolutionState::NormalMix(const al::span
<FloatBufferLine
> samplesOut
,
263 const size_t samplesToDo
)
265 for(auto &chan
: mChans
)
266 MixSamples({chan
.mBuffer
.data(), samplesToDo
}, samplesOut
, chan
.Current
.data(),
267 chan
.Target
.data(), samplesToDo
, 0);
270 void ConvolutionState::UpsampleMix(const al::span
<FloatBufferLine
> samplesOut
,
271 const size_t samplesToDo
)
273 for(auto &chan
: mChans
)
275 const al::span
<float> src
{chan
.mBuffer
.data(), samplesToDo
};
276 chan
.mFilter
.processScale(src
, chan
.mHfScale
, chan
.mLfScale
);
277 MixSamples(src
, samplesOut
, chan
.Current
.data(), chan
.Target
.data(), samplesToDo
, 0);
282 void ConvolutionState::deviceUpdate(const DeviceBase
*device
, const BufferStorage
*buffer
)
284 using UhjDecoderType
= UhjDecoder
<512>;
285 static constexpr auto DecoderPadding
= UhjDecoderType::sInputPadding
;
287 static constexpr uint MaxConvolveAmbiOrder
{1u};
290 mFft
= PFFFTSetup
{ConvolveUpdateSize
, PFFFT_REAL
};
294 decltype(mFilter
){}.swap(mFilter
);
295 decltype(mOutput
){}.swap(mOutput
);
296 mFftBuffer
.fill(0.0f
);
297 mFftWorkBuffer
.fill(0.0f
);
300 mNumConvolveSegs
= 0;
302 decltype(mChans
){}.swap(mChans
);
303 decltype(mComplexData
){}.swap(mComplexData
);
305 /* An empty buffer doesn't need a convolution filter. */
306 if(!buffer
|| buffer
->mSampleLen
< 1) return;
308 mChannels
= buffer
->mChannels
;
309 mAmbiLayout
= IsUHJ(mChannels
) ? AmbiLayout::FuMa
: buffer
->mAmbiLayout
;
310 mAmbiScaling
= IsUHJ(mChannels
) ? AmbiScaling::UHJ
: buffer
->mAmbiScaling
;
311 mAmbiOrder
= std::min(buffer
->mAmbiOrder
, MaxConvolveAmbiOrder
);
313 const auto realChannels
= buffer
->channelsFromFmt();
314 const auto numChannels
= (mChannels
== FmtUHJ2
) ? 3u : ChannelsFromFmt(mChannels
, mAmbiOrder
);
316 mChans
.resize(numChannels
);
318 /* The impulse response needs to have the same sample rate as the input and
319 * output. The bsinc24 resampler is decent, but there is high-frequency
320 * attenuation that some people may be able to pick up on. Since this is
321 * called very infrequently, go ahead and use the polyphase resampler.
323 PPhaseResampler resampler
;
324 if(device
->Frequency
!= buffer
->mSampleRate
)
325 resampler
.init(buffer
->mSampleRate
, device
->Frequency
);
326 const auto resampledCount
= static_cast<uint
>(
327 (uint64_t{buffer
->mSampleLen
}*device
->Frequency
+(buffer
->mSampleRate
-1)) /
328 buffer
->mSampleRate
);
330 const BandSplitter splitter
{device
->mXOverFreq
/ static_cast<float>(device
->Frequency
)};
331 for(auto &e
: mChans
)
332 e
.mFilter
= splitter
;
334 mFilter
.resize(numChannels
, {});
335 mOutput
.resize(numChannels
, {});
337 /* Calculate the number of segments needed to hold the impulse response and
338 * the input history (rounded up), and allocate them. Exclude one segment
339 * which gets applied as a time-domain FIR filter. Make sure at least one
340 * segment is allocated to simplify handling.
342 mNumConvolveSegs
= (resampledCount
+(ConvolveUpdateSamples
-1)) / ConvolveUpdateSamples
;
343 mNumConvolveSegs
= std::max(mNumConvolveSegs
, 2_uz
) - 1_uz
;
345 const size_t complex_length
{mNumConvolveSegs
* ConvolveUpdateSize
* (numChannels
+1)};
346 mComplexData
.resize(complex_length
, 0.0f
);
348 /* Load the samples from the buffer. */
349 const size_t srclinelength
{RoundUp(buffer
->mSampleLen
+DecoderPadding
, 16)};
350 auto srcsamples
= std::vector
<float>(srclinelength
* numChannels
);
351 std::fill(srcsamples
.begin(), srcsamples
.end(), 0.0f
);
352 for(size_t c
{0};c
< numChannels
&& c
< realChannels
;++c
)
353 LoadSamples(al::span
{srcsamples
}.subspan(srclinelength
*c
, buffer
->mSampleLen
),
354 buffer
->mData
.data(), c
, realChannels
, buffer
->mType
);
358 auto decoder
= std::make_unique
<UhjDecoderType
>();
359 std::array
<float*,4> samples
{};
360 for(size_t c
{0};c
< numChannels
;++c
)
361 samples
[c
] = srcsamples
.data() + srclinelength
*c
;
362 decoder
->decode({samples
.data(), numChannels
}, buffer
->mSampleLen
, buffer
->mSampleLen
);
365 auto ressamples
= std::vector
<double>(buffer
->mSampleLen
+ (resampler
? resampledCount
: 0));
366 auto ffttmp
= al::vector
<float,16>(ConvolveUpdateSize
);
367 auto fftbuffer
= std::vector
<std::complex<double>>(ConvolveUpdateSize
);
369 float *filteriter
= mComplexData
.data() + mNumConvolveSegs
*ConvolveUpdateSize
;
370 for(size_t c
{0};c
< numChannels
;++c
)
372 /* Resample to match the device. */
375 std::copy_n(srcsamples
.data() + srclinelength
*c
, buffer
->mSampleLen
,
376 ressamples
.data() + resampledCount
);
377 resampler
.process(al::span
{ressamples
}.subspan(resampledCount
, buffer
->mSampleLen
),
378 al::span
{ressamples
}.first(resampledCount
));
381 std::copy_n(srcsamples
.data() + srclinelength
*c
, buffer
->mSampleLen
,
384 /* Store the first segment's samples in reverse in the time-domain, to
385 * apply as a FIR filter.
387 const size_t first_size
{std::min(size_t{resampledCount
}, ConvolveUpdateSamples
)};
388 std::transform(ressamples
.data(), ressamples
.data()+first_size
, mFilter
[c
].rbegin(),
389 [](const double d
) noexcept
-> float { return static_cast<float>(d
); });
391 size_t done
{first_size
};
392 for(size_t s
{0};s
< mNumConvolveSegs
;++s
)
394 const size_t todo
{std::min(resampledCount
-done
, ConvolveUpdateSamples
)};
396 /* Apply a double-precision forward FFT for more precise frequency
399 auto iter
= std::copy_n(&ressamples
[done
], todo
, fftbuffer
.begin());
401 std::fill(iter
, fftbuffer
.end(), std::complex<double>{});
402 forward_fft(al::span
{fftbuffer
});
404 /* Convert to, and pack in, a float buffer for PFFFT. Note that the
405 * first bin stores the real component of the half-frequency bin in
406 * the imaginary component. Also scale the FFT by its length so the
407 * iFFT'd output will be normalized.
409 static constexpr float fftscale
{1.0f
/ float{ConvolveUpdateSize
}};
410 for(size_t i
{0};i
< ConvolveUpdateSamples
;++i
)
412 ffttmp
[i
*2 ] = static_cast<float>(fftbuffer
[i
].real()) * fftscale
;
413 ffttmp
[i
*2 + 1] = static_cast<float>((i
== 0) ?
414 fftbuffer
[ConvolveUpdateSamples
].real() : fftbuffer
[i
].imag()) * fftscale
;
416 /* Reorder backward to make it suitable for pffft_zconvolve and the
417 * subsequent pffft_transform(..., PFFFT_BACKWARD).
419 mFft
.zreorder(ffttmp
.data(), al::to_address(filteriter
), PFFFT_BACKWARD
);
420 filteriter
+= ConvolveUpdateSize
;
426 void ConvolutionState::update(const ContextBase
*context
, const EffectSlot
*slot
,
427 const EffectProps
*props_
, const EffectTarget target
)
429 /* TODO: LFE is not mixed to output. This will require each buffer channel
430 * to have its own output target since the main mixing buffer won't have an
431 * LFE channel (due to being B-Format).
433 static constexpr std::array MonoMap
{
434 ChanPosMap
{FrontCenter
, std::array
{0.0f
, 0.0f
, -1.0f
}}
436 static constexpr std::array StereoMap
{
437 ChanPosMap
{FrontLeft
, std::array
{-sin30
, 0.0f
, -cos30
}},
438 ChanPosMap
{FrontRight
, std::array
{ sin30
, 0.0f
, -cos30
}},
440 static constexpr std::array RearMap
{
441 ChanPosMap
{BackLeft
, std::array
{-sin30
, 0.0f
, cos30
}},
442 ChanPosMap
{BackRight
, std::array
{ sin30
, 0.0f
, cos30
}},
444 static constexpr std::array QuadMap
{
445 ChanPosMap
{FrontLeft
, std::array
{-sin45
, 0.0f
, -cos45
}},
446 ChanPosMap
{FrontRight
, std::array
{ sin45
, 0.0f
, -cos45
}},
447 ChanPosMap
{BackLeft
, std::array
{-sin45
, 0.0f
, cos45
}},
448 ChanPosMap
{BackRight
, std::array
{ sin45
, 0.0f
, cos45
}},
450 static constexpr std::array X51Map
{
451 ChanPosMap
{FrontLeft
, std::array
{-sin30
, 0.0f
, -cos30
}},
452 ChanPosMap
{FrontRight
, std::array
{ sin30
, 0.0f
, -cos30
}},
453 ChanPosMap
{FrontCenter
, std::array
{ 0.0f
, 0.0f
, -1.0f
}},
455 ChanPosMap
{SideLeft
, std::array
{-sin110
, 0.0f
, -cos110
}},
456 ChanPosMap
{SideRight
, std::array
{ sin110
, 0.0f
, -cos110
}},
458 static constexpr std::array X61Map
{
459 ChanPosMap
{FrontLeft
, std::array
{-sin30
, 0.0f
, -cos30
}},
460 ChanPosMap
{FrontRight
, std::array
{ sin30
, 0.0f
, -cos30
}},
461 ChanPosMap
{FrontCenter
, std::array
{ 0.0f
, 0.0f
, -1.0f
}},
463 ChanPosMap
{BackCenter
, std::array
{ 0.0f
, 0.0f
, 1.0f
} },
464 ChanPosMap
{SideLeft
, std::array
{-1.0f
, 0.0f
, 0.0f
} },
465 ChanPosMap
{SideRight
, std::array
{ 1.0f
, 0.0f
, 0.0f
} },
467 static constexpr std::array X71Map
{
468 ChanPosMap
{FrontLeft
, std::array
{-sin30
, 0.0f
, -cos30
}},
469 ChanPosMap
{FrontRight
, std::array
{ sin30
, 0.0f
, -cos30
}},
470 ChanPosMap
{FrontCenter
, std::array
{ 0.0f
, 0.0f
, -1.0f
}},
472 ChanPosMap
{BackLeft
, std::array
{-sin30
, 0.0f
, cos30
}},
473 ChanPosMap
{BackRight
, std::array
{ sin30
, 0.0f
, cos30
}},
474 ChanPosMap
{SideLeft
, std::array
{ -1.0f
, 0.0f
, 0.0f
}},
475 ChanPosMap
{SideRight
, std::array
{ 1.0f
, 0.0f
, 0.0f
}},
478 if(mNumConvolveSegs
< 1) UNLIKELY
481 auto &props
= std::get
<ConvolutionProps
>(*props_
);
482 mMix
= &ConvolutionState::NormalMix
;
484 for(auto &chan
: mChans
)
485 std::fill(chan
.Target
.begin(), chan
.Target
.end(), 0.0f
);
486 const float gain
{slot
->Gain
};
487 if(IsAmbisonic(mChannels
))
489 DeviceBase
*device
{context
->mDevice
};
490 if(mChannels
== FmtUHJ2
&& !device
->mUhjEncoder
)
492 mMix
= &ConvolutionState::UpsampleMix
;
493 mChans
[0].mHfScale
= 1.0f
;
494 mChans
[0].mLfScale
= DecoderBase::sWLFScale
;
495 mChans
[1].mHfScale
= 1.0f
;
496 mChans
[1].mLfScale
= DecoderBase::sXYLFScale
;
497 mChans
[2].mHfScale
= 1.0f
;
498 mChans
[2].mLfScale
= DecoderBase::sXYLFScale
;
500 else if(device
->mAmbiOrder
> mAmbiOrder
)
502 mMix
= &ConvolutionState::UpsampleMix
;
503 const auto scales
= AmbiScale::GetHFOrderScales(mAmbiOrder
, device
->mAmbiOrder
,
505 mChans
[0].mHfScale
= scales
[0];
506 mChans
[0].mLfScale
= 1.0f
;
507 for(size_t i
{1};i
< mChans
.size();++i
)
509 mChans
[i
].mHfScale
= scales
[1];
510 mChans
[i
].mLfScale
= 1.0f
;
513 mOutTarget
= target
.Main
->Buffer
;
515 alu::Vector N
{props
.OrientAt
[0], props
.OrientAt
[1], props
.OrientAt
[2], 0.0f
};
517 alu::Vector V
{props
.OrientUp
[0], props
.OrientUp
[1], props
.OrientUp
[2], 0.0f
};
519 /* Build and normalize right-vector */
520 alu::Vector U
{N
.cross_product(V
)};
523 const std::array mixmatrix
{
524 std::array
{1.0f
, 0.0f
, 0.0f
, 0.0f
},
525 std::array
{0.0f
, U
[0], -U
[1], U
[2]},
526 std::array
{0.0f
, -V
[0], V
[1], -V
[2]},
527 std::array
{0.0f
, -N
[0], N
[1], -N
[2]},
530 const auto scales
= GetAmbiScales(mAmbiScaling
);
531 const auto index_map
= Is2DAmbisonic(mChannels
) ?
532 al::span
{GetAmbi2DLayout(mAmbiLayout
)}.subspan(0) :
533 al::span
{GetAmbiLayout(mAmbiLayout
)}.subspan(0);
535 std::array
<float,MaxAmbiChannels
> coeffs
{};
536 for(size_t c
{0u};c
< mChans
.size();++c
)
538 const size_t acn
{index_map
[c
]};
539 const float scale
{scales
[acn
]};
541 std::transform(mixmatrix
[acn
].cbegin(), mixmatrix
[acn
].cend(), coeffs
.begin(),
542 [scale
](const float in
) noexcept
-> float { return in
* scale
; });
544 ComputePanGains(target
.Main
, coeffs
, gain
, mChans
[c
].Target
);
549 DeviceBase
*device
{context
->mDevice
};
550 al::span
<const ChanPosMap
> chanmap
{};
553 case FmtMono
: chanmap
= MonoMap
; break;
554 case FmtMonoDup
: chanmap
= MonoMap
; break;
556 case FmtStereo
: chanmap
= StereoMap
; break;
557 case FmtRear
: chanmap
= RearMap
; break;
558 case FmtQuad
: chanmap
= QuadMap
; break;
559 case FmtX51
: chanmap
= X51Map
; break;
560 case FmtX61
: chanmap
= X61Map
; break;
561 case FmtX71
: chanmap
= X71Map
; break;
570 mOutTarget
= target
.Main
->Buffer
;
571 if(device
->mRenderMode
== RenderMode::Pairwise
)
573 /* Scales the azimuth of the given vector by 3 if it's in front.
574 * Effectively scales +/-30 degrees to +/-90 degrees, leaving > +90
577 auto ScaleAzimuthFront
= [](std::array
<float,3> pos
) -> std::array
<float,3>
581 /* Normalize the length of the x,z components for a 2D
582 * vector of the azimuth angle. Negate Z since {0,0,-1} is
585 const float len2d
{std::sqrt(pos
[0]*pos
[0] + pos
[2]*pos
[2])};
586 float x
{pos
[0] / len2d
};
587 float z
{-pos
[2] / len2d
};
589 /* Z > cos(pi/6) = -30 < azimuth < 30 degrees. */
592 /* Triple the angle represented by x,z. */
593 x
= x
*3.0f
- x
*x
*x
*4.0f
;
594 z
= z
*z
*z
*4.0f
- z
*3.0f
;
596 /* Scale the vector back to fit in 3D. */
602 /* If azimuth >= 30 degrees, clamp to 90 degrees. */
603 pos
[0] = std::copysign(len2d
, pos
[0]);
610 for(size_t i
{0};i
< chanmap
.size();++i
)
612 if(chanmap
[i
].channel
== LFE
) continue;
613 const auto coeffs
= CalcDirectionCoeffs(ScaleAzimuthFront(chanmap
[i
].pos
), 0.0f
);
614 ComputePanGains(target
.Main
, coeffs
, gain
, mChans
[i
].Target
);
617 else for(size_t i
{0};i
< chanmap
.size();++i
)
619 if(chanmap
[i
].channel
== LFE
) continue;
620 const auto coeffs
= CalcDirectionCoeffs(chanmap
[i
].pos
, 0.0f
);
621 ComputePanGains(target
.Main
, coeffs
, gain
, mChans
[i
].Target
);
626 void ConvolutionState::process(const size_t samplesToDo
,
627 const al::span
<const FloatBufferLine
> samplesIn
, const al::span
<FloatBufferLine
> samplesOut
)
629 if(mNumConvolveSegs
< 1) UNLIKELY
632 size_t curseg
{mCurrentSegment
};
634 for(size_t base
{0u};base
< samplesToDo
;)
636 const size_t todo
{std::min(ConvolveUpdateSamples
-mFifoPos
, samplesToDo
-base
)};
638 std::copy_n(samplesIn
[0].begin() + base
, todo
,
639 mInput
.begin()+ConvolveUpdateSamples
+mFifoPos
);
641 /* Apply the FIR for the newly retrieved input samples, and combine it
642 * with the inverse FFT'd output samples.
644 for(size_t c
{0};c
< mChans
.size();++c
)
646 auto buf_iter
= mChans
[c
].mBuffer
.begin() + base
;
647 apply_fir({buf_iter
, todo
}, mInput
.data()+1 + mFifoPos
, mFilter
[c
].data());
649 auto fifo_iter
= mOutput
[c
].begin() + mFifoPos
;
650 std::transform(fifo_iter
, fifo_iter
+todo
, buf_iter
, buf_iter
, std::plus
<>{});
656 /* Check whether the input buffer is filled with new samples. */
657 if(mFifoPos
< ConvolveUpdateSamples
) break;
660 /* Move the newest input to the front for the next iteration's history. */
661 std::copy(mInput
.cbegin()+ConvolveUpdateSamples
, mInput
.cend(), mInput
.begin());
662 std::fill(mInput
.begin()+ConvolveUpdateSamples
, mInput
.end(), 0.0f
);
664 /* Calculate the frequency-domain response and add the relevant
665 * frequency bins to the FFT history.
667 mFft
.transform(mInput
.data(), mComplexData
.data() + curseg
*ConvolveUpdateSize
,
668 mFftWorkBuffer
.data(), PFFFT_FORWARD
);
670 const float *filter
{mComplexData
.data() + mNumConvolveSegs
*ConvolveUpdateSize
};
671 for(size_t c
{0};c
< mChans
.size();++c
)
673 /* Convolve each input segment with its IR filter counterpart
676 mFftBuffer
.fill(0.0f
);
677 const float *input
{&mComplexData
[curseg
*ConvolveUpdateSize
]};
678 for(size_t s
{curseg
};s
< mNumConvolveSegs
;++s
)
680 mFft
.zconvolve_accumulate(input
, filter
, mFftBuffer
.data());
681 input
+= ConvolveUpdateSize
;
682 filter
+= ConvolveUpdateSize
;
684 input
= mComplexData
.data();
685 for(size_t s
{0};s
< curseg
;++s
)
687 mFft
.zconvolve_accumulate(input
, filter
, mFftBuffer
.data());
688 input
+= ConvolveUpdateSize
;
689 filter
+= ConvolveUpdateSize
;
692 /* Apply iFFT to get the 256 (really 255) samples for output. The
693 * 128 output samples are combined with the last output's 127
694 * second-half samples (and this output's second half is
695 * subsequently saved for next time).
697 mFft
.transform(mFftBuffer
.data(), mFftBuffer
.data(), mFftWorkBuffer
.data(),
700 /* The filter was attenuated, so the response is already scaled. */
701 for(size_t i
{0};i
< ConvolveUpdateSamples
;++i
)
702 mOutput
[c
][i
] = mFftBuffer
[i
] + mOutput
[c
][ConvolveUpdateSamples
+i
];
703 for(size_t i
{0};i
< ConvolveUpdateSamples
;++i
)
704 mOutput
[c
][ConvolveUpdateSamples
+i
] = mFftBuffer
[ConvolveUpdateSamples
+i
];
707 /* Shift the input history. */
708 curseg
= curseg
? (curseg
-1) : (mNumConvolveSegs
-1);
710 mCurrentSegment
= curseg
;
712 /* Finally, mix to the output. */
713 (this->*mMix
)(samplesOut
, samplesToDo
);
717 struct ConvolutionStateFactory final
: public EffectStateFactory
{
718 al::intrusive_ptr
<EffectState
> create() override
719 { return al::intrusive_ptr
<EffectState
>{new ConvolutionState
{}}; }
724 EffectStateFactory
*ConvolutionStateFactory_getFactory()
726 static ConvolutionStateFactory ConvolutionFactory
{};
727 return &ConvolutionFactory
;