Apply the source's AL_AIR_ABSORPTION_FACTOR to send paths
[openal-soft.git] / core / uhjfilter.cpp
blob868f63920e2e2ebe70224b008614251c30a1e4e0
2 #include "config.h"
4 #include "uhjfilter.h"
6 #include <algorithm>
7 #include <cmath>
8 #include <complex>
9 #include <functional>
10 #include <vector>
12 #include "alcomplex.h"
13 #include "almalloc.h"
14 #include "alnumbers.h"
15 #include "core/bufferline.h"
16 #include "opthelpers.h"
17 #include "pffft.h"
18 #include "phase_shifter.h"
19 #include "vector.h"
22 namespace {
24 template<std::size_t A, typename T, std::size_t N>
25 constexpr auto assume_aligned_span(const al::span<T,N> s) noexcept -> al::span<T,N>
26 { return al::span<T,N>{al::assume_aligned<A>(s.data()), s.size()}; }
28 /* Convolution is implemented using a segmented overlap-add method. The filter
29 * response is broken up into multiple segments of 128 samples, and each
30 * segment has an FFT applied with a 256-sample buffer (the latter half left
31 * silent) to get its frequency-domain response.
33 * Input samples are similarly broken up into 128-sample segments, with a 256-
34 * sample FFT applied to each new incoming segment to get its frequency-domain
35 * response. A history of FFT'd input segments is maintained, equal to the
36 * number of filter response segments.
38 * To apply the convolution, each filter response segment is convolved with its
39 * paired input segment (using complex multiplies, far cheaper than time-domain
40 * FIRs), accumulating into an FFT buffer. The input history is then shifted to
41 * align with later filter response segments for the next input segment.
43 * An inverse FFT is then applied to the accumulated FFT buffer to get a 256-
44 * sample time-domain response for output, which is split in two halves. The
45 * first half is the 128-sample output, and the second half is a 128-sample
46 * (really, 127) delayed extension, which gets added to the output next time.
47 * Convolving two time-domain responses of length N results in a time-domain
48 * signal of length N*2 - 1, and this holds true regardless of the convolution
49 * being applied in the frequency domain, so these "overflow" samples need to
50 * be accounted for.
52 template<size_t N>
53 struct SegmentedFilter {
54 static constexpr size_t sFftLength{256};
55 static constexpr size_t sSampleLength{sFftLength / 2};
56 static constexpr size_t sNumSegments{N/sSampleLength};
57 static_assert(N >= sFftLength);
58 static_assert((N % sSampleLength) == 0);
60 PFFFTSetup mFft;
61 alignas(16) std::array<float,sFftLength*sNumSegments> mFilterData;
63 SegmentedFilter() : mFft{sFftLength, PFFFT_REAL}
65 static constexpr size_t fft_size{N};
67 /* To set up the filter, we first need to generate the desired
68 * response (not reversed).
70 auto tmpBuffer = std::vector<double>(fft_size, 0.0);
71 for(std::size_t i{0};i < fft_size/2;++i)
73 const int k{int{fft_size/2} - static_cast<int>(i*2 + 1)};
75 const double w{2.0*al::numbers::pi * static_cast<double>(i*2 + 1)
76 / double{fft_size}};
77 const double window{0.3635819 - 0.4891775*std::cos(w) + 0.1365995*std::cos(2.0*w)
78 - 0.0106411*std::cos(3.0*w)};
80 const double pk{al::numbers::pi * static_cast<double>(k)};
81 tmpBuffer[i*2 + 1] = window * (1.0-std::cos(pk)) / pk;
84 /* The segments of the filter are converted back to the frequency
85 * domain, each on their own (0 stuffed).
87 using complex_d = std::complex<double>;
88 auto fftBuffer = std::vector<complex_d>(sFftLength);
89 auto fftTmp = al::vector<float,16>(sFftLength);
90 auto filter = mFilterData.begin();
91 for(size_t s{0};s < sNumSegments;++s)
93 const auto tmpspan = al::span{tmpBuffer}.subspan(sSampleLength*s, sSampleLength);
94 auto iter = std::copy_n(tmpspan.cbegin(), tmpspan.size(), fftBuffer.begin());
95 std::fill(iter, fftBuffer.end(), complex_d{});
96 forward_fft(fftBuffer);
98 /* Convert to zdomain data for PFFFT, scaled by the FFT length so
99 * the iFFT result will be normalized.
101 for(size_t i{0};i < sSampleLength;++i)
103 fftTmp[i*2 + 0] = static_cast<float>(fftBuffer[i].real()) / float{sFftLength};
104 fftTmp[i*2 + 1] = static_cast<float>((i == 0) ? fftBuffer[sSampleLength].real()
105 : fftBuffer[i].imag()) / float{sFftLength};
107 mFft.zreorder(fftTmp.data(), al::to_address(filter), PFFFT_BACKWARD);
108 filter += sFftLength;
113 template<size_t N>
114 const SegmentedFilter<N> gSegmentedFilter;
116 template<size_t N>
117 const PhaseShifterT<N> PShifter;
120 /* Filter coefficients for the 'base' all-pass IIR, which applies a frequency-
121 * dependent phase-shift of N degrees. The output of the filter requires a 1-
122 * sample delay.
124 constexpr std::array<float,4> Filter1Coeff{{
125 0.479400865589f, 0.876218493539f, 0.976597589508f, 0.997499255936f
127 /* Filter coefficients for the offset all-pass IIR, which applies a frequency-
128 * dependent phase-shift of N+90 degrees.
130 constexpr std::array<float,4> Filter2Coeff{{
131 0.161758498368f, 0.733028932341f, 0.945349700329f, 0.990599156684f
134 } // namespace
136 void UhjAllPassFilter::processOne(const al::span<const float, 4> coeffs, float x)
138 auto state = mState;
139 for(size_t i{0};i < 4;++i)
141 const float y{x*coeffs[i] + state[i].z[0]};
142 state[i].z[0] = state[i].z[1];
143 state[i].z[1] = y*coeffs[i] - x;
144 x = y;
146 mState = state;
149 void UhjAllPassFilter::process(const al::span<const float,4> coeffs,
150 const al::span<const float> src, const bool updateState, const al::span<float> dst)
152 auto state = mState;
154 auto proc_sample = [&state,coeffs](float x) noexcept -> float
156 for(size_t i{0};i < 4;++i)
158 const float y{x*coeffs[i] + state[i].z[0]};
159 state[i].z[0] = state[i].z[1];
160 state[i].z[1] = y*coeffs[i] - x;
161 x = y;
163 return x;
165 std::transform(src.begin(), src.end(), dst.begin(), proc_sample);
166 if(updateState) LIKELY mState = state;
170 /* Encoding UHJ from B-Format is done as:
172 * S = 0.9396926*W + 0.1855740*X
173 * D = j(-0.3420201*W + 0.5098604*X) + 0.6554516*Y
175 * Left = (S + D)/2.0
176 * Right = (S - D)/2.0
177 * T = j(-0.1432*W + 0.6512*X) - 0.7071068*Y
178 * Q = 0.9772*Z
180 * where j is a wide-band +90 degree phase shift. 3-channel UHJ excludes Q,
181 * while 2-channel excludes Q and T.
183 * The phase shift is done using a linear FIR filter derived from an FFT'd
184 * impulse with the desired shift.
187 template<size_t N>
188 void UhjEncoder<N>::encode(float *LeftOut, float *RightOut,
189 const al::span<const float*const,3> InSamples, const size_t SamplesToDo)
191 static constexpr auto &Filter = gSegmentedFilter<N>;
192 static_assert(sFftLength == Filter.sFftLength);
193 static_assert(sSegmentSize == Filter.sSampleLength);
194 static_assert(sNumSegments == Filter.sNumSegments);
196 ASSUME(SamplesToDo > 0);
197 ASSUME(SamplesToDo <= BufferLineSize);
199 const auto winput = al::span{al::assume_aligned<16>(InSamples[0]), SamplesToDo};
200 const auto xinput = al::span{al::assume_aligned<16>(InSamples[1]), SamplesToDo};
201 const auto yinput = al::span{al::assume_aligned<16>(InSamples[2]), SamplesToDo};
203 std::copy_n(winput.begin(), SamplesToDo, mW.begin()+sFilterDelay);
204 std::copy_n(xinput.begin(), SamplesToDo, mX.begin()+sFilterDelay);
205 std::copy_n(yinput.begin(), SamplesToDo, mY.begin()+sFilterDelay);
207 /* S = 0.9396926*W + 0.1855740*X */
208 std::transform(mW.begin(), mW.begin()+SamplesToDo, mX.begin(), mS.begin(),
209 [](const float w, const float x) noexcept { return 0.9396926f*w + 0.1855740f*x; });
211 /* Precompute j(-0.3420201*W + 0.5098604*X) and store in mD. */
212 auto dstore = mD.begin();
213 size_t curseg{mCurrentSegment};
214 for(size_t base{0};base < SamplesToDo;)
216 const size_t todo{std::min(sSegmentSize-mFifoPos, SamplesToDo-base)};
217 auto wseg = winput.subspan(base, todo);
218 auto xseg = xinput.subspan(base, todo);
219 auto wxio = al::span{mWXInOut}.subspan(mFifoPos, todo);
221 /* Copy out the samples that were previously processed by the FFT. */
222 dstore = std::copy_n(wxio.begin(), todo, dstore);
224 /* Transform the non-delayed input and store in the front half of the
225 * filter input.
227 std::transform(wseg.begin(), wseg.end(), xseg.begin(), wxio.begin(),
228 [](const float w, const float x) noexcept -> float
229 { return -0.3420201f*w + 0.5098604f*x; });
231 mFifoPos += todo;
232 base += todo;
234 /* Check whether the input buffer is filled with new samples. */
235 if(mFifoPos < sSegmentSize) break;
236 mFifoPos = 0;
238 /* Copy the new input to the next history segment, clearing the back
239 * half of the segment, and convert to the frequency domain.
241 auto input = mWXHistory.begin() + curseg*sFftLength;
242 std::copy_n(mWXInOut.begin(), sSegmentSize, input);
243 std::fill_n(input+sSegmentSize, sSegmentSize, 0.0f);
245 Filter.mFft.transform(al::to_address(input), al::to_address(input), mWorkData.data(),
246 PFFFT_FORWARD);
248 /* Convolve each input segment with its IR filter counterpart (aligned
249 * in time, from newest to oldest).
251 mFftBuffer.fill(0.0f);
252 auto filter = Filter.mFilterData.begin();
253 for(size_t s{curseg};s < sNumSegments;++s)
255 Filter.mFft.zconvolve_accumulate(al::to_address(input), al::to_address(filter),
256 mFftBuffer.data());
257 input += sFftLength;
258 filter += sFftLength;
260 input = mWXHistory.begin();
261 for(size_t s{0};s < curseg;++s)
263 Filter.mFft.zconvolve_accumulate(al::to_address(input), al::to_address(filter),
264 mFftBuffer.data());
265 input += sFftLength;
266 filter += sFftLength;
269 /* Convert back to samples, writing to the output and storing the extra
270 * for next time.
272 Filter.mFft.transform(mFftBuffer.data(), mFftBuffer.data(), mWorkData.data(),
273 PFFFT_BACKWARD);
275 std::transform(mFftBuffer.begin(), mFftBuffer.begin()+sSegmentSize,
276 mWXInOut.begin()+sSegmentSize, mWXInOut.begin(), std::plus{});
277 std::copy_n(mFftBuffer.begin()+sSegmentSize, sSegmentSize, mWXInOut.begin()+sSegmentSize);
279 /* Shift the input history. */
280 curseg = curseg ? (curseg-1) : (sNumSegments-1);
282 mCurrentSegment = curseg;
284 /* D = j(-0.3420201*W + 0.5098604*X) + 0.6554516*Y */
285 std::transform(mD.begin(), mD.begin()+SamplesToDo, mY.begin(), mD.begin(),
286 [](const float jwx, const float y) noexcept { return jwx + 0.6554516f*y; });
288 /* Copy the future samples to the front for next time. */
289 std::copy(mW.cbegin()+SamplesToDo, mW.cbegin()+SamplesToDo+sFilterDelay, mW.begin());
290 std::copy(mX.cbegin()+SamplesToDo, mX.cbegin()+SamplesToDo+sFilterDelay, mX.begin());
291 std::copy(mY.cbegin()+SamplesToDo, mY.cbegin()+SamplesToDo+sFilterDelay, mY.begin());
293 /* Apply a delay to the existing output to align with the input delay. */
294 auto delayBuffer = mDirectDelay.begin();
295 for(float *buffer : {LeftOut, RightOut})
297 const auto distbuf = assume_aligned_span<16>(al::span{*delayBuffer});
298 ++delayBuffer;
300 const auto inout = al::span{al::assume_aligned<16>(buffer), SamplesToDo};
301 if(SamplesToDo >= sFilterDelay)
303 auto delay_end = std::rotate(inout.begin(), inout.end() - sFilterDelay, inout.end());
304 std::swap_ranges(inout.begin(), delay_end, distbuf.begin());
306 else
308 auto delay_start = std::swap_ranges(inout.begin(), inout.end(), distbuf.begin());
309 std::rotate(distbuf.begin(), delay_start, distbuf.begin() + sFilterDelay);
313 /* Combine the direct signal with the produced output. */
315 /* Left = (S + D)/2.0 */
316 const auto left = al::span{al::assume_aligned<16>(LeftOut), SamplesToDo};
317 for(size_t i{0};i < SamplesToDo;++i)
318 left[i] += (mS[i] + mD[i]) * 0.5f;
320 /* Right = (S - D)/2.0 */
321 const auto right = al::span{al::assume_aligned<16>(RightOut), SamplesToDo};
322 for(size_t i{0};i < SamplesToDo;++i)
323 right[i] += (mS[i] - mD[i]) * 0.5f;
326 /* This encoding implementation uses two sets of four chained IIR filters to
327 * produce the desired relative phase shift. The first filter chain produces a
328 * phase shift of varying degrees over a wide range of frequencies, while the
329 * second filter chain produces a phase shift 90 degrees ahead of the first
330 * over the same range. Further details are described here:
332 * https://web.archive.org/web/20060708031958/http://www.biochem.oulu.fi/~oniemita/dsp/hilbert/
334 * 2-channel UHJ output requires the use of three filter chains. The S channel
335 * output uses a Filter1 chain on the W and X channel mix, while the D channel
336 * output uses a Filter1 chain on the Y channel plus a Filter2 chain on the W
337 * and X channel mix. This results in the W and X input mix on the D channel
338 * output having the required +90 degree phase shift relative to the other
339 * inputs.
341 void UhjEncoderIIR::encode(float *LeftOut, float *RightOut,
342 const al::span<const float *const, 3> InSamples, const size_t SamplesToDo)
344 ASSUME(SamplesToDo > 0);
345 ASSUME(SamplesToDo <= BufferLineSize);
347 const auto winput = al::span{al::assume_aligned<16>(InSamples[0]), SamplesToDo};
348 const auto xinput = al::span{al::assume_aligned<16>(InSamples[1]), SamplesToDo};
349 const auto yinput = al::span{al::assume_aligned<16>(InSamples[2]), SamplesToDo};
351 /* S = 0.9396926*W + 0.1855740*X */
352 std::transform(winput.begin(), winput.end(), xinput.begin(), mTemp.begin(),
353 [](const float w, const float x) noexcept { return 0.9396926f*w + 0.1855740f*x; });
354 mFilter1WX.process(Filter1Coeff, al::span{mTemp}.first(SamplesToDo), true,
355 al::span{mS}.subspan(1));
356 mS[0] = mDelayWX; mDelayWX = mS[SamplesToDo];
358 /* Precompute j(-0.3420201*W + 0.5098604*X) and store in mWX. */
359 std::transform(winput.begin(), winput.end(), xinput.begin(), mTemp.begin(),
360 [](const float w, const float x) noexcept { return -0.3420201f*w + 0.5098604f*x; });
361 mFilter2WX.process(Filter2Coeff, al::span{mTemp}.first(SamplesToDo), true, mWX);
363 /* Apply filter1 to Y and store in mD. */
364 mFilter1Y.process(Filter1Coeff, yinput, true, al::span{mD}.subspan(1));
365 mD[0] = mDelayY; mDelayY = mD[SamplesToDo];
367 /* D = j(-0.3420201*W + 0.5098604*X) + 0.6554516*Y */
368 std::transform(mWX.begin(), mWX.begin()+SamplesToDo, mD.begin(), mD.begin(),
369 [](const float jwx, const float y) noexcept { return jwx + 0.6554516f*y; });
371 /* Apply the base filter to the existing output to align with the processed
372 * signal.
374 const auto left = al::span{al::assume_aligned<16>(LeftOut), SamplesToDo};
375 mFilter1Direct[0].process(Filter1Coeff, left, true, al::span{mTemp}.subspan(1));
376 mTemp[0] = mDirectDelay[0]; mDirectDelay[0] = mTemp[SamplesToDo];
378 /* Left = (S + D)/2.0 */
379 for(size_t i{0};i < SamplesToDo;++i)
380 left[i] = (mS[i] + mD[i])*0.5f + mTemp[i];
382 const auto right = al::span{al::assume_aligned<16>(RightOut), SamplesToDo};
383 mFilter1Direct[1].process(Filter1Coeff, right, true, al::span{mTemp}.subspan(1));
384 mTemp[0] = mDirectDelay[1]; mDirectDelay[1] = mTemp[SamplesToDo];
386 /* Right = (S - D)/2.0 */
387 for(size_t i{0};i < SamplesToDo;++i)
388 right[i] = (mS[i] - mD[i])*0.5f + mTemp[i];
392 /* Decoding UHJ is done as:
394 * S = Left + Right
395 * D = Left - Right
397 * W = 0.981532*S + 0.197484*j(0.828331*D + 0.767820*T)
398 * X = 0.418496*S - j(0.828331*D + 0.767820*T)
399 * Y = 0.795968*D - 0.676392*T + j(0.186633*S)
400 * Z = 1.023332*Q
402 * where j is a +90 degree phase shift. 3-channel UHJ excludes Q, while 2-
403 * channel excludes Q and T.
405 template<size_t N>
406 void UhjDecoder<N>::decode(const al::span<float*> samples, const size_t samplesToDo,
407 const bool updateState)
409 static_assert(sInputPadding <= sMaxPadding, "Filter padding is too large");
411 constexpr auto &PShift = PShifter<N>;
413 ASSUME(samplesToDo > 0);
414 ASSUME(samplesToDo <= BufferLineSize);
417 const auto left = al::span{al::assume_aligned<16>(samples[0]), samplesToDo+sInputPadding};
418 const auto right = al::span{al::assume_aligned<16>(samples[1]), samplesToDo+sInputPadding};
419 const auto t = al::span{al::assume_aligned<16>(samples[2]), samplesToDo+sInputPadding};
421 /* S = Left + Right */
422 std::transform(left.begin(), left.end(), right.begin(), mS.begin(), std::plus{});
424 /* D = Left - Right */
425 std::transform(left.begin(), left.end(), right.begin(), mD.begin(), std::minus{});
427 /* T */
428 std::copy(t.begin(), t.end(), mT.begin());
431 const auto woutput = al::span{al::assume_aligned<16>(samples[0]), samplesToDo};
432 const auto xoutput = al::span{al::assume_aligned<16>(samples[1]), samplesToDo};
433 const auto youtput = al::span{al::assume_aligned<16>(samples[2]), samplesToDo};
435 /* Precompute j(0.828331*D + 0.767820*T) and store in xoutput. */
436 auto tmpiter = std::copy(mDTHistory.cbegin(), mDTHistory.cend(), mTemp.begin());
437 std::transform(mD.cbegin(), mD.cbegin()+samplesToDo+sInputPadding, mT.cbegin(), tmpiter,
438 [](const float d, const float t) noexcept { return 0.828331f*d + 0.767820f*t; });
439 if(updateState) LIKELY
440 std::copy_n(mTemp.cbegin()+samplesToDo, mDTHistory.size(), mDTHistory.begin());
441 PShift.process(xoutput, mTemp);
443 /* W = 0.981532*S + 0.197484*j(0.828331*D + 0.767820*T) */
444 std::transform(mS.begin(), mS.begin()+samplesToDo, xoutput.begin(), woutput.begin(),
445 [](const float s, const float jdt) noexcept { return 0.981532f*s + 0.197484f*jdt; });
447 /* X = 0.418496*S - j(0.828331*D + 0.767820*T) */
448 std::transform(mS.begin(), mS.begin()+samplesToDo, xoutput.begin(), xoutput.begin(),
449 [](const float s, const float jdt) noexcept { return 0.418496f*s - jdt; });
451 /* Precompute j*S and store in youtput. */
452 tmpiter = std::copy(mSHistory.cbegin(), mSHistory.cend(), mTemp.begin());
453 std::copy_n(mS.cbegin(), samplesToDo+sInputPadding, tmpiter);
454 if(updateState) LIKELY
455 std::copy_n(mTemp.cbegin()+samplesToDo, mSHistory.size(), mSHistory.begin());
456 PShift.process(youtput, mTemp);
458 /* Y = 0.795968*D - 0.676392*T + j(0.186633*S) */
459 for(size_t i{0};i < samplesToDo;++i)
460 youtput[i] = 0.795968f*mD[i] - 0.676392f*mT[i] + 0.186633f*youtput[i];
462 if(samples.size() > 3)
464 const auto zoutput = al::span{al::assume_aligned<16>(samples[3]), samplesToDo};
465 /* Z = 1.023332*Q */
466 std::transform(zoutput.begin(), zoutput.end(), zoutput.begin(),
467 [](const float q) noexcept { return 1.023332f*q; });
471 void UhjDecoderIIR::decode(const al::span<float*> samples, const size_t samplesToDo,
472 const bool updateState)
474 static_assert(sInputPadding <= sMaxPadding, "Filter padding is too large");
476 ASSUME(samplesToDo > 0);
477 ASSUME(samplesToDo <= BufferLineSize);
480 const auto left = al::span{al::assume_aligned<16>(samples[0]), samplesToDo+sInputPadding};
481 const auto right = al::span{al::assume_aligned<16>(samples[1]), samplesToDo+sInputPadding};
483 /* S = Left + Right */
484 std::transform(left.begin(), left.end(), right.begin(), mS.begin(), std::plus{});
486 /* D = Left - Right */
487 std::transform(left.begin(), left.end(), right.begin(), mD.begin(), std::minus{});
490 const auto woutput = al::span{al::assume_aligned<16>(samples[0]), samplesToDo};
491 const auto xoutput = al::span{al::assume_aligned<16>(samples[1]), samplesToDo};
492 const auto youtput = al::span{al::assume_aligned<16>(samples[2]), samplesToDo+sInputPadding};
494 /* Precompute j(0.828331*D + 0.767820*T) and store in xoutput. */
495 std::transform(mD.cbegin(), mD.cbegin()+sInputPadding+samplesToDo, youtput.begin(),
496 mTemp.begin(),
497 [](const float d, const float t) noexcept { return 0.828331f*d + 0.767820f*t; });
498 if(mFirstRun) mFilter2DT.processOne(Filter2Coeff, mTemp[0]);
499 mFilter2DT.process(Filter2Coeff, al::span{mTemp}.subspan(1,samplesToDo), updateState, xoutput);
501 /* Apply filter1 to S and store in mTemp. */
502 mFilter1S.process(Filter1Coeff, al::span{mS}.first(samplesToDo), updateState, mTemp);
504 /* W = 0.981532*S + 0.197484*j(0.828331*D + 0.767820*T) */
505 std::transform(mTemp.begin(), mTemp.begin()+samplesToDo, xoutput.begin(), woutput.begin(),
506 [](const float s, const float jdt) noexcept { return 0.981532f*s + 0.197484f*jdt; });
507 /* X = 0.418496*S - j(0.828331*D + 0.767820*T) */
508 std::transform(mTemp.begin(), mTemp.begin()+samplesToDo, xoutput.begin(), xoutput.begin(),
509 [](const float s, const float jdt) noexcept { return 0.418496f*s - jdt; });
512 /* Apply filter1 to (0.795968*D - 0.676392*T) and store in mTemp. */
513 std::transform(mD.cbegin(), mD.cbegin()+samplesToDo, youtput.begin(), youtput.begin(),
514 [](const float d, const float t) noexcept { return 0.795968f*d - 0.676392f*t; });
515 mFilter1DT.process(Filter1Coeff, youtput.first(samplesToDo), updateState, mTemp);
517 /* Precompute j*S and store in youtput. */
518 if(mFirstRun) mFilter2S.processOne(Filter2Coeff, mS[0]);
519 mFilter2S.process(Filter2Coeff, al::span{mS}.subspan(1, samplesToDo), updateState, youtput);
521 /* Y = 0.795968*D - 0.676392*T + j(0.186633*S) */
522 std::transform(mTemp.begin(), mTemp.begin()+samplesToDo, youtput.begin(), youtput.begin(),
523 [](const float dt, const float js) noexcept { return dt + 0.186633f*js; });
525 if(samples.size() > 3)
527 const auto zoutput = al::span{al::assume_aligned<16>(samples[3]), samplesToDo};
529 /* Apply filter1 to Q and store in mTemp. */
530 mFilter1Q.process(Filter1Coeff, zoutput, updateState, mTemp);
532 /* Z = 1.023332*Q */
533 std::transform(mTemp.begin(), mTemp.end(), zoutput.begin(),
534 [](const float q) noexcept { return 1.023332f*q; });
537 mFirstRun = false;
541 /* Super Stereo processing is done as:
543 * S = Left + Right
544 * D = Left - Right
546 * W = 0.6098637*S + 0.6896511*j*w*D
547 * X = 0.8624776*S - 0.7626955*j*w*D
548 * Y = 1.6822415*w*D + 0.2156194*j*S
550 * where j is a +90 degree phase shift. w is a variable control for the
551 * resulting stereo width, with the range 0 <= w <= 0.7.
553 template<size_t N>
554 void UhjStereoDecoder<N>::decode(const al::span<float*> samples, const size_t samplesToDo,
555 const bool updateState)
557 static_assert(sInputPadding <= sMaxPadding, "Filter padding is too large");
559 constexpr auto &PShift = PShifter<N>;
561 ASSUME(samplesToDo > 0);
562 ASSUME(samplesToDo <= BufferLineSize);
565 const auto left = al::span{al::assume_aligned<16>(samples[0]), samplesToDo+sInputPadding};
566 const auto right = al::span{al::assume_aligned<16>(samples[1]), samplesToDo+sInputPadding};
568 std::transform(left.begin(), left.end(), right.begin(), mS.begin(), std::plus{});
570 /* Pre-apply the width factor to the difference signal D. Smoothly
571 * interpolate when it changes.
573 const float wtarget{mWidthControl};
574 const float wcurrent{(mCurrentWidth < 0.0f) ? wtarget : mCurrentWidth};
575 if(wtarget == wcurrent || !updateState)
577 std::transform(left.begin(), left.end(), right.begin(), mD.begin(),
578 [wcurrent](const float l, const float r) noexcept { return (l-r) * wcurrent; });
579 mCurrentWidth = wcurrent;
581 else
583 const float wstep{(wtarget - wcurrent) / static_cast<float>(samplesToDo)};
584 float fi{0.0f};
586 const auto lfade = left.first(samplesToDo);
587 auto dstore = std::transform(lfade.begin(), lfade.begin(), right.begin(), mD.begin(),
588 [wcurrent,wstep,&fi](const float l, const float r) noexcept
590 const float ret{(l-r) * (wcurrent + wstep*fi)};
591 fi += 1.0f;
592 return ret;
595 const auto lend = left.subspan(samplesToDo);
596 const auto rend = right.subspan(samplesToDo);
597 std::transform(lend.begin(), lend.end(), rend.begin(), dstore,
598 [wtarget](const float l, const float r) noexcept { return (l-r) * wtarget; });
599 mCurrentWidth = wtarget;
603 const auto woutput = al::span{al::assume_aligned<16>(samples[0]), samplesToDo};
604 const auto xoutput = al::span{al::assume_aligned<16>(samples[1]), samplesToDo};
605 const auto youtput = al::span{al::assume_aligned<16>(samples[2]), samplesToDo};
607 /* Precompute j*D and store in xoutput. */
608 auto tmpiter = std::copy(mDTHistory.cbegin(), mDTHistory.cend(), mTemp.begin());
609 std::copy_n(mD.cbegin(), samplesToDo+sInputPadding, tmpiter);
610 if(updateState) LIKELY
611 std::copy_n(mTemp.cbegin()+samplesToDo, mDTHistory.size(), mDTHistory.begin());
612 PShift.process(xoutput, mTemp);
614 /* W = 0.6098637*S + 0.6896511*j*w*D */
615 std::transform(mS.begin(), mS.begin()+samplesToDo, xoutput.begin(), woutput.begin(),
616 [](const float s, const float jd) noexcept { return 0.6098637f*s + 0.6896511f*jd; });
617 /* X = 0.8624776*S - 0.7626955*j*w*D */
618 std::transform(mS.begin(), mS.begin()+samplesToDo, xoutput.begin(), xoutput.begin(),
619 [](const float s, const float jd) noexcept { return 0.8624776f*s - 0.7626955f*jd; });
621 /* Precompute j*S and store in youtput. */
622 tmpiter = std::copy(mSHistory.cbegin(), mSHistory.cend(), mTemp.begin());
623 std::copy_n(mS.cbegin(), samplesToDo+sInputPadding, tmpiter);
624 if(updateState) LIKELY
625 std::copy_n(mTemp.cbegin()+samplesToDo, mSHistory.size(), mSHistory.begin());
626 PShift.process(youtput, mTemp);
628 /* Y = 1.6822415*w*D + 0.2156194*j*S */
629 std::transform(mD.begin(), mD.begin()+samplesToDo, youtput.begin(), youtput.begin(),
630 [](const float d, const float js) noexcept { return 1.6822415f*d + 0.2156194f*js; });
633 void UhjStereoDecoderIIR::decode(const al::span<float*> samples, const size_t samplesToDo,
634 const bool updateState)
636 static_assert(sInputPadding <= sMaxPadding, "Filter padding is too large");
638 ASSUME(samplesToDo > 0);
639 ASSUME(samplesToDo <= BufferLineSize);
642 const auto left = al::span{al::assume_aligned<16>(samples[0]), samplesToDo+sInputPadding};
643 const auto right = al::span{al::assume_aligned<16>(samples[1]), samplesToDo+sInputPadding};
645 std::transform(left.begin(), left.end(), right.begin(), mS.begin(), std::plus{});
647 /* Pre-apply the width factor to the difference signal D. Smoothly
648 * interpolate when it changes.
650 const float wtarget{mWidthControl};
651 const float wcurrent{(mCurrentWidth < 0.0f) ? wtarget : mCurrentWidth};
652 if(wtarget == wcurrent || !updateState)
654 std::transform(left.begin(), left.end(), right.begin(), mD.begin(),
655 [wcurrent](const float l, const float r) noexcept
656 { return (l-r) * wcurrent; });
657 mCurrentWidth = wcurrent;
659 else
661 const float wstep{(wtarget - wcurrent) / static_cast<float>(samplesToDo)};
662 float fi{0.0f};
664 const auto lfade = left.first(samplesToDo);
665 auto dstore = std::transform(lfade.begin(), lfade.begin(), right.begin(), mD.begin(),
666 [wcurrent,wstep,&fi](const float l, const float r) noexcept
668 const float ret{(l-r) * (wcurrent + wstep*fi)};
669 fi += 1.0f;
670 return ret;
673 const auto lend = left.subspan(samplesToDo);
674 const auto rend = right.subspan(samplesToDo);
675 std::transform(lend.begin(), lend.end(), rend.begin(), dstore,
676 [wtarget](const float l, const float r) noexcept { return (l-r) * wtarget; });
677 mCurrentWidth = wtarget;
681 const auto woutput = al::span{al::assume_aligned<16>(samples[0]), samplesToDo};
682 const auto xoutput = al::span{al::assume_aligned<16>(samples[1]), samplesToDo};
683 const auto youtput = al::span{al::assume_aligned<16>(samples[2]), samplesToDo};
685 /* Apply filter1 to S and store in mTemp. */
686 mFilter1S.process(Filter1Coeff, al::span{mS}.first(samplesToDo), updateState, mTemp);
688 /* Precompute j*D and store in xoutput. */
689 if(mFirstRun) mFilter2D.processOne(Filter2Coeff, mD[0]);
690 mFilter2D.process(Filter2Coeff, al::span{mD}.subspan(1, samplesToDo), updateState, xoutput);
692 /* W = 0.6098637*S + 0.6896511*j*w*D */
693 std::transform(mTemp.begin(), mTemp.begin()+samplesToDo, xoutput.begin(), woutput.begin(),
694 [](const float s, const float jd) noexcept { return 0.6098637f*s + 0.6896511f*jd; });
695 /* X = 0.8624776*S - 0.7626955*j*w*D */
696 std::transform(mTemp.begin(), mTemp.begin()+samplesToDo, xoutput.begin(), xoutput.begin(),
697 [](const float s, const float jd) noexcept { return 0.8624776f*s - 0.7626955f*jd; });
699 /* Precompute j*S and store in youtput. */
700 if(mFirstRun) mFilter2S.processOne(Filter2Coeff, mS[0]);
701 mFilter2S.process(Filter2Coeff, al::span{mS}.subspan(1, samplesToDo), updateState, youtput);
703 /* Apply filter1 to D and store in mTemp. */
704 mFilter1D.process(Filter1Coeff, al::span{mD}.first(samplesToDo), updateState, mTemp);
706 /* Y = 1.6822415*w*D + 0.2156194*j*S */
707 std::transform(mTemp.begin(), mTemp.begin()+samplesToDo, youtput.begin(), youtput.begin(),
708 [](const float d, const float js) noexcept { return 1.6822415f*d + 0.2156194f*js; });
710 mFirstRun = false;
714 template struct UhjEncoder<UhjLength256>;
715 template struct UhjDecoder<UhjLength256>;
716 template struct UhjStereoDecoder<UhjLength256>;
718 template struct UhjEncoder<UhjLength512>;
719 template struct UhjDecoder<UhjLength512>;
720 template struct UhjStereoDecoder<UhjLength512>;