12 #include "alcomplex.h"
14 #include "alnumbers.h"
15 #include "core/bufferline.h"
16 #include "opthelpers.h"
18 #include "phase_shifter.h"
24 template<std::size_t A
, typename T
, std::size_t N
>
25 constexpr auto assume_aligned_span(const al::span
<T
,N
> s
) noexcept
-> al::span
<T
,N
>
26 { return al::span
<T
,N
>{al::assume_aligned
<A
>(s
.data()), s
.size()}; }
28 /* Convolution is implemented using a segmented overlap-add method. The filter
29 * response is broken up into multiple segments of 128 samples, and each
30 * segment has an FFT applied with a 256-sample buffer (the latter half left
31 * silent) to get its frequency-domain response.
33 * Input samples are similarly broken up into 128-sample segments, with a 256-
34 * sample FFT applied to each new incoming segment to get its frequency-domain
35 * response. A history of FFT'd input segments is maintained, equal to the
36 * number of filter response segments.
38 * To apply the convolution, each filter response segment is convolved with its
39 * paired input segment (using complex multiplies, far cheaper than time-domain
40 * FIRs), accumulating into an FFT buffer. The input history is then shifted to
41 * align with later filter response segments for the next input segment.
43 * An inverse FFT is then applied to the accumulated FFT buffer to get a 256-
44 * sample time-domain response for output, which is split in two halves. The
45 * first half is the 128-sample output, and the second half is a 128-sample
46 * (really, 127) delayed extension, which gets added to the output next time.
47 * Convolving two time-domain responses of length N results in a time-domain
48 * signal of length N*2 - 1, and this holds true regardless of the convolution
49 * being applied in the frequency domain, so these "overflow" samples need to
53 struct SegmentedFilter
{
54 static constexpr size_t sFftLength
{256};
55 static constexpr size_t sSampleLength
{sFftLength
/ 2};
56 static constexpr size_t sNumSegments
{N
/sSampleLength
};
57 static_assert(N
>= sFftLength
);
58 static_assert((N
% sSampleLength
) == 0);
61 alignas(16) std::array
<float,sFftLength
*sNumSegments
> mFilterData
;
63 SegmentedFilter() : mFft
{sFftLength
, PFFFT_REAL
}
65 static constexpr size_t fft_size
{N
};
67 /* To set up the filter, we first need to generate the desired
68 * response (not reversed).
70 auto tmpBuffer
= std::vector
<double>(fft_size
, 0.0);
71 for(std::size_t i
{0};i
< fft_size
/2;++i
)
73 const int k
{int{fft_size
/2} - static_cast<int>(i
*2 + 1)};
75 const double w
{2.0*al::numbers::pi
* static_cast<double>(i
*2 + 1)
77 const double window
{0.3635819 - 0.4891775*std::cos(w
) + 0.1365995*std::cos(2.0*w
)
78 - 0.0106411*std::cos(3.0*w
)};
80 const double pk
{al::numbers::pi
* static_cast<double>(k
)};
81 tmpBuffer
[i
*2 + 1] = window
* (1.0-std::cos(pk
)) / pk
;
84 /* The segments of the filter are converted back to the frequency
85 * domain, each on their own (0 stuffed).
87 using complex_d
= std::complex<double>;
88 auto fftBuffer
= std::vector
<complex_d
>(sFftLength
);
89 auto fftTmp
= al::vector
<float,16>(sFftLength
);
90 auto filter
= mFilterData
.begin();
91 for(size_t s
{0};s
< sNumSegments
;++s
)
93 const auto tmpspan
= al::span
{tmpBuffer
}.subspan(sSampleLength
*s
, sSampleLength
);
94 auto iter
= std::copy_n(tmpspan
.cbegin(), tmpspan
.size(), fftBuffer
.begin());
95 std::fill(iter
, fftBuffer
.end(), complex_d
{});
96 forward_fft(fftBuffer
);
98 /* Convert to zdomain data for PFFFT, scaled by the FFT length so
99 * the iFFT result will be normalized.
101 for(size_t i
{0};i
< sSampleLength
;++i
)
103 fftTmp
[i
*2 + 0] = static_cast<float>(fftBuffer
[i
].real()) / float{sFftLength
};
104 fftTmp
[i
*2 + 1] = static_cast<float>((i
== 0) ? fftBuffer
[sSampleLength
].real()
105 : fftBuffer
[i
].imag()) / float{sFftLength
};
107 mFft
.zreorder(fftTmp
.data(), al::to_address(filter
), PFFFT_BACKWARD
);
108 filter
+= sFftLength
;
114 const SegmentedFilter
<N
> gSegmentedFilter
;
117 const PhaseShifterT
<N
> PShifter
;
120 /* Filter coefficients for the 'base' all-pass IIR, which applies a frequency-
121 * dependent phase-shift of N degrees. The output of the filter requires a 1-
124 constexpr std::array
<float,4> Filter1Coeff
{{
125 0.479400865589f
, 0.876218493539f
, 0.976597589508f
, 0.997499255936f
127 /* Filter coefficients for the offset all-pass IIR, which applies a frequency-
128 * dependent phase-shift of N+90 degrees.
130 constexpr std::array
<float,4> Filter2Coeff
{{
131 0.161758498368f
, 0.733028932341f
, 0.945349700329f
, 0.990599156684f
136 void UhjAllPassFilter::processOne(const al::span
<const float, 4> coeffs
, float x
)
139 for(size_t i
{0};i
< 4;++i
)
141 const float y
{x
*coeffs
[i
] + state
[i
].z
[0]};
142 state
[i
].z
[0] = state
[i
].z
[1];
143 state
[i
].z
[1] = y
*coeffs
[i
] - x
;
149 void UhjAllPassFilter::process(const al::span
<const float,4> coeffs
,
150 const al::span
<const float> src
, const bool updateState
, const al::span
<float> dst
)
154 auto proc_sample
= [&state
,coeffs
](float x
) noexcept
-> float
156 for(size_t i
{0};i
< 4;++i
)
158 const float y
{x
*coeffs
[i
] + state
[i
].z
[0]};
159 state
[i
].z
[0] = state
[i
].z
[1];
160 state
[i
].z
[1] = y
*coeffs
[i
] - x
;
165 std::transform(src
.begin(), src
.end(), dst
.begin(), proc_sample
);
166 if(updateState
) LIKELY mState
= state
;
170 /* Encoding UHJ from B-Format is done as:
172 * S = 0.9396926*W + 0.1855740*X
173 * D = j(-0.3420201*W + 0.5098604*X) + 0.6554516*Y
176 * Right = (S - D)/2.0
177 * T = j(-0.1432*W + 0.6512*X) - 0.7071068*Y
180 * where j is a wide-band +90 degree phase shift. 3-channel UHJ excludes Q,
181 * while 2-channel excludes Q and T.
183 * The phase shift is done using a linear FIR filter derived from an FFT'd
184 * impulse with the desired shift.
188 void UhjEncoder
<N
>::encode(float *LeftOut
, float *RightOut
,
189 const al::span
<const float*const,3> InSamples
, const size_t SamplesToDo
)
191 static constexpr auto &Filter
= gSegmentedFilter
<N
>;
192 static_assert(sFftLength
== Filter
.sFftLength
);
193 static_assert(sSegmentSize
== Filter
.sSampleLength
);
194 static_assert(sNumSegments
== Filter
.sNumSegments
);
196 ASSUME(SamplesToDo
> 0);
197 ASSUME(SamplesToDo
<= BufferLineSize
);
199 const auto winput
= al::span
{al::assume_aligned
<16>(InSamples
[0]), SamplesToDo
};
200 const auto xinput
= al::span
{al::assume_aligned
<16>(InSamples
[1]), SamplesToDo
};
201 const auto yinput
= al::span
{al::assume_aligned
<16>(InSamples
[2]), SamplesToDo
};
203 std::copy_n(winput
.begin(), SamplesToDo
, mW
.begin()+sFilterDelay
);
204 std::copy_n(xinput
.begin(), SamplesToDo
, mX
.begin()+sFilterDelay
);
205 std::copy_n(yinput
.begin(), SamplesToDo
, mY
.begin()+sFilterDelay
);
207 /* S = 0.9396926*W + 0.1855740*X */
208 std::transform(mW
.begin(), mW
.begin()+SamplesToDo
, mX
.begin(), mS
.begin(),
209 [](const float w
, const float x
) noexcept
{ return 0.9396926f
*w
+ 0.1855740f
*x
; });
211 /* Precompute j(-0.3420201*W + 0.5098604*X) and store in mD. */
212 auto dstore
= mD
.begin();
213 size_t curseg
{mCurrentSegment
};
214 for(size_t base
{0};base
< SamplesToDo
;)
216 const size_t todo
{std::min(sSegmentSize
-mFifoPos
, SamplesToDo
-base
)};
217 auto wseg
= winput
.subspan(base
, todo
);
218 auto xseg
= xinput
.subspan(base
, todo
);
219 auto wxio
= al::span
{mWXInOut
}.subspan(mFifoPos
, todo
);
221 /* Copy out the samples that were previously processed by the FFT. */
222 dstore
= std::copy_n(wxio
.begin(), todo
, dstore
);
224 /* Transform the non-delayed input and store in the front half of the
227 std::transform(wseg
.begin(), wseg
.end(), xseg
.begin(), wxio
.begin(),
228 [](const float w
, const float x
) noexcept
-> float
229 { return -0.3420201f
*w
+ 0.5098604f
*x
; });
234 /* Check whether the input buffer is filled with new samples. */
235 if(mFifoPos
< sSegmentSize
) break;
238 /* Copy the new input to the next history segment, clearing the back
239 * half of the segment, and convert to the frequency domain.
241 auto input
= mWXHistory
.begin() + curseg
*sFftLength
;
242 std::copy_n(mWXInOut
.begin(), sSegmentSize
, input
);
243 std::fill_n(input
+sSegmentSize
, sSegmentSize
, 0.0f
);
245 Filter
.mFft
.transform(al::to_address(input
), al::to_address(input
), mWorkData
.data(),
248 /* Convolve each input segment with its IR filter counterpart (aligned
249 * in time, from newest to oldest).
251 mFftBuffer
.fill(0.0f
);
252 auto filter
= Filter
.mFilterData
.begin();
253 for(size_t s
{curseg
};s
< sNumSegments
;++s
)
255 Filter
.mFft
.zconvolve_accumulate(al::to_address(input
), al::to_address(filter
),
258 filter
+= sFftLength
;
260 input
= mWXHistory
.begin();
261 for(size_t s
{0};s
< curseg
;++s
)
263 Filter
.mFft
.zconvolve_accumulate(al::to_address(input
), al::to_address(filter
),
266 filter
+= sFftLength
;
269 /* Convert back to samples, writing to the output and storing the extra
272 Filter
.mFft
.transform(mFftBuffer
.data(), mFftBuffer
.data(), mWorkData
.data(),
275 std::transform(mFftBuffer
.begin(), mFftBuffer
.begin()+sSegmentSize
,
276 mWXInOut
.begin()+sSegmentSize
, mWXInOut
.begin(), std::plus
{});
277 std::copy_n(mFftBuffer
.begin()+sSegmentSize
, sSegmentSize
, mWXInOut
.begin()+sSegmentSize
);
279 /* Shift the input history. */
280 curseg
= curseg
? (curseg
-1) : (sNumSegments
-1);
282 mCurrentSegment
= curseg
;
284 /* D = j(-0.3420201*W + 0.5098604*X) + 0.6554516*Y */
285 std::transform(mD
.begin(), mD
.begin()+SamplesToDo
, mY
.begin(), mD
.begin(),
286 [](const float jwx
, const float y
) noexcept
{ return jwx
+ 0.6554516f
*y
; });
288 /* Copy the future samples to the front for next time. */
289 std::copy(mW
.cbegin()+SamplesToDo
, mW
.cbegin()+SamplesToDo
+sFilterDelay
, mW
.begin());
290 std::copy(mX
.cbegin()+SamplesToDo
, mX
.cbegin()+SamplesToDo
+sFilterDelay
, mX
.begin());
291 std::copy(mY
.cbegin()+SamplesToDo
, mY
.cbegin()+SamplesToDo
+sFilterDelay
, mY
.begin());
293 /* Apply a delay to the existing output to align with the input delay. */
294 auto delayBuffer
= mDirectDelay
.begin();
295 for(float *buffer
: {LeftOut
, RightOut
})
297 const auto distbuf
= assume_aligned_span
<16>(al::span
{*delayBuffer
});
300 const auto inout
= al::span
{al::assume_aligned
<16>(buffer
), SamplesToDo
};
301 if(SamplesToDo
>= sFilterDelay
)
303 auto delay_end
= std::rotate(inout
.begin(), inout
.end() - sFilterDelay
, inout
.end());
304 std::swap_ranges(inout
.begin(), delay_end
, distbuf
.begin());
308 auto delay_start
= std::swap_ranges(inout
.begin(), inout
.end(), distbuf
.begin());
309 std::rotate(distbuf
.begin(), delay_start
, distbuf
.begin() + sFilterDelay
);
313 /* Combine the direct signal with the produced output. */
315 /* Left = (S + D)/2.0 */
316 const auto left
= al::span
{al::assume_aligned
<16>(LeftOut
), SamplesToDo
};
317 for(size_t i
{0};i
< SamplesToDo
;++i
)
318 left
[i
] += (mS
[i
] + mD
[i
]) * 0.5f
;
320 /* Right = (S - D)/2.0 */
321 const auto right
= al::span
{al::assume_aligned
<16>(RightOut
), SamplesToDo
};
322 for(size_t i
{0};i
< SamplesToDo
;++i
)
323 right
[i
] += (mS
[i
] - mD
[i
]) * 0.5f
;
326 /* This encoding implementation uses two sets of four chained IIR filters to
327 * produce the desired relative phase shift. The first filter chain produces a
328 * phase shift of varying degrees over a wide range of frequencies, while the
329 * second filter chain produces a phase shift 90 degrees ahead of the first
330 * over the same range. Further details are described here:
332 * https://web.archive.org/web/20060708031958/http://www.biochem.oulu.fi/~oniemita/dsp/hilbert/
334 * 2-channel UHJ output requires the use of three filter chains. The S channel
335 * output uses a Filter1 chain on the W and X channel mix, while the D channel
336 * output uses a Filter1 chain on the Y channel plus a Filter2 chain on the W
337 * and X channel mix. This results in the W and X input mix on the D channel
338 * output having the required +90 degree phase shift relative to the other
341 void UhjEncoderIIR::encode(float *LeftOut
, float *RightOut
,
342 const al::span
<const float *const, 3> InSamples
, const size_t SamplesToDo
)
344 ASSUME(SamplesToDo
> 0);
345 ASSUME(SamplesToDo
<= BufferLineSize
);
347 const auto winput
= al::span
{al::assume_aligned
<16>(InSamples
[0]), SamplesToDo
};
348 const auto xinput
= al::span
{al::assume_aligned
<16>(InSamples
[1]), SamplesToDo
};
349 const auto yinput
= al::span
{al::assume_aligned
<16>(InSamples
[2]), SamplesToDo
};
351 /* S = 0.9396926*W + 0.1855740*X */
352 std::transform(winput
.begin(), winput
.end(), xinput
.begin(), mTemp
.begin(),
353 [](const float w
, const float x
) noexcept
{ return 0.9396926f
*w
+ 0.1855740f
*x
; });
354 mFilter1WX
.process(Filter1Coeff
, al::span
{mTemp
}.first(SamplesToDo
), true,
355 al::span
{mS
}.subspan(1));
356 mS
[0] = mDelayWX
; mDelayWX
= mS
[SamplesToDo
];
358 /* Precompute j(-0.3420201*W + 0.5098604*X) and store in mWX. */
359 std::transform(winput
.begin(), winput
.end(), xinput
.begin(), mTemp
.begin(),
360 [](const float w
, const float x
) noexcept
{ return -0.3420201f
*w
+ 0.5098604f
*x
; });
361 mFilter2WX
.process(Filter2Coeff
, al::span
{mTemp
}.first(SamplesToDo
), true, mWX
);
363 /* Apply filter1 to Y and store in mD. */
364 mFilter1Y
.process(Filter1Coeff
, yinput
, true, al::span
{mD
}.subspan(1));
365 mD
[0] = mDelayY
; mDelayY
= mD
[SamplesToDo
];
367 /* D = j(-0.3420201*W + 0.5098604*X) + 0.6554516*Y */
368 std::transform(mWX
.begin(), mWX
.begin()+SamplesToDo
, mD
.begin(), mD
.begin(),
369 [](const float jwx
, const float y
) noexcept
{ return jwx
+ 0.6554516f
*y
; });
371 /* Apply the base filter to the existing output to align with the processed
374 const auto left
= al::span
{al::assume_aligned
<16>(LeftOut
), SamplesToDo
};
375 mFilter1Direct
[0].process(Filter1Coeff
, left
, true, al::span
{mTemp
}.subspan(1));
376 mTemp
[0] = mDirectDelay
[0]; mDirectDelay
[0] = mTemp
[SamplesToDo
];
378 /* Left = (S + D)/2.0 */
379 for(size_t i
{0};i
< SamplesToDo
;++i
)
380 left
[i
] = (mS
[i
] + mD
[i
])*0.5f
+ mTemp
[i
];
382 const auto right
= al::span
{al::assume_aligned
<16>(RightOut
), SamplesToDo
};
383 mFilter1Direct
[1].process(Filter1Coeff
, right
, true, al::span
{mTemp
}.subspan(1));
384 mTemp
[0] = mDirectDelay
[1]; mDirectDelay
[1] = mTemp
[SamplesToDo
];
386 /* Right = (S - D)/2.0 */
387 for(size_t i
{0};i
< SamplesToDo
;++i
)
388 right
[i
] = (mS
[i
] - mD
[i
])*0.5f
+ mTemp
[i
];
392 /* Decoding UHJ is done as:
397 * W = 0.981532*S + 0.197484*j(0.828331*D + 0.767820*T)
398 * X = 0.418496*S - j(0.828331*D + 0.767820*T)
399 * Y = 0.795968*D - 0.676392*T + j(0.186633*S)
402 * where j is a +90 degree phase shift. 3-channel UHJ excludes Q, while 2-
403 * channel excludes Q and T.
406 void UhjDecoder
<N
>::decode(const al::span
<float*> samples
, const size_t samplesToDo
,
407 const bool updateState
)
409 static_assert(sInputPadding
<= sMaxPadding
, "Filter padding is too large");
411 constexpr auto &PShift
= PShifter
<N
>;
413 ASSUME(samplesToDo
> 0);
414 ASSUME(samplesToDo
<= BufferLineSize
);
417 const auto left
= al::span
{al::assume_aligned
<16>(samples
[0]), samplesToDo
+sInputPadding
};
418 const auto right
= al::span
{al::assume_aligned
<16>(samples
[1]), samplesToDo
+sInputPadding
};
419 const auto t
= al::span
{al::assume_aligned
<16>(samples
[2]), samplesToDo
+sInputPadding
};
421 /* S = Left + Right */
422 std::transform(left
.begin(), left
.end(), right
.begin(), mS
.begin(), std::plus
{});
424 /* D = Left - Right */
425 std::transform(left
.begin(), left
.end(), right
.begin(), mD
.begin(), std::minus
{});
428 std::copy(t
.begin(), t
.end(), mT
.begin());
431 const auto woutput
= al::span
{al::assume_aligned
<16>(samples
[0]), samplesToDo
};
432 const auto xoutput
= al::span
{al::assume_aligned
<16>(samples
[1]), samplesToDo
};
433 const auto youtput
= al::span
{al::assume_aligned
<16>(samples
[2]), samplesToDo
};
435 /* Precompute j(0.828331*D + 0.767820*T) and store in xoutput. */
436 auto tmpiter
= std::copy(mDTHistory
.cbegin(), mDTHistory
.cend(), mTemp
.begin());
437 std::transform(mD
.cbegin(), mD
.cbegin()+samplesToDo
+sInputPadding
, mT
.cbegin(), tmpiter
,
438 [](const float d
, const float t
) noexcept
{ return 0.828331f
*d
+ 0.767820f
*t
; });
439 if(updateState
) LIKELY
440 std::copy_n(mTemp
.cbegin()+samplesToDo
, mDTHistory
.size(), mDTHistory
.begin());
441 PShift
.process(xoutput
, mTemp
);
443 /* W = 0.981532*S + 0.197484*j(0.828331*D + 0.767820*T) */
444 std::transform(mS
.begin(), mS
.begin()+samplesToDo
, xoutput
.begin(), woutput
.begin(),
445 [](const float s
, const float jdt
) noexcept
{ return 0.981532f
*s
+ 0.197484f
*jdt
; });
447 /* X = 0.418496*S - j(0.828331*D + 0.767820*T) */
448 std::transform(mS
.begin(), mS
.begin()+samplesToDo
, xoutput
.begin(), xoutput
.begin(),
449 [](const float s
, const float jdt
) noexcept
{ return 0.418496f
*s
- jdt
; });
451 /* Precompute j*S and store in youtput. */
452 tmpiter
= std::copy(mSHistory
.cbegin(), mSHistory
.cend(), mTemp
.begin());
453 std::copy_n(mS
.cbegin(), samplesToDo
+sInputPadding
, tmpiter
);
454 if(updateState
) LIKELY
455 std::copy_n(mTemp
.cbegin()+samplesToDo
, mSHistory
.size(), mSHistory
.begin());
456 PShift
.process(youtput
, mTemp
);
458 /* Y = 0.795968*D - 0.676392*T + j(0.186633*S) */
459 for(size_t i
{0};i
< samplesToDo
;++i
)
460 youtput
[i
] = 0.795968f
*mD
[i
] - 0.676392f
*mT
[i
] + 0.186633f
*youtput
[i
];
462 if(samples
.size() > 3)
464 const auto zoutput
= al::span
{al::assume_aligned
<16>(samples
[3]), samplesToDo
};
466 std::transform(zoutput
.begin(), zoutput
.end(), zoutput
.begin(),
467 [](const float q
) noexcept
{ return 1.023332f
*q
; });
471 void UhjDecoderIIR::decode(const al::span
<float*> samples
, const size_t samplesToDo
,
472 const bool updateState
)
474 static_assert(sInputPadding
<= sMaxPadding
, "Filter padding is too large");
476 ASSUME(samplesToDo
> 0);
477 ASSUME(samplesToDo
<= BufferLineSize
);
480 const auto left
= al::span
{al::assume_aligned
<16>(samples
[0]), samplesToDo
+sInputPadding
};
481 const auto right
= al::span
{al::assume_aligned
<16>(samples
[1]), samplesToDo
+sInputPadding
};
483 /* S = Left + Right */
484 std::transform(left
.begin(), left
.end(), right
.begin(), mS
.begin(), std::plus
{});
486 /* D = Left - Right */
487 std::transform(left
.begin(), left
.end(), right
.begin(), mD
.begin(), std::minus
{});
490 const auto woutput
= al::span
{al::assume_aligned
<16>(samples
[0]), samplesToDo
};
491 const auto xoutput
= al::span
{al::assume_aligned
<16>(samples
[1]), samplesToDo
};
492 const auto youtput
= al::span
{al::assume_aligned
<16>(samples
[2]), samplesToDo
+sInputPadding
};
494 /* Precompute j(0.828331*D + 0.767820*T) and store in xoutput. */
495 std::transform(mD
.cbegin(), mD
.cbegin()+sInputPadding
+samplesToDo
, youtput
.begin(),
497 [](const float d
, const float t
) noexcept
{ return 0.828331f
*d
+ 0.767820f
*t
; });
498 if(mFirstRun
) mFilter2DT
.processOne(Filter2Coeff
, mTemp
[0]);
499 mFilter2DT
.process(Filter2Coeff
, al::span
{mTemp
}.subspan(1,samplesToDo
), updateState
, xoutput
);
501 /* Apply filter1 to S and store in mTemp. */
502 mFilter1S
.process(Filter1Coeff
, al::span
{mS
}.first(samplesToDo
), updateState
, mTemp
);
504 /* W = 0.981532*S + 0.197484*j(0.828331*D + 0.767820*T) */
505 std::transform(mTemp
.begin(), mTemp
.begin()+samplesToDo
, xoutput
.begin(), woutput
.begin(),
506 [](const float s
, const float jdt
) noexcept
{ return 0.981532f
*s
+ 0.197484f
*jdt
; });
507 /* X = 0.418496*S - j(0.828331*D + 0.767820*T) */
508 std::transform(mTemp
.begin(), mTemp
.begin()+samplesToDo
, xoutput
.begin(), xoutput
.begin(),
509 [](const float s
, const float jdt
) noexcept
{ return 0.418496f
*s
- jdt
; });
512 /* Apply filter1 to (0.795968*D - 0.676392*T) and store in mTemp. */
513 std::transform(mD
.cbegin(), mD
.cbegin()+samplesToDo
, youtput
.begin(), youtput
.begin(),
514 [](const float d
, const float t
) noexcept
{ return 0.795968f
*d
- 0.676392f
*t
; });
515 mFilter1DT
.process(Filter1Coeff
, youtput
.first(samplesToDo
), updateState
, mTemp
);
517 /* Precompute j*S and store in youtput. */
518 if(mFirstRun
) mFilter2S
.processOne(Filter2Coeff
, mS
[0]);
519 mFilter2S
.process(Filter2Coeff
, al::span
{mS
}.subspan(1, samplesToDo
), updateState
, youtput
);
521 /* Y = 0.795968*D - 0.676392*T + j(0.186633*S) */
522 std::transform(mTemp
.begin(), mTemp
.begin()+samplesToDo
, youtput
.begin(), youtput
.begin(),
523 [](const float dt
, const float js
) noexcept
{ return dt
+ 0.186633f
*js
; });
525 if(samples
.size() > 3)
527 const auto zoutput
= al::span
{al::assume_aligned
<16>(samples
[3]), samplesToDo
};
529 /* Apply filter1 to Q and store in mTemp. */
530 mFilter1Q
.process(Filter1Coeff
, zoutput
, updateState
, mTemp
);
533 std::transform(mTemp
.begin(), mTemp
.end(), zoutput
.begin(),
534 [](const float q
) noexcept
{ return 1.023332f
*q
; });
541 /* Super Stereo processing is done as:
546 * W = 0.6098637*S + 0.6896511*j*w*D
547 * X = 0.8624776*S - 0.7626955*j*w*D
548 * Y = 1.6822415*w*D + 0.2156194*j*S
550 * where j is a +90 degree phase shift. w is a variable control for the
551 * resulting stereo width, with the range 0 <= w <= 0.7.
554 void UhjStereoDecoder
<N
>::decode(const al::span
<float*> samples
, const size_t samplesToDo
,
555 const bool updateState
)
557 static_assert(sInputPadding
<= sMaxPadding
, "Filter padding is too large");
559 constexpr auto &PShift
= PShifter
<N
>;
561 ASSUME(samplesToDo
> 0);
562 ASSUME(samplesToDo
<= BufferLineSize
);
565 const auto left
= al::span
{al::assume_aligned
<16>(samples
[0]), samplesToDo
+sInputPadding
};
566 const auto right
= al::span
{al::assume_aligned
<16>(samples
[1]), samplesToDo
+sInputPadding
};
568 std::transform(left
.begin(), left
.end(), right
.begin(), mS
.begin(), std::plus
{});
570 /* Pre-apply the width factor to the difference signal D. Smoothly
571 * interpolate when it changes.
573 const float wtarget
{mWidthControl
};
574 const float wcurrent
{(mCurrentWidth
< 0.0f
) ? wtarget
: mCurrentWidth
};
575 if(wtarget
== wcurrent
|| !updateState
)
577 std::transform(left
.begin(), left
.end(), right
.begin(), mD
.begin(),
578 [wcurrent
](const float l
, const float r
) noexcept
{ return (l
-r
) * wcurrent
; });
579 mCurrentWidth
= wcurrent
;
583 const float wstep
{(wtarget
- wcurrent
) / static_cast<float>(samplesToDo
)};
586 const auto lfade
= left
.first(samplesToDo
);
587 auto dstore
= std::transform(lfade
.begin(), lfade
.begin(), right
.begin(), mD
.begin(),
588 [wcurrent
,wstep
,&fi
](const float l
, const float r
) noexcept
590 const float ret
{(l
-r
) * (wcurrent
+ wstep
*fi
)};
595 const auto lend
= left
.subspan(samplesToDo
);
596 const auto rend
= right
.subspan(samplesToDo
);
597 std::transform(lend
.begin(), lend
.end(), rend
.begin(), dstore
,
598 [wtarget
](const float l
, const float r
) noexcept
{ return (l
-r
) * wtarget
; });
599 mCurrentWidth
= wtarget
;
603 const auto woutput
= al::span
{al::assume_aligned
<16>(samples
[0]), samplesToDo
};
604 const auto xoutput
= al::span
{al::assume_aligned
<16>(samples
[1]), samplesToDo
};
605 const auto youtput
= al::span
{al::assume_aligned
<16>(samples
[2]), samplesToDo
};
607 /* Precompute j*D and store in xoutput. */
608 auto tmpiter
= std::copy(mDTHistory
.cbegin(), mDTHistory
.cend(), mTemp
.begin());
609 std::copy_n(mD
.cbegin(), samplesToDo
+sInputPadding
, tmpiter
);
610 if(updateState
) LIKELY
611 std::copy_n(mTemp
.cbegin()+samplesToDo
, mDTHistory
.size(), mDTHistory
.begin());
612 PShift
.process(xoutput
, mTemp
);
614 /* W = 0.6098637*S + 0.6896511*j*w*D */
615 std::transform(mS
.begin(), mS
.begin()+samplesToDo
, xoutput
.begin(), woutput
.begin(),
616 [](const float s
, const float jd
) noexcept
{ return 0.6098637f
*s
+ 0.6896511f
*jd
; });
617 /* X = 0.8624776*S - 0.7626955*j*w*D */
618 std::transform(mS
.begin(), mS
.begin()+samplesToDo
, xoutput
.begin(), xoutput
.begin(),
619 [](const float s
, const float jd
) noexcept
{ return 0.8624776f
*s
- 0.7626955f
*jd
; });
621 /* Precompute j*S and store in youtput. */
622 tmpiter
= std::copy(mSHistory
.cbegin(), mSHistory
.cend(), mTemp
.begin());
623 std::copy_n(mS
.cbegin(), samplesToDo
+sInputPadding
, tmpiter
);
624 if(updateState
) LIKELY
625 std::copy_n(mTemp
.cbegin()+samplesToDo
, mSHistory
.size(), mSHistory
.begin());
626 PShift
.process(youtput
, mTemp
);
628 /* Y = 1.6822415*w*D + 0.2156194*j*S */
629 std::transform(mD
.begin(), mD
.begin()+samplesToDo
, youtput
.begin(), youtput
.begin(),
630 [](const float d
, const float js
) noexcept
{ return 1.6822415f
*d
+ 0.2156194f
*js
; });
633 void UhjStereoDecoderIIR::decode(const al::span
<float*> samples
, const size_t samplesToDo
,
634 const bool updateState
)
636 static_assert(sInputPadding
<= sMaxPadding
, "Filter padding is too large");
638 ASSUME(samplesToDo
> 0);
639 ASSUME(samplesToDo
<= BufferLineSize
);
642 const auto left
= al::span
{al::assume_aligned
<16>(samples
[0]), samplesToDo
+sInputPadding
};
643 const auto right
= al::span
{al::assume_aligned
<16>(samples
[1]), samplesToDo
+sInputPadding
};
645 std::transform(left
.begin(), left
.end(), right
.begin(), mS
.begin(), std::plus
{});
647 /* Pre-apply the width factor to the difference signal D. Smoothly
648 * interpolate when it changes.
650 const float wtarget
{mWidthControl
};
651 const float wcurrent
{(mCurrentWidth
< 0.0f
) ? wtarget
: mCurrentWidth
};
652 if(wtarget
== wcurrent
|| !updateState
)
654 std::transform(left
.begin(), left
.end(), right
.begin(), mD
.begin(),
655 [wcurrent
](const float l
, const float r
) noexcept
656 { return (l
-r
) * wcurrent
; });
657 mCurrentWidth
= wcurrent
;
661 const float wstep
{(wtarget
- wcurrent
) / static_cast<float>(samplesToDo
)};
664 const auto lfade
= left
.first(samplesToDo
);
665 auto dstore
= std::transform(lfade
.begin(), lfade
.begin(), right
.begin(), mD
.begin(),
666 [wcurrent
,wstep
,&fi
](const float l
, const float r
) noexcept
668 const float ret
{(l
-r
) * (wcurrent
+ wstep
*fi
)};
673 const auto lend
= left
.subspan(samplesToDo
);
674 const auto rend
= right
.subspan(samplesToDo
);
675 std::transform(lend
.begin(), lend
.end(), rend
.begin(), dstore
,
676 [wtarget
](const float l
, const float r
) noexcept
{ return (l
-r
) * wtarget
; });
677 mCurrentWidth
= wtarget
;
681 const auto woutput
= al::span
{al::assume_aligned
<16>(samples
[0]), samplesToDo
};
682 const auto xoutput
= al::span
{al::assume_aligned
<16>(samples
[1]), samplesToDo
};
683 const auto youtput
= al::span
{al::assume_aligned
<16>(samples
[2]), samplesToDo
};
685 /* Apply filter1 to S and store in mTemp. */
686 mFilter1S
.process(Filter1Coeff
, al::span
{mS
}.first(samplesToDo
), updateState
, mTemp
);
688 /* Precompute j*D and store in xoutput. */
689 if(mFirstRun
) mFilter2D
.processOne(Filter2Coeff
, mD
[0]);
690 mFilter2D
.process(Filter2Coeff
, al::span
{mD
}.subspan(1, samplesToDo
), updateState
, xoutput
);
692 /* W = 0.6098637*S + 0.6896511*j*w*D */
693 std::transform(mTemp
.begin(), mTemp
.begin()+samplesToDo
, xoutput
.begin(), woutput
.begin(),
694 [](const float s
, const float jd
) noexcept
{ return 0.6098637f
*s
+ 0.6896511f
*jd
; });
695 /* X = 0.8624776*S - 0.7626955*j*w*D */
696 std::transform(mTemp
.begin(), mTemp
.begin()+samplesToDo
, xoutput
.begin(), xoutput
.begin(),
697 [](const float s
, const float jd
) noexcept
{ return 0.8624776f
*s
- 0.7626955f
*jd
; });
699 /* Precompute j*S and store in youtput. */
700 if(mFirstRun
) mFilter2S
.processOne(Filter2Coeff
, mS
[0]);
701 mFilter2S
.process(Filter2Coeff
, al::span
{mS
}.subspan(1, samplesToDo
), updateState
, youtput
);
703 /* Apply filter1 to D and store in mTemp. */
704 mFilter1D
.process(Filter1Coeff
, al::span
{mD
}.first(samplesToDo
), updateState
, mTemp
);
706 /* Y = 1.6822415*w*D + 0.2156194*j*S */
707 std::transform(mTemp
.begin(), mTemp
.begin()+samplesToDo
, youtput
.begin(), youtput
.begin(),
708 [](const float d
, const float js
) noexcept
{ return 1.6822415f
*d
+ 0.2156194f
*js
; });
714 template struct UhjEncoder
<UhjLength256
>;
715 template struct UhjDecoder
<UhjLength256
>;
716 template struct UhjStereoDecoder
<UhjLength256
>;
718 template struct UhjEncoder
<UhjLength512
>;
719 template struct UhjDecoder
<UhjLength512
>;
720 template struct UhjStereoDecoder
<UhjLength512
>;