Remove a left-over assignment
[openal-soft.git] / utils / uhjdecoder.cpp
blob7bedcb223c6bcf385ff7323d6780d6df80f23adf
1 /*
2 * 2-channel UHJ Decoder
4 * Copyright (c) Chris Robinson <chris.kcat@gmail.com>
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to deal
8 * in the Software without restriction, including without limitation the rights
9 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 * copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 * THE SOFTWARE.
25 #include "config.h"
27 #include <algorithm>
28 #include <array>
29 #include <cassert>
30 #include <cerrno>
31 #include <complex>
32 #include <cstddef>
33 #include <cstdio>
34 #include <cstring>
35 #include <memory>
36 #include <string>
37 #include <string_view>
38 #include <system_error>
39 #include <utility>
40 #include <vector>
42 #include "albit.h"
43 #include "almalloc.h"
44 #include "alnumbers.h"
45 #include "alspan.h"
46 #include "alstring.h"
47 #include "vector.h"
48 #include "opthelpers.h"
49 #include "phase_shifter.h"
51 #include "sndfile.h"
53 #include "win_main_utf8.h"
56 namespace {
58 struct FileDeleter {
59 void operator()(gsl::owner<FILE*> file) { fclose(file); }
61 using FilePtr = std::unique_ptr<FILE,FileDeleter>;
63 struct SndFileDeleter {
64 void operator()(SNDFILE *sndfile) { sf_close(sndfile); }
66 using SndFilePtr = std::unique_ptr<SNDFILE,SndFileDeleter>;
69 using ubyte = unsigned char;
70 using ushort = unsigned short;
71 using uint = unsigned int;
72 using complex_d = std::complex<double>;
74 using byte4 = std::array<std::byte,4>;
77 constexpr std::array<ubyte,16> SUBTYPE_BFORMAT_FLOAT{
78 0x03, 0x00, 0x00, 0x00, 0x21, 0x07, 0xd3, 0x11, 0x86, 0x44, 0xc8, 0xc1,
79 0xca, 0x00, 0x00, 0x00
82 void fwrite16le(ushort val, FILE *f)
84 std::array data{static_cast<ubyte>(val&0xff), static_cast<ubyte>((val>>8)&0xff)};
85 fwrite(data.data(), 1, data.size(), f);
88 void fwrite32le(uint val, FILE *f)
90 std::array data{static_cast<ubyte>(val&0xff), static_cast<ubyte>((val>>8)&0xff),
91 static_cast<ubyte>((val>>16)&0xff), static_cast<ubyte>((val>>24)&0xff)};
92 fwrite(data.data(), 1, data.size(), f);
95 byte4 f32AsLEBytes(const float value)
97 auto ret = al::bit_cast<byte4>(value);
98 if constexpr(al::endian::native == al::endian::big)
100 std::swap(ret[0], ret[3]);
101 std::swap(ret[1], ret[2]);
103 return ret;
107 constexpr uint BufferLineSize{1024};
109 using FloatBufferLine = std::array<float,BufferLineSize>;
110 using FloatBufferSpan = al::span<float,BufferLineSize>;
113 struct UhjDecoder {
114 constexpr static std::size_t sFilterDelay{1024};
116 alignas(16) std::array<float,BufferLineSize+sFilterDelay> mS{};
117 alignas(16) std::array<float,BufferLineSize+sFilterDelay> mD{};
118 alignas(16) std::array<float,BufferLineSize+sFilterDelay> mT{};
119 alignas(16) std::array<float,BufferLineSize+sFilterDelay> mQ{};
121 /* History for the FIR filter. */
122 alignas(16) std::array<float,sFilterDelay-1> mDTHistory{};
123 alignas(16) std::array<float,sFilterDelay-1> mSHistory{};
125 alignas(16) std::array<float,BufferLineSize + sFilterDelay*2> mTemp{};
127 void decode(const al::span<const float> InSamples, const std::size_t InChannels,
128 const al::span<FloatBufferLine> OutSamples, const std::size_t SamplesToDo);
129 void decode2(const al::span<const float> InSamples, const al::span<FloatBufferLine> OutSamples,
130 const std::size_t SamplesToDo);
133 const PhaseShifterT<UhjDecoder::sFilterDelay*2> PShift{};
136 /* Decoding UHJ is done as:
138 * S = Left + Right
139 * D = Left - Right
141 * W = 0.981532*S + 0.197484*j(0.828331*D + 0.767820*T)
142 * X = 0.418496*S - j(0.828331*D + 0.767820*T)
143 * Y = 0.795968*D - 0.676392*T + j(0.186633*S)
144 * Z = 1.023332*Q
146 * where j is a +90 degree phase shift. 3-channel UHJ excludes Q, while 2-
147 * channel excludes Q and T. The B-Format signal reconstructed from 2-channel
148 * UHJ should not be run through a normal B-Format decoder, as it needs
149 * different shelf filters.
151 * NOTE: Some sources specify
153 * S = (Left + Right)/2
154 * D = (Left - Right)/2
156 * However, this is incorrect. It's halving Left and Right even though they
157 * were already halved during encoding, causing S and D to be half what they
158 * initially were at the encoding stage. This division is not present in
159 * Gerzon's original paper for deriving Sigma (S) or Delta (D) from the L and R
160 * signals. As proof, taking Y for example:
162 * Y = 0.795968*D - 0.676392*T + j(0.186633*S)
164 * * Plug in the encoding parameters, using ? as a placeholder for whether S
165 * and D should receive an extra 0.5 factor
166 * Y = 0.795968*(j(-0.3420201*W + 0.5098604*X) + 0.6554516*Y)*? -
167 * 0.676392*(j(-0.1432*W + 0.6512*X) - 0.7071068*Y) +
168 * 0.186633*j(0.9396926*W + 0.1855740*X)*?
170 * * Move common factors in
171 * Y = (j(-0.3420201*0.795968*?*W + 0.5098604*0.795968*?*X) + 0.6554516*0.795968*?*Y) -
172 * (j(-0.1432*0.676392*W + 0.6512*0.676392*X) - 0.7071068*0.676392*Y) +
173 * j(0.9396926*0.186633*?*W + 0.1855740*0.186633*?*X)
175 * * Clean up extraneous groupings
176 * Y = j(-0.3420201*0.795968*?*W + 0.5098604*0.795968*?*X) + 0.6554516*0.795968*?*Y -
177 * j(-0.1432*0.676392*W + 0.6512*0.676392*X) + 0.7071068*0.676392*Y +
178 * j*(0.9396926*0.186633*?*W + 0.1855740*0.186633*?*X)
180 * * Move phase shifts together and combine them
181 * Y = j(-0.3420201*0.795968*?*W + 0.5098604*0.795968*?*X - -0.1432*0.676392*W -
182 * 0.6512*0.676392*X + 0.9396926*0.186633*?*W + 0.1855740*0.186633*?*X) +
183 * 0.6554516*0.795968*?*Y + 0.7071068*0.676392*Y
185 * * Reorder terms
186 * Y = j(-0.3420201*0.795968*?*W + 0.1432*0.676392*W + 0.9396926*0.186633*?*W +
187 * 0.5098604*0.795968*?*X + -0.6512*0.676392*X + 0.1855740*0.186633*?*X) +
188 * 0.7071068*0.676392*Y + 0.6554516*0.795968*?*Y
190 * * Move common factors out
191 * Y = j((-0.3420201*0.795968*? + 0.1432*0.676392 + 0.9396926*0.186633*?)*W +
192 * ( 0.5098604*0.795968*? + -0.6512*0.676392 + 0.1855740*0.186633*?)*X) +
193 * (0.7071068*0.676392 + 0.6554516*0.795968*?)*Y
195 * * Result w/ 0.5 factor:
196 * -0.3420201*0.795968*0.5 + 0.1432*0.676392 + 0.9396926*0.186633*0.5 = 0.04843*W
197 * 0.5098604*0.795968*0.5 + -0.6512*0.676392 + 0.1855740*0.186633*0.5 = -0.22023*X
198 * 0.7071068*0.676392 + 0.6554516*0.795968*0.5 = 0.73914*Y
199 * -> Y = j(0.04843*W + -0.22023*X) + 0.73914*Y
201 * * Result w/o 0.5 factor:
202 * -0.3420201*0.795968 + 0.1432*0.676392 + 0.9396926*0.186633 = 0.00000*W
203 * 0.5098604*0.795968 + -0.6512*0.676392 + 0.1855740*0.186633 = 0.00000*X
204 * 0.7071068*0.676392 + 0.6554516*0.795968 = 1.00000*Y
205 * -> Y = j(0.00000*W + 0.00000*X) + 1.00000*Y
207 * Not halving produces a result matching the original input.
209 void UhjDecoder::decode(const al::span<const float> InSamples, const std::size_t InChannels,
210 const al::span<FloatBufferLine> OutSamples, const std::size_t SamplesToDo)
212 ASSUME(SamplesToDo > 0);
214 auto woutput = al::span{OutSamples[0]};
215 auto xoutput = al::span{OutSamples[1]};
216 auto youtput = al::span{OutSamples[2]};
218 /* Add a delay to the input channels, to align it with the all-passed
219 * signal.
222 /* S = Left + Right */
223 for(std::size_t i{0};i < SamplesToDo;++i)
224 mS[sFilterDelay+i] = InSamples[i*InChannels + 0] + InSamples[i*InChannels + 1];
226 /* D = Left - Right */
227 for(std::size_t i{0};i < SamplesToDo;++i)
228 mD[sFilterDelay+i] = InSamples[i*InChannels + 0] - InSamples[i*InChannels + 1];
230 if(InChannels > 2)
232 /* T */
233 for(std::size_t i{0};i < SamplesToDo;++i)
234 mT[sFilterDelay+i] = InSamples[i*InChannels + 2];
236 if(InChannels > 3)
238 /* Q */
239 for(std::size_t i{0};i < SamplesToDo;++i)
240 mQ[sFilterDelay+i] = InSamples[i*InChannels + 3];
243 /* Precompute j(0.828331*D + 0.767820*T) and store in xoutput. */
244 auto tmpiter = std::copy(mDTHistory.cbegin(), mDTHistory.cend(), mTemp.begin());
245 std::transform(mD.cbegin(), mD.cbegin()+SamplesToDo+sFilterDelay, mT.cbegin(), tmpiter,
246 [](const float d, const float t) noexcept { return 0.828331f*d + 0.767820f*t; });
247 std::copy_n(mTemp.cbegin()+SamplesToDo, mDTHistory.size(), mDTHistory.begin());
248 PShift.process(xoutput.first(SamplesToDo), mTemp);
250 for(std::size_t i{0};i < SamplesToDo;++i)
252 /* W = 0.981532*S + 0.197484*j(0.828331*D + 0.767820*T) */
253 woutput[i] = 0.981532f*mS[i] + 0.197484f*xoutput[i];
254 /* X = 0.418496*S - j(0.828331*D + 0.767820*T) */
255 xoutput[i] = 0.418496f*mS[i] - xoutput[i];
258 /* Precompute j*S and store in youtput. */
259 tmpiter = std::copy(mSHistory.cbegin(), mSHistory.cend(), mTemp.begin());
260 std::copy_n(mS.cbegin(), SamplesToDo+sFilterDelay, tmpiter);
261 std::copy_n(mTemp.cbegin()+SamplesToDo, mSHistory.size(), mSHistory.begin());
262 PShift.process(youtput.first(SamplesToDo), mTemp);
264 for(std::size_t i{0};i < SamplesToDo;++i)
266 /* Y = 0.795968*D - 0.676392*T + j(0.186633*S) */
267 youtput[i] = 0.795968f*mD[i] - 0.676392f*mT[i] + 0.186633f*youtput[i];
270 if(OutSamples.size() > 3)
272 auto zoutput = al::span{OutSamples[3]};
273 /* Z = 1.023332*Q */
274 for(std::size_t i{0};i < SamplesToDo;++i)
275 zoutput[i] = 1.023332f*mQ[i];
278 std::copy(mS.begin()+SamplesToDo, mS.begin()+SamplesToDo+sFilterDelay, mS.begin());
279 std::copy(mD.begin()+SamplesToDo, mD.begin()+SamplesToDo+sFilterDelay, mD.begin());
280 std::copy(mT.begin()+SamplesToDo, mT.begin()+SamplesToDo+sFilterDelay, mT.begin());
281 std::copy(mQ.begin()+SamplesToDo, mQ.begin()+SamplesToDo+sFilterDelay, mQ.begin());
284 /* This is an alternative equation for decoding 2-channel UHJ. Not sure what
285 * the intended benefit is over the above equation as this slightly reduces the
286 * amount of the original left response and has more of the phase-shifted
287 * forward response on the left response.
289 * This decoding is done as:
291 * S = Left + Right
292 * D = Left - Right
294 * W = 0.981530*S + j*0.163585*D
295 * X = 0.418504*S - j*0.828347*D
296 * Y = 0.762956*D + j*0.384230*S
298 * where j is a +90 degree phase shift.
300 * NOTE: As above, S and D should not be halved. The only consequence of
301 * halving here is merely a -6dB reduction in output, but it's still incorrect.
303 void UhjDecoder::decode2(const al::span<const float> InSamples,
304 const al::span<FloatBufferLine> OutSamples, const std::size_t SamplesToDo)
306 ASSUME(SamplesToDo > 0);
308 auto woutput = al::span{OutSamples[0]};
309 auto xoutput = al::span{OutSamples[1]};
310 auto youtput = al::span{OutSamples[2]};
312 /* S = Left + Right */
313 for(std::size_t i{0};i < SamplesToDo;++i)
314 mS[sFilterDelay+i] = InSamples[i*2 + 0] + InSamples[i*2 + 1];
316 /* D = Left - Right */
317 for(std::size_t i{0};i < SamplesToDo;++i)
318 mD[sFilterDelay+i] = InSamples[i*2 + 0] - InSamples[i*2 + 1];
320 /* Precompute j*D and store in xoutput. */
321 auto tmpiter = std::copy(mDTHistory.cbegin(), mDTHistory.cend(), mTemp.begin());
322 std::copy_n(mD.cbegin(), SamplesToDo+sFilterDelay, tmpiter);
323 std::copy_n(mTemp.cbegin()+SamplesToDo, mDTHistory.size(), mDTHistory.begin());
324 PShift.process(xoutput.first(SamplesToDo), mTemp);
326 for(std::size_t i{0};i < SamplesToDo;++i)
328 /* W = 0.981530*S + j*0.163585*D */
329 woutput[i] = 0.981530f*mS[i] + 0.163585f*xoutput[i];
330 /* X = 0.418504*S - j*0.828347*D */
331 xoutput[i] = 0.418504f*mS[i] - 0.828347f*xoutput[i];
334 /* Precompute j*S and store in youtput. */
335 tmpiter = std::copy(mSHistory.cbegin(), mSHistory.cend(), mTemp.begin());
336 std::copy_n(mS.cbegin(), SamplesToDo+sFilterDelay, tmpiter);
337 std::copy_n(mTemp.cbegin()+SamplesToDo, mSHistory.size(), mSHistory.begin());
338 PShift.process(youtput.first(SamplesToDo), mTemp);
340 for(std::size_t i{0};i < SamplesToDo;++i)
342 /* Y = 0.762956*D + j*0.384230*S */
343 youtput[i] = 0.762956f*mD[i] + 0.384230f*youtput[i];
346 std::copy(mS.begin()+SamplesToDo, mS.begin()+SamplesToDo+sFilterDelay, mS.begin());
347 std::copy(mD.begin()+SamplesToDo, mD.begin()+SamplesToDo+sFilterDelay, mD.begin());
351 int main(al::span<std::string_view> args)
353 if(args.size() < 2 || args[1] == "-h" || args[1] == "--help")
355 printf("Usage: %.*s <[options] filename.wav...>\n\n"
356 " Options:\n"
357 " --general Use the general equations for 2-channel UHJ (default).\n"
358 " --alternative Use the alternative equations for 2-channel UHJ.\n"
359 "\n"
360 "Note: When decoding 2-channel UHJ to an .amb file, the result should not use\n"
361 "the normal B-Format shelf filters! Only 3- and 4-channel UHJ can accurately\n"
362 "reconstruct the original B-Format signal.",
363 al::sizei(args[0]), args[0].data());
364 return 1;
367 std::size_t num_files{0}, num_decoded{0};
368 bool use_general{true};
369 for(size_t fidx{1};fidx < args.size();++fidx)
371 if(args[fidx] == "--general")
373 use_general = true;
374 continue;
376 if(args[fidx] == "--alternative")
378 use_general = false;
379 continue;
381 ++num_files;
382 SF_INFO ininfo{};
383 SndFilePtr infile{sf_open(std::string{args[fidx]}.c_str(), SFM_READ, &ininfo)};
384 if(!infile)
386 fprintf(stderr, "Failed to open %.*s\n", al::sizei(args[fidx]), args[fidx].data());
387 continue;
389 if(sf_command(infile.get(), SFC_WAVEX_GET_AMBISONIC, nullptr, 0) == SF_AMBISONIC_B_FORMAT)
391 fprintf(stderr, "%.*s is already B-Format\n", al::sizei(args[fidx]),
392 args[fidx].data());
393 continue;
395 uint outchans{};
396 if(ininfo.channels == 2)
397 outchans = 3;
398 else if(ininfo.channels == 3 || ininfo.channels == 4)
399 outchans = static_cast<uint>(ininfo.channels);
400 else
402 fprintf(stderr, "%.*s is not a 2-, 3-, or 4-channel file\n", al::sizei(args[fidx]),
403 args[fidx].data());
404 continue;
406 printf("Converting %.*s from %d-channel UHJ%s...\n", al::sizei(args[fidx]),
407 args[fidx].data(), ininfo.channels,
408 (ininfo.channels == 2) ? use_general ? " (general)" : " (alternative)" : "");
410 std::string outname{args[fidx]};
411 auto lastslash = outname.find_last_of('/');
412 if(lastslash != std::string::npos)
413 outname.erase(0, lastslash+1);
414 auto lastdot = outname.find_last_of('.');
415 if(lastdot != std::string::npos)
416 outname.resize(lastdot+1);
417 outname += "amb";
419 FilePtr outfile{fopen(outname.c_str(), "wb")};
420 if(!outfile)
422 fprintf(stderr, "Failed to create %s\n", outname.c_str());
423 continue;
426 fputs("RIFF", outfile.get());
427 fwrite32le(0xFFFFFFFF, outfile.get()); // 'RIFF' header len; filled in at close
429 fputs("WAVE", outfile.get());
431 fputs("fmt ", outfile.get());
432 fwrite32le(40, outfile.get()); // 'fmt ' header len; 40 bytes for EXTENSIBLE
434 // 16-bit val, format type id (extensible: 0xFFFE)
435 fwrite16le(0xFFFE, outfile.get());
436 // 16-bit val, channel count
437 fwrite16le(static_cast<ushort>(outchans), outfile.get());
438 // 32-bit val, frequency
439 fwrite32le(static_cast<uint>(ininfo.samplerate), outfile.get());
440 // 32-bit val, bytes per second
441 fwrite32le(static_cast<uint>(ininfo.samplerate)*outchans*uint{sizeof(float)}, outfile.get());
442 // 16-bit val, frame size
443 fwrite16le(static_cast<ushort>(sizeof(float)*outchans), outfile.get());
444 // 16-bit val, bits per sample
445 fwrite16le(static_cast<ushort>(sizeof(float)*8), outfile.get());
446 // 16-bit val, extra byte count
447 fwrite16le(22, outfile.get());
448 // 16-bit val, valid bits per sample
449 fwrite16le(static_cast<ushort>(sizeof(float)*8), outfile.get());
450 // 32-bit val, channel mask
451 fwrite32le(0, outfile.get());
452 // 16 byte GUID, sub-type format
453 fwrite(SUBTYPE_BFORMAT_FLOAT.data(), 1, SUBTYPE_BFORMAT_FLOAT.size(), outfile.get());
455 fputs("data", outfile.get());
456 fwrite32le(0xFFFFFFFF, outfile.get()); // 'data' header len; filled in at close
457 if(ferror(outfile.get()))
459 fprintf(stderr, "Error writing wave file header: %s (%d)\n",
460 std::generic_category().message(errno).c_str(), errno);
461 continue;
464 auto DataStart = ftell(outfile.get());
466 auto decoder = std::make_unique<UhjDecoder>();
467 auto inmem = std::vector<float>(size_t{BufferLineSize}*static_cast<uint>(ininfo.channels));
468 auto decmem = al::vector<std::array<float,BufferLineSize>, 16>(outchans);
469 auto outmem = std::vector<byte4>(size_t{BufferLineSize}*outchans);
471 /* A number of initial samples need to be skipped to cut the lead-in
472 * from the all-pass filter delay. The same number of samples need to
473 * be fed through the decoder after reaching the end of the input file
474 * to ensure none of the original input is lost.
476 std::size_t LeadIn{UhjDecoder::sFilterDelay};
477 sf_count_t LeadOut{UhjDecoder::sFilterDelay};
478 while(LeadOut > 0)
480 sf_count_t sgot{sf_readf_float(infile.get(), inmem.data(), BufferLineSize)};
481 sgot = std::max<sf_count_t>(sgot, 0);
482 if(sgot < BufferLineSize)
484 const sf_count_t remaining{std::min(BufferLineSize - sgot, LeadOut)};
485 std::fill_n(inmem.begin() + sgot*ininfo.channels, remaining*ininfo.channels, 0.0f);
486 sgot += remaining;
487 LeadOut -= remaining;
490 auto got = static_cast<std::size_t>(sgot);
491 if(ininfo.channels > 2 || use_general)
492 decoder->decode(inmem, static_cast<uint>(ininfo.channels), decmem, got);
493 else
494 decoder->decode2(inmem, decmem, got);
495 if(LeadIn >= got)
497 LeadIn -= got;
498 continue;
501 got -= LeadIn;
502 for(std::size_t i{0};i < got;++i)
504 /* Attenuate by -3dB for FuMa output levels. */
505 constexpr auto inv_sqrt2 = static_cast<float>(1.0/al::numbers::sqrt2);
506 for(std::size_t j{0};j < outchans;++j)
507 outmem[i*outchans + j] = f32AsLEBytes(decmem[j][LeadIn+i] * inv_sqrt2);
509 LeadIn = 0;
511 std::size_t wrote{fwrite(outmem.data(), sizeof(byte4)*outchans, got, outfile.get())};
512 if(wrote < got)
514 fprintf(stderr, "Error writing wave data: %s (%d)\n",
515 std::generic_category().message(errno).c_str(), errno);
516 break;
520 auto DataEnd = ftell(outfile.get());
521 if(DataEnd > DataStart)
523 long dataLen{DataEnd - DataStart};
524 if(fseek(outfile.get(), 4, SEEK_SET) == 0)
525 fwrite32le(static_cast<uint>(DataEnd-8), outfile.get()); // 'WAVE' header len
526 if(fseek(outfile.get(), DataStart-4, SEEK_SET) == 0)
527 fwrite32le(static_cast<uint>(dataLen), outfile.get()); // 'data' header len
529 fflush(outfile.get());
530 ++num_decoded;
532 if(num_decoded == 0)
533 fprintf(stderr, "Failed to decode any input files\n");
534 else if(num_decoded < num_files)
535 fprintf(stderr, "Decoded %zu of %zu files\n", num_decoded, num_files);
536 else
537 printf("Decoded %zu file%s\n", num_decoded, (num_decoded==1)?"":"s");
538 return 0;
541 } /* namespace */
543 int main(int argc, char **argv)
545 assert(argc >= 0);
546 auto args = std::vector<std::string_view>(static_cast<unsigned int>(argc));
547 std::copy_n(argv, args.size(), args.begin());
548 return main(al::span{args});