Merge pull request #438 from s4Ys369/revert-434-patch-1
[sm64pc.git] / tools / aifc_decode.c
blob102e62bf70927f3fbd5bcbe6b4153e4149c984e7
1 /**
2 * Bruteforcing decoder for converting ADPCM-encoded AIFC into AIFF, in a way
3 * that roundtrips with vadpcm_enc.
4 */
5 #include <unistd.h>
6 #include <assert.h>
7 #include <math.h>
8 #include <string.h>
9 #include <stdio.h>
10 #include <stdlib.h>
11 #include <stdarg.h>
13 typedef signed char s8;
14 typedef short s16;
15 typedef int s32;
16 typedef unsigned char u8;
17 typedef unsigned short u16;
18 typedef unsigned int u32;
19 typedef unsigned long long u64;
20 typedef float f32;
22 #if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
23 # define bswap16(x) (x)
24 # define bswap32(x) (x)
25 # define BSWAP16(x)
26 # define BSWAP32(x)
27 # define BSWAP16_MANY(x, n)
28 #else
29 # define bswap16(x) __builtin_bswap16(x)
30 # define bswap32(x) __builtin_bswap32(x)
31 # define BSWAP16(x) x = __builtin_bswap16(x)
32 # define BSWAP32(x) x = __builtin_bswap32(x)
33 # define BSWAP16_MANY(x, n) for (s32 _i = 0; _i < n; _i++) BSWAP16((x)[_i])
34 #endif
36 #define NORETURN __attribute__((noreturn))
37 #define UNUSED __attribute__((unused))
39 typedef struct {
40 u32 ckID;
41 u32 ckSize;
42 } ChunkHeader;
44 typedef struct {
45 u32 ckID;
46 u32 ckSize;
47 u32 formType;
48 } Chunk;
50 typedef struct {
51 s16 numChannels;
52 u16 numFramesH;
53 u16 numFramesL;
54 s16 sampleSize;
55 s16 sampleRate[5]; // 80-bit float
56 u16 compressionTypeH;
57 u16 compressionTypeL;
58 } CommonChunk;
60 typedef struct {
61 s16 MarkerID;
62 u16 positionH;
63 u16 positionL;
64 } Marker;
66 typedef struct {
67 s16 playMode;
68 s16 beginLoop;
69 s16 endLoop;
70 } Loop;
72 typedef struct {
73 s8 baseNote;
74 s8 detune;
75 s8 lowNote;
76 s8 highNote;
77 s8 lowVelocity;
78 s8 highVelocity;
79 s16 gain;
80 Loop sustainLoop;
81 Loop releaseLoop;
82 } InstrumentChunk;
84 typedef struct {
85 s32 offset;
86 s32 blockSize;
87 } SoundDataChunk;
89 typedef struct {
90 s16 version;
91 s16 order;
92 s16 nEntries;
93 } CodeChunk;
95 typedef struct
97 u32 start;
98 u32 end;
99 u32 count;
100 s16 state[16];
101 } ALADPCMloop;
104 static char usage[] = "input.aifc output.aiff";
105 static const char *progname, *infilename;
107 #define checked_fread(a, b, c, d) if (fread(a, b, c, d) != c) fail_parse("error parsing file")
109 NORETURN
110 void fail_parse(const char *fmt, ...)
112 char *formatted = NULL;
113 va_list ap;
114 va_start(ap, fmt);
115 int size = vsnprintf(NULL, 0, fmt, ap);
116 va_end(ap);
117 if (size >= 0) {
118 size++;
119 formatted = malloc(size);
120 if (formatted != NULL) {
121 va_start(ap, fmt);
122 size = vsnprintf(formatted, size, fmt, ap);
123 va_end(ap);
124 if (size < 0) {
125 free(formatted);
126 formatted = NULL;
131 if (formatted != NULL) {
132 fprintf(stderr, "%s: %s [%s]\n", progname, formatted, infilename);
133 free(formatted);
135 exit(1);
138 s32 myrand()
140 static u64 state = 1619236481962341ULL;
141 state *= 3123692312231ULL;
142 state++;
143 return state >> 33;
146 s16 qsample(s32 x, s32 scale)
148 // Compute x / 2^scale rounded to the nearest integer, breaking ties towards zero.
149 if (scale == 0) return x;
150 return (x + (1 << (scale - 1)) - (x > 0)) >> scale;
153 s16 clamp_to_s16(s32 x)
155 if (x < -0x8000) return -0x8000;
156 if (x > 0x7fff) return 0x7fff;
157 return (s16) x;
160 s32 toi4(s32 x)
162 if (x >= 8) return x - 16;
163 return x;
166 s32 readaifccodebook(FILE *fhandle, s32 ****table, s16 *order, s16 *npredictors)
168 checked_fread(order, sizeof(s16), 1, fhandle);
169 BSWAP16(*order);
170 checked_fread(npredictors, sizeof(s16), 1, fhandle);
171 BSWAP16(*npredictors);
172 *table = malloc(*npredictors * sizeof(s32 **));
173 for (s32 i = 0; i < *npredictors; i++) {
174 (*table)[i] = malloc(8 * sizeof(s32 *));
175 for (s32 j = 0; j < 8; j++) {
176 (*table)[i][j] = malloc((*order + 8) * sizeof(s32));
180 for (s32 i = 0; i < *npredictors; i++) {
181 s32 **table_entry = (*table)[i];
182 for (s32 j = 0; j < *order; j++) {
183 for (s32 k = 0; k < 8; k++) {
184 s16 ts;
185 checked_fread(&ts, sizeof(s16), 1, fhandle);
186 BSWAP16(ts);
187 table_entry[k][j] = ts;
191 for (s32 k = 1; k < 8; k++) {
192 table_entry[k][*order] = table_entry[k - 1][*order - 1];
195 table_entry[0][*order] = 1 << 11;
197 for (s32 k = 1; k < 8; k++) {
198 s32 j = 0;
199 for (; j < k; j++) {
200 table_entry[j][k + *order] = 0;
203 for (; j < 8; j++) {
204 table_entry[j][k + *order] = table_entry[j - k][*order];
208 return 0;
211 ALADPCMloop *readlooppoints(FILE *ifile, s16 *nloops)
213 checked_fread(nloops, sizeof(s16), 1, ifile);
214 BSWAP16(*nloops);
215 ALADPCMloop *al = malloc(*nloops * sizeof(ALADPCMloop));
216 for (s32 i = 0; i < *nloops; i++) {
217 checked_fread(&al[i], sizeof(ALADPCMloop), 1, ifile);
218 BSWAP32(al[i].start);
219 BSWAP32(al[i].end);
220 BSWAP32(al[i].count);
221 BSWAP16_MANY(al[i].state, 16);
223 return al;
226 s32 inner_product(s32 length, s32 *v1, s32 *v2)
228 s32 out = 0;
229 for (s32 i = 0; i < length; i++) {
230 out += v1[i] * v2[i];
233 // Compute "out / 2^11", rounded down.
234 s32 dout = out / (1 << 11);
235 s32 fiout = dout * (1 << 11);
236 return dout - (out - fiout < 0);
239 void my_decodeframe(u8 *frame, s32 *state, s32 order, s32 ***coefTable)
241 s32 ix[16];
243 u8 header = frame[0];
244 s32 scale = 1 << (header >> 4);
245 s32 optimalp = header & 0xf;
247 for (s32 i = 0; i < 16; i += 2) {
248 u8 c = frame[1 + i/2];
249 ix[i] = c >> 4;
250 ix[i + 1] = c & 0xf;
253 for (s32 i = 0; i < 16; i++) {
254 if (ix[i] >= 8) ix[i] -= 16;
255 ix[i] *= scale;
258 for (s32 j = 0; j < 2; j++) {
259 s32 in_vec[16];
260 if (j == 0) {
261 for (s32 i = 0; i < order; i++) {
262 in_vec[i] = state[16 - order + i];
264 } else {
265 for (s32 i = 0; i < order; i++) {
266 in_vec[i] = state[8 - order + i];
270 for (s32 i = 0; i < 8; i++) {
271 s32 ind = j * 8 + i;
272 in_vec[order + i] = ix[ind];
273 state[ind] = inner_product(order + i, coefTable[optimalp][i], in_vec) + ix[ind];
278 void my_encodeframe(u8 *out, s16 *inBuffer, s32 *state, s32 ***coefTable, s32 order, s32 npredictors)
280 s16 ix[16];
281 s32 prediction[16];
282 s32 inVector[16];
283 s32 saveState[16];
284 s32 optimalp = 0;
285 s32 scale;
286 s32 ie[16];
287 s32 e[16];
288 f32 min = 1e30;
290 for (s32 k = 0; k < npredictors; k++) {
291 for (s32 j = 0; j < 2; j++) {
292 for (s32 i = 0; i < order; i++) {
293 inVector[i] = (j == 0 ? state[16 - order + i] : inBuffer[8 - order + i]);
296 for (s32 i = 0; i < 8; i++) {
297 prediction[j * 8 + i] = inner_product(order + i, coefTable[k][i], inVector);
298 e[j * 8 + i] = inVector[i + order] = inBuffer[j * 8 + i] - prediction[j * 8 + i];
302 f32 se = 0.0f;
303 for (s32 j = 0; j < 16; j++) {
304 se += (f32) e[j] * (f32) e[j];
307 if (se < min) {
308 min = se;
309 optimalp = k;
313 for (s32 j = 0; j < 2; j++) {
314 for (s32 i = 0; i < order; i++) {
315 inVector[i] = (j == 0 ? state[16 - order + i] : inBuffer[8 - order + i]);
318 for (s32 i = 0; i < 8; i++) {
319 prediction[j * 8 + i] = inner_product(order + i, coefTable[optimalp][i], inVector);
320 e[j * 8 + i] = inVector[i + order] = inBuffer[j * 8 + i] - prediction[j * 8 + i];
324 for (s32 i = 0; i < 16; i++) {
325 ie[i] = clamp_to_s16(e[i]);
328 s32 max = 0;
329 for (s32 i = 0; i < 16; i++) {
330 if (abs(ie[i]) > abs(max)) {
331 max = ie[i];
335 for (scale = 0; scale <= 12; scale++) {
336 if (max <= 7 && max >= -8) break;
337 max /= 2;
340 for (s32 i = 0; i < 16; i++) {
341 saveState[i] = state[i];
344 for (s32 nIter = 0, again = 1; nIter < 2 && again; nIter++) {
345 again = 0;
346 if (nIter == 1) scale++;
347 if (scale > 12) {
348 scale = 12;
351 for (s32 j = 0; j < 2; j++) {
352 s32 base = j * 8;
353 for (s32 i = 0; i < order; i++) {
354 inVector[i] = (j == 0 ?
355 saveState[16 - order + i] : state[8 - order + i]);
358 for (s32 i = 0; i < 8; i++) {
359 prediction[base + i] = inner_product(order + i, coefTable[optimalp][i], inVector);
360 s32 se = inBuffer[base + i] - prediction[base + i];
361 ix[base + i] = qsample(se, scale);
362 s32 cV = clamp_to_s16(ix[base + i]) - ix[base + i];
363 if (cV > 1 || cV < -1) again = 1;
364 ix[base + i] += cV;
365 inVector[i + order] = ix[base + i] * (1 << scale);
366 state[base + i] = prediction[base + i] + inVector[i + order];
371 u8 header = (scale << 4) | (optimalp & 0xf);
372 out[0] = header;
373 for (s32 i = 0; i < 16; i += 2) {
374 u8 c = ((ix[i] & 0xf) << 4) | (ix[i + 1] & 0xf);
375 out[1 + i/2] = c;
379 void permute(s16 *out, s32 *in, s32 scale)
381 for (s32 i = 0; i < 16; i++) {
382 out[i] = clamp_to_s16(in[i] - scale / 2 + myrand() % (scale + 1));
386 void write_header(FILE *ofile, const char *id, s32 size)
388 fwrite(id, 4, 1, ofile);
389 BSWAP32(size);
390 fwrite(&size, sizeof(s32), 1, ofile);
393 int main(int argc, char **argv)
395 s16 order = -1;
396 s16 nloops = 0;
397 ALADPCMloop *aloops = NULL;
398 s16 npredictors = -1;
399 s32 ***coefTable = NULL;
400 s32 state[16];
401 s32 soundPointer = -1;
402 s32 currPos = 0;
403 s32 nSamples = 0;
404 Chunk FormChunk;
405 ChunkHeader Header;
406 CommonChunk CommChunk;
407 InstrumentChunk InstChunk;
408 SoundDataChunk SndDChunk;
409 FILE *ifile;
410 FILE *ofile;
411 progname = argv[0];
413 if (argc < 3) {
414 fprintf(stderr, "%s %s\n", progname, usage);
415 exit(1);
418 infilename = argv[1];
420 if ((ifile = fopen(infilename, "rb")) == NULL) {
421 fail_parse("AIFF-C file could not be opened");
422 exit(1);
425 if ((ofile = fopen(argv[2], "wb")) == NULL) {
426 fprintf(stderr, "%s: output file could not be opened [%s]\n", progname, argv[2]);
427 exit(1);
430 memset(&InstChunk, 0, sizeof(InstChunk));
432 checked_fread(&FormChunk, sizeof(FormChunk), 1, ifile);
433 BSWAP32(FormChunk.ckID);
434 BSWAP32(FormChunk.formType);
435 if ((FormChunk.ckID != 0x464f524d) || (FormChunk.formType != 0x41494643)) { // FORM, AIFC
436 fail_parse("not an AIFF-C file");
439 for (;;) {
440 s32 num = fread(&Header, sizeof(Header), 1, ifile);
441 u32 ts;
442 if (num <= 0) break;
443 BSWAP32(Header.ckID);
444 BSWAP32(Header.ckSize);
446 Header.ckSize++;
447 Header.ckSize &= ~1;
448 s32 offset = ftell(ifile);
450 switch (Header.ckID) {
451 case 0x434f4d4d: // COMM
452 checked_fread(&CommChunk, sizeof(CommChunk), 1, ifile);
453 BSWAP16(CommChunk.numChannels);
454 BSWAP16(CommChunk.numFramesH);
455 BSWAP16(CommChunk.numFramesL);
456 BSWAP16(CommChunk.sampleSize);
457 BSWAP16(CommChunk.compressionTypeH);
458 BSWAP16(CommChunk.compressionTypeL);
459 s32 cType = (CommChunk.compressionTypeH << 16) + CommChunk.compressionTypeL;
460 if (cType != 0x56415043) { // VAPC
461 fail_parse("file is of the wrong compression type");
463 if (CommChunk.numChannels != 1) {
464 fail_parse("file contains %d channels, only 1 channel supported", CommChunk.numChannels);
466 if (CommChunk.sampleSize != 16) {
467 fail_parse("file contains %d bit samples, only 16 bit samples supported", CommChunk.sampleSize);
470 nSamples = (CommChunk.numFramesH << 16) + CommChunk.numFramesL;
472 // Allow broken input lengths
473 if (nSamples % 16) {
474 nSamples--;
477 if (nSamples % 16 != 0) {
478 fail_parse("number of chunks must be a multiple of 16, found %d", nSamples);
480 break;
482 case 0x53534e44: // SSND
483 checked_fread(&SndDChunk, sizeof(SndDChunk), 1, ifile);
484 BSWAP32(SndDChunk.offset);
485 BSWAP32(SndDChunk.blockSize);
486 assert(SndDChunk.offset == 0);
487 assert(SndDChunk.blockSize == 0);
488 soundPointer = ftell(ifile);
489 break;
491 case 0x4150504c: // APPL
492 checked_fread(&ts, sizeof(u32), 1, ifile);
493 BSWAP32(ts);
494 if (ts == 0x73746f63) { // stoc
495 u8 len;
496 checked_fread(&len, 1, 1, ifile);
497 if (len == 11) {
498 char ChunkName[12];
499 s16 version;
500 checked_fread(ChunkName, 11, 1, ifile);
501 ChunkName[11] = '\0';
502 if (strcmp("VADPCMCODES", ChunkName) == 0) {
503 checked_fread(&version, sizeof(s16), 1, ifile);
504 BSWAP16(version);
505 if (version != 1) {
506 fail_parse("Unknown codebook chunk version");
508 readaifccodebook(ifile, &coefTable, &order, &npredictors);
510 else if (strcmp("VADPCMLOOPS", ChunkName) == 0) {
511 checked_fread(&version, sizeof(s16), 1, ifile);
512 BSWAP16(version);
513 if (version != 1) {
514 fail_parse("Unknown loop chunk version");
516 aloops = readlooppoints(ifile, &nloops);
517 if (nloops != 1) {
518 fail_parse("Only a single loop supported");
523 break;
526 fseek(ifile, offset + Header.ckSize, SEEK_SET);
529 if (coefTable == NULL) {
530 fail_parse("Codebook missing from bitstream");
533 for (s32 i = 0; i < order; i++) {
534 state[15 - i] = 0;
537 u32 outputBytes = nSamples * sizeof(s16);
538 u8 *outputBuf = malloc(outputBytes);
540 fseek(ifile, soundPointer, SEEK_SET);
541 while (currPos < nSamples) {
542 u8 input[9];
543 u8 encoded[9];
544 s32 lastState[16];
545 s32 decoded[16];
546 s16 guess[16];
547 s16 origGuess[16];
549 memcpy(lastState, state, sizeof(lastState));
550 checked_fread(input, 9, 1, ifile);
552 // Decode for real
553 my_decodeframe(input, state, order, coefTable);
554 memcpy(decoded, state, sizeof(lastState));
556 // Create a guess from that, by clamping to 16 bits
557 for (s32 i = 0; i < 16; i++) {
558 origGuess[i] = clamp_to_s16(state[i]);
561 // Encode the guess
562 memcpy(state, lastState, sizeof(lastState));
563 memcpy(guess, origGuess, sizeof(guess));
564 my_encodeframe(encoded, guess, state, coefTable, order, npredictors);
566 // If it doesn't match, randomly round numbers until it does.
567 if (memcmp(input, encoded, 9) != 0) {
568 s32 scale = 1 << (input[0] >> 4);
569 do {
570 permute(guess, decoded, scale);
571 memcpy(state, lastState, sizeof(lastState));
572 my_encodeframe(encoded, guess, state, coefTable, order, npredictors);
573 } while (memcmp(input, encoded, 9) != 0);
575 // Bring the matching closer to the original decode (not strictly
576 // necessary, but it will move us closer to the target on average).
577 for (s32 failures = 0; failures < 50; failures++) {
578 s32 ind = myrand() % 16;
579 s32 old = guess[ind];
580 if (old == origGuess[ind]) continue;
581 guess[ind] = origGuess[ind];
582 if (myrand() % 2) guess[ind] += (old - origGuess[ind]) / 2;
583 memcpy(state, lastState, sizeof(lastState));
584 my_encodeframe(encoded, guess, state, coefTable, order, npredictors);
585 if (memcmp(input, encoded, 9) == 0) {
586 failures = -1;
588 else {
589 guess[ind] = old;
594 memcpy(state, decoded, sizeof(lastState));
595 BSWAP16_MANY(guess, 16);
596 memcpy(outputBuf + currPos * 2, guess, sizeof(guess));
597 currPos += 16;
600 // Write an incomplete file header. We'll fill in the size later.
601 fwrite("FORM\0\0\0\0AIFF", 12, 1, ofile);
603 // Subtract 4 from the COMM size to skip the compression field.
604 write_header(ofile, "COMM", sizeof(CommonChunk) - 4);
605 CommChunk.numFramesH = nSamples >> 16;
606 CommChunk.numFramesL = nSamples & 0xffff;
607 BSWAP16(CommChunk.numChannels);
608 BSWAP16(CommChunk.numFramesH);
609 BSWAP16(CommChunk.numFramesL);
610 BSWAP16(CommChunk.sampleSize);
611 fwrite(&CommChunk, sizeof(CommonChunk) - 4, 1, ofile);
613 if (nloops > 0) {
614 s32 startPos = aloops[0].start, endPos = aloops[0].end;
615 const char *markerNames[2] = {"start", "end"};
616 Marker markers[2] = {
617 {1, startPos >> 16, startPos & 0xffff},
618 {2, endPos >> 16, endPos & 0xffff}
620 write_header(ofile, "MARK", 2 + 2 * sizeof(Marker) + 1 + 5 + 1 + 3);
621 s16 numMarkers = bswap16(2);
622 fwrite(&numMarkers, sizeof(s16), 1, ofile);
623 for (s32 i = 0; i < 2; i++) {
624 u8 len = (u8) strlen(markerNames[i]);
625 BSWAP16(markers[i].MarkerID);
626 BSWAP16(markers[i].positionH);
627 BSWAP16(markers[i].positionL);
628 fwrite(&markers[i], sizeof(Marker), 1, ofile);
629 fwrite(&len, 1, 1, ofile);
630 fwrite(markerNames[i], len, 1, ofile);
633 write_header(ofile, "INST", sizeof(InstrumentChunk));
634 InstChunk.sustainLoop.playMode = bswap16(1);
635 InstChunk.sustainLoop.beginLoop = bswap16(1);
636 InstChunk.sustainLoop.endLoop = bswap16(2);
637 InstChunk.releaseLoop.playMode = 0;
638 InstChunk.releaseLoop.beginLoop = 0;
639 InstChunk.releaseLoop.endLoop = 0;
640 fwrite(&InstChunk, sizeof(InstrumentChunk), 1, ofile);
643 // Save the coefficient table for use when encoding. Ideally this wouldn't
644 // be needed and "tabledesign -s 1" would generate the right table, but in
645 // practice it's difficult to adjust samples to make that happen.
646 write_header(ofile, "APPL", 4 + 12 + sizeof(CodeChunk) + npredictors * order * 8 * 2);
647 fwrite("stoc", 4, 1, ofile);
648 CodeChunk cChunk;
649 cChunk.version = bswap16(1);
650 cChunk.order = bswap16(order);
651 cChunk.nEntries = bswap16(npredictors);
652 fwrite("\x0bVADPCMCODES", 12, 1, ofile);
653 fwrite(&cChunk, sizeof(CodeChunk), 1, ofile);
654 for (s32 i = 0; i < npredictors; i++) {
655 for (s32 j = 0; j < order; j++) {
656 for (s32 k = 0; k < 8; k++) {
657 s16 ts = bswap16(coefTable[i][k][j]);
658 fwrite(&ts, sizeof(s16), 1, ofile);
663 write_header(ofile, "SSND", outputBytes + 8);
664 SndDChunk.offset = 0;
665 SndDChunk.blockSize = 0;
666 fwrite(&SndDChunk, sizeof(SoundDataChunk), 1, ofile);
667 fwrite(outputBuf, outputBytes, 1, ofile);
669 // Fix the size in the header
670 s32 fileSize = bswap32(ftell(ofile) - 8);
671 fseek(ofile, 4, SEEK_SET);
672 fwrite(&fileSize, 4, 1, ofile);
674 fclose(ifile);
675 fclose(ofile);
676 return 0;