3 * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
4 * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
6 * The ifft algorithms in this file have been largely inspired by Dan
7 * Bernstein's work, djbfft, available at http://cr.yp.to/djbfft.html
9 * This file is part of a52dec, a free ATSC A-52 stream decoder.
10 * See http://liba52.sourceforge.net/ for updates.
12 * a52dec is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU General Public License as published by
14 * the Free Software Foundation; either version 2 of the License, or
15 * (at your option) any later version.
17 * a52dec is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU General Public License for more details.
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, write to the Free Software
24 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
28 #include "a52_internal.h"
31 typedef struct complex_s
{
36 static uint8_t fftorder
[] = {
37 0,128, 64,192, 32,160,224, 96, 16,144, 80,208,240,112, 48,176,
38 8,136, 72,200, 40,168,232,104,248,120, 56,184, 24,152,216, 88,
39 4,132, 68,196, 36,164,228,100, 20,148, 84,212,244,116, 52,180,
40 252,124, 60,188, 28,156,220, 92, 12,140, 76,204,236,108, 44,172,
41 2,130, 66,194, 34,162,226, 98, 18,146, 82,210,242,114, 50,178,
42 10,138, 74,202, 42,170,234,106,250,122, 58,186, 26,154,218, 90,
43 254,126, 62,190, 30,158,222, 94, 14,142, 78,206,238,110, 46,174,
44 6,134, 70,198, 38,166,230,102,246,118, 54,182, 22,150,214, 86
47 /* Root values for IFFT */
48 static sample_t roots16
[3];
49 static sample_t roots32
[7];
50 static sample_t roots64
[15];
51 static sample_t roots128
[31];
53 /* Twiddle factors for IMDCT */
54 static complex_t pre1
[128];
55 static complex_t post1
[64];
56 static complex_t pre2
[64];
57 static complex_t post2
[32];
59 static sample_t a52_imdct_window
[256];
61 static void (* ifft128
) (complex_t
* buf
);
62 static void (* ifft64
) (complex_t
* buf
);
64 static inline void ifft2 (complex_t
* buf
)
70 buf
[0].real
+= buf
[1].real
;
71 buf
[0].imag
+= buf
[1].imag
;
72 buf
[1].real
= r
- buf
[1].real
;
73 buf
[1].imag
= i
- buf
[1].imag
;
76 static inline void ifft4 (complex_t
* buf
)
78 sample_t tmp1
, tmp2
, tmp3
, tmp4
, tmp5
, tmp6
, tmp7
, tmp8
;
80 tmp1
= buf
[0].real
+ buf
[1].real
;
81 tmp2
= buf
[3].real
+ buf
[2].real
;
82 tmp3
= buf
[0].imag
+ buf
[1].imag
;
83 tmp4
= buf
[2].imag
+ buf
[3].imag
;
84 tmp5
= buf
[0].real
- buf
[1].real
;
85 tmp6
= buf
[0].imag
- buf
[1].imag
;
86 tmp7
= buf
[2].imag
- buf
[3].imag
;
87 tmp8
= buf
[3].real
- buf
[2].real
;
89 buf
[0].real
= tmp1
+ tmp2
;
90 buf
[0].imag
= tmp3
+ tmp4
;
91 buf
[2].real
= tmp1
- tmp2
;
92 buf
[2].imag
= tmp3
- tmp4
;
93 buf
[1].real
= tmp5
+ tmp7
;
94 buf
[1].imag
= tmp6
+ tmp8
;
95 buf
[3].real
= tmp5
- tmp7
;
96 buf
[3].imag
= tmp6
- tmp8
;
99 /* basic radix-2 ifft butterfly */
101 #define BUTTERFLY_0(t0,t1,W0,W1,d0,d1) do { \
102 t0 = MUL (W1, d1) + MUL (W0, d0); \
103 t1 = MUL (W0, d1) - MUL (W1, d0); \
106 /* radix-2 ifft butterfly with bias */
108 #define BUTTERFLY_B(t0,t1,W0,W1,d0,d1) do { \
109 t0 = BIAS (MUL (d1, W1) + MUL (d0, W0)); \
110 t1 = BIAS (MUL (d1, W0) - MUL (d0, W1)); \
113 /* the basic split-radix ifft butterfly */
115 #define BUTTERFLY(a0,a1,a2,a3,wr,wi) do { \
116 BUTTERFLY_0 (tmp5, tmp6, wr, wi, a2.real, a2.imag); \
117 BUTTERFLY_0 (tmp8, tmp7, wr, wi, a3.imag, a3.real); \
118 tmp1 = tmp5 + tmp7; \
119 tmp2 = tmp6 + tmp8; \
120 tmp3 = tmp6 - tmp8; \
121 tmp4 = tmp7 - tmp5; \
122 a2.real = a0.real - tmp1; \
123 a2.imag = a0.imag - tmp2; \
124 a3.real = a1.real - tmp3; \
125 a3.imag = a1.imag - tmp4; \
132 /* split-radix ifft butterfly, specialized for wr=1 wi=0 */
134 #define BUTTERFLY_ZERO(a0,a1,a2,a3) do { \
135 tmp1 = a2.real + a3.real; \
136 tmp2 = a2.imag + a3.imag; \
137 tmp3 = a2.imag - a3.imag; \
138 tmp4 = a3.real - a2.real; \
139 a2.real = a0.real - tmp1; \
140 a2.imag = a0.imag - tmp2; \
141 a3.real = a1.real - tmp3; \
142 a3.imag = a1.imag - tmp4; \
149 /* split-radix ifft butterfly, specialized for wr=wi */
151 #define BUTTERFLY_HALF(a0,a1,a2,a3,w) do { \
152 tmp5 = MUL (a2.real + a2.imag, w); \
153 tmp6 = MUL (a2.imag - a2.real, w); \
154 tmp7 = MUL (a3.real - a3.imag, w); \
155 tmp8 = MUL (a3.imag + a3.real, w); \
156 tmp1 = tmp5 + tmp7; \
157 tmp2 = tmp6 + tmp8; \
158 tmp3 = tmp6 - tmp8; \
159 tmp4 = tmp7 - tmp5; \
160 a2.real = a0.real - tmp1; \
161 a2.imag = a0.imag - tmp2; \
162 a3.real = a1.real - tmp3; \
163 a3.imag = a1.imag - tmp4; \
170 static inline void ifft8 (complex_t
* buf
)
172 sample_t tmp1
, tmp2
, tmp3
, tmp4
, tmp5
, tmp6
, tmp7
, tmp8
;
177 BUTTERFLY_ZERO (buf
[0], buf
[2], buf
[4], buf
[6]);
178 BUTTERFLY_HALF (buf
[1], buf
[3], buf
[5], buf
[7], roots16
[1]);
181 static void ifft_pass (complex_t
* buf
, sample_t
* weight
, int n
)
186 sample_t tmp1
, tmp2
, tmp3
, tmp4
, tmp5
, tmp6
, tmp7
, tmp8
;
194 BUTTERFLY_ZERO (buf
[-1], buf1
[-1], buf2
[-1], buf3
[-1]);
199 BUTTERFLY (buf
[0], buf1
[0], buf2
[0], buf3
[0],
200 weight
[0], weight
[2*i
-n
]);
209 static void ifft16 (complex_t
* buf
)
214 ifft_pass (buf
, roots16
, 4);
217 static void ifft32 (complex_t
* buf
)
222 ifft_pass (buf
, roots32
, 8);
225 static void ifft64_c (complex_t
* buf
)
230 ifft_pass (buf
, roots64
, 16);
233 static void ifft128_c (complex_t
* buf
)
238 ifft_pass (buf
, roots64
, 16);
242 ifft_pass (buf
, roots128
, 32);
245 void a52_imdct_512 (sample_t
* data
, sample_t
* delay
, sample_t bias
)
248 sample_t t_r
, t_i
, a_r
, a_i
, b_r
, b_i
, w_1
, w_2
;
249 const sample_t
* window
= a52_imdct_window
;
252 for (i
= 0; i
< 128; i
++) {
256 BUTTERFLY_0 (buf
[i
].real
, buf
[i
].imag
, t_r
, t_i
, data
[k
], data
[255-k
]);
261 /* Post IFFT complex multiply plus IFFT complex conjugate*/
262 /* Window and convert to real valued signal */
263 for (i
= 0; i
< 64; i
++) {
264 /* y[n] = z[n] * (xcos1[n] + j * xsin1[n]) ; */
267 BUTTERFLY_0 (a_r
, a_i
, t_i
, t_r
, buf
[i
].imag
, buf
[i
].real
);
268 BUTTERFLY_0 (b_r
, b_i
, t_r
, t_i
, buf
[127-i
].imag
, buf
[127-i
].real
);
271 w_2
= window
[255-2*i
];
272 BUTTERFLY_B (data
[255-2*i
], data
[2*i
], w_2
, w_1
, a_r
, delay
[2*i
]);
276 w_2
= window
[254-2*i
];
277 BUTTERFLY_B (data
[2*i
+1], data
[254-2*i
], w_1
, w_2
, b_r
, delay
[2*i
+1]);
282 void a52_imdct_256 (sample_t
* data
, sample_t
* delay
, sample_t bias
)
285 sample_t t_r
, t_i
, a_r
, a_i
, b_r
, b_i
, c_r
, c_i
, d_r
, d_i
, w_1
, w_2
;
286 const sample_t
* window
= a52_imdct_window
;
287 complex_t buf1
[64], buf2
[64];
289 /* Pre IFFT complex multiply plus IFFT cmplx conjugate */
290 for (i
= 0; i
< 64; i
++) {
294 BUTTERFLY_0 (buf1
[i
].real
, buf1
[i
].imag
, t_r
, t_i
, data
[k
], data
[254-k
]);
295 BUTTERFLY_0 (buf2
[i
].real
, buf2
[i
].imag
, t_r
, t_i
, data
[k
+1], data
[255-k
]);
301 /* Post IFFT complex multiply */
302 /* Window and convert to real valued signal */
303 for (i
= 0; i
< 32; i
++) {
304 /* y1[n] = z1[n] * (xcos2[n] + j * xs in2[n]) ; */
307 BUTTERFLY_0 (a_r
, a_i
, t_i
, t_r
, buf1
[i
].imag
, buf1
[i
].real
);
308 BUTTERFLY_0 (b_r
, b_i
, t_r
, t_i
, buf1
[63-i
].imag
, buf1
[63-i
].real
);
309 BUTTERFLY_0 (c_r
, c_i
, t_i
, t_r
, buf2
[i
].imag
, buf2
[i
].real
);
310 BUTTERFLY_0 (d_r
, d_i
, t_r
, t_i
, buf2
[63-i
].imag
, buf2
[63-i
].real
);
313 w_2
= window
[255-2*i
];
314 BUTTERFLY_B (data
[255-2*i
], data
[2*i
], w_2
, w_1
, a_r
, delay
[2*i
]);
317 w_1
= window
[128+2*i
];
318 w_2
= window
[127-2*i
];
319 BUTTERFLY_B (data
[128+2*i
], data
[127-2*i
], w_1
, w_2
, a_i
, delay
[127-2*i
]);
320 delay
[127-2*i
] = c_r
;
323 w_2
= window
[254-2*i
];
324 BUTTERFLY_B (data
[254-2*i
], data
[2*i
+1], w_2
, w_1
, b_i
, delay
[2*i
+1]);
327 w_1
= window
[129+2*i
];
328 w_2
= window
[126-2*i
];
329 BUTTERFLY_B (data
[129+2*i
], data
[126-2*i
], w_1
, w_2
, b_r
, delay
[126-2*i
]);
330 delay
[126-2*i
] = d_i
;
334 static double besselI0 (double x
)
340 bessel
= bessel
* x
/ (i
* i
) + 1;
345 void a52_imdct_init (uint32_t mm_accel
)
349 double local_imdct_window
[256];
351 /* compute imdct window - kaiser-bessel derived window, alpha = 5.0 */
353 for (i
= 0; i
< 256; i
++) {
354 sum
+= besselI0 (i
* (256 - i
) * (5 * M_PI
/ 256) * (5 * M_PI
/ 256));
355 local_imdct_window
[i
] = sum
;
358 for (i
= 0; i
< 256; i
++)
359 a52_imdct_window
[i
] = SAMPLE (sqrt (local_imdct_window
[i
] / sum
));
361 for (i
= 0; i
< 3; i
++)
362 roots16
[i
] = SAMPLE (cos ((M_PI
/ 8) * (i
+ 1)));
364 for (i
= 0; i
< 7; i
++)
365 roots32
[i
] = SAMPLE (cos ((M_PI
/ 16) * (i
+ 1)));
367 for (i
= 0; i
< 15; i
++)
368 roots64
[i
] = SAMPLE (cos ((M_PI
/ 32) * (i
+ 1)));
370 for (i
= 0; i
< 31; i
++)
371 roots128
[i
] = SAMPLE (cos ((M_PI
/ 64) * (i
+ 1)));
373 for (i
= 0; i
< 64; i
++) {
374 k
= fftorder
[i
] / 2 + 64;
375 pre1
[i
].real
= SAMPLE (cos ((M_PI
/ 256) * (k
- 0.25)));
376 pre1
[i
].imag
= SAMPLE (sin ((M_PI
/ 256) * (k
- 0.25)));
379 for (i
= 64; i
< 128; i
++) {
380 k
= fftorder
[i
] / 2 + 64;
381 pre1
[i
].real
= SAMPLE (-cos ((M_PI
/ 256) * (k
- 0.25)));
382 pre1
[i
].imag
= SAMPLE (-sin ((M_PI
/ 256) * (k
- 0.25)));
385 for (i
= 0; i
< 64; i
++) {
386 post1
[i
].real
= SAMPLE (cos ((M_PI
/ 256) * (i
+ 0.5)));
387 post1
[i
].imag
= SAMPLE (sin ((M_PI
/ 256) * (i
+ 0.5)));
390 for (i
= 0; i
< 64; i
++) {
392 pre2
[i
].real
= SAMPLE (cos ((M_PI
/ 128) * (k
- 0.25)));
393 pre2
[i
].imag
= SAMPLE (sin ((M_PI
/ 128) * (k
- 0.25)));
396 for (i
= 0; i
< 32; i
++) {
397 post2
[i
].real
= SAMPLE (cos ((M_PI
/ 128) * (i
+ 0.5)));
398 post2
[i
].imag
= SAMPLE (sin ((M_PI
/ 128) * (i
+ 0.5)));
402 if (mm_accel
& MM_ACCEL_DJBFFT
) {
403 ifft128
= (void (*) (complex_t
*)) fftc4_un128
;
404 ifft64
= (void (*) (complex_t
*)) fftc4_un64
;