2 ** FFT and FHT routines
3 ** Copyright 1988, 1993; Ron Mayer
6 ** Does a hartley transform of "n" points in the array "fz".
8 ** NOTE: This routine uses at least 2 patented algorithms, and may be
9 ** under the restrictions of a bunch of different organizations.
10 ** Although I wrote it completely myself; it is kind of a derivative
11 ** of a routine I once authored and released under the GPL, so it
12 ** may fall under the free software foundation's restrictions;
13 ** it was worked on as a Stanford Univ project, so they claim
14 ** some rights to it; it was further optimized at work here, so
15 ** I think this company claims parts of it. The patents are
16 ** held by R. Bracewell (the FHT algorithm) and O. Buneman (the
17 ** trig generator), both at Stanford Univ.
18 ** If it were up to me, I'd say go do whatever you want with it;
19 ** but it would be polite to give credit to the following people
20 ** if you use this anywhere:
21 ** Euler - probable inventor of the fourier transform.
22 ** Gauss - probable inventor of the FFT.
23 ** Hartley - probable inventor of the hartley transform.
24 ** Buneman - for a really cool trig generator
25 ** Mayer(me) - for authoring this particular version and
26 ** including all the optimizations in one package.
28 ** Ron Mayer; mayer@acuson.com
29 ** and added some optimization by
30 ** Mather - idea of using lookup table
31 ** Takehiro - some dirty hack for speed up
34 /* $Id: fft.c,v 1.1 2002/04/28 17:30:18 kramm Exp $ */
36 #include "config_static.h"
49 #define TRI_SIZE (5-1) /* 1024 = 4**5 */
51 static const FLOAT costab
[TRI_SIZE
*2] = {
52 9.238795325112867e-01, 3.826834323650898e-01,
53 9.951847266721969e-01, 9.801714032956060e-02,
54 9.996988186962042e-01, 2.454122852291229e-02,
55 9.999811752826011e-01, 6.135884649154475e-03
58 static void fht(FLOAT
*fz
, int n
)
60 const FLOAT
*tri
= costab
;
64 n
<<= 1; /* to get BLKSIZE, because of 3DNow! ASM routine */
69 int i
, k1
, k2
, k3
, kx
;
100 for (i
= 1; i
< kx
; i
++) {
107 FLOAT a
,b
,g0
,f0
,f1
,g1
,f2
,g2
,f3
,g3
;
108 b
= s2
*fi
[k1
] - c2
*gi
[k1
];
109 a
= c2
*fi
[k1
] + s2
*gi
[k1
];
114 b
= s2
*fi
[k3
] - c2
*gi
[k3
];
115 a
= c2
*fi
[k3
] + s2
*gi
[k3
];
136 c1
= c2
* tri
[0] - s1
* tri
[1];
137 s1
= c2
* tri
[1] + s1
* tri
[0];
143 static const unsigned char rv_tbl
[] = {
144 0x00, 0x80, 0x40, 0xc0, 0x20, 0xa0, 0x60, 0xe0,
145 0x10, 0x90, 0x50, 0xd0, 0x30, 0xb0, 0x70, 0xf0,
146 0x08, 0x88, 0x48, 0xc8, 0x28, 0xa8, 0x68, 0xe8,
147 0x18, 0x98, 0x58, 0xd8, 0x38, 0xb8, 0x78, 0xf8,
148 0x04, 0x84, 0x44, 0xc4, 0x24, 0xa4, 0x64, 0xe4,
149 0x14, 0x94, 0x54, 0xd4, 0x34, 0xb4, 0x74, 0xf4,
150 0x0c, 0x8c, 0x4c, 0xcc, 0x2c, 0xac, 0x6c, 0xec,
151 0x1c, 0x9c, 0x5c, 0xdc, 0x3c, 0xbc, 0x7c, 0xfc,
152 0x02, 0x82, 0x42, 0xc2, 0x22, 0xa2, 0x62, 0xe2,
153 0x12, 0x92, 0x52, 0xd2, 0x32, 0xb2, 0x72, 0xf2,
154 0x0a, 0x8a, 0x4a, 0xca, 0x2a, 0xaa, 0x6a, 0xea,
155 0x1a, 0x9a, 0x5a, 0xda, 0x3a, 0xba, 0x7a, 0xfa,
156 0x06, 0x86, 0x46, 0xc6, 0x26, 0xa6, 0x66, 0xe6,
157 0x16, 0x96, 0x56, 0xd6, 0x36, 0xb6, 0x76, 0xf6,
158 0x0e, 0x8e, 0x4e, 0xce, 0x2e, 0xae, 0x6e, 0xee,
159 0x1e, 0x9e, 0x5e, 0xde, 0x3e, 0xbe, 0x7e, 0xfe
162 #define ch01(index) (buffer[chn][index])
164 #define ml00(f) (window[i ] * f(i))
165 #define ml10(f) (window[i + 0x200] * f(i + 0x200))
166 #define ml20(f) (window[i + 0x100] * f(i + 0x100))
167 #define ml30(f) (window[i + 0x300] * f(i + 0x300))
169 #define ml01(f) (window[i + 0x001] * f(i + 0x001))
170 #define ml11(f) (window[i + 0x201] * f(i + 0x201))
171 #define ml21(f) (window[i + 0x101] * f(i + 0x101))
172 #define ml31(f) (window[i + 0x301] * f(i + 0x301))
174 #define ms00(f) (window_s[i ] * f(i + k))
175 #define ms10(f) (window_s[0x7f - i] * f(i + k + 0x80))
176 #define ms20(f) (window_s[i + 0x40] * f(i + k + 0x40))
177 #define ms30(f) (window_s[0x3f - i] * f(i + k + 0xc0))
179 #define ms01(f) (window_s[i + 0x01] * f(i + k + 0x01))
180 #define ms11(f) (window_s[0x7e - i] * f(i + k + 0x81))
181 #define ms21(f) (window_s[i + 0x41] * f(i + k + 0x41))
182 #define ms31(f) (window_s[0x3e - i] * f(i + k + 0xc1))
185 void fft_short(lame_internal_flags
* const gfc
,
186 FLOAT x_real
[3][BLKSIZE_s
], int chn
, const sample_t
*buffer
[2])
188 const FLOAT
* window_s
= (const FLOAT
*)&gfc
->window_s
[0];
193 for (b
= 0; b
< 3; b
++) {
194 FLOAT
*x
= &x_real
[b
][BLKSIZE_s
/ 2];
195 short k
= (576 / 3) * (b
+ 1);
196 j
= BLKSIZE_s
/ 8 - 1;
198 FLOAT f0
,f1
,f2
,f3
, w
;
202 f0
= ms00(ch01
); w
= ms10(ch01
); f1
= f0
- w
; f0
= f0
+ w
;
203 f2
= ms20(ch01
); w
= ms30(ch01
); f3
= f2
- w
; f2
= f2
+ w
;
211 f0
= ms01(ch01
); w
= ms11(ch01
); f1
= f0
- w
; f0
= f0
+ w
;
212 f2
= ms21(ch01
); w
= ms31(ch01
); f3
= f2
- w
; f2
= f2
+ w
;
214 x
[BLKSIZE_s
/ 2 + 0] = f0
+ f2
;
215 x
[BLKSIZE_s
/ 2 + 2] = f0
- f2
;
216 x
[BLKSIZE_s
/ 2 + 1] = f1
+ f3
;
217 x
[BLKSIZE_s
/ 2 + 3] = f1
- f3
;
220 gfc
->fft_fht(x
, BLKSIZE_s
/2);
221 /* BLKSIZE_s/2 because of 3DNow! ASM routine */
225 void fft_long(lame_internal_flags
* const gfc
,
226 FLOAT x
[BLKSIZE
], int chn
, const sample_t
*buffer
[2] )
228 const FLOAT
* window
= (const FLOAT
*)&gfc
->window
[0];
230 int jj
= BLKSIZE
/ 8 - 1;
234 FLOAT f0
,f1
,f2
,f3
, w
;
237 f0
= ml00(ch01
); w
= ml10(ch01
); f1
= f0
- w
; f0
= f0
+ w
;
238 f2
= ml20(ch01
); w
= ml30(ch01
); f3
= f2
- w
; f2
= f2
+ w
;
246 f0
= ml01(ch01
); w
= ml11(ch01
); f1
= f0
- w
; f0
= f0
+ w
;
247 f2
= ml21(ch01
); w
= ml31(ch01
); f3
= f2
- w
; f2
= f2
+ w
;
249 x
[BLKSIZE
/ 2 + 0] = f0
+ f2
;
250 x
[BLKSIZE
/ 2 + 2] = f0
- f2
;
251 x
[BLKSIZE
/ 2 + 1] = f1
+ f3
;
252 x
[BLKSIZE
/ 2 + 3] = f1
- f3
;
255 gfc
->fft_fht(x
, BLKSIZE
/2);
256 /* BLKSIZE/2 because of 3DNow! ASM routine */
260 void init_fft(lame_internal_flags
* const gfc
)
262 FLOAT
*window
= &gfc
->window
[0];
263 FLOAT
*window_s
= &gfc
->window_s
[0];
267 if (gfc
->nsPsy
.use
) {
268 for (i
= 0; i
< BLKSIZE
; i
++)
269 /* blackman window */
270 window
[i
] = 0.42-0.5*cos(2*PI
*i
/(BLKSIZE
-1))+0.08*cos(4*PI
*i
/(BLKSIZE
-1));
273 * calculate HANN window coefficients
275 for (i
= 0; i
< BLKSIZE
; i
++)
276 window
[i
] = 0.5 * (1.0 - cos(2.0 * PI
* (i
+ 0.5) / BLKSIZE
));
280 // The type of window used here will make no real difference, but
281 // in the interest of merging nspsytune stuff - switch to blackman window
282 for (i
= 0; i
< BLKSIZE
; i
++)
283 /* blackman window */
284 window
[i
] = 0.42-0.5*cos(2*PI
*(i
+.5)/BLKSIZE
)+
285 0.08*cos(4*PI
*(i
+.5)/BLKSIZE
);
287 for (i
= 0; i
< BLKSIZE_s
/2 ; i
++)
288 window_s
[i
] = 0.5 * (1.0 - cos(2.0 * PI
* (i
+ 0.5) / BLKSIZE_s
));
291 if (gfc
->CPU_features
.AMD_3DNow
) {
292 extern void fht_3DN(FLOAT
*fz
, int n
);
293 gfc
->fft_fht
= fht_3DN
;
297 if (gfc
->CPU_features
.SIMD
) {
298 extern void fht_SSE(FLOAT
*fz
, int n
);
299 gfc
->fft_fht
= fht_SSE
;
303 if (gfc
->CPU_features
.i387
) {
304 extern void fht_FPU(FLOAT
*fz
, int n
);
305 gfc
->fft_fht
= fht_FPU
;