3 Copyright (C) 2006 and later Cockos Incorporated
4 Copyright 1999 D. J. Bernstein
6 This software is provided 'as-is', without any express or implied
7 warranty. In no event will the authors be held liable for any damages
8 arising from the use of this software.
10 Permission is granted to anyone to use this software for any purpose,
11 including commercial applications, and to alter it and redistribute it
12 freely, subject to the following restrictions:
14 1. The origin of this software must not be misrepresented; you must not
15 claim that you wrote the original software. If you use this software
16 in a product, an acknowledgment in the product documentation would be
17 appreciated but is not required.
18 2. Altered source versions must be plainly marked as such, and must not be
19 misrepresented as being the original software.
20 3. This notice may not be removed or altered from any source distribution.
24 This file implements the WDL FFT library. These routines are based on the
25 DJBFFT library, which are Copyright 1999 D. J. Bernstein, djb@pobox.com
27 The DJB FFT web page is: http://cr.yp.to/djbfft.html
32 // this is based on djbfft
38 #define FFT_MAXBITLEN 15
41 #define inline __inline
44 #define PI 3.1415926535897932384626433832795
46 static WDL_FFT_COMPLEX d16
[3];
47 static WDL_FFT_COMPLEX d32
[7];
48 static WDL_FFT_COMPLEX d64
[15];
49 static WDL_FFT_COMPLEX d128
[31];
50 static WDL_FFT_COMPLEX d256
[63];
51 static WDL_FFT_COMPLEX d512
[127];
52 static WDL_FFT_COMPLEX d1024
[127];
53 static WDL_FFT_COMPLEX d2048
[255];
54 static WDL_FFT_COMPLEX d4096
[511];
55 static WDL_FFT_COMPLEX d8192
[1023];
56 static WDL_FFT_COMPLEX d16384
[2047];
57 static WDL_FFT_COMPLEX d32768
[4095];
60 #define sqrthalf (d16[1].re)
62 #define VOL *(volatile WDL_FFT_REAL *)&
64 #define TRANSFORM(a0,a1,a2,a3,wre,wim) { \
104 #define TRANSFORMHALF(a0,a1,a2,a3) { \
140 #define TRANSFORMZERO(a0,a1,a2,a3) { \
167 #define UNTRANSFORM(a0,a1,a2,a3,wre,wim) { \
211 #define UNTRANSFORMHALF(a0,a1,a2,a3) { \
245 #define UNTRANSFORMZERO(a0,a1,a2,a3) { \
270 #define R(a0,a1,b0,b1,wre,wim) { \
289 #define RHALF(a0,a1,b0,b1) { \
305 #define RZERO(a0,a1,b0,b1) { \
316 #define V(a0,a1,b0,b1,wre,wim) { \
333 #define VHALF(a0,a1,b0,b1) { \
349 #define VZERO(a0,a1,b0,b1) { \
360 static void c2(register WDL_FFT_COMPLEX
*a
)
362 register WDL_FFT_REAL t1
;
365 a
[1].re
= a
[0].re
- t1
;
369 a
[1].im
= a
[0].im
- t1
;
373 static inline void c4(register WDL_FFT_COMPLEX
*a
)
375 register WDL_FFT_REAL t1
, t2
, t3
, t4
, t5
, t6
, t7
, t8
;
404 static void c8(register WDL_FFT_COMPLEX
*a
)
406 register WDL_FFT_REAL t1
, t2
, t3
, t4
, t5
, t6
, t7
, t8
;
464 a
[5].re
= a
[4].re
- t6
;
467 a
[5].im
= a
[4].im
- t1
;
475 a
[7].im
= a
[6].im
- t5
;
478 a
[7].re
= a
[6].re
- t2
;
487 static void c16(register WDL_FFT_COMPLEX
*a
)
489 register WDL_FFT_REAL t1
, t2
, t3
, t4
, t5
, t6
, t7
, t8
;
491 TRANSFORMZERO(a
[0],a
[4],a
[8],a
[12]);
492 TRANSFORM(a
[1],a
[5],a
[9],a
[13],d16
[0].re
,d16
[0].im
);
493 TRANSFORMHALF(a
[2],a
[6],a
[10],a
[14]);
494 TRANSFORM(a
[3],a
[7],a
[11],a
[15],d16
[0].im
,d16
[0].re
);
501 /* a[0...8n-1], w[0...2n-2]; n >= 2 */
502 static void cpass(register WDL_FFT_COMPLEX
*a
,register const WDL_FFT_COMPLEX
*w
,register unsigned int n
)
504 register WDL_FFT_REAL t1
, t2
, t3
, t4
, t5
, t6
, t7
, t8
;
505 register WDL_FFT_COMPLEX
*a1
;
506 register WDL_FFT_COMPLEX
*a2
;
507 register WDL_FFT_COMPLEX
*a3
;
514 TRANSFORMZERO(a
[0],a1
[0],a2
[0],a3
[0]);
515 TRANSFORM(a
[1],a1
[1],a2
[1],a3
[1],w
[0].re
,w
[0].im
);
518 TRANSFORM(a
[2],a1
[2],a2
[2],a3
[2],w
[1].re
,w
[1].im
);
519 TRANSFORM(a
[3],a1
[3],a2
[3],a3
[3],w
[2].re
,w
[2].im
);
529 static void c32(register WDL_FFT_COMPLEX
*a
)
537 static void c64(register WDL_FFT_COMPLEX
*a
)
545 static void c128(register WDL_FFT_COMPLEX
*a
)
553 static void c256(register WDL_FFT_COMPLEX
*a
)
561 static void c512(register WDL_FFT_COMPLEX
*a
)
569 /* a[0...8n-1], w[0...n-2]; n even, n >= 4 */
570 static void cpassbig(register WDL_FFT_COMPLEX
*a
,register const WDL_FFT_COMPLEX
*w
,register unsigned int n
)
572 register WDL_FFT_REAL t1
, t2
, t3
, t4
, t5
, t6
, t7
, t8
;
573 register WDL_FFT_COMPLEX
*a1
;
574 register WDL_FFT_COMPLEX
*a2
;
575 register WDL_FFT_COMPLEX
*a3
;
576 register unsigned int k
;
583 TRANSFORMZERO(a
[0],a1
[0],a2
[0],a3
[0]);
584 TRANSFORM(a
[1],a1
[1],a2
[1],a3
[1],w
[0].re
,w
[0].im
);
591 TRANSFORM(a
[0],a1
[0],a2
[0],a3
[0],w
[1].re
,w
[1].im
);
592 TRANSFORM(a
[1],a1
[1],a2
[1],a3
[1],w
[2].re
,w
[2].im
);
600 TRANSFORMHALF(a
[0],a1
[0],a2
[0],a3
[0]);
601 TRANSFORM(a
[1],a1
[1],a2
[1],a3
[1],w
[0].im
,w
[0].re
);
609 TRANSFORM(a
[0],a1
[0],a2
[0],a3
[0],w
[-1].im
,w
[-1].re
);
610 TRANSFORM(a
[1],a1
[1],a2
[1],a3
[1],w
[-2].im
,w
[-2].re
);
620 static void c1024(register WDL_FFT_COMPLEX
*a
)
622 cpassbig(a
,d1024
,128);
628 static void c2048(register WDL_FFT_COMPLEX
*a
)
630 cpassbig(a
,d2048
,256);
636 static void c4096(register WDL_FFT_COMPLEX
*a
)
638 cpassbig(a
,d4096
,512);
644 static void c8192(register WDL_FFT_COMPLEX
*a
)
646 cpassbig(a
,d8192
,1024);
652 static void c16384(register WDL_FFT_COMPLEX
*a
)
654 cpassbig(a
,d16384
,2048);
655 c4096(a
+ 8192 + 4096);
660 static void c32768(register WDL_FFT_COMPLEX
*a
)
662 cpassbig(a
,d32768
,4096);
663 c8192(a
+ 16384 + 8192);
669 static void mulr4(WDL_FFT_REAL
*a
,WDL_FFT_REAL
*b
)
671 register WDL_FFT_REAL t1
, t2
, t3
, t4
, t5
, t6
;
689 void WDL_fft_complexmul(WDL_FFT_COMPLEX
*a
,WDL_FFT_COMPLEX
*b
,int n
)
691 register WDL_FFT_REAL t1
, t2
, t3
, t4
, t5
, t6
, t7
, t8
;
692 if (n
<2 || (n
&1)) return;
695 t1
= a
[0].re
* b
[0].re
;
696 t2
= a
[0].im
* b
[0].im
;
697 t3
= a
[0].im
* b
[0].re
;
698 t4
= a
[0].re
* b
[0].im
;
699 t5
= a
[1].re
* b
[1].re
;
700 t6
= a
[1].im
* b
[1].im
;
701 t7
= a
[1].im
* b
[1].re
;
702 t8
= a
[1].re
* b
[1].im
;
716 void WDL_fft_complexmul2(WDL_FFT_COMPLEX
*c
, WDL_FFT_COMPLEX
*a
, WDL_FFT_COMPLEX
*b
, int n
)
718 register WDL_FFT_REAL t1
, t2
, t3
, t4
, t5
, t6
, t7
, t8
;
719 if (n
<2 || (n
&1)) return;
722 t1
= a
[0].re
* b
[0].re
;
723 t2
= a
[0].im
* b
[0].im
;
724 t3
= a
[0].im
* b
[0].re
;
725 t4
= a
[0].re
* b
[0].im
;
726 t5
= a
[1].re
* b
[1].re
;
727 t6
= a
[1].im
* b
[1].im
;
728 t7
= a
[1].im
* b
[1].re
;
729 t8
= a
[1].re
* b
[1].im
;
743 void WDL_fft_complexmul3(WDL_FFT_COMPLEX
*c
, WDL_FFT_COMPLEX
*a
, WDL_FFT_COMPLEX
*b
, int n
)
745 register WDL_FFT_REAL t1
, t2
, t3
, t4
, t5
, t6
, t7
, t8
;
746 if (n
<2 || (n
&1)) return;
749 t1
= a
[0].re
* b
[0].re
;
750 t2
= a
[0].im
* b
[0].im
;
751 t3
= a
[0].im
* b
[0].re
;
752 t4
= a
[0].re
* b
[0].im
;
753 t5
= a
[1].re
* b
[1].re
;
754 t6
= a
[1].im
* b
[1].im
;
755 t7
= a
[1].im
* b
[1].re
;
756 t8
= a
[1].re
* b
[1].im
;
772 static inline void u4(register WDL_FFT_COMPLEX
*a
)
774 register WDL_FFT_REAL t1
, t2
, t3
, t4
, t5
, t6
, t7
, t8
;
804 static void u8(register WDL_FFT_COMPLEX
*a
)
806 register WDL_FFT_REAL t1
, t2
, t3
, t4
, t5
, t6
, t7
, t8
;
811 a
[5].re
= a
[4].re
- t1
;
815 a
[7].re
= a
[6].re
- t3
;
831 a
[5].im
= a
[4].im
- t2
;
835 a
[7].im
= a
[6].im
- t4
;
890 static void u16(register WDL_FFT_COMPLEX
*a
)
892 register WDL_FFT_REAL t1
, t2
, t3
, t4
, t5
, t6
, t7
, t8
;
898 UNTRANSFORMZERO(a
[0],a
[4],a
[8],a
[12]);
899 UNTRANSFORMHALF(a
[2],a
[6],a
[10],a
[14]);
900 UNTRANSFORM(a
[1],a
[5],a
[9],a
[13],d16
[0].re
,d16
[0].im
);
901 UNTRANSFORM(a
[3],a
[7],a
[11],a
[15],d16
[0].im
,d16
[0].re
);
904 /* a[0...8n-1], w[0...2n-2] */
905 static void upass(register WDL_FFT_COMPLEX
*a
,register const WDL_FFT_COMPLEX
*w
,register unsigned int n
)
907 register WDL_FFT_REAL t1
, t2
, t3
, t4
, t5
, t6
, t7
, t8
;
908 register WDL_FFT_COMPLEX
*a1
;
909 register WDL_FFT_COMPLEX
*a2
;
910 register WDL_FFT_COMPLEX
*a3
;
917 UNTRANSFORMZERO(a
[0],a1
[0],a2
[0],a3
[0]);
918 UNTRANSFORM(a
[1],a1
[1],a2
[1],a3
[1],w
[0].re
,w
[0].im
);
921 UNTRANSFORM(a
[2],a1
[2],a2
[2],a3
[2],w
[1].re
,w
[1].im
);
922 UNTRANSFORM(a
[3],a1
[3],a2
[3],a3
[3],w
[2].re
,w
[2].im
);
932 static void u32(register WDL_FFT_COMPLEX
*a
)
940 static void u64(register WDL_FFT_COMPLEX
*a
)
948 static void u128(register WDL_FFT_COMPLEX
*a
)
956 static void u256(register WDL_FFT_COMPLEX
*a
)
964 static void u512(register WDL_FFT_COMPLEX
*a
)
973 /* a[0...8n-1], w[0...n-2]; n even, n >= 4 */
974 static void upassbig(register WDL_FFT_COMPLEX
*a
,register const WDL_FFT_COMPLEX
*w
,register unsigned int n
)
976 register WDL_FFT_REAL t1
, t2
, t3
, t4
, t5
, t6
, t7
, t8
;
977 register WDL_FFT_COMPLEX
*a1
;
978 register WDL_FFT_COMPLEX
*a2
;
979 register WDL_FFT_COMPLEX
*a3
;
980 register unsigned int k
;
987 UNTRANSFORMZERO(a
[0],a1
[0],a2
[0],a3
[0]);
988 UNTRANSFORM(a
[1],a1
[1],a2
[1],a3
[1],w
[0].re
,w
[0].im
);
995 UNTRANSFORM(a
[0],a1
[0],a2
[0],a3
[0],w
[1].re
,w
[1].im
);
996 UNTRANSFORM(a
[1],a1
[1],a2
[1],a3
[1],w
[2].re
,w
[2].im
);
1004 UNTRANSFORMHALF(a
[0],a1
[0],a2
[0],a3
[0]);
1005 UNTRANSFORM(a
[1],a1
[1],a2
[1],a3
[1],w
[0].im
,w
[0].re
);
1013 UNTRANSFORM(a
[0],a1
[0],a2
[0],a3
[0],w
[-1].im
,w
[-1].re
);
1014 UNTRANSFORM(a
[1],a1
[1],a2
[1],a3
[1],w
[-2].im
,w
[-2].re
);
1025 static void u1024(register WDL_FFT_COMPLEX
*a
)
1030 upassbig(a
,d1024
,128);
1033 static void u2048(register WDL_FFT_COMPLEX
*a
)
1038 upassbig(a
,d2048
,256);
1042 static void u4096(register WDL_FFT_COMPLEX
*a
)
1047 upassbig(a
,d4096
,512);
1050 static void u8192(register WDL_FFT_COMPLEX
*a
)
1055 upassbig(a
,d8192
,1024);
1058 static void u16384(register WDL_FFT_COMPLEX
*a
)
1062 u4096(a
+ 8192 + 4096);
1063 upassbig(a
,d16384
,2048);
1066 static void u32768(register WDL_FFT_COMPLEX
*a
)
1070 u8192(a
+ 16384 + 8192 );
1071 upassbig(a
,d32768
,4096);
1075 static void __fft_gen(WDL_FFT_COMPLEX
*buf
, int sz
, int isfull
)
1078 double div
=PI
*0.25/(sz
+1.0);
1080 if (isfull
) div
*=2.0;
1082 for (x
= 0; x
< sz
; x
++)
1084 buf
[x
].re
= (WDL_FFT_REAL
) cos((x
+1)*div
);
1085 buf
[x
].im
= (WDL_FFT_REAL
) sin((x
+1)*div
);
1089 #ifndef WDL_FFT_NO_PERMUTE
1091 static unsigned int fftfreq_c(unsigned int i
,unsigned int n
)
1095 if (n
<= 2) return i
;
1098 if (i
< m
) return fftfreq_c(i
,m
) << 1;
1102 if (i
< m
) return (fftfreq_c(i
,m
) << 2) + 1;
1104 return ((fftfreq_c(i
,m
) << 2) - 1) & (n
- 1);
1107 static int _idxperm
[2<<FFT_MAXBITLEN
];
1109 static void idx_perm_calc(int offs
, int n
)
1113 for (i
= 1; i
< n
; ++i
) {
1114 j
= fftfreq_c(i
, n
);
1115 _idxperm
[offs
+n
-j
] = i
;
1119 int WDL_fft_permute(int fftsize
, int idx
)
1121 return _idxperm
[fftsize
+idx
-2];
1123 int *WDL_fft_permute_tab(int fftsize
)
1125 return _idxperm
+ fftsize
- 2;
1133 static int ffttabinit
;
1139 #define fft_gen(x,y) __fft_gen(x,sizeof(x)/sizeof(x[0]),y)
1154 #ifndef WDL_FFT_NO_PERMUTE
1156 for (i
= 2; i
<= 32768; i
*= 2)
1158 idx_perm_calc(offs
, i
);
1166 void WDL_fft(WDL_FFT_COMPLEX
*buf
, int len
, int isInverse
)
1170 case 2: c2(buf
); break;
1171 #define TMP(x) case x: if (!isInverse) c##x(buf); else u##x(buf); break;
1192 /* n multiple of 4, n >= 8 */
1193 void WDL_fft_realmul(WDL_FFT_REAL
*a
,WDL_FFT_REAL
*b
,int n
)
1195 if (n
<8 || (n
&3)) return;
1197 WDL_fft_complexmul((WDL_FFT_COMPLEX
*)(a
+ 4),(WDL_FFT_COMPLEX
*)(b
+ 4),(n
- 4) / 2);
1203 //////////// begin WDL_FFT_REAL modes
1204 static void r2(register WDL_FFT_REAL
*a
)
1206 register WDL_FFT_REAL t1
, t2
;
1214 static void r4(register WDL_FFT_REAL
*a
)
1216 register WDL_FFT_REAL t1
, t2
, t3
, t4
, t6
;
1230 static void r8(register WDL_FFT_REAL
*a
)
1232 register WDL_FFT_REAL t1
, t2
, t3
, t4
, t5
, t6
, t7
, t8
;
1266 /* a[0...8n-1], w[0...2n-1]; n even, n >= 4 */
1267 static void rpass(register WDL_FFT_REAL
*a
,register const WDL_FFT_COMPLEX
*w
,register unsigned int n
)
1269 register WDL_FFT_REAL t1
, t2
, t3
, t4
, t5
, t6
;
1270 register WDL_FFT_REAL
*b
;
1271 register unsigned int k
;
1276 RZERO(a
[0],a
[1],b
[0],b
[1]);
1277 R(a
[2],a
[3],b
[2],b
[3],w
[0].re
,w
[0].im
);
1278 R(a
[4],a
[5],b
[4],b
[5],w
[1].re
,w
[1].im
);
1279 R(a
[6],a
[7],b
[6],b
[7],w
[2].re
,w
[2].im
);
1282 R(a
[8],a
[9],b
[8],b
[9],w
[3].re
,w
[3].im
);
1283 R(a
[10],a
[11],b
[10],b
[11],w
[4].re
,w
[4].im
);
1284 R(a
[12],a
[13],b
[12],b
[13],w
[5].re
,w
[5].im
);
1285 R(a
[14],a
[15],b
[14],b
[15],w
[6].re
,w
[6].im
);
1286 if (!(k
-= 2)) break;
1293 static void r16(register WDL_FFT_REAL
*a
)
1295 register WDL_FFT_REAL t1
, t2
, t3
, t4
, t5
, t6
;
1297 RZERO(a
[0],a
[1],a
[8],a
[9]);
1298 R(a
[2],a
[3],a
[10],a
[11],d16
[0].re
,d16
[0].im
);
1299 R(a
[4],a
[5],a
[12],a
[13],d16
[1].re
,d16
[1].im
);
1300 R(a
[6],a
[7],a
[14],a
[15],d16
[2].re
,d16
[2].im
);
1302 c4((WDL_FFT_COMPLEX
*)(a
+ 8));
1305 static void r32(register WDL_FFT_REAL
*a
)
1309 c8((WDL_FFT_COMPLEX
*)(a
+ 16));
1312 static void r64(register WDL_FFT_REAL
*a
)
1316 c16((WDL_FFT_COMPLEX
*)(a
+ 32));
1319 static void r128(register WDL_FFT_REAL
*a
)
1323 c32((WDL_FFT_COMPLEX
*)(a
+ 64));
1326 static void r256(register WDL_FFT_REAL
*a
)
1330 c64((WDL_FFT_COMPLEX
*)(a
+ 128));
1333 static void r512(register WDL_FFT_REAL
*a
)
1337 c128((WDL_FFT_COMPLEX
*)(a
+ 256));
1341 /* a[0...8n-1], w[0...n-1]; n even, n >= 8 */
1342 static void rpassbig(register WDL_FFT_REAL
*a
,register const WDL_FFT_COMPLEX
*w
,register unsigned int n
)
1344 register WDL_FFT_REAL t1
, t2
, t3
, t4
, t5
, t6
;
1345 register WDL_FFT_REAL
*b
;
1346 register unsigned int k
;
1350 RZERO(a
[0],a
[1],b
[0],b
[1]);
1351 R(a
[2],a
[3],b
[2],b
[3],w
[0].re
,w
[0].im
);
1355 R(a
[4],a
[5],b
[4],b
[5],w
[1].re
,w
[1].im
);
1356 R(a
[6],a
[7],b
[6],b
[7],w
[2].re
,w
[2].im
);
1362 RHALF(a
[4],a
[5],b
[4],b
[5]);
1363 R(a
[6],a
[7],b
[6],b
[7],w
[0].im
,w
[0].re
);
1367 R(a
[8],a
[9],b
[8],b
[9],w
[-1].im
,w
[-1].re
);
1368 R(a
[10],a
[11],b
[10],b
[11],w
[-2].im
,w
[-2].re
);
1376 static void r1024(register WDL_FFT_REAL
*a
)
1378 rpassbig(a
,d1024
,128);
1380 c256((WDL_FFT_COMPLEX
*)(a
+ 512));
1383 static void r2048(register WDL_FFT_REAL
*a
)
1385 rpassbig(a
,d2048
,256);
1387 c512((WDL_FFT_COMPLEX
*)(a
+ 1024));
1391 static void r4096(register WDL_FFT_REAL
*a
)
1393 rpassbig(a
,d4096
,512);
1395 c1024((WDL_FFT_COMPLEX
*)(a
+ 2048));
1398 static void r8192(register WDL_FFT_REAL
*a
)
1400 rpassbig(a
,d8192
,1024);
1402 c2048((WDL_FFT_COMPLEX
*)(a
+ 4096));
1405 static void r16384(register WDL_FFT_REAL
*a
)
1407 rpassbig(a
,d16384
,2048);
1409 c4096((WDL_FFT_COMPLEX
*)(a
+ 8192));
1412 static void r32768(register WDL_FFT_REAL
*a
)
1414 rpassbig(a
,d32768
,4096);
1416 c8192((WDL_FFT_COMPLEX
*)(a
+ 16384));
1421 static void v4(register WDL_FFT_REAL
*a
)
1423 register WDL_FFT_REAL t1
, t3
, t5
, t6
;
1437 static void v8(register WDL_FFT_REAL
*a
)
1439 register WDL_FFT_REAL t1
, t2
, t3
, t4
, t5
, t6
, t7
, t8
;
1473 /* a[0...8n-1], w[0...2n-1]; n even, n >= 4 */
1474 static void vpass(register WDL_FFT_REAL
*a
,register const WDL_FFT_COMPLEX
*w
,register unsigned int n
)
1476 register WDL_FFT_REAL t1
, t2
, t3
, t4
, t5
, t6
;
1477 register WDL_FFT_REAL
*b
;
1478 register unsigned int k
;
1483 VZERO(a
[0],a
[1],b
[0],b
[1]);
1484 V(a
[2],a
[3],b
[2],b
[3],w
[0].re
,w
[0].im
);
1485 V(a
[4],a
[5],b
[4],b
[5],w
[1].re
,w
[1].im
);
1486 V(a
[6],a
[7],b
[6],b
[7],w
[2].re
,w
[2].im
);
1489 V(a
[8],a
[9],b
[8],b
[9],w
[3].re
,w
[3].im
);
1490 V(a
[10],a
[11],b
[10],b
[11],w
[4].re
,w
[4].im
);
1491 V(a
[12],a
[13],b
[12],b
[13],w
[5].re
,w
[5].im
);
1492 V(a
[14],a
[15],b
[14],b
[15],w
[6].re
,w
[6].im
);
1493 if (!(k
-= 2)) break;
1500 static void v16(register WDL_FFT_REAL
*a
)
1502 register WDL_FFT_REAL t1
, t2
, t3
, t4
, t5
, t6
;
1504 u4((WDL_FFT_COMPLEX
*)(a
+ 8));
1506 VZERO(a
[0],a
[1],a
[8],a
[9]);
1507 V(a
[2],a
[3],a
[10],a
[11],d16
[0].re
,d16
[0].im
);
1508 V(a
[4],a
[5],a
[12],a
[13],d16
[1].re
,d16
[1].im
);
1509 V(a
[6],a
[7],a
[14],a
[15],d16
[2].re
,d16
[2].im
);
1512 static void v32(register WDL_FFT_REAL
*a
)
1514 u8((WDL_FFT_COMPLEX
*)(a
+ 16));
1519 static void v64(register WDL_FFT_REAL
*a
)
1521 u16((WDL_FFT_COMPLEX
*)(a
+ 32));
1526 static void v128(register WDL_FFT_REAL
*a
)
1528 u32((WDL_FFT_COMPLEX
*)(a
+ 64));
1533 static void v256(register WDL_FFT_REAL
*a
)
1535 u64((WDL_FFT_COMPLEX
*)(a
+ 128));
1540 static void v512(register WDL_FFT_REAL
*a
)
1542 u128((WDL_FFT_COMPLEX
*)(a
+ 256));
1549 /* a[0...8n-1], w[0...n-1]; n even, n >= 8 */
1550 static void vpassbig(register WDL_FFT_REAL
*a
,register const WDL_FFT_COMPLEX
*w
,register unsigned int n
)
1552 register WDL_FFT_REAL t1
, t2
, t3
, t4
, t5
, t6
;
1553 register WDL_FFT_REAL
*b
;
1554 register unsigned int k
;
1558 VZERO(a
[0],a
[1],b
[0],b
[1]);
1559 V(a
[2],a
[3],b
[2],b
[3],w
[0].re
,w
[0].im
);
1563 V(a
[4],a
[5],b
[4],b
[5],w
[1].re
,w
[1].im
);
1564 V(a
[6],a
[7],b
[6],b
[7],w
[2].re
,w
[2].im
);
1570 VHALF(a
[4],a
[5],b
[4],b
[5]);
1571 V(a
[6],a
[7],b
[6],b
[7],w
[0].im
,w
[0].re
);
1575 V(a
[8],a
[9],b
[8],b
[9],w
[-1].im
,w
[-1].re
);
1576 V(a
[10],a
[11],b
[10],b
[11],w
[-2].im
,w
[-2].re
);
1584 static void v1024(register WDL_FFT_REAL
*a
)
1586 u256((WDL_FFT_COMPLEX
*)(a
+ 512));
1588 vpassbig(a
,d1024
,128);
1591 static void v2048(register WDL_FFT_REAL
*a
)
1593 u512((WDL_FFT_COMPLEX
*)(a
+ 1024));
1595 vpassbig(a
,d2048
,256);
1599 static void v4096(register WDL_FFT_REAL
*a
)
1601 u1024((WDL_FFT_COMPLEX
*)(a
+ 2048));
1603 vpassbig(a
,d4096
,512);
1606 static void v8192(register WDL_FFT_REAL
*a
)
1608 u2048((WDL_FFT_COMPLEX
*)(a
+ 4096));
1610 vpassbig(a
,d8192
,1024);
1613 static void v16384(register WDL_FFT_REAL
*a
)
1615 u4096((WDL_FFT_COMPLEX
*)(a
+ 8192));
1617 vpassbig(a
,d16384
,2048);
1620 static void v32768(register WDL_FFT_REAL
*a
)
1622 u8192((WDL_FFT_COMPLEX
*)(a
+ 16384));
1624 vpassbig(a
,d32768
,4096);
1628 void WDL_real_fft(WDL_FFT_REAL
*buf
, int len
, int isInverse
)
1632 case 2: r2(buf
); break;
1633 #define TMP(x) case x: if (!isInverse) r##x(buf); else v##x(buf); break;