3 Copyright (C) 2006 and later Cockos Incorporated
4 Copyright 1999 D. J. Bernstein
6 This software is provided 'as-is', without any express or implied
7 warranty. In no event will the authors be held liable for any damages
8 arising from the use of this software.
10 Permission is granted to anyone to use this software for any purpose,
11 including commercial applications, and to alter it and redistribute it
12 freely, subject to the following restrictions:
14 1. The origin of this software must not be misrepresented; you must not
15 claim that you wrote the original software. If you use this software
16 in a product, an acknowledgment in the product documentation would be
17 appreciated but is not required.
18 2. Altered source versions must be plainly marked as such, and must not be
19 misrepresented as being the original software.
20 3. This notice may not be removed or altered from any source distribution.
24 This file implements the WDL FFT library. These routines are based on the
25 DJBFFT library, which are Copyright 1999 D. J. Bernstein, djb@pobox.com
27 The DJB FFT web page is: http://cr.yp.to/djbfft.html
32 // this is based on djbfft
38 #define FFT_MAXBITLEN 15
41 #define inline __inline
44 #define PI 3.1415926535897932384626433832795
46 static WDL_FFT_COMPLEX d16
[3];
47 static WDL_FFT_COMPLEX d32
[7];
48 static WDL_FFT_COMPLEX d64
[15];
49 static WDL_FFT_COMPLEX d128
[31];
50 static WDL_FFT_COMPLEX d256
[63];
51 static WDL_FFT_COMPLEX d512
[127];
52 static WDL_FFT_COMPLEX d1024
[127];
53 static WDL_FFT_COMPLEX d2048
[255];
54 static WDL_FFT_COMPLEX d4096
[511];
55 static WDL_FFT_COMPLEX d8192
[1023];
56 static WDL_FFT_COMPLEX d16384
[2047];
57 static WDL_FFT_COMPLEX d32768
[4095];
60 #define sqrthalf (d16[1].re)
62 #define VOL *(volatile WDL_FFT_REAL *)&
64 #define TRANSFORM(a0,a1,a2,a3,wre,wim) { \
104 #define TRANSFORMHALF(a0,a1,a2,a3) { \
140 #define TRANSFORMZERO(a0,a1,a2,a3) { \
167 #define UNTRANSFORM(a0,a1,a2,a3,wre,wim) { \
211 #define UNTRANSFORMHALF(a0,a1,a2,a3) { \
245 #define UNTRANSFORMZERO(a0,a1,a2,a3) { \
270 #define R(a0,a1,b0,b1,wre,wim) { \
289 #define RHALF(a0,a1,b0,b1) { \
305 #define RZERO(a0,a1,b0,b1) { \
316 #define V(a0,a1,b0,b1,wre,wim) { \
333 #define VHALF(a0,a1,b0,b1) { \
349 #define VZERO(a0,a1,b0,b1) { \
360 static void c2(register WDL_FFT_COMPLEX
*a
)
362 register WDL_FFT_REAL t1
;
365 a
[1].re
= a
[0].re
- t1
;
369 a
[1].im
= a
[0].im
- t1
;
373 static inline void c4(register WDL_FFT_COMPLEX
*a
)
375 register WDL_FFT_REAL t1
, t2
, t3
, t4
, t5
, t6
, t7
, t8
;
404 static void c8(register WDL_FFT_COMPLEX
*a
)
406 register WDL_FFT_REAL t1
, t2
, t3
, t4
, t5
, t6
, t7
, t8
;
464 a
[5].re
= a
[4].re
- t6
;
467 a
[5].im
= a
[4].im
- t1
;
475 a
[7].im
= a
[6].im
- t5
;
478 a
[7].re
= a
[6].re
- t2
;
487 static void c16(register WDL_FFT_COMPLEX
*a
)
489 register WDL_FFT_REAL t1
, t2
, t3
, t4
, t5
, t6
, t7
, t8
;
491 TRANSFORMZERO(a
[0],a
[4],a
[8],a
[12]);
492 TRANSFORM(a
[1],a
[5],a
[9],a
[13],d16
[0].re
,d16
[0].im
);
493 TRANSFORMHALF(a
[2],a
[6],a
[10],a
[14]);
494 TRANSFORM(a
[3],a
[7],a
[11],a
[15],d16
[0].im
,d16
[0].re
);
501 /* a[0...8n-1], w[0...2n-2]; n >= 2 */
502 static void cpass(register WDL_FFT_COMPLEX
*a
,register const WDL_FFT_COMPLEX
*w
,register unsigned int n
)
504 register WDL_FFT_REAL t1
, t2
, t3
, t4
, t5
, t6
, t7
, t8
;
505 register WDL_FFT_COMPLEX
*a1
;
506 register WDL_FFT_COMPLEX
*a2
;
507 register WDL_FFT_COMPLEX
*a3
;
514 TRANSFORMZERO(a
[0],a1
[0],a2
[0],a3
[0]);
515 TRANSFORM(a
[1],a1
[1],a2
[1],a3
[1],w
[0].re
,w
[0].im
);
518 TRANSFORM(a
[2],a1
[2],a2
[2],a3
[2],w
[1].re
,w
[1].im
);
519 TRANSFORM(a
[3],a1
[3],a2
[3],a3
[3],w
[2].re
,w
[2].im
);
529 static void c32(register WDL_FFT_COMPLEX
*a
)
537 static void c64(register WDL_FFT_COMPLEX
*a
)
545 static void c128(register WDL_FFT_COMPLEX
*a
)
553 static void c256(register WDL_FFT_COMPLEX
*a
)
561 static void c512(register WDL_FFT_COMPLEX
*a
)
569 /* a[0...8n-1], w[0...n-2]; n even, n >= 4 */
570 static void cpassbig(register WDL_FFT_COMPLEX
*a
,register const WDL_FFT_COMPLEX
*w
,register unsigned int n
)
572 register WDL_FFT_REAL t1
, t2
, t3
, t4
, t5
, t6
, t7
, t8
;
573 register WDL_FFT_COMPLEX
*a1
;
574 register WDL_FFT_COMPLEX
*a2
;
575 register WDL_FFT_COMPLEX
*a3
;
576 register unsigned int k
;
583 TRANSFORMZERO(a
[0],a1
[0],a2
[0],a3
[0]);
584 TRANSFORM(a
[1],a1
[1],a2
[1],a3
[1],w
[0].re
,w
[0].im
);
591 TRANSFORM(a
[0],a1
[0],a2
[0],a3
[0],w
[1].re
,w
[1].im
);
592 TRANSFORM(a
[1],a1
[1],a2
[1],a3
[1],w
[2].re
,w
[2].im
);
600 TRANSFORMHALF(a
[0],a1
[0],a2
[0],a3
[0]);
601 TRANSFORM(a
[1],a1
[1],a2
[1],a3
[1],w
[0].im
,w
[0].re
);
609 TRANSFORM(a
[0],a1
[0],a2
[0],a3
[0],w
[-1].im
,w
[-1].re
);
610 TRANSFORM(a
[1],a1
[1],a2
[1],a3
[1],w
[-2].im
,w
[-2].re
);
620 static void c1024(register WDL_FFT_COMPLEX
*a
)
622 cpassbig(a
,d1024
,128);
628 static void c2048(register WDL_FFT_COMPLEX
*a
)
630 cpassbig(a
,d2048
,256);
636 static void c4096(register WDL_FFT_COMPLEX
*a
)
638 cpassbig(a
,d4096
,512);
644 static void c8192(register WDL_FFT_COMPLEX
*a
)
646 cpassbig(a
,d8192
,1024);
652 static void c16384(register WDL_FFT_COMPLEX
*a
)
654 cpassbig(a
,d16384
,2048);
655 c4096(a
+ 8192 + 4096);
660 static void c32768(register WDL_FFT_COMPLEX
*a
)
662 cpassbig(a
,d32768
,4096);
663 c8192(a
+ 16384 + 8192);
669 static void mulr4(WDL_FFT_REAL
*a
,WDL_FFT_REAL
*b
)
671 register WDL_FFT_REAL t1
, t2
, t3
, t4
, t5
, t6
;
689 void WDL_fft_complexmul(WDL_FFT_COMPLEX
*a
,WDL_FFT_COMPLEX
*b
,int n
)
691 register WDL_FFT_REAL t1
, t2
, t3
, t4
, t5
, t6
, t7
, t8
;
692 if (n
<2 || (n
&1)) return;
695 t1
= a
[0].re
* b
[0].re
;
696 t2
= a
[0].im
* b
[0].im
;
697 t3
= a
[0].im
* b
[0].re
;
698 t4
= a
[0].re
* b
[0].im
;
699 t5
= a
[1].re
* b
[1].re
;
700 t6
= a
[1].im
* b
[1].im
;
701 t7
= a
[1].im
* b
[1].re
;
702 t8
= a
[1].re
* b
[1].im
;
716 void WDL_fft_complexmul2(WDL_FFT_COMPLEX
*c
, WDL_FFT_COMPLEX
*a
, WDL_FFT_COMPLEX
*b
, int n
)
718 register WDL_FFT_REAL t1
, t2
, t3
, t4
, t5
, t6
, t7
, t8
;
719 if (n
<2 || (n
&1)) return;
722 t1
= a
[0].re
* b
[0].re
;
723 t2
= a
[0].im
* b
[0].im
;
724 t3
= a
[0].im
* b
[0].re
;
725 t4
= a
[0].re
* b
[0].im
;
726 t5
= a
[1].re
* b
[1].re
;
727 t6
= a
[1].im
* b
[1].im
;
728 t7
= a
[1].im
* b
[1].re
;
729 t8
= a
[1].re
* b
[1].im
;
743 void WDL_fft_complexmul3(WDL_FFT_COMPLEX
*c
, WDL_FFT_COMPLEX
*a
, WDL_FFT_COMPLEX
*b
, int n
)
745 register WDL_FFT_REAL t1
, t2
, t3
, t4
, t5
, t6
, t7
, t8
;
746 if (n
<2 || (n
&1)) return;
749 t1
= a
[0].re
* b
[0].re
;
750 t2
= a
[0].im
* b
[0].im
;
751 t3
= a
[0].im
* b
[0].re
;
752 t4
= a
[0].re
* b
[0].im
;
753 t5
= a
[1].re
* b
[1].re
;
754 t6
= a
[1].im
* b
[1].im
;
755 t7
= a
[1].im
* b
[1].re
;
756 t8
= a
[1].re
* b
[1].im
;
772 static inline void u4(register WDL_FFT_COMPLEX
*a
)
774 register WDL_FFT_REAL t1
, t2
, t3
, t4
, t5
, t6
, t7
, t8
;
804 static void u8(register WDL_FFT_COMPLEX
*a
)
806 register WDL_FFT_REAL t1
, t2
, t3
, t4
, t5
, t6
, t7
, t8
;
811 a
[5].re
= a
[4].re
- t1
;
815 a
[7].re
= a
[6].re
- t3
;
831 a
[5].im
= a
[4].im
- t2
;
835 a
[7].im
= a
[6].im
- t4
;
890 static void u16(register WDL_FFT_COMPLEX
*a
)
892 register WDL_FFT_REAL t1
, t2
, t3
, t4
, t5
, t6
, t7
, t8
;
898 UNTRANSFORMZERO(a
[0],a
[4],a
[8],a
[12]);
899 UNTRANSFORMHALF(a
[2],a
[6],a
[10],a
[14]);
900 UNTRANSFORM(a
[1],a
[5],a
[9],a
[13],d16
[0].re
,d16
[0].im
);
901 UNTRANSFORM(a
[3],a
[7],a
[11],a
[15],d16
[0].im
,d16
[0].re
);
904 /* a[0...8n-1], w[0...2n-2] */
905 static void upass(register WDL_FFT_COMPLEX
*a
,register const WDL_FFT_COMPLEX
*w
,register unsigned int n
)
907 register WDL_FFT_REAL t1
, t2
, t3
, t4
, t5
, t6
, t7
, t8
;
908 register WDL_FFT_COMPLEX
*a1
;
909 register WDL_FFT_COMPLEX
*a2
;
910 register WDL_FFT_COMPLEX
*a3
;
917 UNTRANSFORMZERO(a
[0],a1
[0],a2
[0],a3
[0]);
918 UNTRANSFORM(a
[1],a1
[1],a2
[1],a3
[1],w
[0].re
,w
[0].im
);
921 UNTRANSFORM(a
[2],a1
[2],a2
[2],a3
[2],w
[1].re
,w
[1].im
);
922 UNTRANSFORM(a
[3],a1
[3],a2
[3],a3
[3],w
[2].re
,w
[2].im
);
932 static void u32(register WDL_FFT_COMPLEX
*a
)
940 static void u64(register WDL_FFT_COMPLEX
*a
)
948 static void u128(register WDL_FFT_COMPLEX
*a
)
956 static void u256(register WDL_FFT_COMPLEX
*a
)
964 static void u512(register WDL_FFT_COMPLEX
*a
)
973 /* a[0...8n-1], w[0...n-2]; n even, n >= 4 */
974 static void upassbig(register WDL_FFT_COMPLEX
*a
,register const WDL_FFT_COMPLEX
*w
,register unsigned int n
)
976 register WDL_FFT_REAL t1
, t2
, t3
, t4
, t5
, t6
, t7
, t8
;
977 register WDL_FFT_COMPLEX
*a1
;
978 register WDL_FFT_COMPLEX
*a2
;
979 register WDL_FFT_COMPLEX
*a3
;
980 register unsigned int k
;
987 UNTRANSFORMZERO(a
[0],a1
[0],a2
[0],a3
[0]);
988 UNTRANSFORM(a
[1],a1
[1],a2
[1],a3
[1],w
[0].re
,w
[0].im
);
995 UNTRANSFORM(a
[0],a1
[0],a2
[0],a3
[0],w
[1].re
,w
[1].im
);
996 UNTRANSFORM(a
[1],a1
[1],a2
[1],a3
[1],w
[2].re
,w
[2].im
);
1004 UNTRANSFORMHALF(a
[0],a1
[0],a2
[0],a3
[0]);
1005 UNTRANSFORM(a
[1],a1
[1],a2
[1],a3
[1],w
[0].im
,w
[0].re
);
1013 UNTRANSFORM(a
[0],a1
[0],a2
[0],a3
[0],w
[-1].im
,w
[-1].re
);
1014 UNTRANSFORM(a
[1],a1
[1],a2
[1],a3
[1],w
[-2].im
,w
[-2].re
);
1025 static void u1024(register WDL_FFT_COMPLEX
*a
)
1030 upassbig(a
,d1024
,128);
1033 static void u2048(register WDL_FFT_COMPLEX
*a
)
1038 upassbig(a
,d2048
,256);
1042 static void u4096(register WDL_FFT_COMPLEX
*a
)
1047 upassbig(a
,d4096
,512);
1050 static void u8192(register WDL_FFT_COMPLEX
*a
)
1055 upassbig(a
,d8192
,1024);
1058 static void u16384(register WDL_FFT_COMPLEX
*a
)
1062 u4096(a
+ 8192 + 4096);
1063 upassbig(a
,d16384
,2048);
1066 static void u32768(register WDL_FFT_COMPLEX
*a
)
1070 u8192(a
+ 16384 + 8192 );
1071 upassbig(a
,d32768
,4096);
1075 static void __fft_gen(WDL_FFT_COMPLEX
*buf
, int sz
, int isfull
)
1078 double div
=PI
*0.25/(sz
+1.0);
1080 if (isfull
) div
*=2.0;
1082 for (x
= 0; x
< sz
; x
++)
1084 buf
[x
].re
= (WDL_FFT_REAL
) cos((x
+1)*div
);
1085 buf
[x
].im
= (WDL_FFT_REAL
) sin((x
+1)*div
);
1089 #ifndef WDL_FFT_NO_PERMUTE
1091 static unsigned int fftfreq_c(unsigned int i
,unsigned int n
)
1095 if (n
<= 2) return i
;
1098 if (i
< m
) return fftfreq_c(i
,m
) << 1;
1102 if (i
< m
) return (fftfreq_c(i
,m
) << 2) + 1;
1104 return ((fftfreq_c(i
,m
) << 2) - 1) & (n
- 1);
1107 static int _idxperm
[2<<FFT_MAXBITLEN
];
1109 static void idx_perm_calc(int offs
, int n
)
1113 for (i
= 1; i
< n
; ++i
) {
1114 j
= fftfreq_c(i
, n
);
1115 _idxperm
[offs
+n
-j
] = i
;
1119 int WDL_fft_permute(int fftsize
, int idx
)
1121 return _idxperm
[fftsize
+idx
-16];
1128 static int ffttabinit
;
1134 #define fft_gen(x,y) __fft_gen(x,sizeof(x)/sizeof(x[0]),y)
1149 #ifndef WDL_FFT_NO_PERMUTE
1151 for (i
= 16; i
<= 32768; i
*= 2)
1153 idx_perm_calc(offs
, i
);
1161 void WDL_fft(WDL_FFT_COMPLEX
*buf
, int len
, int isInverse
)
1165 case 2: c2(buf
); break;
1166 #define TMP(x) case x: if (!isInverse) c##x(buf); else u##x(buf); break;
1187 /* n multiple of 4, n >= 8 */
1188 void WDL_fft_realmul(WDL_FFT_REAL
*a
,WDL_FFT_REAL
*b
,int n
)
1190 if (n
<8 || (n
&3)) return;
1192 WDL_fft_complexmul((WDL_FFT_COMPLEX
*)(a
+ 4),(WDL_FFT_COMPLEX
*)(b
+ 4),(n
- 4) / 2);
1198 //////////// begin WDL_FFT_REAL modes
1199 static void r2(register WDL_FFT_REAL
*a
)
1201 register WDL_FFT_REAL t1
, t2
;
1209 static void r4(register WDL_FFT_REAL
*a
)
1211 register WDL_FFT_REAL t1
, t2
, t3
, t4
, t6
;
1225 static void r8(register WDL_FFT_REAL
*a
)
1227 register WDL_FFT_REAL t1
, t2
, t3
, t4
, t5
, t6
, t7
, t8
;
1261 /* a[0...8n-1], w[0...2n-1]; n even, n >= 4 */
1262 static void rpass(register WDL_FFT_REAL
*a
,register const WDL_FFT_COMPLEX
*w
,register unsigned int n
)
1264 register WDL_FFT_REAL t1
, t2
, t3
, t4
, t5
, t6
;
1265 register WDL_FFT_REAL
*b
;
1266 register unsigned int k
;
1271 RZERO(a
[0],a
[1],b
[0],b
[1]);
1272 R(a
[2],a
[3],b
[2],b
[3],w
[0].re
,w
[0].im
);
1273 R(a
[4],a
[5],b
[4],b
[5],w
[1].re
,w
[1].im
);
1274 R(a
[6],a
[7],b
[6],b
[7],w
[2].re
,w
[2].im
);
1277 R(a
[8],a
[9],b
[8],b
[9],w
[3].re
,w
[3].im
);
1278 R(a
[10],a
[11],b
[10],b
[11],w
[4].re
,w
[4].im
);
1279 R(a
[12],a
[13],b
[12],b
[13],w
[5].re
,w
[5].im
);
1280 R(a
[14],a
[15],b
[14],b
[15],w
[6].re
,w
[6].im
);
1281 if (!(k
-= 2)) break;
1288 static void r16(register WDL_FFT_REAL
*a
)
1290 register WDL_FFT_REAL t1
, t2
, t3
, t4
, t5
, t6
;
1292 RZERO(a
[0],a
[1],a
[8],a
[9]);
1293 R(a
[2],a
[3],a
[10],a
[11],d16
[0].re
,d16
[0].im
);
1294 R(a
[4],a
[5],a
[12],a
[13],d16
[1].re
,d16
[1].im
);
1295 R(a
[6],a
[7],a
[14],a
[15],d16
[2].re
,d16
[2].im
);
1297 c4((WDL_FFT_COMPLEX
*)(a
+ 8));
1300 static void r32(register WDL_FFT_REAL
*a
)
1304 c8((WDL_FFT_COMPLEX
*)(a
+ 16));
1307 static void r64(register WDL_FFT_REAL
*a
)
1311 c16((WDL_FFT_COMPLEX
*)(a
+ 32));
1314 static void r128(register WDL_FFT_REAL
*a
)
1318 c32((WDL_FFT_COMPLEX
*)(a
+ 64));
1321 static void r256(register WDL_FFT_REAL
*a
)
1325 c64((WDL_FFT_COMPLEX
*)(a
+ 128));
1328 static void r512(register WDL_FFT_REAL
*a
)
1332 c128((WDL_FFT_COMPLEX
*)(a
+ 256));
1336 /* a[0...8n-1], w[0...n-1]; n even, n >= 8 */
1337 static void rpassbig(register WDL_FFT_REAL
*a
,register const WDL_FFT_COMPLEX
*w
,register unsigned int n
)
1339 register WDL_FFT_REAL t1
, t2
, t3
, t4
, t5
, t6
;
1340 register WDL_FFT_REAL
*b
;
1341 register unsigned int k
;
1345 RZERO(a
[0],a
[1],b
[0],b
[1]);
1346 R(a
[2],a
[3],b
[2],b
[3],w
[0].re
,w
[0].im
);
1350 R(a
[4],a
[5],b
[4],b
[5],w
[1].re
,w
[1].im
);
1351 R(a
[6],a
[7],b
[6],b
[7],w
[2].re
,w
[2].im
);
1357 RHALF(a
[4],a
[5],b
[4],b
[5]);
1358 R(a
[6],a
[7],b
[6],b
[7],w
[0].im
,w
[0].re
);
1362 R(a
[8],a
[9],b
[8],b
[9],w
[-1].im
,w
[-1].re
);
1363 R(a
[10],a
[11],b
[10],b
[11],w
[-2].im
,w
[-2].re
);
1371 static void r1024(register WDL_FFT_REAL
*a
)
1373 rpassbig(a
,d1024
,128);
1375 c256((WDL_FFT_COMPLEX
*)(a
+ 512));
1378 static void r2048(register WDL_FFT_REAL
*a
)
1380 rpassbig(a
,d2048
,256);
1382 c512((WDL_FFT_COMPLEX
*)(a
+ 1024));
1386 static void r4096(register WDL_FFT_REAL
*a
)
1388 rpassbig(a
,d4096
,512);
1390 c1024((WDL_FFT_COMPLEX
*)(a
+ 2048));
1393 static void r8192(register WDL_FFT_REAL
*a
)
1395 rpassbig(a
,d8192
,1024);
1397 c2048((WDL_FFT_COMPLEX
*)(a
+ 4096));
1400 static void r16384(register WDL_FFT_REAL
*a
)
1402 rpassbig(a
,d16384
,2048);
1404 c4096((WDL_FFT_COMPLEX
*)(a
+ 8192));
1407 static void r32768(register WDL_FFT_REAL
*a
)
1409 rpassbig(a
,d32768
,4096);
1411 c8192((WDL_FFT_COMPLEX
*)(a
+ 16384));
1416 static void v4(register WDL_FFT_REAL
*a
)
1418 register WDL_FFT_REAL t1
, t3
, t5
, t6
;
1432 static void v8(register WDL_FFT_REAL
*a
)
1434 register WDL_FFT_REAL t1
, t2
, t3
, t4
, t5
, t6
, t7
, t8
;
1468 /* a[0...8n-1], w[0...2n-1]; n even, n >= 4 */
1469 static void vpass(register WDL_FFT_REAL
*a
,register const WDL_FFT_COMPLEX
*w
,register unsigned int n
)
1471 register WDL_FFT_REAL t1
, t2
, t3
, t4
, t5
, t6
;
1472 register WDL_FFT_REAL
*b
;
1473 register unsigned int k
;
1478 VZERO(a
[0],a
[1],b
[0],b
[1]);
1479 V(a
[2],a
[3],b
[2],b
[3],w
[0].re
,w
[0].im
);
1480 V(a
[4],a
[5],b
[4],b
[5],w
[1].re
,w
[1].im
);
1481 V(a
[6],a
[7],b
[6],b
[7],w
[2].re
,w
[2].im
);
1484 V(a
[8],a
[9],b
[8],b
[9],w
[3].re
,w
[3].im
);
1485 V(a
[10],a
[11],b
[10],b
[11],w
[4].re
,w
[4].im
);
1486 V(a
[12],a
[13],b
[12],b
[13],w
[5].re
,w
[5].im
);
1487 V(a
[14],a
[15],b
[14],b
[15],w
[6].re
,w
[6].im
);
1488 if (!(k
-= 2)) break;
1495 static void v16(register WDL_FFT_REAL
*a
)
1497 register WDL_FFT_REAL t1
, t2
, t3
, t4
, t5
, t6
;
1499 u4((WDL_FFT_COMPLEX
*)(a
+ 8));
1501 VZERO(a
[0],a
[1],a
[8],a
[9]);
1502 V(a
[2],a
[3],a
[10],a
[11],d16
[0].re
,d16
[0].im
);
1503 V(a
[4],a
[5],a
[12],a
[13],d16
[1].re
,d16
[1].im
);
1504 V(a
[6],a
[7],a
[14],a
[15],d16
[2].re
,d16
[2].im
);
1507 static void v32(register WDL_FFT_REAL
*a
)
1509 u8((WDL_FFT_COMPLEX
*)(a
+ 16));
1514 static void v64(register WDL_FFT_REAL
*a
)
1516 u16((WDL_FFT_COMPLEX
*)(a
+ 32));
1521 static void v128(register WDL_FFT_REAL
*a
)
1523 u32((WDL_FFT_COMPLEX
*)(a
+ 64));
1528 static void v256(register WDL_FFT_REAL
*a
)
1530 u64((WDL_FFT_COMPLEX
*)(a
+ 128));
1535 static void v512(register WDL_FFT_REAL
*a
)
1537 u128((WDL_FFT_COMPLEX
*)(a
+ 256));
1544 /* a[0...8n-1], w[0...n-1]; n even, n >= 8 */
1545 static void vpassbig(register WDL_FFT_REAL
*a
,register const WDL_FFT_COMPLEX
*w
,register unsigned int n
)
1547 register WDL_FFT_REAL t1
, t2
, t3
, t4
, t5
, t6
;
1548 register WDL_FFT_REAL
*b
;
1549 register unsigned int k
;
1553 VZERO(a
[0],a
[1],b
[0],b
[1]);
1554 V(a
[2],a
[3],b
[2],b
[3],w
[0].re
,w
[0].im
);
1558 V(a
[4],a
[5],b
[4],b
[5],w
[1].re
,w
[1].im
);
1559 V(a
[6],a
[7],b
[6],b
[7],w
[2].re
,w
[2].im
);
1565 VHALF(a
[4],a
[5],b
[4],b
[5]);
1566 V(a
[6],a
[7],b
[6],b
[7],w
[0].im
,w
[0].re
);
1570 V(a
[8],a
[9],b
[8],b
[9],w
[-1].im
,w
[-1].re
);
1571 V(a
[10],a
[11],b
[10],b
[11],w
[-2].im
,w
[-2].re
);
1579 static void v1024(register WDL_FFT_REAL
*a
)
1581 u256((WDL_FFT_COMPLEX
*)(a
+ 512));
1583 vpassbig(a
,d1024
,128);
1586 static void v2048(register WDL_FFT_REAL
*a
)
1588 u512((WDL_FFT_COMPLEX
*)(a
+ 1024));
1590 vpassbig(a
,d2048
,256);
1594 static void v4096(register WDL_FFT_REAL
*a
)
1596 u1024((WDL_FFT_COMPLEX
*)(a
+ 2048));
1598 vpassbig(a
,d4096
,512);
1601 static void v8192(register WDL_FFT_REAL
*a
)
1603 u2048((WDL_FFT_COMPLEX
*)(a
+ 4096));
1605 vpassbig(a
,d8192
,1024);
1608 static void v16384(register WDL_FFT_REAL
*a
)
1610 u4096((WDL_FFT_COMPLEX
*)(a
+ 8192));
1612 vpassbig(a
,d16384
,2048);
1615 static void v32768(register WDL_FFT_REAL
*a
)
1617 u8192((WDL_FFT_COMPLEX
*)(a
+ 16384));
1619 vpassbig(a
,d32768
,4096);
1623 void WDL_real_fft(WDL_FFT_REAL
*buf
, int len
, int isInverse
)
1627 case 2: r2(buf
); break;
1628 #define TMP(x) case x: if (!isInverse) r##x(buf); else v##x(buf); break;