3 Copyright (C) 2006 and later Cockos Incorporated
4 Copyright 1999 D. J. Bernstein
6 This software is provided 'as-is', without any express or implied
7 warranty. In no event will the authors be held liable for any damages
8 arising from the use of this software.
10 Permission is granted to anyone to use this software for any purpose,
11 including commercial applications, and to alter it and redistribute it
12 freely, subject to the following restrictions:
14 1. The origin of this software must not be misrepresented; you must not
15 claim that you wrote the original software. If you use this software
16 in a product, an acknowledgment in the product documentation would be
17 appreciated but is not required.
18 2. Altered source versions must be plainly marked as such, and must not be
19 misrepresented as being the original software.
20 3. This notice may not be removed or altered from any source distribution.
24 This file implements the WDL FFT library. These routines are based on the
25 DJBFFT library, which are Copyright 1999 D. J. Bernstein, djb@pobox.com
27 The DJB FFT web page is: http://cr.yp.to/djbfft.html
32 // this is based on djbfft
38 #define FFT_MAXBITLEN 15
41 #define inline __inline
44 #define PI 3.1415926535897932384626433832795
46 static WDL_FFT_COMPLEX d16
[3];
47 static WDL_FFT_COMPLEX d32
[7];
48 static WDL_FFT_COMPLEX d64
[15];
49 static WDL_FFT_COMPLEX d128
[31];
50 static WDL_FFT_COMPLEX d256
[63];
51 static WDL_FFT_COMPLEX d512
[127];
52 static WDL_FFT_COMPLEX d1024
[127];
53 static WDL_FFT_COMPLEX d2048
[255];
54 static WDL_FFT_COMPLEX d4096
[511];
55 static WDL_FFT_COMPLEX d8192
[1023];
56 static WDL_FFT_COMPLEX d16384
[2047];
57 static WDL_FFT_COMPLEX d32768
[4095];
60 #define sqrthalf (d16[1].re)
62 #define VOL *(volatile WDL_FFT_REAL *)&
64 #define TRANSFORM(a0,a1,a2,a3,wre,wim) { \
104 #define TRANSFORMHALF(a0,a1,a2,a3) { \
140 #define TRANSFORMZERO(a0,a1,a2,a3) { \
167 #define UNTRANSFORM(a0,a1,a2,a3,wre,wim) { \
211 #define UNTRANSFORMHALF(a0,a1,a2,a3) { \
245 #define UNTRANSFORMZERO(a0,a1,a2,a3) { \
270 static void c2(register WDL_FFT_COMPLEX
*a
)
272 register WDL_FFT_REAL t1
;
275 a
[1].re
= a
[0].re
- t1
;
279 a
[1].im
= a
[0].im
- t1
;
283 static inline void c4(register WDL_FFT_COMPLEX
*a
)
285 register WDL_FFT_REAL t1
, t2
, t3
, t4
, t5
, t6
, t7
, t8
;
314 static void c8(register WDL_FFT_COMPLEX
*a
)
316 register WDL_FFT_REAL t1
, t2
, t3
, t4
, t5
, t6
, t7
, t8
;
374 a
[5].re
= a
[4].re
- t6
;
377 a
[5].im
= a
[4].im
- t1
;
385 a
[7].im
= a
[6].im
- t5
;
388 a
[7].re
= a
[6].re
- t2
;
397 static void c16(register WDL_FFT_COMPLEX
*a
)
399 register WDL_FFT_REAL t1
, t2
, t3
, t4
, t5
, t6
, t7
, t8
;
401 TRANSFORMZERO(a
[0],a
[4],a
[8],a
[12]);
402 TRANSFORM(a
[1],a
[5],a
[9],a
[13],d16
[0].re
,d16
[0].im
);
403 TRANSFORMHALF(a
[2],a
[6],a
[10],a
[14]);
404 TRANSFORM(a
[3],a
[7],a
[11],a
[15],d16
[0].im
,d16
[0].re
);
411 /* a[0...8n-1], w[0...2n-2]; n >= 2 */
412 static void cpass(register WDL_FFT_COMPLEX
*a
,register const WDL_FFT_COMPLEX
*w
,register unsigned int n
)
414 register WDL_FFT_REAL t1
, t2
, t3
, t4
, t5
, t6
, t7
, t8
;
415 register WDL_FFT_COMPLEX
*a1
;
416 register WDL_FFT_COMPLEX
*a2
;
417 register WDL_FFT_COMPLEX
*a3
;
424 TRANSFORMZERO(a
[0],a1
[0],a2
[0],a3
[0]);
425 TRANSFORM(a
[1],a1
[1],a2
[1],a3
[1],w
[0].re
,w
[0].im
);
428 TRANSFORM(a
[2],a1
[2],a2
[2],a3
[2],w
[1].re
,w
[1].im
);
429 TRANSFORM(a
[3],a1
[3],a2
[3],a3
[3],w
[2].re
,w
[2].im
);
439 static void c32(register WDL_FFT_COMPLEX
*a
)
447 static void c64(register WDL_FFT_COMPLEX
*a
)
455 static void c128(register WDL_FFT_COMPLEX
*a
)
463 static void c256(register WDL_FFT_COMPLEX
*a
)
471 static void c512(register WDL_FFT_COMPLEX
*a
)
479 /* a[0...8n-1], w[0...n-2]; n even, n >= 4 */
480 static void cpassbig(register WDL_FFT_COMPLEX
*a
,register const WDL_FFT_COMPLEX
*w
,register unsigned int n
)
482 register WDL_FFT_REAL t1
, t2
, t3
, t4
, t5
, t6
, t7
, t8
;
483 register WDL_FFT_COMPLEX
*a1
;
484 register WDL_FFT_COMPLEX
*a2
;
485 register WDL_FFT_COMPLEX
*a3
;
486 register unsigned int k
;
493 TRANSFORMZERO(a
[0],a1
[0],a2
[0],a3
[0]);
494 TRANSFORM(a
[1],a1
[1],a2
[1],a3
[1],w
[0].re
,w
[0].im
);
501 TRANSFORM(a
[0],a1
[0],a2
[0],a3
[0],w
[1].re
,w
[1].im
);
502 TRANSFORM(a
[1],a1
[1],a2
[1],a3
[1],w
[2].re
,w
[2].im
);
510 TRANSFORMHALF(a
[0],a1
[0],a2
[0],a3
[0]);
511 TRANSFORM(a
[1],a1
[1],a2
[1],a3
[1],w
[0].im
,w
[0].re
);
519 TRANSFORM(a
[0],a1
[0],a2
[0],a3
[0],w
[-1].im
,w
[-1].re
);
520 TRANSFORM(a
[1],a1
[1],a2
[1],a3
[1],w
[-2].im
,w
[-2].re
);
530 static void c1024(register WDL_FFT_COMPLEX
*a
)
532 cpassbig(a
,d1024
,128);
538 static void c2048(register WDL_FFT_COMPLEX
*a
)
540 cpassbig(a
,d2048
,256);
546 static void c4096(register WDL_FFT_COMPLEX
*a
)
548 cpassbig(a
,d4096
,512);
554 static void c8192(register WDL_FFT_COMPLEX
*a
)
556 cpassbig(a
,d8192
,1024);
562 static void c16384(register WDL_FFT_COMPLEX
*a
)
564 cpassbig(a
,d16384
,2048);
565 c4096(a
+ 8192 + 4096);
570 static void c32768(register WDL_FFT_COMPLEX
*a
)
572 cpassbig(a
,d32768
,4096);
573 c8192(a
+ 16384 + 8192);
580 void WDL_fft_complexmul(WDL_FFT_COMPLEX
*a
,WDL_FFT_COMPLEX
*b
,int n
)
582 register WDL_FFT_REAL t1
, t2
, t3
, t4
, t5
, t6
, t7
, t8
;
583 if (n
<2 || (n
&1)) return;
586 t1
= a
[0].re
* b
[0].re
;
587 t2
= a
[0].im
* b
[0].im
;
588 t3
= a
[0].im
* b
[0].re
;
589 t4
= a
[0].re
* b
[0].im
;
590 t5
= a
[1].re
* b
[1].re
;
591 t6
= a
[1].im
* b
[1].im
;
592 t7
= a
[1].im
* b
[1].re
;
593 t8
= a
[1].re
* b
[1].im
;
607 void WDL_fft_complexmul2(WDL_FFT_COMPLEX
*c
, WDL_FFT_COMPLEX
*a
, WDL_FFT_COMPLEX
*b
, int n
)
609 register WDL_FFT_REAL t1
, t2
, t3
, t4
, t5
, t6
, t7
, t8
;
610 if (n
<2 || (n
&1)) return;
613 t1
= a
[0].re
* b
[0].re
;
614 t2
= a
[0].im
* b
[0].im
;
615 t3
= a
[0].im
* b
[0].re
;
616 t4
= a
[0].re
* b
[0].im
;
617 t5
= a
[1].re
* b
[1].re
;
618 t6
= a
[1].im
* b
[1].im
;
619 t7
= a
[1].im
* b
[1].re
;
620 t8
= a
[1].re
* b
[1].im
;
634 void WDL_fft_complexmul3(WDL_FFT_COMPLEX
*c
, WDL_FFT_COMPLEX
*a
, WDL_FFT_COMPLEX
*b
, int n
)
636 register WDL_FFT_REAL t1
, t2
, t3
, t4
, t5
, t6
, t7
, t8
;
637 if (n
<2 || (n
&1)) return;
640 t1
= a
[0].re
* b
[0].re
;
641 t2
= a
[0].im
* b
[0].im
;
642 t3
= a
[0].im
* b
[0].re
;
643 t4
= a
[0].re
* b
[0].im
;
644 t5
= a
[1].re
* b
[1].re
;
645 t6
= a
[1].im
* b
[1].im
;
646 t7
= a
[1].im
* b
[1].re
;
647 t8
= a
[1].re
* b
[1].im
;
663 static inline void u4(register WDL_FFT_COMPLEX
*a
)
665 register WDL_FFT_REAL t1
, t2
, t3
, t4
, t5
, t6
, t7
, t8
;
695 static void u8(register WDL_FFT_COMPLEX
*a
)
697 register WDL_FFT_REAL t1
, t2
, t3
, t4
, t5
, t6
, t7
, t8
;
702 a
[5].re
= a
[4].re
- t1
;
706 a
[7].re
= a
[6].re
- t3
;
722 a
[5].im
= a
[4].im
- t2
;
726 a
[7].im
= a
[6].im
- t4
;
781 static void u16(register WDL_FFT_COMPLEX
*a
)
783 register WDL_FFT_REAL t1
, t2
, t3
, t4
, t5
, t6
, t7
, t8
;
789 UNTRANSFORMZERO(a
[0],a
[4],a
[8],a
[12]);
790 UNTRANSFORMHALF(a
[2],a
[6],a
[10],a
[14]);
791 UNTRANSFORM(a
[1],a
[5],a
[9],a
[13],d16
[0].re
,d16
[0].im
);
792 UNTRANSFORM(a
[3],a
[7],a
[11],a
[15],d16
[0].im
,d16
[0].re
);
795 /* a[0...8n-1], w[0...2n-2] */
796 static void upass(register WDL_FFT_COMPLEX
*a
,register const WDL_FFT_COMPLEX
*w
,register unsigned int n
)
798 register WDL_FFT_REAL t1
, t2
, t3
, t4
, t5
, t6
, t7
, t8
;
799 register WDL_FFT_COMPLEX
*a1
;
800 register WDL_FFT_COMPLEX
*a2
;
801 register WDL_FFT_COMPLEX
*a3
;
808 UNTRANSFORMZERO(a
[0],a1
[0],a2
[0],a3
[0]);
809 UNTRANSFORM(a
[1],a1
[1],a2
[1],a3
[1],w
[0].re
,w
[0].im
);
812 UNTRANSFORM(a
[2],a1
[2],a2
[2],a3
[2],w
[1].re
,w
[1].im
);
813 UNTRANSFORM(a
[3],a1
[3],a2
[3],a3
[3],w
[2].re
,w
[2].im
);
823 static void u32(register WDL_FFT_COMPLEX
*a
)
831 static void u64(register WDL_FFT_COMPLEX
*a
)
839 static void u128(register WDL_FFT_COMPLEX
*a
)
847 static void u256(register WDL_FFT_COMPLEX
*a
)
855 static void u512(register WDL_FFT_COMPLEX
*a
)
864 /* a[0...8n-1], w[0...n-2]; n even, n >= 4 */
865 static void upassbig(register WDL_FFT_COMPLEX
*a
,register const WDL_FFT_COMPLEX
*w
,register unsigned int n
)
867 register WDL_FFT_REAL t1
, t2
, t3
, t4
, t5
, t6
, t7
, t8
;
868 register WDL_FFT_COMPLEX
*a1
;
869 register WDL_FFT_COMPLEX
*a2
;
870 register WDL_FFT_COMPLEX
*a3
;
871 register unsigned int k
;
878 UNTRANSFORMZERO(a
[0],a1
[0],a2
[0],a3
[0]);
879 UNTRANSFORM(a
[1],a1
[1],a2
[1],a3
[1],w
[0].re
,w
[0].im
);
886 UNTRANSFORM(a
[0],a1
[0],a2
[0],a3
[0],w
[1].re
,w
[1].im
);
887 UNTRANSFORM(a
[1],a1
[1],a2
[1],a3
[1],w
[2].re
,w
[2].im
);
895 UNTRANSFORMHALF(a
[0],a1
[0],a2
[0],a3
[0]);
896 UNTRANSFORM(a
[1],a1
[1],a2
[1],a3
[1],w
[0].im
,w
[0].re
);
904 UNTRANSFORM(a
[0],a1
[0],a2
[0],a3
[0],w
[-1].im
,w
[-1].re
);
905 UNTRANSFORM(a
[1],a1
[1],a2
[1],a3
[1],w
[-2].im
,w
[-2].re
);
916 static void u1024(register WDL_FFT_COMPLEX
*a
)
921 upassbig(a
,d1024
,128);
924 static void u2048(register WDL_FFT_COMPLEX
*a
)
929 upassbig(a
,d2048
,256);
933 static void u4096(register WDL_FFT_COMPLEX
*a
)
938 upassbig(a
,d4096
,512);
941 static void u8192(register WDL_FFT_COMPLEX
*a
)
946 upassbig(a
,d8192
,1024);
949 static void u16384(register WDL_FFT_COMPLEX
*a
)
953 u4096(a
+ 8192 + 4096);
954 upassbig(a
,d16384
,2048);
957 static void u32768(register WDL_FFT_COMPLEX
*a
)
961 u8192(a
+ 16384 + 8192 );
962 upassbig(a
,d32768
,4096);
966 static void __fft_gen(WDL_FFT_COMPLEX
*buf
, const WDL_FFT_COMPLEX
*buf2
, int sz
, int isfull
)
969 double div
=PI
*0.25/(sz
+1);
971 if (isfull
) div
*=2.0;
973 for (x
= 0; x
< sz
; x
++)
975 if (!(x
& 1) || !buf2
)
977 buf
[x
].re
= (WDL_FFT_REAL
) cos((x
+1)*div
);
978 buf
[x
].im
= (WDL_FFT_REAL
) sin((x
+1)*div
);
982 buf
[x
].re
= buf2
[x
>> 1].re
;
983 buf
[x
].im
= buf2
[x
>> 1].im
;
988 #ifndef WDL_FFT_NO_PERMUTE
990 static unsigned int fftfreq_c(unsigned int i
,unsigned int n
)
994 if (n
<= 2) return i
;
997 if (i
< m
) return fftfreq_c(i
,m
) << 1;
1001 if (i
< m
) return (fftfreq_c(i
,m
) << 2) + 1;
1003 return ((fftfreq_c(i
,m
) << 2) - 1) & (n
- 1);
1006 static int _idxperm
[2<<FFT_MAXBITLEN
];
1008 static void idx_perm_calc(int offs
, int n
)
1012 for (i
= 1; i
< n
; ++i
) {
1013 j
= fftfreq_c(i
, n
);
1014 _idxperm
[offs
+n
-j
] = i
;
1018 int WDL_fft_permute(int fftsize
, int idx
)
1020 return _idxperm
[fftsize
+idx
-2];
1022 int *WDL_fft_permute_tab(int fftsize
)
1024 return _idxperm
+ fftsize
- 2;
1032 static int ffttabinit
;
1038 #define fft_gen(x,y,z) __fft_gen(x,y,sizeof(x)/sizeof(x[0]),z)
1042 fft_gen(d128
,d64
,1);
1043 fft_gen(d256
,d128
,1);
1044 fft_gen(d512
,d256
,1);
1045 fft_gen(d1024
,d512
,0);
1046 fft_gen(d2048
,d1024
,0);
1047 fft_gen(d4096
,d2048
,0);
1048 fft_gen(d8192
,d4096
,0);
1049 fft_gen(d16384
,d8192
,0);
1050 fft_gen(d32768
,d16384
,0);
1053 #ifndef WDL_FFT_NO_PERMUTE
1055 for (i
= 2; i
<= 32768; i
*= 2)
1057 idx_perm_calc(offs
, i
);
1065 void WDL_fft(WDL_FFT_COMPLEX
*buf
, int len
, int isInverse
)
1069 case 2: c2(buf
); break;
1070 #define TMP(x) case x: if (!isInverse) c##x(buf); else u##x(buf); break;
1089 static inline void r2(register WDL_FFT_REAL
*a
)
1091 register WDL_FFT_REAL t1
, t2
;
1099 static inline void v2(register WDL_FFT_REAL
*a
)
1101 register WDL_FFT_REAL t1
, t2
;
1109 static void two_for_one(WDL_FFT_REAL
* buf
, const WDL_FFT_COMPLEX
*d
, int len
, int isInverse
)
1111 const unsigned int half
= (unsigned)len
>> 1, quart
= half
>> 1, eighth
= quart
>> 1;
1112 const int *permute
= WDL_fft_permute_tab(half
);
1115 WDL_FFT_COMPLEX
*p
, *q
, tw
, sum
, diff
;
1116 WDL_FFT_REAL tw1
, tw2
;
1120 WDL_fft((WDL_FFT_COMPLEX
*)buf
, half
, isInverse
);
1128 /* Source: http://www.katjaas.nl/realFFT/realFFT2.html */
1130 for (i
= 1; i
< quart
; ++i
)
1132 p
= (WDL_FFT_COMPLEX
*)buf
+ permute
[i
];
1133 q
= (WDL_FFT_COMPLEX
*)buf
+ permute
[half
- i
];
1135 /* tw.re = cos(2*PI * i / len);
1136 tw.im = sin(2*PI * i / len); */
1144 else if (i
> eighth
)
1152 tw
.re
= tw
.im
= sqrthalf
;
1155 if (!isInverse
) tw
.re
= -tw
.re
;
1157 sum
.re
= p
->re
+ q
->re
;
1158 sum
.im
= p
->im
+ q
->im
;
1159 diff
.re
= p
->re
- q
->re
;
1160 diff
.im
= p
->im
- q
->im
;
1162 tw1
= tw
.re
* sum
.im
+ tw
.im
* diff
.re
;
1163 tw2
= tw
.im
* sum
.im
- tw
.re
* diff
.re
;
1165 p
->re
= sum
.re
- tw1
;
1166 p
->im
= diff
.im
- tw2
;
1167 q
->re
= sum
.re
+ tw1
;
1168 q
->im
= -(diff
.im
+ tw2
);
1171 p
= (WDL_FFT_COMPLEX
*)buf
+ permute
[i
];
1175 if (isInverse
) WDL_fft((WDL_FFT_COMPLEX
*)buf
, half
, isInverse
);
1178 void WDL_real_fft(WDL_FFT_REAL
* buf
, int len
, int isInverse
)
1182 case 2: if (!isInverse
) r2(buf
); else v2(buf
); break;
1183 case 4: case 8: two_for_one(buf
, 0, len
, isInverse
); break;
1184 #define TMP(x) case x: two_for_one(buf, d##x, len, isInverse); break;