scide: keyboard navigation - move among documents via Alt-Left/Right
[supercollider.git] / common / fftlib.c
blobcd2220288b2e4f22127199b783be0932bd56420d
1 /* FFT library */
2 /* Library of in-place fast fourier transforms */
3 /* Forward and inverse complex transforms */
4 /* and real forward transform */
5 /* Optimized for RISC processors with many registers */
6 /* Version 1.1 John Green NUWC New London CT January 96 */
7 /* Version 1.1 renamed as fftlib from fftbig */
8 /* Version 1.1 removed (float *)((char *) ptr) optimization */
9 /* Version 1.0 John Green NUWC New London CT December 95 */
10 /* (John Green) green_jt@vsdec.nl.nuwc.navy.mil */
11 /* green_jt@vsdec.nl.nuwc.navy.mil */
13 #include <math.h>
14 #include "fftlib.h"
16 #define MAXMROOT 9 /* 2^(MAXMROOT-1) = # of elements in BRcnt */
18 /* Bit reversed counter */
19 static const unsigned char BRcnt[256] = {
20 0, 128, 64, 192, 32, 160, 96, 224, 16, 144, 80, 208,
21 48, 176, 112, 240, 8, 136, 72, 200, 40, 168, 104, 232,
22 24, 152, 88, 216, 56, 184, 120, 248, 4, 132, 68, 196,
23 36, 164, 100, 228, 20, 148, 84, 212, 52, 180, 116, 244,
24 12, 140, 76, 204, 44, 172, 108, 236, 28, 156, 92, 220,
25 60, 188, 124, 252, 2, 130, 66, 194, 34, 162, 98, 226,
26 18, 146, 82, 210, 50, 178, 114, 242, 10, 138, 74, 202,
27 42, 170, 106, 234, 26, 154, 90, 218, 58, 186, 122, 250,
28 6, 134, 70, 198, 38, 166, 102, 230, 22, 150, 86, 214,
29 54, 182, 118, 246, 14, 142, 78, 206, 46, 174, 110, 238,
30 30, 158, 94, 222, 62, 190, 126, 254, 1, 129, 65, 193,
31 33, 161, 97, 225, 17, 145, 81, 209, 49, 177, 113, 241,
32 9, 137, 73, 201, 41, 169, 105, 233, 25, 153, 89, 217,
33 57, 185, 121, 249, 5, 133, 69, 197, 37, 165, 101, 229,
34 21, 149, 85, 213, 53, 181, 117, 245, 13, 141, 77, 205,
35 45, 173, 109, 237, 29, 157, 93, 221, 61, 189, 125, 253,
36 3, 131, 67, 195, 35, 163, 99, 227, 19, 147, 83, 211,
37 51, 179, 115, 243, 11, 139, 75, 203, 43, 171, 107, 235,
38 27, 155, 91, 219, 59, 187, 123, 251, 7, 135, 71, 199,
39 39, 167, 103, 231, 23, 151, 87, 215, 55, 183, 119, 247,
40 15, 143, 79, 207, 47, 175, 111, 239, 31, 159, 95, 223,
41 63, 191, 127, 255 };
43 /* Table of powers of 2 */
44 static const long Ntbl[21] = {1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048,
45 4096, 8192, 16384, 32768, 65536, 131072, 262144, 524288, 1048576};
47 long FFTInit(long *fftMptr, long fftN, float *Utbl){
48 /* Compute cosine table and check size for complex ffts */
49 /* INPUTS */
50 /* fftN = size of fft */
51 /* OUTPUTS */
52 /* *fftMptr = log2 of fft size */
53 /* *Utbl = cosine table with fftN/4 + 1 entries (angles = 0 to pi/2 inclusive) */
54 /* RETURNS */
55 /* 1 if fftN is invalid, 0 otherwise */
56 long i1, ErrVal;
57 ErrVal = 0;
58 *fftMptr = (long)(log(fftN)/log(2.0) + 0.5);
59 if ((*fftMptr >= 3) & (*fftMptr <= 19) & (int)(pow(2,*fftMptr)+.5) == fftN)
60 for (i1 = 0; i1 <= fftN/4; i1++)
61 Utbl[i1] = cos( ( 3.1415926535897932384626433832795 * 2.0 * i1 ) / fftN );
62 else
63 ErrVal = 1;
64 return ErrVal;
67 long rFFTInit(long *fftMptr, long fftN, float *Utbl){
68 /* Compute cosine table and check size for a real input fft */
69 /* INPUTS */
70 /* fftN = size of fft */
71 /* OUTPUTS */
72 /* *fftMptr = log2 of fft size */
73 /* *Utbl = cosine table with fftN/4 + 1 entries (angles = 0 to pi/2 inclusive) */
74 /* RETURNS */
75 /* 1 if fftN is invalid, 0 otherwise */
76 long i1, ErrVal;
77 ErrVal = 0;
78 *fftMptr = (long)(log(fftN)/log(2.0) + 0.5);
79 if ((*fftMptr >= 4) & (*fftMptr <= 20) & (int)(pow(2,*fftMptr)+.5) == fftN)
81 for (i1 = 0; i1 <= fftN/4; i1++)
82 Utbl[i1] = cos( ( 3.1415926535897932384626433832795 * 2.0 * i1 ) / fftN );
83 else
84 ErrVal = 1;
85 return ErrVal;
88 void ffts(float *ioptr, long M, long Rows, float *Utbl){
89 /* Compute in-place complex fft on the rows of the input array */
90 /* INPUTS */
91 /* M = log2 of fft size */
92 /* *ioptr = input data array */
93 /* *Utbl = cosine table */
94 /* Rows = number of rows in ioptr array (use Rows of 1 if ioptr is a 1 dimensional array) */
95 /* OUTPUTS */
96 /* *ioptr = output data array */
98 long Flyinc;
99 long FlyOffsetA;
100 long FlyOffsetAIm;
101 long FlyOffsetB;
102 long FlyOffsetBIm;
103 long NsameU1;
104 long NsameU2;
105 long NsameU4;
106 long diffUcnt;
107 long LoopCnt;
109 float scale;
110 float fly0r;
111 float fly0i;
112 float fly1r;
113 float fly1i;
114 float fly2r;
115 float fly2i;
116 float fly3r;
117 float fly3i;
118 float fly4r;
119 float fly4i;
120 float fly5r;
121 float fly5i;
122 float fly6r;
123 float fly6i;
124 float fly7r;
125 float fly7i;
126 float U0r;
127 float U0i;
128 float U1r;
129 float U1i;
130 float U2r;
131 float U2i;
132 float U3r;
133 float U3i;
134 float t0r;
135 float t0i;
136 float t1r;
137 float t1i;
139 float *fly0P;
140 float *fly1P;
141 float *fly2P;
142 float *fly3P;
144 float *U0rP;
145 float *U0iP;
146 float *U1rP;
147 float *U1iP;
148 float *U2rP;
149 float *U2iP;
150 float *IOP;
151 long U3offset;
153 long stage;
154 long NdiffU;
155 long LoopN;
157 const long BRshift = MAXMROOT - (M>>1);
158 const long Nrems2 = Ntbl[M-(M>>1)+1];
159 const long Nroot_1_ColInc = (Ntbl[M-1]-Ntbl[M-(M>>1)])*2;
161 scale = 2.0;
162 for (;Rows>0;Rows--){
164 FlyOffsetA = Ntbl[M] * (2/2);
165 FlyOffsetAIm = FlyOffsetA + 1;
166 FlyOffsetB = FlyOffsetA + 2;
167 FlyOffsetBIm = FlyOffsetB + 1;
169 /* BitrevR2 ** bit reverse and first radix 2 stage ******/
171 for (stage = 0; stage < Ntbl[M-(M>>1)]*2; stage += Ntbl[M>>1]*2){
172 for (LoopN = (Ntbl[(M>>1)-1]-1); LoopN >= 0; LoopN--){
173 LoopCnt = (Ntbl[(M>>1)-1]-1);
174 fly0P = ioptr+ Nroot_1_ColInc + ((int)BRcnt[LoopN] >> BRshift)*(2*2) + stage;
175 IOP = ioptr + (LoopN<<(M+1)/2) * 2 + stage;
176 fly1P = IOP + ((int)BRcnt[LoopCnt] >> BRshift)*(2*2);
177 fly0r = *(fly0P);
178 fly0i = *(fly0P+1);
179 fly1r = *(fly0P+FlyOffsetA);
180 fly1i = *(fly0P+FlyOffsetAIm);
181 for (; LoopCnt > LoopN;){
182 fly2r = *(fly0P+2);
183 fly2i = *(fly0P+(2+1));
184 fly3r = *(fly0P+FlyOffsetB);
185 fly3i = *(fly0P+FlyOffsetBIm);
186 fly4r = *(fly1P);
187 fly4i = *(fly1P+1);
188 fly5r = *(fly1P+FlyOffsetA);
189 fly5i = *(fly1P+FlyOffsetAIm);
190 fly6r = *(fly1P+2);
191 fly6i = *(fly1P+(2+1));
192 fly7r = *(fly1P+FlyOffsetB);
193 fly7i = *(fly1P+FlyOffsetBIm);
195 t0r = fly0r + fly1r;
196 t0i = fly0i + fly1i;
197 fly1r = fly0r - fly1r;
198 fly1i = fly0i - fly1i;
199 t1r = fly2r + fly3r;
200 t1i = fly2i + fly3i;
201 fly3r = fly2r - fly3r;
202 fly3i = fly2i - fly3i;
203 fly0r = fly4r + fly5r;
204 fly0i = fly4i + fly5i;
205 fly5r = fly4r - fly5r;
206 fly5i = fly4i - fly5i;
207 fly2r = fly6r + fly7r;
208 fly2i = fly6i + fly7i;
209 fly7r = fly6r - fly7r;
210 fly7i = fly6i - fly7i;
212 *(fly1P) = t0r;
213 *(fly1P+1) = t0i;
214 *(fly1P+2) = fly1r;
215 *(fly1P+(2+1)) = fly1i;
216 *(fly1P+FlyOffsetA) = t1r;
217 *(fly1P+FlyOffsetAIm) = t1i;
218 *(fly1P+FlyOffsetB) = fly3r;
219 *(fly1P+FlyOffsetBIm) = fly3i;
220 *(fly0P) = fly0r;
221 *(fly0P+1) = fly0i;
222 *(fly0P+2) = fly5r;
223 *(fly0P+(2+1)) = fly5i;
224 *(fly0P+FlyOffsetA) = fly2r;
225 *(fly0P+FlyOffsetAIm) = fly2i;
226 *(fly0P+FlyOffsetB) = fly7r;
227 *(fly0P+FlyOffsetBIm) = fly7i;
229 fly0P -= Nrems2;
230 fly0r = *(fly0P);
231 fly0i = *(fly0P+1);
232 fly1r = *(fly0P+FlyOffsetA);
233 fly1i = *(fly0P+FlyOffsetAIm);
234 LoopCnt -= 1;
235 fly1P = (IOP + ((int)BRcnt[LoopCnt] >> BRshift)*(2*2));
237 fly2r = *(fly0P+2);
238 fly2i = *(fly0P+(2+1));
239 fly3r = *(fly0P+FlyOffsetB);
240 fly3i = *(fly0P+FlyOffsetBIm);
242 t0r = fly0r + fly1r;
243 t0i = fly0i + fly1i;
244 fly1r = fly0r - fly1r;
245 fly1i = fly0i - fly1i;
246 t1r = fly2r + fly3r;
247 t1i = fly2i + fly3i;
248 fly3r = fly2r - fly3r;
249 fly3i = fly2i - fly3i;
251 *(fly0P) = t0r;
252 *(fly0P+1) = t0i;
253 *(fly0P+2) = fly1r;
254 *(fly0P+(2+1)) = fly1i;
255 *(fly0P+FlyOffsetA) = t1r;
256 *(fly0P+FlyOffsetAIm) = t1i;
257 *(fly0P+FlyOffsetB) = fly3r;
258 *(fly0P+FlyOffsetBIm) = fly3i;
265 /**** FFTC **************/
267 NdiffU = 2;
268 Flyinc = (NdiffU);
270 NsameU4 = Ntbl[M]/4;
271 LoopN = Ntbl[M-3];
273 stage = ((M-1)/3);
274 if ( (M-1-(stage * 3)) != 0 ){
275 FlyOffsetA = Flyinc << 2;
276 FlyOffsetB = FlyOffsetA << 1;
277 FlyOffsetAIm = FlyOffsetA + 1;
278 FlyOffsetBIm = FlyOffsetB + 1;
279 if ( (M-1-(stage * 3)) == 1 ){
280 /* 1 radix 2 stage */
282 IOP = ioptr;
283 fly0P = IOP;
284 fly1P = (IOP+Flyinc);
285 fly2P = (fly1P+Flyinc);
286 fly3P = (fly2P+Flyinc);
288 /* Butterflys */
290 t0 - - t0
291 t1 - - t1
292 f2 - 1- f5
293 f1 - -i- f7
294 f4 - - f4
295 f0 - - f0
296 f6 - 1- f2
297 f3 - -i- f1
300 for (LoopCnt = LoopN; LoopCnt > 0 ; LoopCnt--){
301 t0r = *(fly0P);
302 t0i = *(fly0P + 1);
303 t1r = *(fly1P);
304 t1i = *(fly1P + 1);
305 fly2r = *(fly2P);
306 fly2i = *(fly2P + 1);
307 fly1r = *(fly3P);
308 fly1i = *(fly3P + 1);
309 fly4r = *(fly0P + FlyOffsetA);
310 fly4i = *(fly0P + FlyOffsetAIm);
311 fly0r = *(fly1P + FlyOffsetA);
312 fly0i = *(fly1P + FlyOffsetAIm);
313 fly6r = *(fly2P + FlyOffsetA);
314 fly6i = *(fly2P + FlyOffsetAIm);
315 fly3r = *(fly3P + FlyOffsetA);
316 fly3i = *(fly3P + FlyOffsetAIm);
318 fly5r = t0r - fly2r;
319 fly5i = t0i - fly2i;
320 t0r = t0r + fly2r;
321 t0i = t0i + fly2i;
323 fly7r = t1r - fly1i;
324 fly7i = t1i + fly1r;
325 t1r = t1r + fly1i;
326 t1i = t1i - fly1r;
328 fly2r = fly4r - fly6r;
329 fly2i = fly4i - fly6i;
330 fly4r = fly4r + fly6r;
331 fly4i = fly4i + fly6i;
333 fly1r = fly0r - fly3i;
334 fly1i = fly0i + fly3r;
335 fly0r = fly0r + fly3i;
336 fly0i = fly0i - fly3r;
338 *(fly2P) = fly5r;
339 *(fly2P + 1) = fly5i;
340 *(fly0P) = t0r;
341 *(fly0P + 1) = t0i;
342 *(fly3P) = fly7r;
343 *(fly3P + 1) = fly7i;
344 *(fly1P) = t1r;
345 *(fly1P + 1) = t1i;
346 *(fly2P + FlyOffsetA) = fly2r;
347 *(fly2P + FlyOffsetAIm) = fly2i;
348 *(fly0P + FlyOffsetA) = fly4r;
349 *(fly0P + FlyOffsetAIm) = fly4i;
350 *(fly3P + FlyOffsetA) = fly1r;
351 *(fly3P + FlyOffsetAIm) = fly1i;
352 *(fly1P + FlyOffsetA) = fly0r;
353 *(fly1P + FlyOffsetAIm) = fly0i;
355 fly0P = (fly0P + FlyOffsetB);
356 fly1P = (fly1P + FlyOffsetB);
357 fly2P = (fly2P + FlyOffsetB);
358 fly3P = (fly3P + FlyOffsetB);
361 NsameU4 >>= 1;
362 LoopN >>= 1;
363 NdiffU <<= 1;
364 Flyinc = Flyinc << 1;
366 else{
367 /* 1 radix 4 stage */
368 IOP = ioptr;
370 U3r = 0.7071067811865475244008443621; /* sqrt(0.5); */
371 U3i = U3r;
372 fly0P = IOP;
373 fly1P = (IOP+Flyinc);
374 fly2P = (fly1P+Flyinc);
375 fly3P = (fly2P+Flyinc);
377 /* Butterflys */
379 t0 - - t0 - - t0
380 t1 - - t1 - - t1
381 f2 - 1- f5 - - f5
382 f1 - -i- f7 - - f7
383 f4 - - f4 - 1- f6
384 f0 - - f0 -U3 - f3
385 f6 - 1- f2 - -i- f4
386 f3 - -i- f1 -U3a- f2
389 for (LoopCnt = LoopN; LoopCnt > 0 ; LoopCnt--){
390 t0r = *(fly0P);
391 t0i = *(fly0P + 1);
392 t1r = *(fly1P);
393 t1i = *(fly1P + 1);
394 fly2r = *(fly2P);
395 fly2i = *(fly2P + 1);
396 fly1r = *(fly3P);
397 fly1i = *(fly3P + 1);
398 fly4r = *(fly0P + FlyOffsetA);
399 fly4i = *(fly0P + FlyOffsetAIm);
400 fly0r = *(fly1P + FlyOffsetA);
401 fly0i = *(fly1P + FlyOffsetAIm);
402 fly6r = *(fly2P + FlyOffsetA);
403 fly6i = *(fly2P + FlyOffsetAIm);
404 fly3r = *(fly3P + FlyOffsetA);
405 fly3i = *(fly3P + FlyOffsetAIm);
407 fly5r = t0r - fly2r;
408 fly5i = t0i - fly2i;
409 t0r = t0r + fly2r;
410 t0i = t0i + fly2i;
412 fly7r = t1r - fly1i;
413 fly7i = t1i + fly1r;
414 t1r = t1r + fly1i;
415 t1i = t1i - fly1r;
417 fly2r = fly4r - fly6r;
418 fly2i = fly4i - fly6i;
419 fly4r = fly4r + fly6r;
420 fly4i = fly4i + fly6i;
422 fly1r = fly0r - fly3i;
423 fly1i = fly0i + fly3r;
424 fly0r = fly0r + fly3i;
425 fly0i = fly0i - fly3r;
428 fly6r = t0r - fly4r;
429 fly6i = t0i - fly4i;
430 t0r = t0r + fly4r;
431 t0i = t0i + fly4i;
433 fly3r = fly5r - fly2i;
434 fly3i = fly5i + fly2r;
435 fly5r = fly5r + fly2i;
436 fly5i = fly5i - fly2r;
438 fly4r = t1r - U3r * fly0r;
439 fly4r = fly4r - U3i * fly0i;
440 fly4i = t1i + U3i * fly0r;
441 fly4i = fly4i - U3r * fly0i;
442 t1r = scale * t1r - fly4r;
443 t1i = scale * t1i - fly4i;
445 fly2r = fly7r + U3i * fly1r;
446 fly2r = fly2r - U3r * fly1i;
447 fly2i = fly7i + U3r * fly1r;
448 fly2i = fly2i + U3i * fly1i;
449 fly7r = scale * fly7r - fly2r;
450 fly7i = scale * fly7i - fly2i;
452 *(fly0P + FlyOffsetA) = fly6r;
453 *(fly0P + FlyOffsetAIm) = fly6i;
454 *(fly0P) = t0r;
455 *(fly0P + 1) = t0i;
456 *(fly2P + FlyOffsetA) = fly3r;
457 *(fly2P + FlyOffsetAIm) = fly3i;
458 *(fly2P) = fly5r;
459 *(fly2P + 1) = fly5i;
460 *(fly1P + FlyOffsetA) = fly4r;
461 *(fly1P + FlyOffsetAIm) = fly4i;
462 *(fly1P) = t1r;
463 *(fly1P + 1) = t1i;
464 *(fly3P + FlyOffsetA) = fly2r;
465 *(fly3P + FlyOffsetAIm) = fly2i;
466 *(fly3P) = fly7r;
467 *(fly3P + 1) = fly7i;
469 fly0P = (fly0P + FlyOffsetB);
470 fly1P = (fly1P + FlyOffsetB);
471 fly2P = (fly2P + FlyOffsetB);
472 fly3P = (fly3P + FlyOffsetB);
476 NsameU4 >>= 2;
477 LoopN >>= 2;
478 NdiffU <<= 2;
479 Flyinc = Flyinc << 2;
483 NsameU2 = NsameU4 >> 1;
484 NsameU1 = NsameU2 >> 1;
485 Flyinc <<= 1;
486 FlyOffsetA = Flyinc << 2;
487 FlyOffsetB = FlyOffsetA << 1;
488 FlyOffsetAIm = FlyOffsetA + 1;
489 FlyOffsetBIm = FlyOffsetB + 1;
490 LoopN >>= 1;
491 /* ****** RADIX 8 Stages */
492 for (stage = stage<<1; stage > 0 ; stage--){
494 /* an fft stage is done in two parts to ease use of the single quadrant cos table */
496 /* fftcalc1(iobuf, Utbl, N, NdiffU, LoopN); */
497 if(!(stage&1)){
498 U0rP = &Utbl[0];
499 U0iP = &Utbl[Ntbl[M-2]];
500 U1rP = U0rP;
501 U1iP = U0iP;
502 U2rP = U0rP;
503 U2iP = U0iP;
504 U3offset = (Ntbl[M]) / 8;
506 IOP = ioptr;
508 U0r = *U0rP,
509 U0i = *U0iP;
510 U1r = *U1rP,
511 U1i = *U1iP;
512 U2r = *U2rP,
513 U2i = *U2iP;
514 U3r = *( U2rP + U3offset);
515 U3i = *( U2iP - U3offset);
518 fly0P = IOP;
519 fly1P = (IOP+Flyinc);
520 fly2P = (fly1P+Flyinc);
521 fly3P = (fly2P+Flyinc);
523 for (diffUcnt = (NdiffU)>>1; diffUcnt != 0; diffUcnt--){
525 /* Butterflys */
527 f0 - - t0 - - t0 - - t0
528 f1 -U0 - t1 - - t1 - - t1
529 f2 - - f2 -U1 - f5 - - f3
530 f3 -U0 - f1 -U1a- f7 - - f7
531 f4 - - f4 - - f4 -U2 - f6
532 f5 -U0 - f0 - - f0 -U3 - f4
533 f6 - - f6 -U1 - f2 -U2a- f2
534 f7 -U0 - f3 -U1a- f1 -U3a- f5
537 fly0r = *(IOP);
538 fly0i = *(IOP+1);
539 fly1r = *(fly1P);
540 fly1i = *(fly1P+1);
542 for (LoopCnt = LoopN-1; LoopCnt > 0 ; LoopCnt--){
544 fly2r = *(fly2P);
545 fly2i = *(fly2P + 1);
546 fly3r = *(fly3P);
547 fly3i = *(fly3P + 1);
548 fly4r = *(fly0P + FlyOffsetA);
549 fly4i = *(fly0P + FlyOffsetAIm);
550 fly5r = *(fly1P + FlyOffsetA);
551 fly5i = *(fly1P + FlyOffsetAIm);
552 fly6r = *(fly2P + FlyOffsetA);
553 fly6i = *(fly2P + FlyOffsetAIm);
554 fly7r = *(fly3P + FlyOffsetA);
555 fly7i = *(fly3P + FlyOffsetAIm);
557 t1r = fly0r - U0r * fly1r;
558 t1r = t1r - U0i * fly1i;
559 t1i = fly0i + U0i * fly1r;
560 t1i = t1i - U0r * fly1i;
561 t0r = scale * fly0r - t1r;
562 t0i = scale * fly0i - t1i;
564 fly1r = fly2r - U0r * fly3r;
565 fly1r = fly1r - U0i * fly3i;
566 fly1i = fly2i + U0i * fly3r;
567 fly1i = fly1i - U0r * fly3i;
568 fly2r = scale * fly2r - fly1r;
569 fly2i = scale * fly2i - fly1i;
571 fly0r = fly4r - U0r * fly5r;
572 fly0r = fly0r - U0i * fly5i;
573 fly0i = fly4i + U0i * fly5r;
574 fly0i = fly0i - U0r * fly5i;
575 fly4r = scale * fly4r - fly0r;
576 fly4i = scale * fly4i - fly0i;
578 fly3r = fly6r - U0r * fly7r;
579 fly3r = fly3r - U0i * fly7i;
580 fly3i = fly6i + U0i * fly7r;
581 fly3i = fly3i - U0r * fly7i;
582 fly6r = scale * fly6r - fly3r;
583 fly6i = scale * fly6i - fly3i;
586 fly5r = t0r - U1r * fly2r;
587 fly5r = fly5r - U1i * fly2i;
588 fly5i = t0i + U1i * fly2r;
589 fly5i = fly5i - U1r * fly2i;
590 t0r = scale * t0r - fly5r;
591 t0i = scale * t0i - fly5i;
593 fly7r = t1r + U1i * fly1r;
594 fly7r = fly7r - U1r * fly1i;
595 fly7i = t1i + U1r * fly1r;
596 fly7i = fly7i + U1i * fly1i;
597 t1r = scale * t1r - fly7r;
598 t1i = scale * t1i - fly7i;
600 fly2r = fly4r - U1r * fly6r;
601 fly2r = fly2r - U1i * fly6i;
602 fly2i = fly4i + U1i * fly6r;
603 fly2i = fly2i - U1r * fly6i;
604 fly4r = scale * fly4r - fly2r;
605 fly4i = scale * fly4i - fly2i;
607 fly1r = fly0r + U1i * fly3r;
608 fly1r = fly1r - U1r * fly3i;
609 fly1i = fly0i + U1r * fly3r;
610 fly1i = fly1i + U1i * fly3i;
611 fly0r = scale * fly0r - fly1r;
612 fly0i = scale * fly0i - fly1i;
614 fly6r = t0r - U2r * fly4r;
615 fly6r = fly6r - U2i * fly4i;
616 fly6i = t0i + U2i * fly4r;
617 fly6i = fly6i - U2r * fly4i;
618 t0r = scale * t0r - fly6r;
619 t0i = scale * t0i - fly6i;
621 fly3r = fly5r - U2i * fly2r;
622 fly3r = fly3r + U2r * fly2i;
623 fly3i = fly5i - U2r * fly2r;
624 fly3i = fly3i - U2i * fly2i;
625 fly2r = scale * fly5r - fly3r;
626 fly2i = scale * fly5i - fly3i;
628 fly4r = t1r - U3r * fly0r;
629 fly4r = fly4r - U3i * fly0i;
630 fly4i = t1i + U3i * fly0r;
631 fly4i = fly4i - U3r * fly0i;
632 t1r = scale * t1r - fly4r;
633 t1i = scale * t1i - fly4i;
635 fly5r = fly7r + U3i * fly1r;
636 fly5r = fly5r - U3r * fly1i;
637 fly5i = fly7i + U3r * fly1r;
638 fly5i = fly5i + U3i * fly1i;
639 fly7r = scale * fly7r - fly5r;
640 fly7i = scale * fly7i - fly5i;
642 *(fly0P + FlyOffsetA) = fly6r;
643 *(fly0P + FlyOffsetAIm) = fly6i;
644 *(fly0P) = t0r;
645 *(fly0P + 1) = t0i;
646 *(fly2P) = fly3r;
647 *(fly2P + 1) = fly3i;
648 *(fly2P + FlyOffsetA) = fly2r;
649 *(fly2P + FlyOffsetAIm) = fly2i;
651 fly0r = *(fly0P + FlyOffsetB);
652 fly0i = *(fly0P + FlyOffsetBIm);
654 *(fly1P + FlyOffsetA) = fly4r;
655 *(fly1P + FlyOffsetAIm) = fly4i;
656 *(fly1P) = t1r;
657 *(fly1P + 1) = t1i;
659 fly1r = *(fly1P + FlyOffsetB);
660 fly1i = *(fly1P + FlyOffsetBIm);
662 *(fly3P + FlyOffsetA) = fly5r;
663 *(fly3P + FlyOffsetAIm) = fly5i;
664 *(fly3P) = fly7r;
665 *(fly3P + 1) = fly7i;
667 fly0P = (fly0P + FlyOffsetB);
668 fly1P = (fly1P + FlyOffsetB);
669 fly2P = (fly2P + FlyOffsetB);
670 fly3P = (fly3P + FlyOffsetB);
672 fly2r = *(fly2P);
673 fly2i = *(fly2P + 1);
674 fly3r = *(fly3P);
675 fly3i = *(fly3P + 1);
676 fly4r = *(fly0P + FlyOffsetA);
677 fly4i = *(fly0P + FlyOffsetAIm);
678 fly5r = *(fly1P + FlyOffsetA);
679 fly5i = *(fly1P + FlyOffsetAIm);
680 fly6r = *(fly2P + FlyOffsetA);
681 fly6i = *(fly2P + FlyOffsetAIm);
682 fly7r = *(fly3P + FlyOffsetA);
683 fly7i = *(fly3P + FlyOffsetAIm);
685 t1r = fly0r - U0r * fly1r;
686 t1r = t1r - U0i * fly1i;
687 t1i = fly0i + U0i * fly1r;
688 t1i = t1i - U0r * fly1i;
689 t0r = scale * fly0r - t1r;
690 t0i = scale * fly0i - t1i;
692 fly1r = fly2r - U0r * fly3r;
693 fly1r = fly1r - U0i * fly3i;
694 fly1i = fly2i + U0i * fly3r;
695 fly1i = fly1i - U0r * fly3i;
696 fly2r = scale * fly2r - fly1r;
697 fly2i = scale * fly2i - fly1i;
699 fly0r = fly4r - U0r * fly5r;
700 fly0r = fly0r - U0i * fly5i;
701 fly0i = fly4i + U0i * fly5r;
702 fly0i = fly0i - U0r * fly5i;
703 fly4r = scale * fly4r - fly0r;
704 fly4i = scale * fly4i - fly0i;
706 fly3r = fly6r - U0r * fly7r;
707 fly3r = fly3r - U0i * fly7i;
708 fly3i = fly6i + U0i * fly7r;
709 fly3i = fly3i - U0r * fly7i;
710 fly6r = scale * fly6r - fly3r;
711 fly6i = scale * fly6i - fly3i;
713 fly5r = t0r - U1r * fly2r;
714 fly5r = fly5r - U1i * fly2i;
715 fly5i = t0i + U1i * fly2r;
716 fly5i = fly5i - U1r * fly2i;
717 t0r = scale * t0r - fly5r;
718 t0i = scale * t0i - fly5i;
720 fly7r = t1r + U1i * fly1r;
721 fly7r = fly7r - U1r * fly1i;
722 fly7i = t1i + U1r * fly1r;
723 fly7i = fly7i + U1i * fly1i;
724 t1r = scale * t1r - fly7r;
725 t1i = scale * t1i - fly7i;
727 fly2r = fly4r - U1r * fly6r;
728 fly2r = fly2r - U1i * fly6i;
729 fly2i = fly4i + U1i * fly6r;
730 fly2i = fly2i - U1r * fly6i;
731 fly4r = scale * fly4r - fly2r;
732 fly4i = scale * fly4i - fly2i;
734 fly1r = fly0r + U1i * fly3r;
735 fly1r = fly1r - U1r * fly3i;
736 fly1i = fly0i + U1r * fly3r;
737 fly1i = fly1i + U1i * fly3i;
738 fly0r = scale * fly0r - fly1r;
739 fly0i = scale * fly0i - fly1i;
741 U0i = *(U0iP = (U0iP - NsameU4));
742 U0r = *(U0rP = (U0rP + NsameU4));
743 U1r = *(U1rP = (U1rP + NsameU2));
744 U1i = *(U1iP = (U1iP - NsameU2));
745 if(stage&1)
746 U0r = -U0r;
748 fly6r = t0r - U2r * fly4r;
749 fly6r = fly6r - U2i * fly4i;
750 fly6i = t0i + U2i * fly4r;
751 fly6i = fly6i - U2r * fly4i;
752 t0r = scale * t0r - fly6r;
753 t0i = scale * t0i - fly6i;
755 fly3r = fly5r - U2i * fly2r;
756 fly3r = fly3r + U2r * fly2i;
757 fly3i = fly5i - U2r * fly2r;
758 fly3i = fly3i - U2i * fly2i;
759 fly2r = scale * fly5r - fly3r;
760 fly2i = scale * fly5i - fly3i;
762 fly4r = t1r - U3r * fly0r;
763 fly4r = fly4r - U3i * fly0i;
764 fly4i = t1i + U3i * fly0r;
765 fly4i = fly4i - U3r * fly0i;
766 t1r = scale * t1r - fly4r;
767 t1i = scale * t1i - fly4i;
769 fly5r = fly7r + U3i * fly1r;
770 fly5r = fly5r - U3r * fly1i;
771 fly5i = fly7i + U3r * fly1r;
772 fly5i = fly5i + U3i * fly1i;
773 fly7r = scale * fly7r - fly5r;
774 fly7i = scale * fly7i - fly5i;
776 *(fly0P + FlyOffsetA) = fly6r;
777 *(fly0P + FlyOffsetAIm) = fly6i;
778 *(fly0P) = t0r;
779 *(fly0P + 1) = t0i;
781 U2r = *(U2rP = (U2rP + NsameU1));
782 U2i = *(U2iP = (U2iP - NsameU1));
784 *(fly2P) = fly3r;
785 *(fly2P + 1) = fly3i;
786 *(fly2P + FlyOffsetA) = fly2r;
787 *(fly2P + FlyOffsetAIm) = fly2i;
788 *(fly1P + FlyOffsetA) = fly4r;
789 *(fly1P + FlyOffsetAIm) = fly4i;
790 *(fly1P) = t1r;
791 *(fly1P + 1) = t1i;
793 U3r = *( U2rP + U3offset);
794 U3i = *( U2iP - U3offset);
796 *(fly3P + FlyOffsetA) = fly5r;
797 *(fly3P + FlyOffsetAIm) = fly5i;
798 *(fly3P) = fly7r;
799 *(fly3P + 1) = fly7i;
801 IOP = IOP + 2;
802 fly0P = IOP;
803 fly1P = (IOP+Flyinc);
804 fly2P = (fly1P+Flyinc);
805 fly3P = (fly2P+Flyinc);
807 NsameU4 = -NsameU4;
809 if(stage&1){
810 LoopN >>= 3;
811 NsameU1 >>= 3;
812 NsameU2 >>= 3;
813 NsameU4 >>= 3;
814 NdiffU <<= 3;
815 Flyinc <<= 3;
816 FlyOffsetA <<= 3;
817 FlyOffsetB <<= 3;
818 FlyOffsetAIm = FlyOffsetA + 1;
819 FlyOffsetBIm = FlyOffsetB + 1;
822 ioptr += 2*Ntbl[M];
826 void iffts(float *ioptr, long M, long Rows, float *Utbl){
827 /* Compute in-place inverse complex fft on the rows of the input array */
828 /* INPUTS */
829 /* M = log2 of fft size */
830 /* *ioptr = input data array */
831 /* *Utbl = cosine table */
832 /* Rows = number of rows in ioptr array (use Rows of 1 if ioptr is a 1 dimensional array) */
833 /* OUTPUTS */
834 /* *ioptr = output data array */
836 long Flyinc;
837 long FlyOffsetA;
838 long FlyOffsetAIm;
839 long FlyOffsetB;
840 long FlyOffsetBIm;
841 long NsameU1;
842 long NsameU2;
843 long NsameU4;
844 long diffUcnt;
845 long LoopCnt;
847 float scale;
848 float fly0r;
849 float fly0i;
850 float fly1r;
851 float fly1i;
852 float fly2r;
853 float fly2i;
854 float fly3r;
855 float fly3i;
856 float fly4r;
857 float fly4i;
858 float fly5r;
859 float fly5i;
860 float fly6r;
861 float fly6i;
862 float fly7r;
863 float fly7i;
864 float U0r;
865 float U0i;
866 float U1r;
867 float U1i;
868 float U2r;
869 float U2i;
870 float U3r;
871 float U3i;
872 float t0r;
873 float t0i;
874 float t1r;
875 float t1i;
877 float *fly0P;
878 float *fly1P;
879 float *fly2P;
880 float *fly3P;
881 float *U0rP;
882 float *U0iP;
883 float *U1rP;
884 float *U1iP;
885 float *U2rP;
886 float *U2iP;
887 float *IOP;
888 long U3offset;
890 long stage;
891 long NdiffU;
892 long LoopN;
894 const long BRshift = MAXMROOT - (M>>1);
895 const long Nrems2 = Ntbl[M-(M>>1)+1];
896 const long Nroot_1_ColInc = (Ntbl[M-1]-Ntbl[M-(M>>1)])*2;
898 for (;Rows>0;Rows--){
900 FlyOffsetA = Ntbl[M] * 2/2;
901 FlyOffsetAIm = FlyOffsetA + 1;
902 FlyOffsetB = FlyOffsetA + 2;
903 FlyOffsetBIm = FlyOffsetB + 1;
905 /* BitrevR2 ** bit reverse and first radix 2 stage ******/
907 scale = 1./Ntbl[M];
908 for (stage = 0; stage < Ntbl[M-(M>>1)]*2; stage += Ntbl[M>>1]*2){
909 for (LoopN = (Ntbl[(M>>1)-1]-1); LoopN >= 0; LoopN--){
910 LoopCnt = (Ntbl[(M>>1)-1]-1);
911 fly0P = ioptr + Nroot_1_ColInc + ((int)BRcnt[LoopN] >> BRshift)*(2*2) + stage;
912 IOP = ioptr + (LoopN<<(M+1)/2) * 2 + stage;
913 fly1P = IOP + ((int)BRcnt[LoopCnt] >> BRshift)*(2*2);
914 fly0r = *(fly0P);
915 fly0i = *(fly0P+1);
916 fly1r = *(fly0P+FlyOffsetA);
917 fly1i = *(fly0P+FlyOffsetAIm);
918 for (; LoopCnt > LoopN;){
919 fly2r = *(fly0P+2);
920 fly2i = *(fly0P+(2+1));
921 fly3r = *(fly0P+FlyOffsetB);
922 fly3i = *(fly0P+FlyOffsetBIm);
923 fly4r = *(fly1P);
924 fly4i = *(fly1P+1);
925 fly5r = *(fly1P+FlyOffsetA);
926 fly5i = *(fly1P+FlyOffsetAIm);
927 fly6r = *(fly1P+2);
928 fly6i = *(fly1P+(2+1));
929 fly7r = *(fly1P+FlyOffsetB);
930 fly7i = *(fly1P+FlyOffsetBIm);
932 t0r = fly0r + fly1r;
933 t0i = fly0i + fly1i;
934 fly1r = fly0r - fly1r;
935 fly1i = fly0i - fly1i;
936 t1r = fly2r + fly3r;
937 t1i = fly2i + fly3i;
938 fly3r = fly2r - fly3r;
939 fly3i = fly2i - fly3i;
940 fly0r = fly4r + fly5r;
941 fly0i = fly4i + fly5i;
942 fly5r = fly4r - fly5r;
943 fly5i = fly4i - fly5i;
944 fly2r = fly6r + fly7r;
945 fly2i = fly6i + fly7i;
946 fly7r = fly6r - fly7r;
947 fly7i = fly6i - fly7i;
949 *(fly1P) = scale*t0r;
950 *(fly1P+1) = scale*t0i;
951 *(fly1P+2) = scale*fly1r;
952 *(fly1P+(2+1)) = scale*fly1i;
953 *(fly1P+FlyOffsetA) = scale*t1r;
954 *(fly1P+FlyOffsetAIm) = scale*t1i;
955 *(fly1P+FlyOffsetB) = scale*fly3r;
956 *(fly1P+FlyOffsetBIm) = scale*fly3i;
957 *(fly0P) = scale*fly0r;
958 *(fly0P+1) = scale*fly0i;
959 *(fly0P+2) = scale*fly5r;
960 *(fly0P+(2+1)) = scale*fly5i;
961 *(fly0P+FlyOffsetA) = scale*fly2r;
962 *(fly0P+FlyOffsetAIm) = scale*fly2i;
963 *(fly0P+FlyOffsetB) = scale*fly7r;
964 *(fly0P+FlyOffsetBIm) = scale*fly7i;
966 fly0P -= Nrems2;
967 fly0r = *(fly0P);
968 fly0i = *(fly0P+1);
969 fly1r = *(fly0P+FlyOffsetA);
970 fly1i = *(fly0P+FlyOffsetAIm);
971 LoopCnt -= 1;
972 fly1P = (IOP + ((int)BRcnt[LoopCnt] >> BRshift)*(2*2));
974 fly2r = *(fly0P+2);
975 fly2i = *(fly0P+(2+1));
976 fly3r = *(fly0P+FlyOffsetB);
977 fly3i = *(fly0P+FlyOffsetBIm);
979 t0r = fly0r + fly1r;
980 t0i = fly0i + fly1i;
981 fly1r = fly0r - fly1r;
982 fly1i = fly0i - fly1i;
983 t1r = fly2r + fly3r;
984 t1i = fly2i + fly3i;
985 fly3r = fly2r - fly3r;
986 fly3i = fly2i - fly3i;
988 *(fly0P) = scale*t0r;
989 *(fly0P+1) = scale*t0i;
990 *(fly0P+2) = scale*fly1r;
991 *(fly0P+(2+1)) = scale*fly1i;
992 *(fly0P+FlyOffsetA) = scale*t1r;
993 *(fly0P+FlyOffsetAIm) = scale*t1i;
994 *(fly0P+FlyOffsetB) = scale*fly3r;
995 *(fly0P+FlyOffsetBIm) = scale*fly3i;
1000 /**** FFTC **************/
1002 scale = 2.0;
1004 NdiffU = 2;
1005 Flyinc = (NdiffU);
1007 NsameU4 = Ntbl[M]/4;
1008 LoopN = Ntbl[M-3];
1010 stage = ((M-1)/3);
1011 if ( (M-1-(stage * 3)) != 0 ){
1012 FlyOffsetA = Flyinc << 2;
1013 FlyOffsetB = FlyOffsetA << 1;
1014 FlyOffsetAIm = FlyOffsetA + 1;
1015 FlyOffsetBIm = FlyOffsetB + 1;
1016 if ( (M-1-(stage * 3)) == 1 ){
1017 /* 1 radix 2 stage */
1019 IOP = ioptr;
1020 fly0P = IOP;
1021 fly1P = (IOP+Flyinc);
1022 fly2P = (fly1P+Flyinc);
1023 fly3P = (fly2P+Flyinc);
1025 /* Butterflys */
1027 t0 - - t0
1028 t1 - - t1
1029 f2 - 1- f5
1030 f1 - -i- f7
1031 f4 - - f4
1032 f0 - - f0
1033 f6 - 1- f2
1034 f3 - -i- f1
1037 for (LoopCnt = LoopN; LoopCnt > 0 ; LoopCnt--){
1038 t0r = *(fly0P);
1039 t0i = *(fly0P + 1);
1040 t1r = *(fly1P);
1041 t1i = *(fly1P + 1);
1042 fly2r = *(fly2P);
1043 fly2i = *(fly2P + 1);
1044 fly1r = *(fly3P);
1045 fly1i = *(fly3P + 1);
1046 fly4r = *(fly0P + FlyOffsetA);
1047 fly4i = *(fly0P + FlyOffsetAIm);
1048 fly0r = *(fly1P + FlyOffsetA);
1049 fly0i = *(fly1P + FlyOffsetAIm);
1050 fly6r = *(fly2P + FlyOffsetA);
1051 fly6i = *(fly2P + FlyOffsetAIm);
1052 fly3r = *(fly3P + FlyOffsetA);
1053 fly3i = *(fly3P + FlyOffsetAIm);
1055 fly5r = t0r - fly2r;
1056 fly5i = t0i - fly2i;
1057 t0r = t0r + fly2r;
1058 t0i = t0i + fly2i;
1060 fly7r = t1r + fly1i;
1061 fly7i = t1i - fly1r;
1062 t1r = t1r - fly1i;
1063 t1i = t1i + fly1r;
1065 fly2r = fly4r - fly6r;
1066 fly2i = fly4i - fly6i;
1067 fly4r = fly4r + fly6r;
1068 fly4i = fly4i + fly6i;
1070 fly1r = fly0r + fly3i;
1071 fly1i = fly0i - fly3r;
1072 fly0r = fly0r - fly3i;
1073 fly0i = fly0i + fly3r;
1075 *(fly2P) = fly5r;
1076 *(fly2P + 1) = fly5i;
1077 *(fly0P) = t0r;
1078 *(fly0P + 1) = t0i;
1079 *(fly3P) = fly7r;
1080 *(fly3P + 1) = fly7i;
1081 *(fly1P) = t1r;
1082 *(fly1P + 1) = t1i;
1083 *(fly2P + FlyOffsetA) = fly2r;
1084 *(fly2P + FlyOffsetAIm) = fly2i;
1085 *(fly0P + FlyOffsetA) = fly4r;
1086 *(fly0P + FlyOffsetAIm) = fly4i;
1087 *(fly3P + FlyOffsetA) = fly1r;
1088 *(fly3P + FlyOffsetAIm) = fly1i;
1089 *(fly1P + FlyOffsetA) = fly0r;
1090 *(fly1P + FlyOffsetAIm) = fly0i;
1092 fly0P = (fly0P + FlyOffsetB);
1093 fly1P = (fly1P + FlyOffsetB);
1094 fly2P = (fly2P + FlyOffsetB);
1095 fly3P = (fly3P + FlyOffsetB);
1098 NsameU4 >>= 1;
1099 LoopN >>= 1;
1100 NdiffU <<= 1;
1101 Flyinc = Flyinc << 1;
1103 else{
1104 /* 1 radix 4 stage */
1105 IOP = ioptr;
1107 U3r = 0.7071067811865475244008443621; /* sqrt(0.5); */
1108 U3i = U3r;
1109 fly0P = IOP;
1110 fly1P = (IOP+Flyinc);
1111 fly2P = (fly1P+Flyinc);
1112 fly3P = (fly2P+Flyinc);
1114 /* Butterflys */
1116 t0 - - t0 - - t0
1117 t1 - - t1 - - t1
1118 f2 - 1- f5 - - f5
1119 f1 - -i- f7 - - f7
1120 f4 - - f4 - 1- f6
1121 f0 - - f0 -U3 - f3
1122 f6 - 1- f2 - -i- f4
1123 f3 - -i- f1 -U3a- f2
1126 for (LoopCnt = LoopN; LoopCnt > 0 ; LoopCnt--){
1127 t0r = *(fly0P);
1128 t0i = *(fly0P + 1);
1129 t1r = *(fly1P);
1130 t1i = *(fly1P + 1);
1131 fly2r = *(fly2P);
1132 fly2i = *(fly2P + 1);
1133 fly1r = *(fly3P);
1134 fly1i = *(fly3P + 1);
1135 fly4r = *(fly0P + FlyOffsetA);
1136 fly4i = *(fly0P + FlyOffsetAIm);
1137 fly0r = *(fly1P + FlyOffsetA);
1138 fly0i = *(fly1P + FlyOffsetAIm);
1139 fly6r = *(fly2P + FlyOffsetA);
1140 fly6i = *(fly2P + FlyOffsetAIm);
1141 fly3r = *(fly3P + FlyOffsetA);
1142 fly3i = *(fly3P + FlyOffsetAIm);
1144 fly5r = t0r - fly2r;
1145 fly5i = t0i - fly2i;
1146 t0r = t0r + fly2r;
1147 t0i = t0i + fly2i;
1149 fly7r = t1r + fly1i;
1150 fly7i = t1i - fly1r;
1151 t1r = t1r - fly1i;
1152 t1i = t1i + fly1r;
1154 fly2r = fly4r - fly6r;
1155 fly2i = fly4i - fly6i;
1156 fly4r = fly4r + fly6r;
1157 fly4i = fly4i + fly6i;
1159 fly1r = fly0r + fly3i;
1160 fly1i = fly0i - fly3r;
1161 fly0r = fly0r - fly3i;
1162 fly0i = fly0i + fly3r;
1164 fly6r = t0r - fly4r;
1165 fly6i = t0i - fly4i;
1166 t0r = t0r + fly4r;
1167 t0i = t0i + fly4i;
1169 fly3r = fly5r + fly2i;
1170 fly3i = fly5i - fly2r;
1171 fly5r = fly5r - fly2i;
1172 fly5i = fly5i + fly2r;
1174 fly4r = t1r - U3r * fly0r;
1175 fly4r = fly4r + U3i * fly0i;
1176 fly4i = t1i - U3i * fly0r;
1177 fly4i = fly4i - U3r * fly0i;
1178 t1r = scale * t1r - fly4r;
1179 t1i = scale * t1i - fly4i;
1181 fly2r = fly7r + U3i * fly1r;
1182 fly2r = fly2r + U3r * fly1i;
1183 fly2i = fly7i - U3r * fly1r;
1184 fly2i = fly2i + U3i * fly1i;
1185 fly7r = scale * fly7r - fly2r;
1186 fly7i = scale * fly7i - fly2i;
1188 *(fly0P + FlyOffsetA) = fly6r;
1189 *(fly0P + FlyOffsetAIm) = fly6i;
1190 *(fly0P) = t0r;
1191 *(fly0P + 1) = t0i;
1192 *(fly2P + FlyOffsetA) = fly3r;
1193 *(fly2P + FlyOffsetAIm) = fly3i;
1194 *(fly2P) = fly5r;
1195 *(fly2P + 1) = fly5i;
1196 *(fly1P + FlyOffsetA) = fly4r;
1197 *(fly1P + FlyOffsetAIm) = fly4i;
1198 *(fly1P) = t1r;
1199 *(fly1P + 1) = t1i;
1200 *(fly3P + FlyOffsetA) = fly2r;
1201 *(fly3P + FlyOffsetAIm) = fly2i;
1202 *(fly3P) = fly7r;
1203 *(fly3P + 1) = fly7i;
1205 fly0P = (fly0P + FlyOffsetB);
1206 fly1P = (fly1P + FlyOffsetB);
1207 fly2P = (fly2P + FlyOffsetB);
1208 fly3P = (fly3P + FlyOffsetB);
1212 NsameU4 >>= 2;
1213 LoopN >>= 2;
1214 NdiffU <<= 2;
1215 Flyinc = Flyinc << 2;
1219 NsameU2 = NsameU4 >> 1;
1220 NsameU1 = NsameU2 >> 1;
1221 Flyinc <<= 1;
1222 FlyOffsetA = Flyinc << 2;
1223 FlyOffsetB = FlyOffsetA << 1;
1224 FlyOffsetAIm = FlyOffsetA + 1;
1225 FlyOffsetBIm = FlyOffsetB + 1;
1226 LoopN >>= 1;
1228 /* ****** RADIX 8 Stages */
1229 for (stage = stage<<1; stage > 0 ; stage--){
1231 /* an fft stage is done in two parts to ease use of the single quadrant cos table */
1233 /* fftcalc1(iobuf, Utbl, N, NdiffU, LoopN); */
1234 if(!(stage&1)){
1235 U0rP = &Utbl[0];
1236 U0iP = &Utbl[Ntbl[M-2]];
1237 U1rP = U0rP;
1238 U1iP = U0iP;
1239 U2rP = U0rP;
1240 U2iP = U0iP;
1241 U3offset = (Ntbl[M]) / 8;
1243 IOP = ioptr;
1245 U0r = *U0rP,
1246 U0i = *U0iP;
1247 U1r = *U1rP,
1248 U1i = *U1iP;
1249 U2r = *U2rP,
1250 U2i = *U2iP;
1251 U3r = *( U2rP + U3offset);
1252 U3i = *( U2iP - U3offset);
1255 fly0P = IOP;
1256 fly1P = (IOP+Flyinc);
1257 fly2P = (fly1P+Flyinc);
1258 fly3P = (fly2P+Flyinc);
1260 for (diffUcnt = (NdiffU)>>1; diffUcnt > 0; diffUcnt--){
1262 /* Butterflys */
1264 f0 - - t0 - - t0 - - t0
1265 f1 -U0 - t1 - - t1 - - t1
1266 f2 - - f2 -U1 - f5 - - f3
1267 f3 -U0 - f1 -U1a- f7 - - f7
1268 f4 - - f4 - - f4 -U2 - f6
1269 f5 -U0 - f0 - - f0 -U3 - f4
1270 f6 - - f6 -U1 - f2 -U2a- f2
1271 f7 -U0 - f3 -U1a- f1 -U3a- f5
1274 fly0r = *(IOP);
1275 fly0i = *(IOP+1);
1276 fly1r = *(fly1P);
1277 fly1i = *(fly1P+1);
1279 for (LoopCnt = LoopN-1; LoopCnt > 0 ; LoopCnt--){
1281 fly2r = *(fly2P);
1282 fly2i = *(fly2P + 1);
1283 fly3r = *(fly3P);
1284 fly3i = *(fly3P + 1);
1285 fly4r = *(fly0P + FlyOffsetA);
1286 fly4i = *(fly0P + FlyOffsetAIm);
1287 fly5r = *(fly1P + FlyOffsetA);
1288 fly5i = *(fly1P + FlyOffsetAIm);
1289 fly6r = *(fly2P + FlyOffsetA);
1290 fly6i = *(fly2P + FlyOffsetAIm);
1291 fly7r = *(fly3P + FlyOffsetA);
1292 fly7i = *(fly3P + FlyOffsetAIm);
1294 t1r = fly0r - U0r * fly1r;
1295 t1r = t1r + U0i * fly1i;
1296 t1i = fly0i - U0i * fly1r;
1297 t1i = t1i - U0r * fly1i;
1298 t0r = scale * fly0r - t1r;
1299 t0i = scale * fly0i - t1i;
1301 fly1r = fly2r - U0r * fly3r;
1302 fly1r = fly1r + U0i * fly3i;
1303 fly1i = fly2i - U0i * fly3r;
1304 fly1i = fly1i - U0r * fly3i;
1305 fly2r = scale * fly2r - fly1r;
1306 fly2i = scale * fly2i - fly1i;
1308 fly0r = fly4r - U0r * fly5r;
1309 fly0r = fly0r + U0i * fly5i;
1310 fly0i = fly4i - U0i * fly5r;
1311 fly0i = fly0i - U0r * fly5i;
1312 fly4r = scale * fly4r - fly0r;
1313 fly4i = scale * fly4i - fly0i;
1315 fly3r = fly6r - U0r * fly7r;
1316 fly3r = fly3r + U0i * fly7i;
1317 fly3i = fly6i - U0i * fly7r;
1318 fly3i = fly3i - U0r * fly7i;
1319 fly6r = scale * fly6r - fly3r;
1320 fly6i = scale * fly6i - fly3i;
1323 fly5r = t0r - U1r * fly2r;
1324 fly5r = fly5r + U1i * fly2i;
1325 fly5i = t0i - U1i * fly2r;
1326 fly5i = fly5i - U1r * fly2i;
1327 t0r = scale * t0r - fly5r;
1328 t0i = scale * t0i - fly5i;
1330 fly7r = t1r + U1i * fly1r;
1331 fly7r = fly7r + U1r * fly1i;
1332 fly7i = t1i - U1r * fly1r;
1333 fly7i = fly7i + U1i * fly1i;
1334 t1r = scale * t1r - fly7r;
1335 t1i = scale * t1i - fly7i;
1337 fly2r = fly4r - U1r * fly6r;
1338 fly2r = fly2r + U1i * fly6i;
1339 fly2i = fly4i - U1i * fly6r;
1340 fly2i = fly2i - U1r * fly6i;
1341 fly4r = scale * fly4r - fly2r;
1342 fly4i = scale * fly4i - fly2i;
1344 fly1r = fly0r + U1i * fly3r;
1345 fly1r = fly1r + U1r * fly3i;
1346 fly1i = fly0i - U1r * fly3r;
1347 fly1i = fly1i + U1i * fly3i;
1348 fly0r = scale * fly0r - fly1r;
1349 fly0i = scale * fly0i - fly1i;
1351 fly6r = t0r - U2r * fly4r;
1352 fly6r = fly6r + U2i * fly4i;
1353 fly6i = t0i - U2i * fly4r;
1354 fly6i = fly6i - U2r * fly4i;
1355 t0r = scale * t0r - fly6r;
1356 t0i = scale * t0i - fly6i;
1358 fly3r = fly5r - U2i * fly2r;
1359 fly3r = fly3r - U2r * fly2i;
1360 fly3i = fly5i + U2r * fly2r;
1361 fly3i = fly3i - U2i * fly2i;
1362 fly2r = scale * fly5r - fly3r;
1363 fly2i = scale * fly5i - fly3i;
1365 fly4r = t1r - U3r * fly0r;
1366 fly4r = fly4r + U3i * fly0i;
1367 fly4i = t1i - U3i * fly0r;
1368 fly4i = fly4i - U3r * fly0i;
1369 t1r = scale * t1r - fly4r;
1370 t1i = scale * t1i - fly4i;
1372 fly5r = fly7r + U3i * fly1r;
1373 fly5r = fly5r + U3r * fly1i;
1374 fly5i = fly7i - U3r * fly1r;
1375 fly5i = fly5i + U3i * fly1i;
1376 fly7r = scale * fly7r - fly5r;
1377 fly7i = scale * fly7i - fly5i;
1379 *(fly0P + FlyOffsetA) = fly6r;
1380 *(fly0P + FlyOffsetAIm) = fly6i;
1381 *(fly0P) = t0r;
1382 *(fly0P + 1) = t0i;
1383 *(fly2P) = fly3r;
1384 *(fly2P + 1) = fly3i;
1385 *(fly2P + FlyOffsetA) = fly2r;
1386 *(fly2P + FlyOffsetAIm) = fly2i;
1388 fly0r = *(fly0P + FlyOffsetB);
1389 fly0i = *(fly0P + FlyOffsetBIm);
1391 *(fly1P + FlyOffsetA) = fly4r;
1392 *(fly1P + FlyOffsetAIm) = fly4i;
1393 *(fly1P) = t1r;
1394 *(fly1P + 1) = t1i;
1396 fly1r = *(fly1P + FlyOffsetB);
1397 fly1i = *(fly1P + FlyOffsetBIm);
1399 *(fly3P + FlyOffsetA) = fly5r;
1400 *(fly3P + FlyOffsetAIm) = fly5i;
1401 *(fly3P) = fly7r;
1402 *(fly3P + 1) = fly7i;
1404 fly0P = (fly0P + FlyOffsetB);
1405 fly1P = (fly1P + FlyOffsetB);
1406 fly2P = (fly2P + FlyOffsetB);
1407 fly3P = (fly3P + FlyOffsetB);
1410 fly2r = *(fly2P);
1411 fly2i = *(fly2P + 1);
1412 fly3r = *(fly3P);
1413 fly3i = *(fly3P + 1);
1414 fly4r = *(fly0P + FlyOffsetA);
1415 fly4i = *(fly0P + FlyOffsetAIm);
1416 fly5r = *(fly1P + FlyOffsetA);
1417 fly5i = *(fly1P + FlyOffsetAIm);
1418 fly6r = *(fly2P + FlyOffsetA);
1419 fly6i = *(fly2P + FlyOffsetAIm);
1420 fly7r = *(fly3P + FlyOffsetA);
1421 fly7i = *(fly3P + FlyOffsetAIm);
1423 t1r = fly0r - U0r * fly1r;
1424 t1r = t1r + U0i * fly1i;
1425 t1i = fly0i - U0i * fly1r;
1426 t1i = t1i - U0r * fly1i;
1427 t0r = scale * fly0r - t1r;
1428 t0i = scale * fly0i - t1i;
1430 fly1r = fly2r - U0r * fly3r;
1431 fly1r = fly1r + U0i * fly3i;
1432 fly1i = fly2i - U0i * fly3r;
1433 fly1i = fly1i - U0r * fly3i;
1434 fly2r = scale * fly2r - fly1r;
1435 fly2i = scale * fly2i - fly1i;
1437 fly0r = fly4r - U0r * fly5r;
1438 fly0r = fly0r + U0i * fly5i;
1439 fly0i = fly4i - U0i * fly5r;
1440 fly0i = fly0i - U0r * fly5i;
1441 fly4r = scale * fly4r - fly0r;
1442 fly4i = scale * fly4i - fly0i;
1444 fly3r = fly6r - U0r * fly7r;
1445 fly3r = fly3r + U0i * fly7i;
1446 fly3i = fly6i - U0i * fly7r;
1447 fly3i = fly3i - U0r * fly7i;
1448 fly6r = scale * fly6r - fly3r;
1449 fly6i = scale * fly6i - fly3i;
1451 fly5r = t0r - U1r * fly2r;
1452 fly5r = fly5r + U1i * fly2i;
1453 fly5i = t0i - U1i * fly2r;
1454 fly5i = fly5i - U1r * fly2i;
1455 t0r = scale * t0r - fly5r;
1456 t0i = scale * t0i - fly5i;
1458 fly7r = t1r + U1i * fly1r;
1459 fly7r = fly7r + U1r * fly1i;
1460 fly7i = t1i - U1r * fly1r;
1461 fly7i = fly7i + U1i * fly1i;
1462 t1r = scale * t1r - fly7r;
1463 t1i = scale * t1i - fly7i;
1465 fly2r = fly4r - U1r * fly6r;
1466 fly2r = fly2r + U1i * fly6i;
1467 fly2i = fly4i - U1i * fly6r;
1468 fly2i = fly2i - U1r * fly6i;
1469 fly4r = scale * fly4r - fly2r;
1470 fly4i = scale * fly4i - fly2i;
1472 fly1r = fly0r + U1i * fly3r;
1473 fly1r = fly1r + U1r * fly3i;
1474 fly1i = fly0i - U1r * fly3r;
1475 fly1i = fly1i + U1i * fly3i;
1476 fly0r = scale * fly0r - fly1r;
1477 fly0i = scale * fly0i - fly1i;
1479 U0i = *(U0iP = (U0iP - NsameU4));
1480 U0r = *(U0rP = (U0rP + NsameU4));
1481 U1r = *(U1rP = (U1rP + NsameU2));
1482 U1i = *(U1iP = (U1iP - NsameU2));
1483 if(stage&1)
1484 U0r = - U0r;
1486 fly6r = t0r - U2r * fly4r;
1487 fly6r = fly6r + U2i * fly4i;
1488 fly6i = t0i - U2i * fly4r;
1489 fly6i = fly6i - U2r * fly4i;
1490 t0r = scale * t0r - fly6r;
1491 t0i = scale * t0i - fly6i;
1493 fly3r = fly5r - U2i * fly2r;
1494 fly3r = fly3r - U2r * fly2i;
1495 fly3i = fly5i + U2r * fly2r;
1496 fly3i = fly3i - U2i * fly2i;
1497 fly2r = scale * fly5r - fly3r;
1498 fly2i = scale * fly5i - fly3i;
1500 fly4r = t1r - U3r * fly0r;
1501 fly4r = fly4r + U3i * fly0i;
1502 fly4i = t1i - U3i * fly0r;
1503 fly4i = fly4i - U3r * fly0i;
1504 t1r = scale * t1r - fly4r;
1505 t1i = scale * t1i - fly4i;
1507 fly5r = fly7r + U3i * fly1r;
1508 fly5r = fly5r + U3r * fly1i;
1509 fly5i = fly7i - U3r * fly1r;
1510 fly5i = fly5i + U3i * fly1i;
1511 fly7r = scale * fly7r - fly5r;
1512 fly7i = scale * fly7i - fly5i;
1514 *(fly0P + FlyOffsetA) = fly6r;
1515 *(fly0P + FlyOffsetAIm) = fly6i;
1516 *(fly0P) = t0r;
1517 *(fly0P + 1) = t0i;
1519 U2r = *(U2rP = (U2rP + NsameU1));
1520 U2i = *(U2iP = (U2iP - NsameU1));
1522 *(fly2P) = fly3r;
1523 *(fly2P + 1) = fly3i;
1524 *(fly2P + FlyOffsetA) = fly2r;
1525 *(fly2P + FlyOffsetAIm) = fly2i;
1526 *(fly1P + FlyOffsetA) = fly4r;
1527 *(fly1P + FlyOffsetAIm) = fly4i;
1528 *(fly1P) = t1r;
1529 *(fly1P + 1) = t1i;
1531 U3r = *( U2rP + U3offset);
1532 U3i = *( U2iP - U3offset);
1534 *(fly3P + FlyOffsetA) = fly5r;
1535 *(fly3P + FlyOffsetAIm) = fly5i;
1536 *(fly3P) = fly7r;
1537 *(fly3P + 1) = fly7i;
1539 IOP = IOP + 2;
1540 fly0P = IOP;
1541 fly1P = (IOP+Flyinc);
1542 fly2P = (fly1P+Flyinc);
1543 fly3P = (fly2P+Flyinc);
1545 NsameU4 = -NsameU4;
1547 if(stage&1){
1548 LoopN >>= 3;
1549 NsameU1 >>= 3;
1550 NsameU2 >>= 3;
1551 NsameU4 >>= 3;
1552 NdiffU <<= 3;
1553 Flyinc = Flyinc << 3;
1554 FlyOffsetA <<= 3;
1555 FlyOffsetB <<= 3;
1556 FlyOffsetAIm = FlyOffsetA + 1;
1557 FlyOffsetBIm = FlyOffsetB + 1;
1560 ioptr += 2*Ntbl[M];
1564 /* rffts */
1565 /* compute multiple real input FFTs on 'Rows' consecutively stored arrays */
1566 /* ioptr = pointer to the data in and out */
1567 /* M = log2 of fft size */
1568 /* Rows = number of FFTs to compute */
1569 /* Utbl = Pointer to cosine table */
1571 void rffts(float *ioptr, long M, long Rows, float *Utbl){
1572 /* Compute in-place real fft on the rows of the input array */
1573 /* INPUTS */
1574 /* M = log2 of fft size */
1575 /* *ioptr = real input data array */
1576 /* *Utbl = cosine table */
1577 /* Rows = number of rows in ioptr array (use Rows of 1 if ioptr is a 1 dimensional array) */
1578 /* OUTPUTS */
1579 /* *ioptr = output data array in the following order */
1580 /* Re(x[0]), Re(x[N/2]), Re(x[1]), Im(x[1]), Re(x[2]), Im(x[2]), ... Re(x[N/2-1]), Im(x[N/2-1]). */
1582 long Flyinc;
1583 long FlyOffsetA;
1584 long FlyOffsetAIm;
1585 long FlyOffsetB;
1586 long FlyOffsetBIm;
1587 long NsameU1;
1588 long NsameU2;
1589 long NsameU4;
1590 long diffUcnt;
1591 long LoopCnt;
1592 float scale;
1593 float fly0r;
1594 float fly0i;
1595 float fly1r;
1596 float fly1i;
1597 float fly2r;
1598 float fly2i;
1599 float fly3r;
1600 float fly3i;
1601 float fly4r;
1602 float fly4i;
1603 float fly5r;
1604 float fly5i;
1605 float fly6r;
1606 float fly6i;
1607 float fly7r;
1608 float fly7i;
1609 float U0r;
1610 float U0i;
1611 float U1r;
1612 float U1i;
1613 float U2r;
1614 float U2i;
1615 float U3r;
1616 float U3i;
1617 float t0r;
1618 float t0i;
1619 float t1r;
1620 float t1i;
1622 float *fly0P;
1623 float *fly1P;
1624 float *fly2P;
1625 float *fly3P;
1627 float *U0rP;
1628 float *U0iP;
1629 float *U1rP;
1630 float *U1iP;
1631 float *U2rP;
1632 float *U2iP;
1633 float *IOP;
1634 long U3offset;
1636 long stage;
1637 long NdiffU;
1638 long LoopN;
1640 const long BRshift = MAXMROOT - ((M-1)>>1); /* for RFFT */
1641 const long Nrems2 = Ntbl[(M-1)-((M-1)>>1)+1]; /* for RFFT */
1642 const long Nroot_1_ColInc = (Ntbl[(M-1)-1]-Ntbl[(M-1)-((M-1)>>1)])*2; /* for RFFT */
1644 for (;Rows>0;Rows--){
1646 M=M-1; /* for RFFT */
1648 FlyOffsetA = Ntbl[M] * 2/2;
1649 FlyOffsetAIm = FlyOffsetA + 1;
1650 FlyOffsetB = FlyOffsetA + 2;
1651 FlyOffsetBIm = FlyOffsetB + 1;
1653 /* BitrevR2 ** bit reverse shuffle and first radix 2 stage ******/
1655 scale = 0.5;
1656 for (stage = 0; stage < Ntbl[M-(M>>1)]*2; stage += Ntbl[M>>1]*2){
1657 for (LoopN = (Ntbl[(M>>1)-1]-1); LoopN >= 0; LoopN--){
1658 LoopCnt = (Ntbl[(M>>1)-1]-1);
1659 fly0P = ioptr + Nroot_1_ColInc + ((int)BRcnt[LoopN] >> BRshift)*(2*2) + stage;
1660 IOP = ioptr + (LoopN<<(M+1)/2) * 2 + stage;
1661 fly1P = IOP + ((int)BRcnt[LoopCnt] >> BRshift)*(2*2);
1662 fly0r = *fly0P;
1663 fly0i = *(fly0P+1);
1664 fly1r = *(fly0P+FlyOffsetA);
1665 fly1i = *(fly0P+FlyOffsetAIm);
1666 for (; LoopCnt > LoopN;){
1667 fly2r = *(fly0P+2);
1668 fly2i = *(fly0P+(2+1));
1669 fly3r = *(fly0P+FlyOffsetB);
1670 fly3i = *(fly0P+FlyOffsetBIm);
1671 fly4r = *fly1P;
1672 fly4i = *(fly1P+1);
1673 fly5r = *(fly1P+FlyOffsetA);
1674 fly5i = *(fly1P+FlyOffsetAIm);
1675 fly6r = *(fly1P+2);
1676 fly6i = *(fly1P+(2+1));
1677 fly7r = *(fly1P+FlyOffsetB);
1678 fly7i = *(fly1P+FlyOffsetBIm);
1680 t0r = fly0r + fly1r;
1681 t0i = fly0i + fly1i;
1682 fly1r = fly0r - fly1r;
1683 fly1i = fly0i - fly1i;
1684 t1r = fly2r + fly3r;
1685 t1i = fly2i + fly3i;
1686 fly3r = fly2r - fly3r;
1687 fly3i = fly2i - fly3i;
1688 fly0r = fly4r + fly5r;
1689 fly0i = fly4i + fly5i;
1690 fly5r = fly4r - fly5r;
1691 fly5i = fly4i - fly5i;
1692 fly2r = fly6r + fly7r;
1693 fly2i = fly6i + fly7i;
1694 fly7r = fly6r - fly7r;
1695 fly7i = fly6i - fly7i;
1697 *fly1P = scale*t0r;
1698 *(fly1P+1) = scale*t0i;
1699 *(fly1P+2) = scale*fly1r;
1700 *(fly1P+(2+1)) = scale*fly1i;
1701 *(fly1P+FlyOffsetA) = scale*t1r;
1702 *(fly1P+FlyOffsetAIm) = scale*t1i;
1703 *(fly1P+FlyOffsetB) = scale*fly3r;
1704 *(fly1P+FlyOffsetBIm) = scale*fly3i;
1705 *fly0P = scale*fly0r;
1706 *(fly0P+1) = scale*fly0i;
1707 *(fly0P+2) = scale*fly5r;
1708 *(fly0P+(2+1)) = scale*fly5i;
1709 *(fly0P+FlyOffsetA) = scale*fly2r;
1710 *(fly0P+FlyOffsetAIm) = scale*fly2i;
1711 *(fly0P+FlyOffsetB) = scale*fly7r;
1712 *(fly0P+FlyOffsetBIm) = scale*fly7i;
1714 fly0P -= Nrems2;
1715 fly0r = *fly0P;
1716 fly0i = *(fly0P+1);
1717 fly1r = *(fly0P+FlyOffsetA);
1718 fly1i = *(fly0P+FlyOffsetAIm);
1719 LoopCnt -= 1;
1720 fly1P = (IOP + ((int)BRcnt[LoopCnt] >> BRshift)*(2*2));
1722 fly2r = *(fly0P+2);
1723 fly2i = *(fly0P+(2+1));
1724 fly3r = *(fly0P+FlyOffsetB);
1725 fly3i = *(fly0P+FlyOffsetBIm);
1727 t0r = fly0r + fly1r;
1728 t0i = fly0i + fly1i;
1729 fly1r = fly0r - fly1r;
1730 fly1i = fly0i - fly1i;
1731 t1r = fly2r + fly3r;
1732 t1i = fly2i + fly3i;
1733 fly3r = fly2r - fly3r;
1734 fly3i = fly2i - fly3i;
1736 *fly0P = scale*t0r;
1737 *(fly0P+1) = scale*t0i;
1738 *(fly0P+2) = scale*fly1r;
1739 *(fly0P+(2+1)) = scale*fly1i;
1740 *(fly0P+FlyOffsetA) = scale*t1r;
1741 *(fly0P+FlyOffsetAIm) = scale*t1i;
1742 *(fly0P+FlyOffsetB) = scale*fly3r;
1743 *(fly0P+FlyOffsetBIm) = scale*fly3i;
1750 /**** FFTC **************/
1752 scale = 2.0;
1754 NdiffU = 2;
1755 Flyinc = (NdiffU);
1757 NsameU4 = Ntbl[M+1]/4; /* for RFFT */
1758 LoopN = Ntbl[M-3];
1760 stage = ((M-1)/3);
1761 if ( (M-1-(stage * 3)) != 0 ){
1762 FlyOffsetA = Flyinc << 2;
1763 FlyOffsetB = FlyOffsetA << 1;
1764 FlyOffsetAIm = FlyOffsetA + 1;
1765 FlyOffsetBIm = FlyOffsetB + 1;
1766 if ( (M-1-(stage * 3)) == 1 ){
1767 /* 1 radix 2 stage */
1769 IOP = ioptr;
1770 fly0P = IOP;
1771 fly1P = (IOP+Flyinc);
1772 fly2P = (fly1P+Flyinc);
1773 fly3P = (fly2P+Flyinc);
1775 /* Butterflys */
1777 t0 - - t0
1778 t1 - - t1
1779 f2 - 1- f5
1780 f1 - -i- f7
1781 f4 - - f4
1782 f0 - - f0
1783 f6 - 1- f2
1784 f3 - -i- f1
1787 for (LoopCnt = LoopN; LoopCnt > 0 ; LoopCnt--){
1788 t0r = *(fly0P);
1789 t0i = *(fly0P + 1);
1790 t1r = *(fly1P);
1791 t1i = *(fly1P + 1);
1792 fly2r = *(fly2P);
1793 fly2i = *(fly2P + 1);
1794 fly1r = *(fly3P);
1795 fly1i = *(fly3P + 1);
1796 fly4r = *(fly0P + FlyOffsetA);
1797 fly4i = *(fly0P + FlyOffsetAIm);
1798 fly0r = *(fly1P + FlyOffsetA);
1799 fly0i = *(fly1P + FlyOffsetAIm);
1800 fly6r = *(fly2P + FlyOffsetA);
1801 fly6i = *(fly2P + FlyOffsetAIm);
1802 fly3r = *(fly3P + FlyOffsetA);
1803 fly3i = *(fly3P + FlyOffsetAIm);
1805 fly5r = t0r - fly2r;
1806 fly5i = t0i - fly2i;
1807 t0r = t0r + fly2r;
1808 t0i = t0i + fly2i;
1810 fly7r = t1r - fly1i;
1811 fly7i = t1i + fly1r;
1812 t1r = t1r + fly1i;
1813 t1i = t1i - fly1r;
1815 fly2r = fly4r - fly6r;
1816 fly2i = fly4i - fly6i;
1817 fly4r = fly4r + fly6r;
1818 fly4i = fly4i + fly6i;
1820 fly1r = fly0r - fly3i;
1821 fly1i = fly0i + fly3r;
1822 fly0r = fly0r + fly3i;
1823 fly0i = fly0i - fly3r;
1825 *(fly2P) = fly5r;
1826 *(fly2P + 1) = fly5i;
1827 *(fly0P) = t0r;
1828 *(fly0P + 1) = t0i;
1829 *(fly3P) = fly7r;
1830 *(fly3P + 1) = fly7i;
1831 *(fly1P) = t1r;
1832 *(fly1P + 1) = t1i;
1833 *(fly2P + FlyOffsetA) = fly2r;
1834 *(fly2P + FlyOffsetAIm) = fly2i;
1835 *(fly0P + FlyOffsetA) = fly4r;
1836 *(fly0P + FlyOffsetAIm) = fly4i;
1837 *(fly3P + FlyOffsetA) = fly1r;
1838 *(fly3P + FlyOffsetAIm) = fly1i;
1839 *(fly1P + FlyOffsetA) = fly0r;
1840 *(fly1P + FlyOffsetAIm) = fly0i;
1842 fly0P = (fly0P + FlyOffsetB);
1843 fly1P = (fly1P + FlyOffsetB);
1844 fly2P = (fly2P + FlyOffsetB);
1845 fly3P = (fly3P + FlyOffsetB);
1848 NsameU4 >>= 1;
1849 LoopN >>= 1;
1850 NdiffU <<= 1;
1851 Flyinc = Flyinc << 1;
1853 else{
1854 /* 1 radix 4 stage */
1855 IOP = ioptr;
1857 U3r = 0.7071067811865475244008443621; /* sqrt(0.5); */
1858 U3i = U3r;
1859 fly0P = IOP;
1860 fly1P = (IOP+Flyinc);
1861 fly2P = (fly1P+Flyinc);
1862 fly3P = (fly2P+Flyinc);
1864 /* Butterflys */
1866 t0 - - t0 - - t0
1867 t1 - - t1 - - t1
1868 f2 - 1- f5 - - f5
1869 f1 - -i- f7 - - f7
1870 f4 - - f4 - 1- f6
1871 f0 - - f0 -U3 - f3
1872 f6 - 1- f2 - -i- f4
1873 f3 - -i- f1 -U3a- f2
1876 for (LoopCnt = LoopN; LoopCnt > 0 ; LoopCnt--){
1877 t0r = *(fly0P);
1878 t0i = *(fly0P + 1);
1879 t1r = *(fly1P);
1880 t1i = *(fly1P + 1);
1881 fly2r = *(fly2P);
1882 fly2i = *(fly2P + 1);
1883 fly1r = *(fly3P);
1884 fly1i = *(fly3P + 1);
1885 fly4r = *(fly0P + FlyOffsetA);
1886 fly4i = *(fly0P + FlyOffsetAIm);
1887 fly0r = *(fly1P + FlyOffsetA);
1888 fly0i = *(fly1P + FlyOffsetAIm);
1889 fly6r = *(fly2P + FlyOffsetA);
1890 fly6i = *(fly2P + FlyOffsetAIm);
1891 fly3r = *(fly3P + FlyOffsetA);
1892 fly3i = *(fly3P + FlyOffsetAIm);
1894 fly5r = t0r - fly2r;
1895 fly5i = t0i - fly2i;
1896 t0r = t0r + fly2r;
1897 t0i = t0i + fly2i;
1899 fly7r = t1r - fly1i;
1900 fly7i = t1i + fly1r;
1901 t1r = t1r + fly1i;
1902 t1i = t1i - fly1r;
1904 fly2r = fly4r - fly6r;
1905 fly2i = fly4i - fly6i;
1906 fly4r = fly4r + fly6r;
1907 fly4i = fly4i + fly6i;
1909 fly1r = fly0r - fly3i;
1910 fly1i = fly0i + fly3r;
1911 fly0r = fly0r + fly3i;
1912 fly0i = fly0i - fly3r;
1914 fly6r = t0r - fly4r;
1915 fly6i = t0i - fly4i;
1916 t0r = t0r + fly4r;
1917 t0i = t0i + fly4i;
1919 fly3r = fly5r - fly2i;
1920 fly3i = fly5i + fly2r;
1921 fly5r = fly5r + fly2i;
1922 fly5i = fly5i - fly2r;
1924 fly4r = t1r - U3r * fly0r;
1925 fly4r = fly4r - U3i * fly0i;
1926 fly4i = t1i + U3i * fly0r;
1927 fly4i = fly4i - U3r * fly0i;
1928 t1r = scale * t1r - fly4r;
1929 t1i = scale * t1i - fly4i;
1931 fly2r = fly7r + U3i * fly1r;
1932 fly2r = fly2r - U3r * fly1i;
1933 fly2i = fly7i + U3r * fly1r;
1934 fly2i = fly2i + U3i * fly1i;
1935 fly7r = scale * fly7r - fly2r;
1936 fly7i = scale * fly7i - fly2i;
1938 *(fly0P + FlyOffsetA) = fly6r;
1939 *(fly0P + FlyOffsetAIm) = fly6i;
1940 *(fly0P) = t0r;
1941 *(fly0P + 1) = t0i;
1942 *(fly2P + FlyOffsetA) = fly3r;
1943 *(fly2P + FlyOffsetAIm) = fly3i;
1944 *(fly2P) = fly5r;
1945 *(fly2P + 1) = fly5i;
1946 *(fly1P + FlyOffsetA) = fly4r;
1947 *(fly1P + FlyOffsetAIm) = fly4i;
1948 *(fly1P) = t1r;
1949 *(fly1P + 1) = t1i;
1950 *(fly3P + FlyOffsetA) = fly2r;
1951 *(fly3P + FlyOffsetAIm) = fly2i;
1952 *(fly3P) = fly7r;
1953 *(fly3P + 1) = fly7i;
1955 fly0P = (fly0P + FlyOffsetB);
1956 fly1P = (fly1P + FlyOffsetB);
1957 fly2P = (fly2P + FlyOffsetB);
1958 fly3P = (fly3P + FlyOffsetB);
1962 NsameU4 >>= 2;
1963 LoopN >>= 2;
1964 NdiffU <<= 2;
1965 Flyinc = Flyinc << 2;
1969 NsameU2 = NsameU4 >> 1;
1970 NsameU1 = NsameU2 >> 1;
1971 Flyinc <<= 1;
1972 FlyOffsetA = Flyinc << 2;
1973 FlyOffsetB = FlyOffsetA << 1;
1974 FlyOffsetAIm = FlyOffsetA + 1;
1975 FlyOffsetBIm = FlyOffsetB + 1;
1976 LoopN >>= 1;
1978 /* ****** RADIX 8 Stages */
1979 for (stage = stage<<1; stage > 0 ; stage--){
1981 /* an fft stage is done in two parts to ease use of the single quadrant cos table */
1983 /* fftcalc1(iobuf, Utbl, N, NdiffU, LoopN); */
1984 if(!(stage&1)){
1985 U0rP = &Utbl[0];
1986 U0iP = &Utbl[Ntbl[M-1]]; /* for RFFT */
1987 U1rP = U0rP;
1988 U1iP = U0iP;
1989 U2rP = U0rP;
1990 U2iP = U0iP;
1991 U3offset = (Ntbl[M+1]) / 8; /* for RFFT */
1993 IOP = ioptr;
1995 U0r = *U0rP,
1996 U0i = *U0iP;
1997 U1r = *U1rP,
1998 U1i = *U1iP;
1999 U2r = *U2rP,
2000 U2i = *U2iP;
2001 U3r = *( U2rP + U3offset);
2002 U3i = *( U2iP - U3offset);
2005 fly0P = IOP;
2006 fly1P = (IOP+Flyinc);
2007 fly2P = (fly1P+Flyinc);
2008 fly3P = (fly2P+Flyinc);
2010 for (diffUcnt = (NdiffU)>>1; diffUcnt > 0; diffUcnt--){
2012 /* Butterflys */
2014 f0 - - t0 - - t0 - - t0
2015 f1 -U0 - t1 - - t1 - - t1
2016 f2 - - f2 -U1 - f5 - - f3
2017 f3 -U0 - f1 -U1a- f7 - - f7
2018 f4 - - f4 - - f4 -U2 - f6
2019 f5 -U0 - f0 - - f0 -U3 - f4
2020 f6 - - f6 -U1 - f2 -U2a- f2
2021 f7 -U0 - f3 -U1a- f1 -U3a- f5
2024 fly0r = *(IOP);
2025 fly0i = *(IOP+1);
2026 fly1r = *(fly1P);
2027 fly1i = *(fly1P+1);
2029 for (LoopCnt = LoopN-1; LoopCnt > 0 ; LoopCnt--){
2031 fly2r = *(fly2P);
2032 fly2i = *(fly2P + 1);
2033 fly3r = *(fly3P);
2034 fly3i = *(fly3P + 1);
2035 fly4r = *(fly0P + FlyOffsetA);
2036 fly4i = *(fly0P + FlyOffsetAIm);
2037 fly5r = *(fly1P + FlyOffsetA);
2038 fly5i = *(fly1P + FlyOffsetAIm);
2039 fly6r = *(fly2P + FlyOffsetA);
2040 fly6i = *(fly2P + FlyOffsetAIm);
2041 fly7r = *(fly3P + FlyOffsetA);
2042 fly7i = *(fly3P + FlyOffsetAIm);
2044 t1r = fly0r - U0r * fly1r;
2045 t1r = t1r - U0i * fly1i;
2046 t1i = fly0i + U0i * fly1r;
2047 t1i = t1i - U0r * fly1i;
2048 t0r = scale * fly0r - t1r;
2049 t0i = scale * fly0i - t1i;
2051 fly1r = fly2r - U0r * fly3r;
2052 fly1r = fly1r - U0i * fly3i;
2053 fly1i = fly2i + U0i * fly3r;
2054 fly1i = fly1i - U0r * fly3i;
2055 fly2r = scale * fly2r - fly1r;
2056 fly2i = scale * fly2i - fly1i;
2058 fly0r = fly4r - U0r * fly5r;
2059 fly0r = fly0r - U0i * fly5i;
2060 fly0i = fly4i + U0i * fly5r;
2061 fly0i = fly0i - U0r * fly5i;
2062 fly4r = scale * fly4r - fly0r;
2063 fly4i = scale * fly4i - fly0i;
2065 fly3r = fly6r - U0r * fly7r;
2066 fly3r = fly3r - U0i * fly7i;
2067 fly3i = fly6i + U0i * fly7r;
2068 fly3i = fly3i - U0r * fly7i;
2069 fly6r = scale * fly6r - fly3r;
2070 fly6i = scale * fly6i - fly3i;
2073 fly5r = t0r - U1r * fly2r;
2074 fly5r = fly5r - U1i * fly2i;
2075 fly5i = t0i + U1i * fly2r;
2076 fly5i = fly5i - U1r * fly2i;
2077 t0r = scale * t0r - fly5r;
2078 t0i = scale * t0i - fly5i;
2080 fly7r = t1r + U1i * fly1r;
2081 fly7r = fly7r - U1r * fly1i;
2082 fly7i = t1i + U1r * fly1r;
2083 fly7i = fly7i + U1i * fly1i;
2084 t1r = scale * t1r - fly7r;
2085 t1i = scale * t1i - fly7i;
2087 fly2r = fly4r - U1r * fly6r;
2088 fly2r = fly2r - U1i * fly6i;
2089 fly2i = fly4i + U1i * fly6r;
2090 fly2i = fly2i - U1r * fly6i;
2091 fly4r = scale * fly4r - fly2r;
2092 fly4i = scale * fly4i - fly2i;
2094 fly1r = fly0r + U1i * fly3r;
2095 fly1r = fly1r - U1r * fly3i;
2096 fly1i = fly0i + U1r * fly3r;
2097 fly1i = fly1i + U1i * fly3i;
2098 fly0r = scale * fly0r - fly1r;
2099 fly0i = scale * fly0i - fly1i;
2101 fly6r = t0r - U2r * fly4r;
2102 fly6r = fly6r - U2i * fly4i;
2103 fly6i = t0i + U2i * fly4r;
2104 fly6i = fly6i - U2r * fly4i;
2105 t0r = scale * t0r - fly6r;
2106 t0i = scale * t0i - fly6i;
2108 fly3r = fly5r - U2i * fly2r;
2109 fly3r = fly3r + U2r * fly2i;
2110 fly3i = fly5i - U2r * fly2r;
2111 fly3i = fly3i - U2i * fly2i;
2112 fly2r = scale * fly5r - fly3r;
2113 fly2i = scale * fly5i - fly3i;
2115 fly4r = t1r - U3r * fly0r;
2116 fly4r = fly4r - U3i * fly0i;
2117 fly4i = t1i + U3i * fly0r;
2118 fly4i = fly4i - U3r * fly0i;
2119 t1r = scale * t1r - fly4r;
2120 t1i = scale * t1i - fly4i;
2122 fly5r = fly7r + U3i * fly1r;
2123 fly5r = fly5r - U3r * fly1i;
2124 fly5i = fly7i + U3r * fly1r;
2125 fly5i = fly5i + U3i * fly1i;
2126 fly7r = scale * fly7r - fly5r;
2127 fly7i = scale * fly7i - fly5i;
2129 *(fly0P + FlyOffsetA) = fly6r;
2130 *(fly0P + FlyOffsetAIm) = fly6i;
2131 *(fly0P) = t0r;
2132 *(fly0P + 1) = t0i;
2133 *(fly2P) = fly3r;
2134 *(fly2P + 1) = fly3i;
2135 *(fly2P + FlyOffsetA) = fly2r;
2136 *(fly2P + FlyOffsetAIm) = fly2i;
2138 fly0r = *(fly0P + FlyOffsetB);
2139 fly0i = *(fly0P + FlyOffsetBIm);
2141 *(fly1P + FlyOffsetA) = fly4r;
2142 *(fly1P + FlyOffsetAIm) = fly4i;
2143 *(fly1P) = t1r;
2144 *(fly1P + 1) = t1i;
2146 fly1r = *(fly1P + FlyOffsetB);
2147 fly1i = *(fly1P + FlyOffsetBIm);
2149 *(fly3P + FlyOffsetA) = fly5r;
2150 *(fly3P + FlyOffsetAIm) = fly5i;
2151 *(fly3P) = fly7r;
2152 *(fly3P + 1) = fly7i;
2154 fly0P = (fly0P + FlyOffsetB);
2155 fly1P = (fly1P + FlyOffsetB);
2156 fly2P = (fly2P + FlyOffsetB);
2157 fly3P = (fly3P + FlyOffsetB);
2160 fly2r = *(fly2P);
2161 fly2i = *(fly2P + 1);
2162 fly3r = *(fly3P);
2163 fly3i = *(fly3P + 1);
2164 fly4r = *(fly0P + FlyOffsetA);
2165 fly4i = *(fly0P + FlyOffsetAIm);
2166 fly5r = *(fly1P + FlyOffsetA);
2167 fly5i = *(fly1P + FlyOffsetAIm);
2168 fly6r = *(fly2P + FlyOffsetA);
2169 fly6i = *(fly2P + FlyOffsetAIm);
2170 fly7r = *(fly3P + FlyOffsetA);
2171 fly7i = *(fly3P + FlyOffsetAIm);
2173 t1r = fly0r - U0r * fly1r;
2174 t1r = t1r - U0i * fly1i;
2175 t1i = fly0i + U0i * fly1r;
2176 t1i = t1i - U0r * fly1i;
2177 t0r = scale * fly0r - t1r;
2178 t0i = scale * fly0i - t1i;
2180 fly1r = fly2r - U0r * fly3r;
2181 fly1r = fly1r - U0i * fly3i;
2182 fly1i = fly2i + U0i * fly3r;
2183 fly1i = fly1i - U0r * fly3i;
2184 fly2r = scale * fly2r - fly1r;
2185 fly2i = scale * fly2i - fly1i;
2187 fly0r = fly4r - U0r * fly5r;
2188 fly0r = fly0r - U0i * fly5i;
2189 fly0i = fly4i + U0i * fly5r;
2190 fly0i = fly0i - U0r * fly5i;
2191 fly4r = scale * fly4r - fly0r;
2192 fly4i = scale * fly4i - fly0i;
2194 fly3r = fly6r - U0r * fly7r;
2195 fly3r = fly3r - U0i * fly7i;
2196 fly3i = fly6i + U0i * fly7r;
2197 fly3i = fly3i - U0r * fly7i;
2198 fly6r = scale * fly6r - fly3r;
2199 fly6i = scale * fly6i - fly3i;
2202 fly5r = t0r - U1r * fly2r;
2203 fly5r = fly5r - U1i * fly2i;
2204 fly5i = t0i + U1i * fly2r;
2205 fly5i = fly5i - U1r * fly2i;
2206 t0r = scale * t0r - fly5r;
2207 t0i = scale * t0i - fly5i;
2209 fly7r = t1r + U1i * fly1r;
2210 fly7r = fly7r - U1r * fly1i;
2211 fly7i = t1i + U1r * fly1r;
2212 fly7i = fly7i + U1i * fly1i;
2213 t1r = scale * t1r - fly7r;
2214 t1i = scale * t1i - fly7i;
2216 fly2r = fly4r - U1r * fly6r;
2217 fly2r = fly2r - U1i * fly6i;
2218 fly2i = fly4i + U1i * fly6r;
2219 fly2i = fly2i - U1r * fly6i;
2220 fly4r = scale * fly4r - fly2r;
2221 fly4i = scale * fly4i - fly2i;
2223 fly1r = fly0r + U1i * fly3r;
2224 fly1r = fly1r - U1r * fly3i;
2225 fly1i = fly0i + U1r * fly3r;
2226 fly1i = fly1i + U1i * fly3i;
2227 fly0r = scale * fly0r - fly1r;
2228 fly0i = scale * fly0i - fly1i;
2230 U0i = *(U0iP = (U0iP - NsameU4));
2231 U0r = *(U0rP = (U0rP + NsameU4));
2232 U1r = *(U1rP = (U1rP + NsameU2));
2233 U1i = *(U1iP = (U1iP - NsameU2));
2234 if(stage&1)
2235 U0r = -U0r;
2237 fly6r = t0r - U2r * fly4r;
2238 fly6r = fly6r - U2i * fly4i;
2239 fly6i = t0i + U2i * fly4r;
2240 fly6i = fly6i - U2r * fly4i;
2241 t0r = scale * t0r - fly6r;
2242 t0i = scale * t0i - fly6i;
2244 fly3r = fly5r - U2i * fly2r;
2245 fly3r = fly3r + U2r * fly2i;
2246 fly3i = fly5i - U2r * fly2r;
2247 fly3i = fly3i - U2i * fly2i;
2248 fly2r = scale * fly5r - fly3r;
2249 fly2i = scale * fly5i - fly3i;
2251 fly4r = t1r - U3r * fly0r;
2252 fly4r = fly4r - U3i * fly0i;
2253 fly4i = t1i + U3i * fly0r;
2254 fly4i = fly4i - U3r * fly0i;
2255 t1r = scale * t1r - fly4r;
2256 t1i = scale * t1i - fly4i;
2258 fly5r = fly7r + U3i * fly1r;
2259 fly5r = fly5r - U3r * fly1i;
2260 fly5i = fly7i + U3r * fly1r;
2261 fly5i = fly5i + U3i * fly1i;
2262 fly7r = scale * fly7r - fly5r;
2263 fly7i = scale * fly7i - fly5i;
2265 *(fly0P + FlyOffsetA) = fly6r;
2266 *(fly0P + FlyOffsetAIm) = fly6i;
2267 *(fly0P) = t0r;
2268 *(fly0P + 1) = t0i;
2270 U2r = *(U2rP = (U2rP + NsameU1));
2271 U2i = *(U2iP = (U2iP - NsameU1));
2273 *(fly2P) = fly3r;
2274 *(fly2P + 1) = fly3i;
2275 *(fly2P + FlyOffsetA) = fly2r;
2276 *(fly2P + FlyOffsetAIm) = fly2i;
2277 *(fly1P + FlyOffsetA) = fly4r;
2278 *(fly1P + FlyOffsetAIm) = fly4i;
2279 *(fly1P) = t1r;
2280 *(fly1P + 1) = t1i;
2282 U3r = *( U2rP + U3offset);
2283 U3i = *( U2iP - U3offset);
2285 *(fly3P + FlyOffsetA) = fly5r;
2286 *(fly3P + FlyOffsetAIm) = fly5i;
2287 *(fly3P) = fly7r;
2288 *(fly3P + 1) = fly7i;
2290 IOP = IOP + 2;
2291 fly0P = IOP;
2292 fly1P = (IOP+Flyinc);
2293 fly2P = (fly1P+Flyinc);
2294 fly3P = (fly2P+Flyinc);
2296 NsameU4 = -NsameU4;
2298 if(stage&1){
2299 LoopN >>= 3;
2300 NsameU1 >>= 3;
2301 NsameU2 >>= 3;
2302 NsameU4 >>= 3;
2303 NdiffU <<= 3;
2304 Flyinc = Flyinc << 3;
2305 FlyOffsetA <<= 3;
2306 FlyOffsetB <<= 3;
2307 FlyOffsetAIm = FlyOffsetA + 1;
2308 FlyOffsetBIm = FlyOffsetB + 1;
2312 /* Finish RFFT */
2313 M=M+1;
2315 FlyOffsetA = Ntbl[M] * 2/4;
2316 FlyOffsetAIm = Ntbl[M] * 2/4 + 1;
2318 IOP = ioptr;
2320 fly0P = (IOP + Ntbl[M]*2/4);
2321 fly1P = (IOP + Ntbl[M]*2/8);
2323 U0rP = &Utbl[Ntbl[M-3]];
2325 U0r = *U0rP,
2327 fly0r = *(IOP);
2328 fly0i = *(IOP + 1);
2329 fly1r = *(fly0P);
2330 fly1i = *(fly0P + 1);
2331 fly2r = *(fly1P);
2332 fly2i = *(fly1P + 1);
2333 fly3r = *(fly1P + FlyOffsetA);
2334 fly3i = *(fly1P + FlyOffsetAIm);
2336 t0r = scale * fly0r + scale * fly0i; /* compute Re(x[0]) */
2337 t0i = scale * fly0r - scale * fly0i; /* compute Re(x[N/2]) */
2338 t1r = scale * fly1r;
2339 t1i = -scale * fly1i;
2341 fly0r = fly2r + fly3r;
2342 fly0i = fly2i - fly3i;
2343 fly1r = fly2i + fly3i;
2344 fly1i = fly3r - fly2r;
2346 fly2r = fly0r + U0r * fly1r;
2347 fly2r = fly2r + U0r * fly1i;
2348 fly2i = fly0i - U0r * fly1r;
2349 fly2i = fly2i + U0r * fly1i;
2350 fly3r = scale * fly0r - fly2r;
2351 fly3i = fly2i - scale * fly0i;
2353 *(IOP) = t0r;
2354 *(IOP + 1) = t0i;
2355 *(fly0P) = t1r;
2356 *(fly0P + 1) = t1i;
2357 *(fly1P) = fly2r;
2358 *(fly1P + 1) = fly2i;
2359 *(fly1P + FlyOffsetA) = fly3r;
2360 *(fly1P + FlyOffsetAIm) = fly3i;
2362 U0rP = &Utbl[1];
2363 U0iP = &Utbl[Ntbl[M-2]-1];
2365 U0r = *U0rP,
2366 U0i = *U0iP;
2368 fly0P = (IOP + 2);
2369 fly1P = (IOP + (Ntbl[M-2]-1)*2);
2371 /* Butterflys */
2373 f0 - t0 - - f2
2374 f1 - t1 -U0 - f3
2375 f2 - f0 - - t0
2376 f3 - f1 -U0a- t1
2379 for (diffUcnt = Ntbl[M-3]-1; diffUcnt > 0 ; diffUcnt--){
2381 fly0r = *(fly0P);
2382 fly0i = *(fly0P + 1);
2383 fly1r = *(fly1P + FlyOffsetA);
2384 fly1i = *(fly1P + FlyOffsetAIm);
2385 fly2r = *(fly1P);
2386 fly2i = *(fly1P + 1);
2387 fly3r = *(fly0P + FlyOffsetA);
2388 fly3i = *(fly0P + FlyOffsetAIm);
2390 t0r = fly0r + fly1r;
2391 t0i = fly0i - fly1i;
2392 t1r = fly0i + fly1i;
2393 t1i = fly1r - fly0r;
2395 fly0r = fly2r + fly3r;
2396 fly0i = fly2i - fly3i;
2397 fly1r = fly2i + fly3i;
2398 fly1i = fly3r - fly2r;
2400 fly2r = t0r + U0r * t1r;
2401 fly2r = fly2r + U0i * t1i;
2402 fly2i = t0i - U0i * t1r;
2403 fly2i = fly2i + U0r * t1i;
2404 fly3r = scale * t0r - fly2r;
2405 fly3i = fly2i - scale * t0i;
2407 t0r = fly0r + U0i * fly1r;
2408 t0r = t0r + U0r * fly1i;
2409 t0i = fly0i - U0r * fly1r;
2410 t0i = t0i + U0i * fly1i;
2411 t1r = scale * fly0r - t0r;
2412 t1i = t0i - scale * fly0i;
2414 *(fly0P) = fly2r;
2415 *(fly0P + 1) = fly2i;
2416 *(fly1P + FlyOffsetA) = fly3r;
2417 *(fly1P + FlyOffsetAIm) = fly3i;
2419 U0r = *(U0rP = (U0rP + 1));
2420 U0i = *(U0iP = (U0iP - 1));
2422 *(fly1P) = t0r;
2423 *(fly1P + 1) = t0i;
2424 *(fly0P + FlyOffsetA) = t1r;
2425 *(fly0P + FlyOffsetAIm) = t1i;
2427 fly0P += 2;
2428 fly1P -= 2;
2431 ioptr += Ntbl[M];
2435 void riffts(float *ioptr, long M, long Rows, float *Utbl){
2436 /* Compute in-place real ifft on the rows of the input array */
2437 /* INPUTS */
2438 /* M = log2 of fft size */
2439 /* *ioptr = input data array in the following order */
2440 /* Re(x[0]), Re(x[N/2]), Re(x[1]), Im(x[1]), Re(x[2]), Im(x[2]), ... Re(x[N/2-1]), Im(x[N/2-1]). */
2441 /* *Utbl = cosine table */
2442 /* Rows = number of rows in ioptr array (use Rows of 1 if ioptr is a 1 dimensional array) */
2443 /* OUTPUTS */
2444 /* *ioptr = real output data array */
2446 long Flyinc;
2447 long FlyOffsetA;
2448 long FlyOffsetAIm;
2449 long FlyOffsetB;
2450 long FlyOffsetBIm;
2451 long NsameU1;
2452 long NsameU2;
2453 long NsameU4;
2454 long diffUcnt;
2455 long LoopCnt;
2456 float scale;
2457 float fly0r;
2458 float fly0i;
2459 float fly1r;
2460 float fly1i;
2461 float fly2r;
2462 float fly2i;
2463 float fly3r;
2464 float fly3i;
2465 float fly4r;
2466 float fly4i;
2467 float fly5r;
2468 float fly5i;
2469 float fly6r;
2470 float fly6i;
2471 float fly7r;
2472 float fly7i;
2473 float U0r;
2474 float U0i;
2475 float U1r;
2476 float U1i;
2477 float U2r;
2478 float U2i;
2479 float U3r;
2480 float U3i;
2481 float t0r;
2482 float t0i;
2483 float t1r;
2484 float t1i;
2486 float *fly0P;
2487 float *fly1P;
2488 float *fly2P;
2489 float *fly3P;
2491 float *U0rP;
2492 float *U0iP;
2493 float *U1rP;
2494 float *U1iP;
2495 float *U2rP;
2496 float *U2iP;
2497 float *IOP;
2498 long U3offset;
2500 long stage;
2501 long NdiffU;
2502 long LoopN;
2504 const long BRshift = MAXMROOT - ((M-1)>>1); /* for RIFFT */
2505 const long Nrems2 = Ntbl[(M-1)-((M-1)>>1)+1]; /* for RIFFT */
2506 const long Nroot_1_ColInc = (Ntbl[(M-1)-1]-Ntbl[(M-1)-((M-1)>>1)])*2; /* for RIFFT */
2508 for (;Rows>0;Rows--){
2510 /* Start RIFFT */
2512 FlyOffsetA = Ntbl[M] * 2/4;
2513 FlyOffsetAIm = Ntbl[M] * 2/4 + 1;
2515 IOP = ioptr;
2517 fly0P = (IOP + Ntbl[M]*2/4);
2518 fly1P = (IOP + Ntbl[M]*2/8);
2520 U0rP = &Utbl[Ntbl[M-3]];
2522 U0r = *U0rP,
2524 fly0r = *(IOP);
2525 fly0i = *(IOP + 1);
2526 fly1r = *(fly0P);
2527 fly1i = *(fly0P + 1);
2528 fly2r = *(fly1P);
2529 fly2i = *(fly1P + 1);
2530 fly3r = *(fly1P + FlyOffsetA);
2531 fly3i = *(fly1P + FlyOffsetAIm);
2533 t0r = fly0r + fly0i;
2534 t0i = fly0r - fly0i;
2535 t1r = fly1r + fly1r;
2536 t1i = -fly1i - fly1i;
2538 fly0r = fly2r + fly3r;
2539 fly0i = fly2i - fly3i;
2540 fly1r = fly2r - fly3r;
2541 fly1i = fly2i + fly3i;
2543 fly3r = fly1r * U0r;
2544 fly3r = fly3r - U0r * fly1i;
2545 fly3i = fly1r * U0r;
2546 fly3i = fly3i + U0r * fly1i;
2548 fly2r = fly0r - fly3i;
2549 fly2i = fly0i + fly3r;
2550 fly1r = fly0r + fly3i;
2551 fly1i = -fly0i + fly3r;
2553 *(IOP) = t0r;
2554 *(IOP + 1) = t0i;
2555 *(fly0P) = t1r;
2556 *(fly0P + 1) = t1i;
2557 *(fly1P) = fly2r;
2558 *(fly1P + 1) = fly2i;
2559 *(fly1P + FlyOffsetA) = fly1r;
2560 *(fly1P + FlyOffsetAIm) = fly1i;
2562 U0rP = &Utbl[1];
2563 U0iP = &Utbl[Ntbl[M-2]-1];
2565 U0r = *U0rP,
2566 U0i = *U0iP;
2568 fly0P = (IOP + 2);
2569 fly1P = (IOP + (Ntbl[M-2]-1)*2);
2571 /* Butterflys */
2573 f0 - t0 - f2
2574 f1 -t1 -U0- f3 - f1
2575 f2 - f0 - t0
2576 f3 -f1 -U0a t1 - f3
2579 for (diffUcnt = Ntbl[M-3]-1; diffUcnt > 0 ; diffUcnt--){
2581 fly0r = *(fly0P);
2582 fly0i = *(fly0P + 1);
2583 fly1r = *(fly1P + FlyOffsetA);
2584 fly1i = *(fly1P + FlyOffsetAIm);
2585 fly2r = *(fly1P);
2586 fly2i = *(fly1P + 1);
2587 fly3r = *(fly0P + FlyOffsetA);
2588 fly3i = *(fly0P + FlyOffsetAIm);
2590 t0r = fly0r + fly1r;
2591 t0i = fly0i - fly1i;
2592 t1r = fly0r - fly1r;
2593 t1i = fly0i + fly1i;
2595 fly0r = fly2r + fly3r;
2596 fly0i = fly2i - fly3i;
2597 fly1r = fly2r - fly3r;
2598 fly1i = fly2i + fly3i;
2601 fly3r = t1r * U0r;
2602 fly3r = fly3r - U0i * t1i;
2603 fly3i = t1r * U0i;
2604 fly3i = fly3i + U0r * t1i;
2606 t1r = fly1r * U0i;
2607 t1r = t1r - U0r * fly1i;
2608 t1i = fly1r * U0r;
2609 t1i = t1i + U0i * fly1i;
2612 fly2r = t0r - fly3i;
2613 fly2i = t0i + fly3r;
2614 fly1r = t0r + fly3i;
2615 fly1i = -t0i + fly3r;
2617 t0r = fly0r - t1i;
2618 t0i = fly0i + t1r;
2619 fly3r = fly0r + t1i;
2620 fly3i = -fly0i + t1r;
2623 *(fly0P) = fly2r;
2624 *(fly0P + 1) = fly2i;
2625 *(fly1P + FlyOffsetA) = fly1r;
2626 *(fly1P + FlyOffsetAIm) = fly1i;
2628 U0r = *(U0rP = (U0rP + 1));
2629 U0i = *(U0iP = (U0iP - 1));
2631 *(fly1P) = t0r;
2632 *(fly1P + 1) = t0i;
2633 *(fly0P + FlyOffsetA) = fly3r;
2634 *(fly0P + FlyOffsetAIm) = fly3i;
2636 fly0P += 2;
2637 fly1P -= 2;
2640 M=M-1; /* for RIFFT */
2642 FlyOffsetA = Ntbl[M] * 2/2;
2643 FlyOffsetAIm = FlyOffsetA + 1;
2644 FlyOffsetB = FlyOffsetA + 2;
2645 FlyOffsetBIm = FlyOffsetB + 1;
2647 /* BitrevR2 ** bit reverse shuffle and first radix 2 stage ******/
2649 scale = 1./Ntbl[M+1];
2650 for (stage = 0; stage < Ntbl[M-(M>>1)]*2; stage += Ntbl[M>>1]*2){
2651 for (LoopN = (Ntbl[(M>>1)-1]-1); LoopN >= 0; LoopN--){
2652 LoopCnt = (Ntbl[(M>>1)-1]-1);
2653 fly0P = ioptr + Nroot_1_ColInc + ((int)BRcnt[LoopN] >> BRshift)*(2*2) + stage;
2654 IOP = ioptr + (LoopN<<(M+1)/2) * 2 + stage;
2655 fly1P = IOP + ((int)BRcnt[LoopCnt] >> BRshift)*(2*2);
2656 fly0r = *(fly0P);
2657 fly0i = *(fly0P+1);
2658 fly1r = *(fly0P+FlyOffsetA);
2659 fly1i = *(fly0P+FlyOffsetAIm);
2660 for (; LoopCnt > LoopN;){
2661 fly2r = *(fly0P+2);
2662 fly2i = *(fly0P+(2+1));
2663 fly3r = *(fly0P+FlyOffsetB);
2664 fly3i = *(fly0P+FlyOffsetBIm);
2665 fly4r = *(fly1P);
2666 fly4i = *(fly1P+1);
2667 fly5r = *(fly1P+FlyOffsetA);
2668 fly5i = *(fly1P+FlyOffsetAIm);
2669 fly6r = *(fly1P+2);
2670 fly6i = *(fly1P+(2+1));
2671 fly7r = *(fly1P+FlyOffsetB);
2672 fly7i = *(fly1P+FlyOffsetBIm);
2674 t0r = fly0r + fly1r;
2675 t0i = fly0i + fly1i;
2676 fly1r = fly0r - fly1r;
2677 fly1i = fly0i - fly1i;
2678 t1r = fly2r + fly3r;
2679 t1i = fly2i + fly3i;
2680 fly3r = fly2r - fly3r;
2681 fly3i = fly2i - fly3i;
2682 fly0r = fly4r + fly5r;
2683 fly0i = fly4i + fly5i;
2684 fly5r = fly4r - fly5r;
2685 fly5i = fly4i - fly5i;
2686 fly2r = fly6r + fly7r;
2687 fly2i = fly6i + fly7i;
2688 fly7r = fly6r - fly7r;
2689 fly7i = fly6i - fly7i;
2691 *(fly1P) = scale*t0r;
2692 *(fly1P+1) = scale*t0i;
2693 *(fly1P+2) = scale*fly1r;
2694 *(fly1P+(2+1)) = scale*fly1i;
2695 *(fly1P+FlyOffsetA) = scale*t1r;
2696 *(fly1P+FlyOffsetAIm) = scale*t1i;
2697 *(fly1P+FlyOffsetB) = scale*fly3r;
2698 *(fly1P+FlyOffsetBIm) = scale*fly3i;
2699 *(fly0P) = scale*fly0r;
2700 *(fly0P+1) = scale*fly0i;
2701 *(fly0P+2) = scale*fly5r;
2702 *(fly0P+(2+1)) = scale*fly5i;
2703 *(fly0P+FlyOffsetA) = scale*fly2r;
2704 *(fly0P+FlyOffsetAIm) = scale*fly2i;
2705 *(fly0P+FlyOffsetB) = scale*fly7r;
2706 *(fly0P+FlyOffsetBIm) = scale*fly7i;
2708 fly0P -= Nrems2;
2709 fly0r = *(fly0P);
2710 fly0i = *(fly0P+1);
2711 fly1r = *(fly0P+FlyOffsetA);
2712 fly1i = *(fly0P+FlyOffsetAIm);
2713 LoopCnt -= 1;
2714 fly1P = (IOP + ((int)BRcnt[LoopCnt] >> BRshift)*(2*2));
2716 fly2r = *(fly0P+2);
2717 fly2i = *(fly0P+(2+1));
2718 fly3r = *(fly0P+FlyOffsetB);
2719 fly3i = *(fly0P+FlyOffsetBIm);
2721 t0r = fly0r + fly1r;
2722 t0i = fly0i + fly1i;
2723 fly1r = fly0r - fly1r;
2724 fly1i = fly0i - fly1i;
2725 t1r = fly2r + fly3r;
2726 t1i = fly2i + fly3i;
2727 fly3r = fly2r - fly3r;
2728 fly3i = fly2i - fly3i;
2730 *(fly0P) = scale*t0r;
2731 *(fly0P+1) = scale*t0i;
2732 *(fly0P+2) = scale*fly1r;
2733 *(fly0P+(2+1)) = scale*fly1i;
2734 *(fly0P+FlyOffsetA) = scale*t1r;
2735 *(fly0P+FlyOffsetAIm) = scale*t1i;
2736 *(fly0P+FlyOffsetB) = scale*fly3r;
2737 *(fly0P+FlyOffsetBIm) = scale*fly3i;
2742 /**** FFTC **************/
2744 scale = 2.0;
2746 NdiffU = 2;
2747 Flyinc = (NdiffU);
2749 NsameU4 = Ntbl[M+1]/4; /* for RIFFT */
2750 LoopN = Ntbl[M-3];
2752 stage = ((M-1)/3);
2753 if ( (M-1-(stage * 3)) != 0 ){
2754 FlyOffsetA = Flyinc << 2;
2755 FlyOffsetB = FlyOffsetA << 1;
2756 FlyOffsetAIm = FlyOffsetA + 1;
2757 FlyOffsetBIm = FlyOffsetB + 1;
2758 if ( (M-1-(stage * 3)) == 1 ){
2759 /* 1 radix 2 stage */
2761 IOP = ioptr;
2762 fly0P = IOP;
2763 fly1P = (IOP+Flyinc);
2764 fly2P = (fly1P+Flyinc);
2765 fly3P = (fly2P+Flyinc);
2767 /* Butterflys */
2769 t0 - - t0
2770 t1 - - t1
2771 f2 - 1- f5
2772 f1 - -i- f7
2773 f4 - - f4
2774 f0 - - f0
2775 f6 - 1- f2
2776 f3 - -i- f1
2779 for (LoopCnt = LoopN; LoopCnt > 0 ; LoopCnt--){
2780 t0r = *(fly0P);
2781 t0i = *(fly0P + 1);
2782 t1r = *(fly1P);
2783 t1i = *(fly1P + 1);
2784 fly2r = *(fly2P);
2785 fly2i = *(fly2P + 1);
2786 fly1r = *(fly3P);
2787 fly1i = *(fly3P + 1);
2788 fly4r = *(fly0P + FlyOffsetA);
2789 fly4i = *(fly0P + FlyOffsetAIm);
2790 fly0r = *(fly1P + FlyOffsetA);
2791 fly0i = *(fly1P + FlyOffsetAIm);
2792 fly6r = *(fly2P + FlyOffsetA);
2793 fly6i = *(fly2P + FlyOffsetAIm);
2794 fly3r = *(fly3P + FlyOffsetA);
2795 fly3i = *(fly3P + FlyOffsetAIm);
2797 fly5r = t0r - fly2r;
2798 fly5i = t0i - fly2i;
2799 t0r = t0r + fly2r;
2800 t0i = t0i + fly2i;
2802 fly7r = t1r + fly1i;
2803 fly7i = t1i - fly1r;
2804 t1r = t1r - fly1i;
2805 t1i = t1i + fly1r;
2807 fly2r = fly4r - fly6r;
2808 fly2i = fly4i - fly6i;
2809 fly4r = fly4r + fly6r;
2810 fly4i = fly4i + fly6i;
2812 fly1r = fly0r + fly3i;
2813 fly1i = fly0i - fly3r;
2814 fly0r = fly0r - fly3i;
2815 fly0i = fly0i + fly3r;
2817 *(fly2P) = fly5r;
2818 *(fly2P + 1) = fly5i;
2819 *(fly0P) = t0r;
2820 *(fly0P + 1) = t0i;
2821 *(fly3P) = fly7r;
2822 *(fly3P + 1) = fly7i;
2823 *(fly1P) = t1r;
2824 *(fly1P + 1) = t1i;
2825 *(fly2P + FlyOffsetA) = fly2r;
2826 *(fly2P + FlyOffsetAIm) = fly2i;
2827 *(fly0P + FlyOffsetA) = fly4r;
2828 *(fly0P + FlyOffsetAIm) = fly4i;
2829 *(fly3P + FlyOffsetA) = fly1r;
2830 *(fly3P + FlyOffsetAIm) = fly1i;
2831 *(fly1P + FlyOffsetA) = fly0r;
2832 *(fly1P + FlyOffsetAIm) = fly0i;
2834 fly0P = (fly0P + FlyOffsetB);
2835 fly1P = (fly1P + FlyOffsetB);
2836 fly2P = (fly2P + FlyOffsetB);
2837 fly3P = (fly3P + FlyOffsetB);
2840 NsameU4 >>= 1;
2841 LoopN >>= 1;
2842 NdiffU <<= 1;
2843 Flyinc = Flyinc << 1;
2845 else{
2846 /* 1 radix 4 stage */
2847 IOP = ioptr;
2849 U3r = 0.7071067811865475244008443621; /* sqrt(0.5); */
2850 U3i = U3r;
2851 fly0P = IOP;
2852 fly1P = (IOP+Flyinc);
2853 fly2P = (fly1P+Flyinc);
2854 fly3P = (fly2P+Flyinc);
2856 /* Butterflys */
2858 t0 - - t0 - - t0
2859 t1 - - t1 - - t1
2860 f2 - 1- f5 - - f5
2861 f1 - -i- f7 - - f7
2862 f4 - - f4 - 1- f6
2863 f0 - - f0 -U3 - f3
2864 f6 - 1- f2 - -i- f4
2865 f3 - -i- f1 -U3a- f2
2868 for (LoopCnt = LoopN; LoopCnt > 0 ; LoopCnt--){
2869 t0r = *(fly0P);
2870 t0i = *(fly0P + 1);
2871 t1r = *(fly1P);
2872 t1i = *(fly1P + 1);
2873 fly2r = *(fly2P);
2874 fly2i = *(fly2P + 1);
2875 fly1r = *(fly3P);
2876 fly1i = *(fly3P + 1);
2877 fly4r = *(fly0P + FlyOffsetA);
2878 fly4i = *(fly0P + FlyOffsetAIm);
2879 fly0r = *(fly1P + FlyOffsetA);
2880 fly0i = *(fly1P + FlyOffsetAIm);
2881 fly6r = *(fly2P + FlyOffsetA);
2882 fly6i = *(fly2P + FlyOffsetAIm);
2883 fly3r = *(fly3P + FlyOffsetA);
2884 fly3i = *(fly3P + FlyOffsetAIm);
2886 fly5r = t0r - fly2r;
2887 fly5i = t0i - fly2i;
2888 t0r = t0r + fly2r;
2889 t0i = t0i + fly2i;
2891 fly7r = t1r + fly1i;
2892 fly7i = t1i - fly1r;
2893 t1r = t1r - fly1i;
2894 t1i = t1i + fly1r;
2896 fly2r = fly4r - fly6r;
2897 fly2i = fly4i - fly6i;
2898 fly4r = fly4r + fly6r;
2899 fly4i = fly4i + fly6i;
2901 fly1r = fly0r + fly3i;
2902 fly1i = fly0i - fly3r;
2903 fly0r = fly0r - fly3i;
2904 fly0i = fly0i + fly3r;
2906 fly6r = t0r - fly4r;
2907 fly6i = t0i - fly4i;
2908 t0r = t0r + fly4r;
2909 t0i = t0i + fly4i;
2911 fly3r = fly5r + fly2i;
2912 fly3i = fly5i - fly2r;
2913 fly5r = fly5r - fly2i;
2914 fly5i = fly5i + fly2r;
2916 fly4r = t1r - U3r * fly0r;
2917 fly4r = fly4r + U3i * fly0i;
2918 fly4i = t1i - U3i * fly0r;
2919 fly4i = fly4i - U3r * fly0i;
2920 t1r = scale * t1r - fly4r;
2921 t1i = scale * t1i - fly4i;
2923 fly2r = fly7r + U3i * fly1r;
2924 fly2r = fly2r + U3r * fly1i;
2925 fly2i = fly7i - U3r * fly1r;
2926 fly2i = fly2i + U3i * fly1i;
2927 fly7r = scale * fly7r - fly2r;
2928 fly7i = scale * fly7i - fly2i;
2930 *(fly0P + FlyOffsetA) = fly6r;
2931 *(fly0P + FlyOffsetAIm) = fly6i;
2932 *(fly0P) = t0r;
2933 *(fly0P + 1) = t0i;
2934 *(fly2P + FlyOffsetA) = fly3r;
2935 *(fly2P + FlyOffsetAIm) = fly3i;
2936 *(fly2P) = fly5r;
2937 *(fly2P + 1) = fly5i;
2938 *(fly1P + FlyOffsetA) = fly4r;
2939 *(fly1P + FlyOffsetAIm) = fly4i;
2940 *(fly1P) = t1r;
2941 *(fly1P + 1) = t1i;
2942 *(fly3P + FlyOffsetA) = fly2r;
2943 *(fly3P + FlyOffsetAIm) = fly2i;
2944 *(fly3P) = fly7r;
2945 *(fly3P + 1) = fly7i;
2947 fly0P = (fly0P + FlyOffsetB);
2948 fly1P = (fly1P + FlyOffsetB);
2949 fly2P = (fly2P + FlyOffsetB);
2950 fly3P = (fly3P + FlyOffsetB);
2954 NsameU4 >>= 2;
2955 LoopN >>= 2;
2956 NdiffU <<= 2;
2957 Flyinc = Flyinc << 2;
2961 NsameU2 = NsameU4 >> 1;
2962 NsameU1 = NsameU2 >> 1;
2963 Flyinc <<= 1;
2964 FlyOffsetA = Flyinc << 2;
2965 FlyOffsetB = FlyOffsetA << 1;
2966 FlyOffsetAIm = FlyOffsetA + 1;
2967 FlyOffsetBIm = FlyOffsetB + 1;
2968 LoopN >>= 1;
2970 /* ****** RADIX 8 Stages */
2971 for (stage = stage<<1; stage > 0 ; stage--){
2973 /* an fft stage is done in two parts to ease use of the single quadrant cos table */
2975 /* fftcalc1(iobuf, Utbl, N, NdiffU, LoopN); */
2976 if(!(stage&1)){
2977 U0rP = &Utbl[0];
2978 U0iP = &Utbl[Ntbl[M-1]]; /* for RIFFT */
2979 U1rP = U0rP;
2980 U1iP = U0iP;
2981 U2rP = U0rP;
2982 U2iP = U0iP;
2983 U3offset = (Ntbl[M+1]) / 8; /* for RIFFT */
2985 IOP = ioptr;
2987 U0r = *U0rP,
2988 U0i = *U0iP;
2989 U1r = *U1rP,
2990 U1i = *U1iP;
2991 U2r = *U2rP,
2992 U2i = *U2iP;
2993 U3r = *( U2rP + U3offset);
2994 U3i = *( U2iP - U3offset);
2997 fly0P = IOP;
2998 fly1P = (IOP+Flyinc);
2999 fly2P = (fly1P+Flyinc);
3000 fly3P = (fly2P+Flyinc);
3002 for (diffUcnt = (NdiffU)>>1; diffUcnt > 0; diffUcnt--){
3004 /* Butterflys */
3006 f0 - - t0 - - t0 - - t0
3007 f1 -U0 - t1 - - t1 - - t1
3008 f2 - - f2 -U1 - f5 - - f3
3009 f3 -U0 - f1 -U1a- f7 - - f7
3010 f4 - - f4 - - f4 -U2 - f6
3011 f5 -U0 - f0 - - f0 -U3 - f4
3012 f6 - - f6 -U1 - f2 -U2a- f2
3013 f7 -U0 - f3 -U1a- f1 -U3a- f5
3016 fly0r = *(IOP);
3017 fly0i = *(IOP+1);
3018 fly1r = *(fly1P);
3019 fly1i = *(fly1P+1);
3021 for (LoopCnt = LoopN-1; LoopCnt > 0 ; LoopCnt--){
3023 fly2r = *(fly2P);
3024 fly2i = *(fly2P + 1);
3025 fly3r = *(fly3P);
3026 fly3i = *(fly3P + 1);
3027 fly4r = *(fly0P + FlyOffsetA);
3028 fly4i = *(fly0P + FlyOffsetAIm);
3029 fly5r = *(fly1P + FlyOffsetA);
3030 fly5i = *(fly1P + FlyOffsetAIm);
3031 fly6r = *(fly2P + FlyOffsetA);
3032 fly6i = *(fly2P + FlyOffsetAIm);
3033 fly7r = *(fly3P + FlyOffsetA);
3034 fly7i = *(fly3P + FlyOffsetAIm);
3036 t1r = fly0r - U0r * fly1r;
3037 t1r = t1r + U0i * fly1i;
3038 t1i = fly0i - U0i * fly1r;
3039 t1i = t1i - U0r * fly1i;
3040 t0r = scale * fly0r - t1r;
3041 t0i = scale * fly0i - t1i;
3043 fly1r = fly2r - U0r * fly3r;
3044 fly1r = fly1r + U0i * fly3i;
3045 fly1i = fly2i - U0i * fly3r;
3046 fly1i = fly1i - U0r * fly3i;
3047 fly2r = scale * fly2r - fly1r;
3048 fly2i = scale * fly2i - fly1i;
3050 fly0r = fly4r - U0r * fly5r;
3051 fly0r = fly0r + U0i * fly5i;
3052 fly0i = fly4i - U0i * fly5r;
3053 fly0i = fly0i - U0r * fly5i;
3054 fly4r = scale * fly4r - fly0r;
3055 fly4i = scale * fly4i - fly0i;
3057 fly3r = fly6r - U0r * fly7r;
3058 fly3r = fly3r + U0i * fly7i;
3059 fly3i = fly6i - U0i * fly7r;
3060 fly3i = fly3i - U0r * fly7i;
3061 fly6r = scale * fly6r - fly3r;
3062 fly6i = scale * fly6i - fly3i;
3065 fly5r = t0r - U1r * fly2r;
3066 fly5r = fly5r + U1i * fly2i;
3067 fly5i = t0i - U1i * fly2r;
3068 fly5i = fly5i - U1r * fly2i;
3069 t0r = scale * t0r - fly5r;
3070 t0i = scale * t0i - fly5i;
3072 fly7r = t1r + U1i * fly1r;
3073 fly7r = fly7r + U1r * fly1i;
3074 fly7i = t1i - U1r * fly1r;
3075 fly7i = fly7i + U1i * fly1i;
3076 t1r = scale * t1r - fly7r;
3077 t1i = scale * t1i - fly7i;
3079 fly2r = fly4r - U1r * fly6r;
3080 fly2r = fly2r + U1i * fly6i;
3081 fly2i = fly4i - U1i * fly6r;
3082 fly2i = fly2i - U1r * fly6i;
3083 fly4r = scale * fly4r - fly2r;
3084 fly4i = scale * fly4i - fly2i;
3086 fly1r = fly0r + U1i * fly3r;
3087 fly1r = fly1r + U1r * fly3i;
3088 fly1i = fly0i - U1r * fly3r;
3089 fly1i = fly1i + U1i * fly3i;
3090 fly0r = scale * fly0r - fly1r;
3091 fly0i = scale * fly0i - fly1i;
3093 fly6r = t0r - U2r * fly4r;
3094 fly6r = fly6r + U2i * fly4i;
3095 fly6i = t0i - U2i * fly4r;
3096 fly6i = fly6i - U2r * fly4i;
3097 t0r = scale * t0r - fly6r;
3098 t0i = scale * t0i - fly6i;
3100 fly3r = fly5r - U2i * fly2r;
3101 fly3r = fly3r - U2r * fly2i;
3102 fly3i = fly5i + U2r * fly2r;
3103 fly3i = fly3i - U2i * fly2i;
3104 fly2r = scale * fly5r - fly3r;
3105 fly2i = scale * fly5i - fly3i;
3107 fly4r = t1r - U3r * fly0r;
3108 fly4r = fly4r + U3i * fly0i;
3109 fly4i = t1i - U3i * fly0r;
3110 fly4i = fly4i - U3r * fly0i;
3111 t1r = scale * t1r - fly4r;
3112 t1i = scale * t1i - fly4i;
3114 fly5r = fly7r + U3i * fly1r;
3115 fly5r = fly5r + U3r * fly1i;
3116 fly5i = fly7i - U3r * fly1r;
3117 fly5i = fly5i + U3i * fly1i;
3118 fly7r = scale * fly7r - fly5r;
3119 fly7i = scale * fly7i - fly5i;
3121 *(fly0P + FlyOffsetA) = fly6r;
3122 *(fly0P + FlyOffsetAIm) = fly6i;
3123 *(fly0P) = t0r;
3124 *(fly0P + 1) = t0i;
3125 *(fly2P) = fly3r;
3126 *(fly2P + 1) = fly3i;
3127 *(fly2P + FlyOffsetA) = fly2r;
3128 *(fly2P + FlyOffsetAIm) = fly2i;
3130 fly0r = *(fly0P + FlyOffsetB);
3131 fly0i = *(fly0P + FlyOffsetBIm);
3133 *(fly1P + FlyOffsetA) = fly4r;
3134 *(fly1P + FlyOffsetAIm) = fly4i;
3135 *(fly1P) = t1r;
3136 *(fly1P + 1) = t1i;
3138 fly1r = *(fly1P + FlyOffsetB);
3139 fly1i = *(fly1P + FlyOffsetBIm);
3141 *(fly3P + FlyOffsetA) = fly5r;
3142 *(fly3P + FlyOffsetAIm) = fly5i;
3143 *(fly3P) = fly7r;
3144 *(fly3P + 1) = fly7i;
3146 fly0P = (fly0P + FlyOffsetB);
3147 fly1P = (fly1P + FlyOffsetB);
3148 fly2P = (fly2P + FlyOffsetB);
3149 fly3P = (fly3P + FlyOffsetB);
3152 fly2r = *(fly2P);
3153 fly2i = *(fly2P + 1);
3154 fly3r = *(fly3P);
3155 fly3i = *(fly3P + 1);
3156 fly4r = *(fly0P + FlyOffsetA);
3157 fly4i = *(fly0P + FlyOffsetAIm);
3158 fly5r = *(fly1P + FlyOffsetA);
3159 fly5i = *(fly1P + FlyOffsetAIm);
3160 fly6r = *(fly2P + FlyOffsetA);
3161 fly6i = *(fly2P + FlyOffsetAIm);
3162 fly7r = *(fly3P + FlyOffsetA);
3163 fly7i = *(fly3P + FlyOffsetAIm);
3165 t1r = fly0r - U0r * fly1r;
3166 t1r = t1r + U0i * fly1i;
3167 t1i = fly0i - U0i * fly1r;
3168 t1i = t1i - U0r * fly1i;
3169 t0r = scale * fly0r - t1r;
3170 t0i = scale * fly0i - t1i;
3172 fly1r = fly2r - U0r * fly3r;
3173 fly1r = fly1r + U0i * fly3i;
3174 fly1i = fly2i - U0i * fly3r;
3175 fly1i = fly1i - U0r * fly3i;
3176 fly2r = scale * fly2r - fly1r;
3177 fly2i = scale * fly2i - fly1i;
3179 fly0r = fly4r - U0r * fly5r;
3180 fly0r = fly0r + U0i * fly5i;
3181 fly0i = fly4i - U0i * fly5r;
3182 fly0i = fly0i - U0r * fly5i;
3183 fly4r = scale * fly4r - fly0r;
3184 fly4i = scale * fly4i - fly0i;
3186 fly3r = fly6r - U0r * fly7r;
3187 fly3r = fly3r + U0i * fly7i;
3188 fly3i = fly6i - U0i * fly7r;
3189 fly3i = fly3i - U0r * fly7i;
3190 fly6r = scale * fly6r - fly3r;
3191 fly6i = scale * fly6i - fly3i;
3193 fly5r = t0r - U1r * fly2r;
3194 fly5r = fly5r + U1i * fly2i;
3195 fly5i = t0i - U1i * fly2r;
3196 fly5i = fly5i - U1r * fly2i;
3197 t0r = scale * t0r - fly5r;
3198 t0i = scale * t0i - fly5i;
3200 fly7r = t1r + U1i * fly1r;
3201 fly7r = fly7r + U1r * fly1i;
3202 fly7i = t1i - U1r * fly1r;
3203 fly7i = fly7i + U1i * fly1i;
3204 t1r = scale * t1r - fly7r;
3205 t1i = scale * t1i - fly7i;
3207 fly2r = fly4r - U1r * fly6r;
3208 fly2r = fly2r + U1i * fly6i;
3209 fly2i = fly4i - U1i * fly6r;
3210 fly2i = fly2i - U1r * fly6i;
3211 fly4r = scale * fly4r - fly2r;
3212 fly4i = scale * fly4i - fly2i;
3214 fly1r = fly0r + U1i * fly3r;
3215 fly1r = fly1r + U1r * fly3i;
3216 fly1i = fly0i - U1r * fly3r;
3217 fly1i = fly1i + U1i * fly3i;
3218 fly0r = scale * fly0r - fly1r;
3219 fly0i = scale * fly0i - fly1i;
3221 U0i = *(U0iP = (U0iP - NsameU4));
3222 U0r = *(U0rP = (U0rP + NsameU4));
3223 U1r = *(U1rP = (U1rP + NsameU2));
3224 U1i = *(U1iP = (U1iP - NsameU2));
3225 if(stage&1)
3226 U0r = - U0r;
3228 fly6r = t0r - U2r * fly4r;
3229 fly6r = fly6r + U2i * fly4i;
3230 fly6i = t0i - U2i * fly4r;
3231 fly6i = fly6i - U2r * fly4i;
3232 t0r = scale * t0r - fly6r;
3233 t0i = scale * t0i - fly6i;
3235 fly3r = fly5r - U2i * fly2r;
3236 fly3r = fly3r - U2r * fly2i;
3237 fly3i = fly5i + U2r * fly2r;
3238 fly3i = fly3i - U2i * fly2i;
3239 fly2r = scale * fly5r - fly3r;
3240 fly2i = scale * fly5i - fly3i;
3242 fly4r = t1r - U3r * fly0r;
3243 fly4r = fly4r + U3i * fly0i;
3244 fly4i = t1i - U3i * fly0r;
3245 fly4i = fly4i - U3r * fly0i;
3246 t1r = scale * t1r - fly4r;
3247 t1i = scale * t1i - fly4i;
3249 fly5r = fly7r + U3i * fly1r;
3250 fly5r = fly5r + U3r * fly1i;
3251 fly5i = fly7i - U3r * fly1r;
3252 fly5i = fly5i + U3i * fly1i;
3253 fly7r = scale * fly7r - fly5r;
3254 fly7i = scale * fly7i - fly5i;
3256 *(fly0P + FlyOffsetA) = fly6r;
3257 *(fly0P + FlyOffsetAIm) = fly6i;
3258 *(fly0P) = t0r;
3259 *(fly0P + 1) = t0i;
3261 U2r = *(U2rP = (U2rP + NsameU1));
3262 U2i = *(U2iP = (U2iP - NsameU1));
3264 *(fly2P) = fly3r;
3265 *(fly2P + 1) = fly3i;
3266 *(fly2P + FlyOffsetA) = fly2r;
3267 *(fly2P + FlyOffsetAIm) = fly2i;
3268 *(fly1P + FlyOffsetA) = fly4r;
3269 *(fly1P + FlyOffsetAIm) = fly4i;
3270 *(fly1P) = t1r;
3271 *(fly1P + 1) = t1i;
3273 U3r = *( U2rP + U3offset);
3274 U3i = *( U2iP - U3offset);
3276 *(fly3P + FlyOffsetA) = fly5r;
3277 *(fly3P + FlyOffsetAIm) = fly5i;
3278 *(fly3P) = fly7r;
3279 *(fly3P + 1) = fly7i;
3281 IOP = IOP + 2;
3282 fly0P = IOP;
3283 fly1P = (IOP+Flyinc);
3284 fly2P = (fly1P+Flyinc);
3285 fly3P = (fly2P+Flyinc);
3287 NsameU4 = -NsameU4;
3289 if(stage&1){
3290 LoopN >>= 3;
3291 NsameU1 >>= 3;
3292 NsameU2 >>= 3;
3293 NsameU4 >>= 3;
3294 NdiffU <<= 3;
3295 Flyinc = Flyinc << 3;
3296 FlyOffsetA <<= 3;
3297 FlyOffsetB <<= 3;
3298 FlyOffsetAIm = FlyOffsetA + 1;
3299 FlyOffsetBIm = FlyOffsetB + 1;
3302 M=M+1; /* for RIFFT */
3304 ioptr += Ntbl[M];