Merge branch 'mr/build' into pu
[sox/ew.git] / src / fft4g.c
blob38a8bcc002f510564db52672bdd840db328119ca
1 /* Copyright Takuya OOURA, 1996-2001.
3 You may use, copy, modify and distribute this code for any
4 purpose (include commercial use) and without fee. Please
5 refer to this package when you modify this code.
7 Package home: http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html
9 Fast Fourier/Cosine/Sine Transform
10 dimension :one
11 data length :power of 2
12 decimation :frequency
13 radix :4, 2
14 data :inplace
15 table :use
16 functions
17 cdft: Complex Discrete Fourier Transform
18 rdft: Real Discrete Fourier Transform
19 ddct: Discrete Cosine Transform
20 ddst: Discrete Sine Transform
21 dfct: Cosine Transform of RDFT (Real Symmetric DFT)
22 dfst: Sine Transform of RDFT (Real Anti-symmetric DFT)
23 function prototypes
24 void cdft(int, int, double *, int *, double *);
25 void rdft(int, int, double *, int *, double *);
26 void ddct(int, int, double *, int *, double *);
27 void ddst(int, int, double *, int *, double *);
28 void dfct(int, double *, double *, int *, double *);
29 void dfst(int, double *, double *, int *, double *);
32 -------- Complex DFT (Discrete Fourier Transform) --------
33 [definition]
34 <case1>
35 X[k] = sum_j=0^n-1 x[j]*exp(2*pi*i*j*k/n), 0<=k<n
36 <case2>
37 X[k] = sum_j=0^n-1 x[j]*exp(-2*pi*i*j*k/n), 0<=k<n
38 (notes: sum_j=0^n-1 is a summation from j=0 to n-1)
39 [usage]
40 <case1>
41 ip[0] = 0; // first time only
42 cdft(2*n, 1, a, ip, w);
43 <case2>
44 ip[0] = 0; // first time only
45 cdft(2*n, -1, a, ip, w);
46 [parameters]
47 2*n :data length (int)
48 n >= 1, n = power of 2
49 a[0...2*n-1] :input/output data (double *)
50 input data
51 a[2*j] = Re(x[j]),
52 a[2*j+1] = Im(x[j]), 0<=j<n
53 output data
54 a[2*k] = Re(X[k]),
55 a[2*k+1] = Im(X[k]), 0<=k<n
56 ip[0...*] :work area for bit reversal (int *)
57 length of ip >= 2+sqrt(n)
58 strictly,
59 length of ip >=
60 2+(1<<(int)(log(n+0.5)/log(2))/2).
61 ip[0],ip[1] are pointers of the cos/sin table.
62 w[0...n/2-1] :cos/sin table (double *)
63 w[],ip[] are initialized if ip[0] == 0.
64 [remark]
65 Inverse of
66 cdft(2*n, -1, a, ip, w);
67 is
68 cdft(2*n, 1, a, ip, w);
69 for (j = 0; j <= 2 * n - 1; j++) {
70 a[j] *= 1.0 / n;
75 -------- Real DFT / Inverse of Real DFT --------
76 [definition]
77 <case1> RDFT
78 R[k] = sum_j=0^n-1 a[j]*cos(2*pi*j*k/n), 0<=k<=n/2
79 I[k] = sum_j=0^n-1 a[j]*sin(2*pi*j*k/n), 0<k<n/2
80 <case2> IRDFT (excluding scale)
81 a[k] = (R[0] + R[n/2]*cos(pi*k))/2 +
82 sum_j=1^n/2-1 R[j]*cos(2*pi*j*k/n) +
83 sum_j=1^n/2-1 I[j]*sin(2*pi*j*k/n), 0<=k<n
84 [usage]
85 <case1>
86 ip[0] = 0; // first time only
87 rdft(n, 1, a, ip, w);
88 <case2>
89 ip[0] = 0; // first time only
90 rdft(n, -1, a, ip, w);
91 [parameters]
92 n :data length (int)
93 n >= 2, n = power of 2
94 a[0...n-1] :input/output data (double *)
95 <case1>
96 output data
97 a[2*k] = R[k], 0<=k<n/2
98 a[2*k+1] = I[k], 0<k<n/2
99 a[1] = R[n/2]
100 <case2>
101 input data
102 a[2*j] = R[j], 0<=j<n/2
103 a[2*j+1] = I[j], 0<j<n/2
104 a[1] = R[n/2]
105 ip[0...*] :work area for bit reversal (int *)
106 length of ip >= 2+sqrt(n/2)
107 strictly,
108 length of ip >=
109 2+(1<<(int)(log(n/2+0.5)/log(2))/2).
110 ip[0],ip[1] are pointers of the cos/sin table.
111 w[0...n/2-1] :cos/sin table (double *)
112 w[],ip[] are initialized if ip[0] == 0.
113 [remark]
114 Inverse of
115 rdft(n, 1, a, ip, w);
117 rdft(n, -1, a, ip, w);
118 for (j = 0; j <= n - 1; j++) {
119 a[j] *= 2.0 / n;
124 -------- DCT (Discrete Cosine Transform) / Inverse of DCT --------
125 [definition]
126 <case1> IDCT (excluding scale)
127 C[k] = sum_j=0^n-1 a[j]*cos(pi*j*(k+1/2)/n), 0<=k<n
128 <case2> DCT
129 C[k] = sum_j=0^n-1 a[j]*cos(pi*(j+1/2)*k/n), 0<=k<n
130 [usage]
131 <case1>
132 ip[0] = 0; // first time only
133 ddct(n, 1, a, ip, w);
134 <case2>
135 ip[0] = 0; // first time only
136 ddct(n, -1, a, ip, w);
137 [parameters]
138 n :data length (int)
139 n >= 2, n = power of 2
140 a[0...n-1] :input/output data (double *)
141 output data
142 a[k] = C[k], 0<=k<n
143 ip[0...*] :work area for bit reversal (int *)
144 length of ip >= 2+sqrt(n/2)
145 strictly,
146 length of ip >=
147 2+(1<<(int)(log(n/2+0.5)/log(2))/2).
148 ip[0],ip[1] are pointers of the cos/sin table.
149 w[0...n*5/4-1] :cos/sin table (double *)
150 w[],ip[] are initialized if ip[0] == 0.
151 [remark]
152 Inverse of
153 ddct(n, -1, a, ip, w);
155 a[0] *= 0.5;
156 ddct(n, 1, a, ip, w);
157 for (j = 0; j <= n - 1; j++) {
158 a[j] *= 2.0 / n;
163 -------- DST (Discrete Sine Transform) / Inverse of DST --------
164 [definition]
165 <case1> IDST (excluding scale)
166 S[k] = sum_j=1^n A[j]*sin(pi*j*(k+1/2)/n), 0<=k<n
167 <case2> DST
168 S[k] = sum_j=0^n-1 a[j]*sin(pi*(j+1/2)*k/n), 0<k<=n
169 [usage]
170 <case1>
171 ip[0] = 0; // first time only
172 ddst(n, 1, a, ip, w);
173 <case2>
174 ip[0] = 0; // first time only
175 ddst(n, -1, a, ip, w);
176 [parameters]
177 n :data length (int)
178 n >= 2, n = power of 2
179 a[0...n-1] :input/output data (double *)
180 <case1>
181 input data
182 a[j] = A[j], 0<j<n
183 a[0] = A[n]
184 output data
185 a[k] = S[k], 0<=k<n
186 <case2>
187 output data
188 a[k] = S[k], 0<k<n
189 a[0] = S[n]
190 ip[0...*] :work area for bit reversal (int *)
191 length of ip >= 2+sqrt(n/2)
192 strictly,
193 length of ip >=
194 2+(1<<(int)(log(n/2+0.5)/log(2))/2).
195 ip[0],ip[1] are pointers of the cos/sin table.
196 w[0...n*5/4-1] :cos/sin table (double *)
197 w[],ip[] are initialized if ip[0] == 0.
198 [remark]
199 Inverse of
200 ddst(n, -1, a, ip, w);
202 a[0] *= 0.5;
203 ddst(n, 1, a, ip, w);
204 for (j = 0; j <= n - 1; j++) {
205 a[j] *= 2.0 / n;
210 -------- Cosine Transform of RDFT (Real Symmetric DFT) --------
211 [definition]
212 C[k] = sum_j=0^n a[j]*cos(pi*j*k/n), 0<=k<=n
213 [usage]
214 ip[0] = 0; // first time only
215 dfct(n, a, t, ip, w);
216 [parameters]
217 n :data length - 1 (int)
218 n >= 2, n = power of 2
219 a[0...n] :input/output data (double *)
220 output data
221 a[k] = C[k], 0<=k<=n
222 t[0...n/2] :work area (double *)
223 ip[0...*] :work area for bit reversal (int *)
224 length of ip >= 2+sqrt(n/4)
225 strictly,
226 length of ip >=
227 2+(1<<(int)(log(n/4+0.5)/log(2))/2).
228 ip[0],ip[1] are pointers of the cos/sin table.
229 w[0...n*5/8-1] :cos/sin table (double *)
230 w[],ip[] are initialized if ip[0] == 0.
231 [remark]
232 Inverse of
233 a[0] *= 0.5;
234 a[n] *= 0.5;
235 dfct(n, a, t, ip, w);
237 a[0] *= 0.5;
238 a[n] *= 0.5;
239 dfct(n, a, t, ip, w);
240 for (j = 0; j <= n; j++) {
241 a[j] *= 2.0 / n;
246 -------- Sine Transform of RDFT (Real Anti-symmetric DFT) --------
247 [definition]
248 S[k] = sum_j=1^n-1 a[j]*sin(pi*j*k/n), 0<k<n
249 [usage]
250 ip[0] = 0; // first time only
251 dfst(n, a, t, ip, w);
252 [parameters]
253 n :data length + 1 (int)
254 n >= 2, n = power of 2
255 a[0...n-1] :input/output data (double *)
256 output data
257 a[k] = S[k], 0<k<n
258 (a[0] is used for work area)
259 t[0...n/2-1] :work area (double *)
260 ip[0...*] :work area for bit reversal (int *)
261 length of ip >= 2+sqrt(n/4)
262 strictly,
263 length of ip >=
264 2+(1<<(int)(log(n/4+0.5)/log(2))/2).
265 ip[0],ip[1] are pointers of the cos/sin table.
266 w[0...n*5/8-1] :cos/sin table (double *)
267 w[],ip[] are initialized if ip[0] == 0.
268 [remark]
269 Inverse of
270 dfst(n, a, t, ip, w);
272 dfst(n, a, t, ip, w);
273 for (j = 1; j <= n - 1; j++) {
274 a[j] *= 2.0 / n;
279 Appendix :
280 The cos/sin table is recalculated when the larger table required.
281 w[] and ip[] are compatible with all routines.
285 #include <math.h>
286 #include "fft4g.h"
288 #ifdef FFT4G_FLOAT
289 #define double float
290 #define sin sinf
291 #define cos cosf
292 #define atan atanf
294 #define cdft lsx_cdft_f
295 #define rdft lsx_rdft_f
296 #define ddct lsx_ddct_f
297 #define ddst lsx_ddst_f
298 #define dfct lsx_dfct_f
299 #define dfst lsx_dfst_f
300 #else
301 #define cdft lsx_cdft
302 #define rdft lsx_rdft
303 #define ddct lsx_ddct
304 #define ddst lsx_ddst
305 #define dfct lsx_dfct
306 #define dfst lsx_dfst
307 #endif
309 static void bitrv2conj(int n, int *ip, double *a);
310 static void bitrv2(int n, int *ip, double *a);
311 static void cft1st(int n, double *a, double const *w);
312 static void cftbsub(int n, double *a, double const *w);
313 static void cftfsub(int n, double *a, double const *w);
314 static void cftmdl(int n, int l, double *a, double const *w);
315 static void dctsub(int n, double *a, int nc, double const *c);
316 static void dstsub(int n, double *a, int nc, double const *c);
317 static void makect(int nc, int *ip, double *c);
318 static void makewt(int nw, int *ip, double *w);
319 static void rftbsub(int n, double *a, int nc, double const *c);
320 static void rftfsub(int n, double *a, int nc, double const *c);
323 void cdft(int n, int isgn, double *a, int *ip, double *w)
325 if (n > (ip[0] << 2)) {
326 makewt(n >> 2, ip, w);
328 if (n > 4) {
329 if (isgn >= 0) {
330 bitrv2(n, ip + 2, a);
331 cftfsub(n, a, w);
332 } else {
333 bitrv2conj(n, ip + 2, a);
334 cftbsub(n, a, w);
336 } else if (n == 4) {
337 cftfsub(n, a, w);
342 void rdft(int n, int isgn, double *a, int *ip, double *w)
344 int nw, nc;
345 double xi;
347 nw = ip[0];
348 if (n > (nw << 2)) {
349 nw = n >> 2;
350 makewt(nw, ip, w);
352 nc = ip[1];
353 if (n > (nc << 2)) {
354 nc = n >> 2;
355 makect(nc, ip, w + nw);
357 if (isgn >= 0) {
358 if (n > 4) {
359 bitrv2(n, ip + 2, a);
360 cftfsub(n, a, w);
361 rftfsub(n, a, nc, w + nw);
362 } else if (n == 4) {
363 cftfsub(n, a, w);
365 xi = a[0] - a[1];
366 a[0] += a[1];
367 a[1] = xi;
368 } else {
369 a[1] = 0.5 * (a[0] - a[1]);
370 a[0] -= a[1];
371 if (n > 4) {
372 rftbsub(n, a, nc, w + nw);
373 bitrv2(n, ip + 2, a);
374 cftbsub(n, a, w);
375 } else if (n == 4) {
376 cftfsub(n, a, w);
382 void ddct(int n, int isgn, double *a, int *ip, double *w)
384 int j, nw, nc;
385 double xr;
387 nw = ip[0];
388 if (n > (nw << 2)) {
389 nw = n >> 2;
390 makewt(nw, ip, w);
392 nc = ip[1];
393 if (n > nc) {
394 nc = n;
395 makect(nc, ip, w + nw);
397 if (isgn < 0) {
398 xr = a[n - 1];
399 for (j = n - 2; j >= 2; j -= 2) {
400 a[j + 1] = a[j] - a[j - 1];
401 a[j] += a[j - 1];
403 a[1] = a[0] - xr;
404 a[0] += xr;
405 if (n > 4) {
406 rftbsub(n, a, nc, w + nw);
407 bitrv2(n, ip + 2, a);
408 cftbsub(n, a, w);
409 } else if (n == 4) {
410 cftfsub(n, a, w);
413 dctsub(n, a, nc, w + nw);
414 if (isgn >= 0) {
415 if (n > 4) {
416 bitrv2(n, ip + 2, a);
417 cftfsub(n, a, w);
418 rftfsub(n, a, nc, w + nw);
419 } else if (n == 4) {
420 cftfsub(n, a, w);
422 xr = a[0] - a[1];
423 a[0] += a[1];
424 for (j = 2; j < n; j += 2) {
425 a[j - 1] = a[j] - a[j + 1];
426 a[j] += a[j + 1];
428 a[n - 1] = xr;
433 void ddst(int n, int isgn, double *a, int *ip, double *w)
435 int j, nw, nc;
436 double xr;
438 nw = ip[0];
439 if (n > (nw << 2)) {
440 nw = n >> 2;
441 makewt(nw, ip, w);
443 nc = ip[1];
444 if (n > nc) {
445 nc = n;
446 makect(nc, ip, w + nw);
448 if (isgn < 0) {
449 xr = a[n - 1];
450 for (j = n - 2; j >= 2; j -= 2) {
451 a[j + 1] = -a[j] - a[j - 1];
452 a[j] -= a[j - 1];
454 a[1] = a[0] + xr;
455 a[0] -= xr;
456 if (n > 4) {
457 rftbsub(n, a, nc, w + nw);
458 bitrv2(n, ip + 2, a);
459 cftbsub(n, a, w);
460 } else if (n == 4) {
461 cftfsub(n, a, w);
464 dstsub(n, a, nc, w + nw);
465 if (isgn >= 0) {
466 if (n > 4) {
467 bitrv2(n, ip + 2, a);
468 cftfsub(n, a, w);
469 rftfsub(n, a, nc, w + nw);
470 } else if (n == 4) {
471 cftfsub(n, a, w);
473 xr = a[0] - a[1];
474 a[0] += a[1];
475 for (j = 2; j < n; j += 2) {
476 a[j - 1] = -a[j] - a[j + 1];
477 a[j] -= a[j + 1];
479 a[n - 1] = -xr;
484 void dfct(int n, double *a, double *t, int *ip, double *w)
486 int j, k, l, m, mh, nw, nc;
487 double xr, xi, yr, yi;
489 nw = ip[0];
490 if (n > (nw << 3)) {
491 nw = n >> 3;
492 makewt(nw, ip, w);
494 nc = ip[1];
495 if (n > (nc << 1)) {
496 nc = n >> 1;
497 makect(nc, ip, w + nw);
499 m = n >> 1;
500 yi = a[m];
501 xi = a[0] + a[n];
502 a[0] -= a[n];
503 t[0] = xi - yi;
504 t[m] = xi + yi;
505 if (n > 2) {
506 mh = m >> 1;
507 for (j = 1; j < mh; j++) {
508 k = m - j;
509 xr = a[j] - a[n - j];
510 xi = a[j] + a[n - j];
511 yr = a[k] - a[n - k];
512 yi = a[k] + a[n - k];
513 a[j] = xr;
514 a[k] = yr;
515 t[j] = xi - yi;
516 t[k] = xi + yi;
518 t[mh] = a[mh] + a[n - mh];
519 a[mh] -= a[n - mh];
520 dctsub(m, a, nc, w + nw);
521 if (m > 4) {
522 bitrv2(m, ip + 2, a);
523 cftfsub(m, a, w);
524 rftfsub(m, a, nc, w + nw);
525 } else if (m == 4) {
526 cftfsub(m, a, w);
528 a[n - 1] = a[0] - a[1];
529 a[1] = a[0] + a[1];
530 for (j = m - 2; j >= 2; j -= 2) {
531 a[2 * j + 1] = a[j] + a[j + 1];
532 a[2 * j - 1] = a[j] - a[j + 1];
534 l = 2;
535 m = mh;
536 while (m >= 2) {
537 dctsub(m, t, nc, w + nw);
538 if (m > 4) {
539 bitrv2(m, ip + 2, t);
540 cftfsub(m, t, w);
541 rftfsub(m, t, nc, w + nw);
542 } else if (m == 4) {
543 cftfsub(m, t, w);
545 a[n - l] = t[0] - t[1];
546 a[l] = t[0] + t[1];
547 k = 0;
548 for (j = 2; j < m; j += 2) {
549 k += l << 2;
550 a[k - l] = t[j] - t[j + 1];
551 a[k + l] = t[j] + t[j + 1];
553 l <<= 1;
554 mh = m >> 1;
555 for (j = 0; j < mh; j++) {
556 k = m - j;
557 t[j] = t[m + k] - t[m + j];
558 t[k] = t[m + k] + t[m + j];
560 t[mh] = t[m + mh];
561 m = mh;
563 a[l] = t[0];
564 a[n] = t[2] - t[1];
565 a[0] = t[2] + t[1];
566 } else {
567 a[1] = a[0];
568 a[2] = t[0];
569 a[0] = t[1];
574 void dfst(int n, double *a, double *t, int *ip, double *w)
576 int j, k, l, m, mh, nw, nc;
577 double xr, xi, yr, yi;
579 nw = ip[0];
580 if (n > (nw << 3)) {
581 nw = n >> 3;
582 makewt(nw, ip, w);
584 nc = ip[1];
585 if (n > (nc << 1)) {
586 nc = n >> 1;
587 makect(nc, ip, w + nw);
589 if (n > 2) {
590 m = n >> 1;
591 mh = m >> 1;
592 for (j = 1; j < mh; j++) {
593 k = m - j;
594 xr = a[j] + a[n - j];
595 xi = a[j] - a[n - j];
596 yr = a[k] + a[n - k];
597 yi = a[k] - a[n - k];
598 a[j] = xr;
599 a[k] = yr;
600 t[j] = xi + yi;
601 t[k] = xi - yi;
603 t[0] = a[mh] - a[n - mh];
604 a[mh] += a[n - mh];
605 a[0] = a[m];
606 dstsub(m, a, nc, w + nw);
607 if (m > 4) {
608 bitrv2(m, ip + 2, a);
609 cftfsub(m, a, w);
610 rftfsub(m, a, nc, w + nw);
611 } else if (m == 4) {
612 cftfsub(m, a, w);
614 a[n - 1] = a[1] - a[0];
615 a[1] = a[0] + a[1];
616 for (j = m - 2; j >= 2; j -= 2) {
617 a[2 * j + 1] = a[j] - a[j + 1];
618 a[2 * j - 1] = -a[j] - a[j + 1];
620 l = 2;
621 m = mh;
622 while (m >= 2) {
623 dstsub(m, t, nc, w + nw);
624 if (m > 4) {
625 bitrv2(m, ip + 2, t);
626 cftfsub(m, t, w);
627 rftfsub(m, t, nc, w + nw);
628 } else if (m == 4) {
629 cftfsub(m, t, w);
631 a[n - l] = t[1] - t[0];
632 a[l] = t[0] + t[1];
633 k = 0;
634 for (j = 2; j < m; j += 2) {
635 k += l << 2;
636 a[k - l] = -t[j] - t[j + 1];
637 a[k + l] = t[j] - t[j + 1];
639 l <<= 1;
640 mh = m >> 1;
641 for (j = 1; j < mh; j++) {
642 k = m - j;
643 t[j] = t[m + k] + t[m + j];
644 t[k] = t[m + k] - t[m + j];
646 t[0] = t[m + mh];
647 m = mh;
649 a[l] = t[0];
651 a[0] = 0;
655 /* -------- initializing routines -------- */
658 static void makewt(int nw, int *ip, double *w)
660 int j, nwh;
661 double delta, x, y;
663 ip[0] = nw;
664 ip[1] = 1;
665 if (nw > 2) {
666 nwh = nw >> 1;
667 delta = atan(1.0) / nwh;
668 w[0] = 1;
669 w[1] = 0;
670 w[nwh] = cos(delta * nwh);
671 w[nwh + 1] = w[nwh];
672 if (nwh > 2) {
673 for (j = 2; j < nwh; j += 2) {
674 x = cos(delta * j);
675 y = sin(delta * j);
676 w[j] = x;
677 w[j + 1] = y;
678 w[nw - j] = y;
679 w[nw - j + 1] = x;
681 bitrv2(nw, ip + 2, w);
687 static void makect(int nc, int *ip, double *c)
689 int j, nch;
690 double delta;
692 ip[1] = nc;
693 if (nc > 1) {
694 nch = nc >> 1;
695 delta = atan(1.0) / nch;
696 c[0] = cos(delta * nch);
697 c[nch] = 0.5 * c[0];
698 for (j = 1; j < nch; j++) {
699 c[j] = 0.5 * cos(delta * j);
700 c[nc - j] = 0.5 * sin(delta * j);
706 /* -------- child routines -------- */
709 static void bitrv2(int n, int *ip0, double *a)
711 int j, j1, k, k1, l, m, m2, ip[256];
712 double xr, xi, yr, yi;
714 (void)ip0;
715 ip[0] = 0;
716 l = n;
717 m = 1;
718 while ((m << 3) < l) {
719 l >>= 1;
720 for (j = 0; j < m; j++) {
721 ip[m + j] = ip[j] + l;
723 m <<= 1;
725 m2 = 2 * m;
726 if ((m << 3) == l) {
727 for (k = 0; k < m; k++) {
728 for (j = 0; j < k; j++) {
729 j1 = 2 * j + ip[k];
730 k1 = 2 * k + ip[j];
731 xr = a[j1];
732 xi = a[j1 + 1];
733 yr = a[k1];
734 yi = a[k1 + 1];
735 a[j1] = yr;
736 a[j1 + 1] = yi;
737 a[k1] = xr;
738 a[k1 + 1] = xi;
739 j1 += m2;
740 k1 += 2 * m2;
741 xr = a[j1];
742 xi = a[j1 + 1];
743 yr = a[k1];
744 yi = a[k1 + 1];
745 a[j1] = yr;
746 a[j1 + 1] = yi;
747 a[k1] = xr;
748 a[k1 + 1] = xi;
749 j1 += m2;
750 k1 -= m2;
751 xr = a[j1];
752 xi = a[j1 + 1];
753 yr = a[k1];
754 yi = a[k1 + 1];
755 a[j1] = yr;
756 a[j1 + 1] = yi;
757 a[k1] = xr;
758 a[k1 + 1] = xi;
759 j1 += m2;
760 k1 += 2 * m2;
761 xr = a[j1];
762 xi = a[j1 + 1];
763 yr = a[k1];
764 yi = a[k1 + 1];
765 a[j1] = yr;
766 a[j1 + 1] = yi;
767 a[k1] = xr;
768 a[k1 + 1] = xi;
770 j1 = 2 * k + m2 + ip[k];
771 k1 = j1 + m2;
772 xr = a[j1];
773 xi = a[j1 + 1];
774 yr = a[k1];
775 yi = a[k1 + 1];
776 a[j1] = yr;
777 a[j1 + 1] = yi;
778 a[k1] = xr;
779 a[k1 + 1] = xi;
781 } else {
782 for (k = 1; k < m; k++) {
783 for (j = 0; j < k; j++) {
784 j1 = 2 * j + ip[k];
785 k1 = 2 * k + ip[j];
786 xr = a[j1];
787 xi = a[j1 + 1];
788 yr = a[k1];
789 yi = a[k1 + 1];
790 a[j1] = yr;
791 a[j1 + 1] = yi;
792 a[k1] = xr;
793 a[k1 + 1] = xi;
794 j1 += m2;
795 k1 += m2;
796 xr = a[j1];
797 xi = a[j1 + 1];
798 yr = a[k1];
799 yi = a[k1 + 1];
800 a[j1] = yr;
801 a[j1 + 1] = yi;
802 a[k1] = xr;
803 a[k1 + 1] = xi;
810 static void bitrv2conj(int n, int *ip0, double *a)
812 int j, j1, k, k1, l, m, m2, ip[256];
813 double xr, xi, yr, yi;
815 (void)ip0;
816 ip[0] = 0;
817 l = n;
818 m = 1;
819 while ((m << 3) < l) {
820 l >>= 1;
821 for (j = 0; j < m; j++) {
822 ip[m + j] = ip[j] + l;
824 m <<= 1;
826 m2 = 2 * m;
827 if ((m << 3) == l) {
828 for (k = 0; k < m; k++) {
829 for (j = 0; j < k; j++) {
830 j1 = 2 * j + ip[k];
831 k1 = 2 * k + ip[j];
832 xr = a[j1];
833 xi = -a[j1 + 1];
834 yr = a[k1];
835 yi = -a[k1 + 1];
836 a[j1] = yr;
837 a[j1 + 1] = yi;
838 a[k1] = xr;
839 a[k1 + 1] = xi;
840 j1 += m2;
841 k1 += 2 * m2;
842 xr = a[j1];
843 xi = -a[j1 + 1];
844 yr = a[k1];
845 yi = -a[k1 + 1];
846 a[j1] = yr;
847 a[j1 + 1] = yi;
848 a[k1] = xr;
849 a[k1 + 1] = xi;
850 j1 += m2;
851 k1 -= m2;
852 xr = a[j1];
853 xi = -a[j1 + 1];
854 yr = a[k1];
855 yi = -a[k1 + 1];
856 a[j1] = yr;
857 a[j1 + 1] = yi;
858 a[k1] = xr;
859 a[k1 + 1] = xi;
860 j1 += m2;
861 k1 += 2 * m2;
862 xr = a[j1];
863 xi = -a[j1 + 1];
864 yr = a[k1];
865 yi = -a[k1 + 1];
866 a[j1] = yr;
867 a[j1 + 1] = yi;
868 a[k1] = xr;
869 a[k1 + 1] = xi;
871 k1 = 2 * k + ip[k];
872 a[k1 + 1] = -a[k1 + 1];
873 j1 = k1 + m2;
874 k1 = j1 + m2;
875 xr = a[j1];
876 xi = -a[j1 + 1];
877 yr = a[k1];
878 yi = -a[k1 + 1];
879 a[j1] = yr;
880 a[j1 + 1] = yi;
881 a[k1] = xr;
882 a[k1 + 1] = xi;
883 k1 += m2;
884 a[k1 + 1] = -a[k1 + 1];
886 } else {
887 a[1] = -a[1];
888 a[m2 + 1] = -a[m2 + 1];
889 for (k = 1; k < m; k++) {
890 for (j = 0; j < k; j++) {
891 j1 = 2 * j + ip[k];
892 k1 = 2 * k + ip[j];
893 xr = a[j1];
894 xi = -a[j1 + 1];
895 yr = a[k1];
896 yi = -a[k1 + 1];
897 a[j1] = yr;
898 a[j1 + 1] = yi;
899 a[k1] = xr;
900 a[k1 + 1] = xi;
901 j1 += m2;
902 k1 += m2;
903 xr = a[j1];
904 xi = -a[j1 + 1];
905 yr = a[k1];
906 yi = -a[k1 + 1];
907 a[j1] = yr;
908 a[j1 + 1] = yi;
909 a[k1] = xr;
910 a[k1 + 1] = xi;
912 k1 = 2 * k + ip[k];
913 a[k1 + 1] = -a[k1 + 1];
914 a[k1 + m2 + 1] = -a[k1 + m2 + 1];
920 static void cftfsub(int n, double *a, double const *w)
922 int j, j1, j2, j3, l;
923 double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
925 l = 2;
926 if (n > 8) {
927 cft1st(n, a, w);
928 l = 8;
929 while ((l << 2) < n) {
930 cftmdl(n, l, a, w);
931 l <<= 2;
934 if ((l << 2) == n) {
935 for (j = 0; j < l; j += 2) {
936 j1 = j + l;
937 j2 = j1 + l;
938 j3 = j2 + l;
939 x0r = a[j] + a[j1];
940 x0i = a[j + 1] + a[j1 + 1];
941 x1r = a[j] - a[j1];
942 x1i = a[j + 1] - a[j1 + 1];
943 x2r = a[j2] + a[j3];
944 x2i = a[j2 + 1] + a[j3 + 1];
945 x3r = a[j2] - a[j3];
946 x3i = a[j2 + 1] - a[j3 + 1];
947 a[j] = x0r + x2r;
948 a[j + 1] = x0i + x2i;
949 a[j2] = x0r - x2r;
950 a[j2 + 1] = x0i - x2i;
951 a[j1] = x1r - x3i;
952 a[j1 + 1] = x1i + x3r;
953 a[j3] = x1r + x3i;
954 a[j3 + 1] = x1i - x3r;
956 } else {
957 for (j = 0; j < l; j += 2) {
958 j1 = j + l;
959 x0r = a[j] - a[j1];
960 x0i = a[j + 1] - a[j1 + 1];
961 a[j] += a[j1];
962 a[j + 1] += a[j1 + 1];
963 a[j1] = x0r;
964 a[j1 + 1] = x0i;
970 static void cftbsub(int n, double *a, double const *w)
972 int j, j1, j2, j3, l;
973 double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
975 l = 2;
976 if (n > 8) {
977 cft1st(n, a, w);
978 l = 8;
979 while ((l << 2) < n) {
980 cftmdl(n, l, a, w);
981 l <<= 2;
984 if ((l << 2) == n) {
985 for (j = 0; j < l; j += 2) {
986 j1 = j + l;
987 j2 = j1 + l;
988 j3 = j2 + l;
989 x0r = a[j] + a[j1];
990 x0i = -a[j + 1] - a[j1 + 1];
991 x1r = a[j] - a[j1];
992 x1i = -a[j + 1] + a[j1 + 1];
993 x2r = a[j2] + a[j3];
994 x2i = a[j2 + 1] + a[j3 + 1];
995 x3r = a[j2] - a[j3];
996 x3i = a[j2 + 1] - a[j3 + 1];
997 a[j] = x0r + x2r;
998 a[j + 1] = x0i - x2i;
999 a[j2] = x0r - x2r;
1000 a[j2 + 1] = x0i + x2i;
1001 a[j1] = x1r - x3i;
1002 a[j1 + 1] = x1i - x3r;
1003 a[j3] = x1r + x3i;
1004 a[j3 + 1] = x1i + x3r;
1006 } else {
1007 for (j = 0; j < l; j += 2) {
1008 j1 = j + l;
1009 x0r = a[j] - a[j1];
1010 x0i = -a[j + 1] + a[j1 + 1];
1011 a[j] += a[j1];
1012 a[j + 1] = -a[j + 1] - a[j1 + 1];
1013 a[j1] = x0r;
1014 a[j1 + 1] = x0i;
1020 static void cft1st(int n, double *a, double const *w)
1022 int j, k1, k2;
1023 double wk1r, wk1i, wk2r, wk2i, wk3r, wk3i;
1024 double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
1026 x0r = a[0] + a[2];
1027 x0i = a[1] + a[3];
1028 x1r = a[0] - a[2];
1029 x1i = a[1] - a[3];
1030 x2r = a[4] + a[6];
1031 x2i = a[5] + a[7];
1032 x3r = a[4] - a[6];
1033 x3i = a[5] - a[7];
1034 a[0] = x0r + x2r;
1035 a[1] = x0i + x2i;
1036 a[4] = x0r - x2r;
1037 a[5] = x0i - x2i;
1038 a[2] = x1r - x3i;
1039 a[3] = x1i + x3r;
1040 a[6] = x1r + x3i;
1041 a[7] = x1i - x3r;
1042 wk1r = w[2];
1043 x0r = a[8] + a[10];
1044 x0i = a[9] + a[11];
1045 x1r = a[8] - a[10];
1046 x1i = a[9] - a[11];
1047 x2r = a[12] + a[14];
1048 x2i = a[13] + a[15];
1049 x3r = a[12] - a[14];
1050 x3i = a[13] - a[15];
1051 a[8] = x0r + x2r;
1052 a[9] = x0i + x2i;
1053 a[12] = x2i - x0i;
1054 a[13] = x0r - x2r;
1055 x0r = x1r - x3i;
1056 x0i = x1i + x3r;
1057 a[10] = wk1r * (x0r - x0i);
1058 a[11] = wk1r * (x0r + x0i);
1059 x0r = x3i + x1r;
1060 x0i = x3r - x1i;
1061 a[14] = wk1r * (x0i - x0r);
1062 a[15] = wk1r * (x0i + x0r);
1063 k1 = 0;
1064 for (j = 16; j < n; j += 16) {
1065 k1 += 2;
1066 k2 = 2 * k1;
1067 wk2r = w[k1];
1068 wk2i = w[k1 + 1];
1069 wk1r = w[k2];
1070 wk1i = w[k2 + 1];
1071 wk3r = wk1r - 2 * wk2i * wk1i;
1072 wk3i = 2 * wk2i * wk1r - wk1i;
1073 x0r = a[j] + a[j + 2];
1074 x0i = a[j + 1] + a[j + 3];
1075 x1r = a[j] - a[j + 2];
1076 x1i = a[j + 1] - a[j + 3];
1077 x2r = a[j + 4] + a[j + 6];
1078 x2i = a[j + 5] + a[j + 7];
1079 x3r = a[j + 4] - a[j + 6];
1080 x3i = a[j + 5] - a[j + 7];
1081 a[j] = x0r + x2r;
1082 a[j + 1] = x0i + x2i;
1083 x0r -= x2r;
1084 x0i -= x2i;
1085 a[j + 4] = wk2r * x0r - wk2i * x0i;
1086 a[j + 5] = wk2r * x0i + wk2i * x0r;
1087 x0r = x1r - x3i;
1088 x0i = x1i + x3r;
1089 a[j + 2] = wk1r * x0r - wk1i * x0i;
1090 a[j + 3] = wk1r * x0i + wk1i * x0r;
1091 x0r = x1r + x3i;
1092 x0i = x1i - x3r;
1093 a[j + 6] = wk3r * x0r - wk3i * x0i;
1094 a[j + 7] = wk3r * x0i + wk3i * x0r;
1095 wk1r = w[k2 + 2];
1096 wk1i = w[k2 + 3];
1097 wk3r = wk1r - 2 * wk2r * wk1i;
1098 wk3i = 2 * wk2r * wk1r - wk1i;
1099 x0r = a[j + 8] + a[j + 10];
1100 x0i = a[j + 9] + a[j + 11];
1101 x1r = a[j + 8] - a[j + 10];
1102 x1i = a[j + 9] - a[j + 11];
1103 x2r = a[j + 12] + a[j + 14];
1104 x2i = a[j + 13] + a[j + 15];
1105 x3r = a[j + 12] - a[j + 14];
1106 x3i = a[j + 13] - a[j + 15];
1107 a[j + 8] = x0r + x2r;
1108 a[j + 9] = x0i + x2i;
1109 x0r -= x2r;
1110 x0i -= x2i;
1111 a[j + 12] = -wk2i * x0r - wk2r * x0i;
1112 a[j + 13] = -wk2i * x0i + wk2r * x0r;
1113 x0r = x1r - x3i;
1114 x0i = x1i + x3r;
1115 a[j + 10] = wk1r * x0r - wk1i * x0i;
1116 a[j + 11] = wk1r * x0i + wk1i * x0r;
1117 x0r = x1r + x3i;
1118 x0i = x1i - x3r;
1119 a[j + 14] = wk3r * x0r - wk3i * x0i;
1120 a[j + 15] = wk3r * x0i + wk3i * x0r;
1125 static void cftmdl(int n, int l, double *a, double const *w)
1127 int j, j1, j2, j3, k, k1, k2, m, m2;
1128 double wk1r, wk1i, wk2r, wk2i, wk3r, wk3i;
1129 double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
1131 m = l << 2;
1132 for (j = 0; j < l; j += 2) {
1133 j1 = j + l;
1134 j2 = j1 + l;
1135 j3 = j2 + l;
1136 x0r = a[j] + a[j1];
1137 x0i = a[j + 1] + a[j1 + 1];
1138 x1r = a[j] - a[j1];
1139 x1i = a[j + 1] - a[j1 + 1];
1140 x2r = a[j2] + a[j3];
1141 x2i = a[j2 + 1] + a[j3 + 1];
1142 x3r = a[j2] - a[j3];
1143 x3i = a[j2 + 1] - a[j3 + 1];
1144 a[j] = x0r + x2r;
1145 a[j + 1] = x0i + x2i;
1146 a[j2] = x0r - x2r;
1147 a[j2 + 1] = x0i - x2i;
1148 a[j1] = x1r - x3i;
1149 a[j1 + 1] = x1i + x3r;
1150 a[j3] = x1r + x3i;
1151 a[j3 + 1] = x1i - x3r;
1153 wk1r = w[2];
1154 for (j = m; j < l + m; j += 2) {
1155 j1 = j + l;
1156 j2 = j1 + l;
1157 j3 = j2 + l;
1158 x0r = a[j] + a[j1];
1159 x0i = a[j + 1] + a[j1 + 1];
1160 x1r = a[j] - a[j1];
1161 x1i = a[j + 1] - a[j1 + 1];
1162 x2r = a[j2] + a[j3];
1163 x2i = a[j2 + 1] + a[j3 + 1];
1164 x3r = a[j2] - a[j3];
1165 x3i = a[j2 + 1] - a[j3 + 1];
1166 a[j] = x0r + x2r;
1167 a[j + 1] = x0i + x2i;
1168 a[j2] = x2i - x0i;
1169 a[j2 + 1] = x0r - x2r;
1170 x0r = x1r - x3i;
1171 x0i = x1i + x3r;
1172 a[j1] = wk1r * (x0r - x0i);
1173 a[j1 + 1] = wk1r * (x0r + x0i);
1174 x0r = x3i + x1r;
1175 x0i = x3r - x1i;
1176 a[j3] = wk1r * (x0i - x0r);
1177 a[j3 + 1] = wk1r * (x0i + x0r);
1179 k1 = 0;
1180 m2 = 2 * m;
1181 for (k = m2; k < n; k += m2) {
1182 k1 += 2;
1183 k2 = 2 * k1;
1184 wk2r = w[k1];
1185 wk2i = w[k1 + 1];
1186 wk1r = w[k2];
1187 wk1i = w[k2 + 1];
1188 wk3r = wk1r - 2 * wk2i * wk1i;
1189 wk3i = 2 * wk2i * wk1r - wk1i;
1190 for (j = k; j < l + k; j += 2) {
1191 j1 = j + l;
1192 j2 = j1 + l;
1193 j3 = j2 + l;
1194 x0r = a[j] + a[j1];
1195 x0i = a[j + 1] + a[j1 + 1];
1196 x1r = a[j] - a[j1];
1197 x1i = a[j + 1] - a[j1 + 1];
1198 x2r = a[j2] + a[j3];
1199 x2i = a[j2 + 1] + a[j3 + 1];
1200 x3r = a[j2] - a[j3];
1201 x3i = a[j2 + 1] - a[j3 + 1];
1202 a[j] = x0r + x2r;
1203 a[j + 1] = x0i + x2i;
1204 x0r -= x2r;
1205 x0i -= x2i;
1206 a[j2] = wk2r * x0r - wk2i * x0i;
1207 a[j2 + 1] = wk2r * x0i + wk2i * x0r;
1208 x0r = x1r - x3i;
1209 x0i = x1i + x3r;
1210 a[j1] = wk1r * x0r - wk1i * x0i;
1211 a[j1 + 1] = wk1r * x0i + wk1i * x0r;
1212 x0r = x1r + x3i;
1213 x0i = x1i - x3r;
1214 a[j3] = wk3r * x0r - wk3i * x0i;
1215 a[j3 + 1] = wk3r * x0i + wk3i * x0r;
1217 wk1r = w[k2 + 2];
1218 wk1i = w[k2 + 3];
1219 wk3r = wk1r - 2 * wk2r * wk1i;
1220 wk3i = 2 * wk2r * wk1r - wk1i;
1221 for (j = k + m; j < l + (k + m); j += 2) {
1222 j1 = j + l;
1223 j2 = j1 + l;
1224 j3 = j2 + l;
1225 x0r = a[j] + a[j1];
1226 x0i = a[j + 1] + a[j1 + 1];
1227 x1r = a[j] - a[j1];
1228 x1i = a[j + 1] - a[j1 + 1];
1229 x2r = a[j2] + a[j3];
1230 x2i = a[j2 + 1] + a[j3 + 1];
1231 x3r = a[j2] - a[j3];
1232 x3i = a[j2 + 1] - a[j3 + 1];
1233 a[j] = x0r + x2r;
1234 a[j + 1] = x0i + x2i;
1235 x0r -= x2r;
1236 x0i -= x2i;
1237 a[j2] = -wk2i * x0r - wk2r * x0i;
1238 a[j2 + 1] = -wk2i * x0i + wk2r * x0r;
1239 x0r = x1r - x3i;
1240 x0i = x1i + x3r;
1241 a[j1] = wk1r * x0r - wk1i * x0i;
1242 a[j1 + 1] = wk1r * x0i + wk1i * x0r;
1243 x0r = x1r + x3i;
1244 x0i = x1i - x3r;
1245 a[j3] = wk3r * x0r - wk3i * x0i;
1246 a[j3 + 1] = wk3r * x0i + wk3i * x0r;
1252 static void rftfsub(int n, double *a, int nc, double const *c)
1254 int j, k, kk, ks, m;
1255 double wkr, wki, xr, xi, yr, yi;
1257 m = n >> 1;
1258 ks = 2 * nc / m;
1259 kk = 0;
1260 for (j = 2; j < m; j += 2) {
1261 k = n - j;
1262 kk += ks;
1263 wkr = 0.5 - c[nc - kk];
1264 wki = c[kk];
1265 xr = a[j] - a[k];
1266 xi = a[j + 1] + a[k + 1];
1267 yr = wkr * xr - wki * xi;
1268 yi = wkr * xi + wki * xr;
1269 a[j] -= yr;
1270 a[j + 1] -= yi;
1271 a[k] += yr;
1272 a[k + 1] -= yi;
1277 static void rftbsub(int n, double *a, int nc, double const *c)
1279 int j, k, kk, ks, m;
1280 double wkr, wki, xr, xi, yr, yi;
1282 a[1] = -a[1];
1283 m = n >> 1;
1284 ks = 2 * nc / m;
1285 kk = 0;
1286 for (j = 2; j < m; j += 2) {
1287 k = n - j;
1288 kk += ks;
1289 wkr = 0.5 - c[nc - kk];
1290 wki = c[kk];
1291 xr = a[j] - a[k];
1292 xi = a[j + 1] + a[k + 1];
1293 yr = wkr * xr + wki * xi;
1294 yi = wkr * xi - wki * xr;
1295 a[j] -= yr;
1296 a[j + 1] = yi - a[j + 1];
1297 a[k] += yr;
1298 a[k + 1] = yi - a[k + 1];
1300 a[m + 1] = -a[m + 1];
1304 static void dctsub(int n, double *a, int nc, double const *c)
1306 int j, k, kk, ks, m;
1307 double wkr, wki, xr;
1309 m = n >> 1;
1310 ks = nc / n;
1311 kk = 0;
1312 for (j = 1; j < m; j++) {
1313 k = n - j;
1314 kk += ks;
1315 wkr = c[kk] - c[nc - kk];
1316 wki = c[kk] + c[nc - kk];
1317 xr = wki * a[j] - wkr * a[k];
1318 a[j] = wkr * a[j] + wki * a[k];
1319 a[k] = xr;
1321 a[m] *= c[0];
1325 static void dstsub(int n, double *a, int nc, double const *c)
1327 int j, k, kk, ks, m;
1328 double wkr, wki, xr;
1330 m = n >> 1;
1331 ks = nc / n;
1332 kk = 0;
1333 for (j = 1; j < m; j++) {
1334 k = n - j;
1335 kk += ks;
1336 wkr = c[kk] - c[nc - kk];
1337 wki = c[kk] + c[nc - kk];
1338 xr = wki * a[k] - wkr * a[j];
1339 a[k] = wkr * a[k] + wki * a[j];
1340 a[j] = xr;
1342 a[m] *= c[0];