2 * Copyright (c) 1997-1999 Massachusetts Institute of Technology
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 * Compute transforms of prime sizes using Rader's trick: turn them
22 * into convolutions of size n - 1, which you then perform via a pair
31 #ifdef FFTW_USING_CILK
33 #include <cilk-compat.h>
37 #define WHEN_DEBUG(a) a
42 /* compute n^m mod p, where m >= 0 and p > 0. */
43 static int power_mod(int n
, int m
, int p
)
47 else if (m
% 2 == 0) {
48 int x
= power_mod(n
, m
/ 2, p
);
51 return ((n
* power_mod(n
, m
- 1, p
)) % p
);
55 * Find the period of n in the multiplicative group mod p (p prime).
56 * That is, return the smallest m such that n^m == 1 mod p.
58 static int period(int n
, int p
)
60 int prod
= n
, period
= 1;
63 prod
= (prod
* n
) % p
;
66 fftw_die("non-prime order in Rader\n");
71 /* find a generator for the multiplicative group mod p, where p is prime */
72 static int find_generator(int p
)
76 for (g
= 1; g
< p
; ++g
)
77 if (period(g
, p
) == p
- 1)
80 fftw_die("couldn't find generator for Rader\n");
84 /***************************************************************************/
86 static fftw_rader_data
*create_rader_aux(int p
, int flags
)
88 fftw_complex
*omega
, *work
;
91 FFTW_TRIG_REAL twoPiOverN
;
92 fftw_real scale
= 1.0 / (p
- 1); /* for convolution */
97 fftw_die("non-prime order in Rader\n");
99 flags
&= ~FFTW_IN_PLACE
;
101 d
= (fftw_rader_data
*) fftw_malloc(sizeof(fftw_rader_data
));
103 g
= find_generator(p
);
104 ginv
= power_mod(g
, p
- 2, p
);
106 omega
= (fftw_complex
*) fftw_malloc((p
- 1) * sizeof(fftw_complex
));
108 plan
= fftw_create_plan(p
- 1, FFTW_FORWARD
, flags
);
110 work
= (fftw_complex
*) fftw_malloc((p
- 1) * sizeof(fftw_complex
));
112 twoPiOverN
= FFTW_K2PI
/ (FFTW_TRIG_REAL
) p
;
114 for (i
= 0; i
< p
- 1; ++i
) {
115 c_re(work
[i
]) = scale
* FFTW_TRIG_COS(twoPiOverN
* gpower
);
116 c_im(work
[i
]) = FFTW_FORWARD
* scale
* FFTW_TRIG_SIN(twoPiOverN
* gpower
);
117 gpower
= (gpower
* ginv
) % p
;
120 /* fft permuted roots of unity */
121 fftw_executor_simple(p
- 1, work
, omega
, plan
->root
, 1, 1);
134 d
->cdesc
= (fftw_codelet_desc
*) fftw_malloc(sizeof(fftw_codelet_desc
));
135 d
->cdesc
->name
= NULL
;
136 d
->cdesc
->codelet
= NULL
;
138 d
->cdesc
->dir
= FFTW_FORWARD
;
139 d
->cdesc
->type
= FFTW_RADER
;
140 d
->cdesc
->signature
= g
;
141 d
->cdesc
->ntwiddle
= 0;
142 d
->cdesc
->twiddle_order
= NULL
;
146 /***************************************************************************/
148 static fftw_rader_data
*fftw_create_rader(int p
, int flags
)
150 fftw_rader_data
*d
= fftw_rader_top
;
152 flags
&= ~FFTW_IN_PLACE
;
153 while (d
&& (d
->p
!= p
|| d
->flags
!= flags
))
159 d
= create_rader_aux(p
, flags
);
160 d
->next
= fftw_rader_top
;
165 /***************************************************************************/
167 /* Compute the prime FFTs, premultiplied by twiddle factors. Below, we
168 * extensively use the identity that fft(x*)* = ifft(x) in order to
169 * share data between forward and backward transforms and to obviate
170 * the necessity of having separate forward and backward plans. */
172 void fftw_twiddle_rader(fftw_complex
*A
, const fftw_complex
*W
,
173 int m
, int r
, int stride
,
176 fftw_complex
*tmp
= (fftw_complex
*)
177 fftw_malloc((r
- 1) * sizeof(fftw_complex
));
178 int i
, k
, gpower
= 1, g
= d
->g
, ginv
= d
->ginv
;
180 fftw_complex
*omega
= d
->omega
;
182 for (i
= 0; i
< m
; ++i
, A
+= stride
, W
+= r
- 1) {
184 * Here, we fft W[k-1] * A[k*(m*stride)], using Rader.
185 * (Actually, W is pre-permuted to match the permutation that we
189 /* First, permute the input and multiply by W, storing in tmp: */
190 /* gpower == g^k mod r in the following loop */
191 for (k
= 0; k
< r
- 1; ++k
, gpower
= (gpower
* g
) % r
) {
192 fftw_real rA
, iA
, rW
, iW
;
195 rA
= c_re(A
[gpower
* (m
* stride
)]);
196 iA
= c_im(A
[gpower
* (m
* stride
)]);
197 c_re(tmp
[k
]) = rW
* rA
- iW
* iA
;
198 c_im(tmp
[k
]) = rW
* iA
+ iW
* rA
;
203 fftw_die("incorrect generator in Rader\n");
208 fftw_executor_simple(r
- 1, tmp
, A
+ (m
* stride
),
209 d
->plan
->root
, 1, m
* stride
);
211 /* set output DC component: */
214 c_re(A
[0]) += c_re(A
[(m
* stride
)]);
215 c_im(A
[0]) += c_im(A
[(m
* stride
)]);
217 /* now, multiply by omega: */
218 for (k
= 0; k
< r
- 1; ++k
) {
219 fftw_real rA
, iA
, rW
, iW
;
222 rA
= c_re(A
[(k
+ 1) * (m
* stride
)]);
223 iA
= c_im(A
[(k
+ 1) * (m
* stride
)]);
224 c_re(A
[(k
+ 1) * (m
* stride
)]) = rW
* rA
- iW
* iA
;
225 c_im(A
[(k
+ 1) * (m
* stride
)]) = -(rW
* iA
+ iW
* rA
);
228 /* this will add A[0] to all of the outputs after the ifft */
229 c_re(A
[(m
* stride
)]) += a0r
;
230 c_im(A
[(m
* stride
)]) -= a0i
;
233 fftw_executor_simple(r
- 1, A
+ (m
* stride
), tmp
,
234 d
->plan
->root
, m
* stride
, 1);
236 /* finally, do inverse permutation to unshuffle the output: */
237 for (k
= 0; k
< r
- 1; ++k
, gpower
= (gpower
* ginv
) % r
) {
238 c_re(A
[gpower
* (m
* stride
)]) = c_re(tmp
[k
]);
239 c_im(A
[gpower
* (m
* stride
)]) = -c_im(tmp
[k
]);
244 fftw_die("incorrect generator in Rader\n");
253 void fftwi_twiddle_rader(fftw_complex
*A
, const fftw_complex
*W
,
254 int m
, int r
, int stride
,
257 fftw_complex
*tmp
= (fftw_complex
*)
258 fftw_malloc((r
- 1) * sizeof(fftw_complex
));
259 int i
, k
, gpower
= 1, g
= d
->g
, ginv
= d
->ginv
;
261 fftw_complex
*omega
= d
->omega
;
263 for (i
= 0; i
< m
; ++i
, A
+= stride
, W
+= r
- 1) {
265 * Here, we fft W[k-1]* * A[k*(m*stride)], using Rader.
266 * (Actually, W is pre-permuted to match the permutation that
270 /* First, permute the input and multiply by W*, storing in tmp: */
271 /* gpower == g^k mod r in the following loop */
272 for (k
= 0; k
< r
- 1; ++k
, gpower
= (gpower
* g
) % r
) {
273 fftw_real rA
, iA
, rW
, iW
;
276 rA
= c_re(A
[gpower
* (m
* stride
)]);
277 iA
= c_im(A
[gpower
* (m
* stride
)]);
278 c_re(tmp
[k
]) = rW
* rA
+ iW
* iA
;
279 c_im(tmp
[k
]) = iW
* rA
- rW
* iA
;
284 fftw_die("incorrect generator in Rader\n");
289 fftw_executor_simple(r
- 1, tmp
, A
+ (m
* stride
),
290 d
->plan
->root
, 1, m
* stride
);
292 /* set output DC component: */
295 c_re(A
[0]) += c_re(A
[(m
* stride
)]);
296 c_im(A
[0]) -= c_im(A
[(m
* stride
)]);
298 /* now, multiply by omega: */
299 for (k
= 0; k
< r
- 1; ++k
) {
300 fftw_real rA
, iA
, rW
, iW
;
303 rA
= c_re(A
[(k
+ 1) * (m
* stride
)]);
304 iA
= c_im(A
[(k
+ 1) * (m
* stride
)]);
305 c_re(A
[(k
+ 1) * (m
* stride
)]) = rW
* rA
- iW
* iA
;
306 c_im(A
[(k
+ 1) * (m
* stride
)]) = -(rW
* iA
+ iW
* rA
);
309 /* this will add A[0] to all of the outputs after the ifft */
310 c_re(A
[(m
* stride
)]) += a0r
;
311 c_im(A
[(m
* stride
)]) += a0i
;
314 fftw_executor_simple(r
- 1, A
+ (m
* stride
), tmp
,
315 d
->plan
->root
, m
* stride
, 1);
317 /* finally, do inverse permutation to unshuffle the output: */
318 for (k
= 0; k
< r
- 1; ++k
, gpower
= (gpower
* ginv
) % r
) {
319 A
[gpower
* (m
* stride
)] = tmp
[k
];
324 fftw_die("incorrect generator in Rader\n");
332 /***************************************************************************/
335 * Make an FFTW_RADER plan node. Note that this function must go
336 * here, rather than in putils.c, because it indirectly calls the
337 * fftw_planner. If we included it in putils.c, which is also used
338 * by rfftw, then any program using rfftw would be linked with all
339 * of the FFTW codelets, even if they were not needed. I wish that the
340 * darn linkers operated on a function rather than a file granularity.
342 fftw_plan_node
*fftw_make_node_rader(int n
, int size
, fftw_direction dir
,
343 fftw_plan_node
*recurse
,
346 fftw_plan_node
*p
= fftw_make_node();
348 p
->type
= FFTW_RADER
;
349 p
->nodeu
.rader
.size
= size
;
350 p
->nodeu
.rader
.codelet
= dir
== FFTW_FORWARD
?
351 fftw_twiddle_rader
: fftwi_twiddle_rader
;
352 p
->nodeu
.rader
.rader_data
= fftw_create_rader(size
, flags
);
353 p
->nodeu
.rader
.recurse
= recurse
;
354 fftw_use_node(recurse
);
356 if (flags
& FFTW_MEASURE
)
358 fftw_create_twiddle(n
, p
->nodeu
.rader
.rader_data
->cdesc
);
360 p
->nodeu
.rader
.tw
= 0;