3 * Copyright (c) 1997-1999 Massachusetts Institute of Technology
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
26 /********************** prototypes for rexec2 routines **********************/
28 extern void rfftw_real2c_aux(fftw_plan plan
, int howmany
,
29 fftw_real
*in
, int istride
, int idist
,
30 fftw_complex
*out
, int ostride
, int odist
,
32 extern void rfftw_c2real_aux(fftw_plan plan
, int howmany
,
33 fftw_complex
*in
, int istride
, int idist
,
34 fftw_real
*out
, int ostride
, int odist
,
36 extern void rfftw_real2c_overlap_aux(fftw_plan plan
, int howmany
,
37 fftw_real
*in
, int istride
, int idist
,
38 fftw_complex
*out
, int ostride
, int odist
,
40 extern void rfftw_c2real_overlap_aux(fftw_plan plan
, int howmany
,
41 fftw_complex
*in
, int istride
, int idist
,
42 fftw_real
*out
, int ostride
, int odist
,
45 /********************** Initializing the RFFTWND Plan ***********************/
48 * Create an fftwnd_plan specialized for specific arrays. (These
49 * arrays are ignored, however, if they are NULL or if the flags
50 * do not include FFTW_MEASURE.) The main advantage of being
51 * provided arrays like this is that we can do runtime timing
52 * measurements of our options, without worrying about allocating
53 * excessive scratch space.
55 fftwnd_plan
rfftwnd_create_plan_specific(int rank
, const int *n
,
56 fftw_direction dir
, int flags
,
57 fftw_real
*in
, int istride
,
58 fftw_real
*out
, int ostride
)
62 int rflags
= flags
& ~FFTW_IN_PLACE
;
63 /* note that we always do rfftw transforms out-of-place in rexec2.c */
65 if (flags
& FFTW_IN_PLACE
) {
69 istride
= ostride
= 1; /*
70 * strides don't work yet, since it is not
71 * clear whether they apply to real
75 if (!(p
= fftwnd_create_plan_aux(rank
, n
, dir
, flags
)))
78 for (i
= 0; i
< rank
- 1; ++i
)
79 p
->n_after
[i
] = (n
[rank
- 1]/2 + 1) * (p
->n_after
[i
] / n
[rank
- 1]);
81 p
->n
[rank
- 1] = n
[rank
- 1] / 2 + 1;
83 p
->plans
= fftwnd_new_plan_array(rank
);
84 if (rank
> 0 && !p
->plans
) {
85 rfftwnd_destroy_plan(p
);
89 p
->plans
[rank
- 1] = rfftw_create_plan(n
[rank
- 1], dir
, rflags
);
90 if (!p
->plans
[rank
- 1]) {
91 rfftwnd_destroy_plan(p
);
96 if (!(flags
& FFTW_MEASURE
) || in
== 0
97 || (!p
->is_in_place
&& out
== 0)) {
98 if (!fftwnd_create_plans_generic(p
->plans
, rank
- 1, n
,
99 dir
, flags
| FFTW_IN_PLACE
)) {
100 rfftwnd_destroy_plan(p
);
103 } else if (dir
== FFTW_COMPLEX_TO_REAL
|| (flags
& FFTW_IN_PLACE
)) {
104 if (!fftwnd_create_plans_specific(p
->plans
, rank
- 1, n
,
106 dir
, flags
| FFTW_IN_PLACE
,
110 rfftwnd_destroy_plan(p
);
114 if (!fftwnd_create_plans_specific(p
->plans
, rank
- 1, n
,
116 dir
, flags
| FFTW_IN_PLACE
,
117 (fftw_complex
*) out
,
120 rfftwnd_destroy_plan(p
);
126 p
->nwork
= fftwnd_work_size(rank
, p
->n
, flags
| FFTW_IN_PLACE
,
128 if (p
->nwork
&& !(flags
& FFTW_THREADSAFE
)) {
129 p
->work
= (fftw_complex
*) fftw_malloc(p
->nwork
130 * sizeof(fftw_complex
));
132 rfftwnd_destroy_plan(p
);
139 fftwnd_plan
rfftw2d_create_plan_specific(int nx
, int ny
,
140 fftw_direction dir
, int flags
,
141 fftw_real
*in
, int istride
,
142 fftw_real
*out
, int ostride
)
149 return rfftwnd_create_plan_specific(2, n
, dir
, flags
,
150 in
, istride
, out
, ostride
);
153 fftwnd_plan
rfftw3d_create_plan_specific(int nx
, int ny
, int nz
,
154 fftw_direction dir
, int flags
,
155 fftw_real
*in
, int istride
,
156 fftw_real
*out
, int ostride
)
164 return rfftwnd_create_plan_specific(3, n
, dir
, flags
,
165 in
, istride
, out
, ostride
);
168 /* Create a generic fftwnd plan: */
170 fftwnd_plan
rfftwnd_create_plan(int rank
, const int *n
,
171 fftw_direction dir
, int flags
)
173 return rfftwnd_create_plan_specific(rank
, n
, dir
, flags
, 0, 1, 0, 1);
176 fftwnd_plan
rfftw2d_create_plan(int nx
, int ny
,
177 fftw_direction dir
, int flags
)
179 return rfftw2d_create_plan_specific(nx
, ny
, dir
, flags
, 0, 1, 0, 1);
182 fftwnd_plan
rfftw3d_create_plan(int nx
, int ny
, int nz
,
183 fftw_direction dir
, int flags
)
185 return rfftw3d_create_plan_specific(nx
, ny
, nz
, dir
, flags
, 0, 1, 0, 1);
188 /************************ Freeing the RFFTWND Plan ************************/
190 void rfftwnd_destroy_plan(fftwnd_plan plan
)
192 fftwnd_destroy_plan(plan
);
195 /************************ Printing the RFFTWND Plan ************************/
197 void rfftwnd_fprint_plan(FILE *f
, fftwnd_plan plan
)
199 fftwnd_fprint_plan(f
, plan
);
202 void rfftwnd_print_plan(fftwnd_plan plan
)
204 rfftwnd_fprint_plan(stdout
, plan
);
207 /*********** Computing the N-Dimensional FFT: Auxiliary Routines ************/
209 void rfftwnd_real2c_aux(fftwnd_plan p
, int cur_dim
,
210 fftw_real
*in
, int istride
,
211 fftw_complex
*out
, int ostride
,
214 int n_after
= p
->n_after
[cur_dim
], n
= p
->n
[cur_dim
];
216 if (cur_dim
== p
->rank
- 2) {
217 /* just do the last dimension directly: */
219 rfftw_real2c_aux(p
->plans
[p
->rank
- 1], n
,
220 in
, istride
, (n_after
* istride
) * 2,
221 out
, istride
, n_after
* istride
,
224 rfftw_real2c_aux(p
->plans
[p
->rank
- 1], n
,
225 in
, istride
, p
->plans
[p
->rank
- 1]->n
* istride
,
226 out
, ostride
, n_after
* ostride
,
228 } else { /* we have at least two dimensions to go */
229 int nr
= p
->plans
[p
->rank
- 1]->n
;
230 int n_after_r
= p
->is_in_place
? n_after
* 2
231 : nr
* (n_after
/ (nr
/2 + 1));
235 * process the subsequent dimensions recursively, in hyperslabs,
236 * to get maximum locality:
238 for (i
= 0; i
< n
; ++i
)
239 rfftwnd_real2c_aux(p
, cur_dim
+ 1,
240 in
+ i
* n_after_r
* istride
, istride
,
241 out
+ i
* n_after
* ostride
, ostride
, work
);
244 /* do the current dimension (in-place): */
245 fftw(p
->plans
[cur_dim
], n_after
,
246 out
, n_after
* ostride
, ostride
,
247 (fftw_complex
*) work
, 1, 0);
248 /* I hate this cast */
251 void rfftwnd_c2real_aux(fftwnd_plan p
, int cur_dim
,
252 fftw_complex
*in
, int istride
,
253 fftw_real
*out
, int ostride
,
256 int n_after
= p
->n_after
[cur_dim
], n
= p
->n
[cur_dim
];
258 /* do the current dimension (in-place): */
259 fftw(p
->plans
[cur_dim
], n_after
,
260 in
, n_after
* istride
, istride
,
261 (fftw_complex
*) work
, 1, 0);
263 if (cur_dim
== p
->rank
- 2) {
264 /* just do the last dimension directly: */
266 rfftw_c2real_aux(p
->plans
[p
->rank
- 1], n
,
267 in
, istride
, n_after
* istride
,
268 out
, istride
, (n_after
* istride
) * 2,
271 rfftw_c2real_aux(p
->plans
[p
->rank
- 1], n
,
272 in
, istride
, n_after
* istride
,
273 out
, ostride
, p
->plans
[p
->rank
- 1]->n
* ostride
,
275 } else { /* we have at least two dimensions to go */
276 int nr
= p
->plans
[p
->rank
- 1]->n
;
277 int n_after_r
= p
->is_in_place
? n_after
* 2 :
278 nr
* (n_after
/ (nr
/2 + 1));
282 * process the subsequent dimensions recursively, in hyperslabs,
283 * to get maximum locality:
285 for (i
= 0; i
< n
; ++i
)
286 rfftwnd_c2real_aux(p
, cur_dim
+ 1,
287 in
+ i
* n_after
* istride
, istride
,
288 out
+ i
* n_after_r
* ostride
, ostride
, work
);
293 * alternate version of rfftwnd_aux -- this version pushes the howmany
294 * loop down to the leaves of the computation, for greater locality
295 * in cases where dist < stride. It is also required for correctness
296 * if in==out, and we must call a special version of the executor.
297 * Note that work must point to 'howmany' copies of its data
301 void rfftwnd_real2c_aux_howmany(fftwnd_plan p
, int cur_dim
,
303 fftw_real
*in
, int istride
, int idist
,
304 fftw_complex
*out
, int ostride
, int odist
,
307 int n_after
= p
->n_after
[cur_dim
], n
= p
->n
[cur_dim
];
310 if (cur_dim
== p
->rank
- 2) {
311 /* just do the last dimension directly: */
313 for (k
= 0; k
< n
; ++k
)
314 rfftw_real2c_overlap_aux(p
->plans
[p
->rank
- 1], howmany
,
315 in
+ (k
* n_after
* istride
) * 2,
317 out
+ (k
* n_after
* ostride
),
321 int nlast
= p
->plans
[p
->rank
- 1]->n
;
322 for (k
= 0; k
< n
; ++k
)
323 rfftw_real2c_aux(p
->plans
[p
->rank
- 1], howmany
,
324 in
+ k
* nlast
* istride
,
326 out
+ k
* n_after
* ostride
,
330 } else { /* we have at least two dimensions to go */
331 int nr
= p
->plans
[p
->rank
- 1]->n
;
332 int n_after_r
= p
->is_in_place
? n_after
* 2 :
333 nr
* (n_after
/ (nr
/2 + 1));
337 * process the subsequent dimensions recursively, in hyperslabs,
338 * to get maximum locality:
340 for (i
= 0; i
< n
; ++i
)
341 rfftwnd_real2c_aux_howmany(p
, cur_dim
+ 1, howmany
,
342 in
+ i
* n_after_r
* istride
, istride
, idist
,
343 out
+ i
* n_after
* ostride
, ostride
, odist
,
347 /* do the current dimension (in-place): */
348 for (k
= 0; k
< n_after
; ++k
)
349 fftw(p
->plans
[cur_dim
], howmany
,
350 out
+ k
* ostride
, n_after
* ostride
, odist
,
354 void rfftwnd_c2real_aux_howmany(fftwnd_plan p
, int cur_dim
,
356 fftw_complex
*in
, int istride
, int idist
,
357 fftw_real
*out
, int ostride
, int odist
,
360 int n_after
= p
->n_after
[cur_dim
], n
= p
->n
[cur_dim
];
363 /* do the current dimension (in-place): */
364 for (k
= 0; k
< n_after
; ++k
)
365 fftw(p
->plans
[cur_dim
], howmany
,
366 in
+ k
* istride
, n_after
* istride
, idist
,
369 if (cur_dim
== p
->rank
- 2) {
370 /* just do the last dimension directly: */
372 for (k
= 0; k
< n
; ++k
)
373 rfftw_c2real_overlap_aux(p
->plans
[p
->rank
- 1], howmany
,
374 in
+ (k
* n_after
* istride
),
376 out
+ (k
* n_after
* ostride
) * 2,
380 int nlast
= p
->plans
[p
->rank
- 1]->n
;
381 for (k
= 0; k
< n
; ++k
)
382 rfftw_c2real_aux(p
->plans
[p
->rank
- 1], howmany
,
383 in
+ k
* n_after
* istride
,
385 out
+ k
* nlast
* ostride
,
389 } else { /* we have at least two dimensions to go */
390 int nr
= p
->plans
[p
->rank
- 1]->n
;
391 int n_after_r
= p
->is_in_place
? n_after
* 2
392 : nr
* (n_after
/ (nr
/2 + 1));
396 * process the subsequent dimensions recursively, in hyperslabs,
397 * to get maximum locality:
399 for (i
= 0; i
< n
; ++i
)
400 rfftwnd_c2real_aux_howmany(p
, cur_dim
+ 1, howmany
,
401 in
+ i
* n_after
* istride
, istride
, idist
,
402 out
+ i
* n_after_r
* ostride
, ostride
, odist
,
407 /********** Computing the N-Dimensional FFT: User-Visible Routines **********/
409 void rfftwnd_real_to_complex(fftwnd_plan p
, int howmany
,
410 fftw_real
*in
, int istride
, int idist
,
411 fftw_complex
*out
, int ostride
, int odist
)
413 fftw_complex
*work
= p
->work
;
417 if (p
->dir
!= FFTW_REAL_TO_COMPLEX
)
418 fftw_die("rfftwnd_real_to_complex with complex-to-real plan");
421 if (p
->rank
> 0 && (p
->plans
[0]->flags
& FFTW_THREADSAFE
)
422 && p
->nwork
&& p
->work
)
423 fftw_die("bug with FFTW_THREADSAFE flag");
426 if (p
->is_in_place
) {
428 odist
= (idist
== 1) ? 1 : (idist
/ 2); /* ugh */
429 out
= (fftw_complex
*) in
;
430 if (howmany
> 1 && istride
> idist
&& rank
> 0) {
433 new_nwork
= p
->n
[rank
- 1] * howmany
;
434 if (new_nwork
> p
->nwork
) {
435 work
= (fftw_complex
*)
436 fftw_malloc(sizeof(fftw_complex
) * new_nwork
);
438 fftw_die("error allocating work array");
443 if (p
->nwork
&& !work
) {
444 work
= (fftw_complex
*) fftw_malloc(sizeof(fftw_complex
) * p
->nwork
);
451 if (p
->is_in_place
&& howmany
> 1 && istride
> idist
)
452 rfftw_real2c_overlap_aux(p
->plans
[0], howmany
,
457 rfftw_real2c_aux(p
->plans
[0], howmany
,
462 default: /* rank >= 2 */
464 if (howmany
> 1 && ostride
> odist
)
465 rfftwnd_real2c_aux_howmany(p
, 0, howmany
,
472 for (i
= 0; i
< howmany
; ++i
)
473 rfftwnd_real2c_aux(p
, 0,
474 in
+ i
* idist
, istride
,
475 out
+ i
* odist
, ostride
,
485 void rfftwnd_complex_to_real(fftwnd_plan p
, int howmany
,
486 fftw_complex
*in
, int istride
, int idist
,
487 fftw_real
*out
, int ostride
, int odist
)
489 fftw_complex
*work
= p
->work
;
493 if (p
->dir
!= FFTW_COMPLEX_TO_REAL
)
494 fftw_die("rfftwnd_complex_to_real with real-to-complex plan");
497 if (p
->rank
> 0 && (p
->plans
[0]->flags
& FFTW_THREADSAFE
)
498 && p
->nwork
&& p
->work
)
499 fftw_die("bug with FFTW_THREADSAFE flag");
502 if (p
->is_in_place
) {
505 odist
= (idist
== 1) ? 1 : (idist
* 2); /* ugh */
506 out
= (fftw_real
*) in
;
507 if (howmany
> 1 && istride
> idist
&& rank
> 0) {
508 int new_nwork
= p
->n
[rank
- 1] * howmany
;
509 if (new_nwork
> p
->nwork
) {
510 work
= (fftw_complex
*)
511 fftw_malloc(sizeof(fftw_complex
) * new_nwork
);
513 fftw_die("error allocating work array");
518 if (p
->nwork
&& !work
) {
519 work
= (fftw_complex
*) fftw_malloc(sizeof(fftw_complex
) * p
->nwork
);
526 if (p
->is_in_place
&& howmany
> 1 && istride
> idist
)
527 rfftw_c2real_overlap_aux(p
->plans
[0], howmany
,
532 rfftw_c2real_aux(p
->plans
[0], howmany
,
537 default: /* rank >= 2 */
539 if (howmany
> 1 && ostride
> odist
)
540 rfftwnd_c2real_aux_howmany(p
, 0, howmany
,
547 for (i
= 0; i
< howmany
; ++i
)
548 rfftwnd_c2real_aux(p
, 0,
549 in
+ i
* idist
, istride
,
550 out
+ i
* odist
, ostride
,
560 void rfftwnd_one_real_to_complex(fftwnd_plan p
,
561 fftw_real
*in
, fftw_complex
*out
)
563 rfftwnd_real_to_complex(p
, 1, in
, 1, 1, out
, 1, 1);
566 void rfftwnd_one_complex_to_real(fftwnd_plan p
,
567 fftw_complex
*in
, fftw_real
*out
)
569 rfftwnd_complex_to_real(p
, 1, in
, 1, 1, out
, 1, 1);