changed reading hint
[gromacs/adressmacs.git] / src / fftw / rfftwnd.c
blob0857109ea31a83fb92efe45e290beed58225930b
2 /*
3 * Copyright (c) 1997-1999 Massachusetts Institute of Technology
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 /* $Id$ */
23 #include <fftw-int.h>
24 #include <rfftw.h>
26 /********************** prototypes for rexec2 routines **********************/
28 extern void rfftw_real2c_aux(fftw_plan plan, int howmany,
29 fftw_real *in, int istride, int idist,
30 fftw_complex *out, int ostride, int odist,
31 fftw_real *work);
32 extern void rfftw_c2real_aux(fftw_plan plan, int howmany,
33 fftw_complex *in, int istride, int idist,
34 fftw_real *out, int ostride, int odist,
35 fftw_real *work);
36 extern void rfftw_real2c_overlap_aux(fftw_plan plan, int howmany,
37 fftw_real *in, int istride, int idist,
38 fftw_complex *out, int ostride, int odist,
39 fftw_real *work);
40 extern void rfftw_c2real_overlap_aux(fftw_plan plan, int howmany,
41 fftw_complex *in, int istride, int idist,
42 fftw_real *out, int ostride, int odist,
43 fftw_real *work);
45 /********************** Initializing the RFFTWND Plan ***********************/
48 * Create an fftwnd_plan specialized for specific arrays. (These
49 * arrays are ignored, however, if they are NULL or if the flags
50 * do not include FFTW_MEASURE.) The main advantage of being
51 * provided arrays like this is that we can do runtime timing
52 * measurements of our options, without worrying about allocating
53 * excessive scratch space.
55 fftwnd_plan rfftwnd_create_plan_specific(int rank, const int *n,
56 fftw_direction dir, int flags,
57 fftw_real *in, int istride,
58 fftw_real *out, int ostride)
60 fftwnd_plan p;
61 int i;
62 int rflags = flags & ~FFTW_IN_PLACE;
63 /* note that we always do rfftw transforms out-of-place in rexec2.c */
65 if (flags & FFTW_IN_PLACE) {
66 out = NULL;
67 ostride = istride;
69 istride = ostride = 1; /*
70 * strides don't work yet, since it is not
71 * clear whether they apply to real
72 * or complex data
75 if (!(p = fftwnd_create_plan_aux(rank, n, dir, flags)))
76 return 0;
78 for (i = 0; i < rank - 1; ++i)
79 p->n_after[i] = (n[rank - 1]/2 + 1) * (p->n_after[i] / n[rank - 1]);
80 if (rank > 0)
81 p->n[rank - 1] = n[rank - 1] / 2 + 1;
83 p->plans = fftwnd_new_plan_array(rank);
84 if (rank > 0 && !p->plans) {
85 rfftwnd_destroy_plan(p);
86 return 0;
88 if (rank > 0) {
89 p->plans[rank - 1] = rfftw_create_plan(n[rank - 1], dir, rflags);
90 if (!p->plans[rank - 1]) {
91 rfftwnd_destroy_plan(p);
92 return 0;
95 if (rank > 1) {
96 if (!(flags & FFTW_MEASURE) || in == 0
97 || (!p->is_in_place && out == 0)) {
98 if (!fftwnd_create_plans_generic(p->plans, rank - 1, n,
99 dir, flags | FFTW_IN_PLACE)) {
100 rfftwnd_destroy_plan(p);
101 return 0;
103 } else if (dir == FFTW_COMPLEX_TO_REAL || (flags & FFTW_IN_PLACE)) {
104 if (!fftwnd_create_plans_specific(p->plans, rank - 1, n,
105 p->n_after,
106 dir, flags | FFTW_IN_PLACE,
107 (fftw_complex *) in,
108 istride,
109 0, 0)) {
110 rfftwnd_destroy_plan(p);
111 return 0;
113 } else {
114 if (!fftwnd_create_plans_specific(p->plans, rank - 1, n,
115 p->n_after,
116 dir, flags | FFTW_IN_PLACE,
117 (fftw_complex *) out,
118 ostride,
119 0, 0)) {
120 rfftwnd_destroy_plan(p);
121 return 0;
125 p->nbuffers = 0;
126 p->nwork = fftwnd_work_size(rank, p->n, flags | FFTW_IN_PLACE,
127 p->nbuffers + 1);
128 if (p->nwork && !(flags & FFTW_THREADSAFE)) {
129 p->work = (fftw_complex *) fftw_malloc(p->nwork
130 * sizeof(fftw_complex));
131 if (!p->work) {
132 rfftwnd_destroy_plan(p);
133 return 0;
136 return p;
139 fftwnd_plan rfftw2d_create_plan_specific(int nx, int ny,
140 fftw_direction dir, int flags,
141 fftw_real *in, int istride,
142 fftw_real *out, int ostride)
144 int n[2];
146 n[0] = nx;
147 n[1] = ny;
149 return rfftwnd_create_plan_specific(2, n, dir, flags,
150 in, istride, out, ostride);
153 fftwnd_plan rfftw3d_create_plan_specific(int nx, int ny, int nz,
154 fftw_direction dir, int flags,
155 fftw_real *in, int istride,
156 fftw_real *out, int ostride)
158 int n[3];
160 n[0] = nx;
161 n[1] = ny;
162 n[2] = nz;
164 return rfftwnd_create_plan_specific(3, n, dir, flags,
165 in, istride, out, ostride);
168 /* Create a generic fftwnd plan: */
170 fftwnd_plan rfftwnd_create_plan(int rank, const int *n,
171 fftw_direction dir, int flags)
173 return rfftwnd_create_plan_specific(rank, n, dir, flags, 0, 1, 0, 1);
176 fftwnd_plan rfftw2d_create_plan(int nx, int ny,
177 fftw_direction dir, int flags)
179 return rfftw2d_create_plan_specific(nx, ny, dir, flags, 0, 1, 0, 1);
182 fftwnd_plan rfftw3d_create_plan(int nx, int ny, int nz,
183 fftw_direction dir, int flags)
185 return rfftw3d_create_plan_specific(nx, ny, nz, dir, flags, 0, 1, 0, 1);
188 /************************ Freeing the RFFTWND Plan ************************/
190 void rfftwnd_destroy_plan(fftwnd_plan plan)
192 fftwnd_destroy_plan(plan);
195 /************************ Printing the RFFTWND Plan ************************/
197 void rfftwnd_fprint_plan(FILE *f, fftwnd_plan plan)
199 fftwnd_fprint_plan(f, plan);
202 void rfftwnd_print_plan(fftwnd_plan plan)
204 rfftwnd_fprint_plan(stdout, plan);
207 /*********** Computing the N-Dimensional FFT: Auxiliary Routines ************/
209 void rfftwnd_real2c_aux(fftwnd_plan p, int cur_dim,
210 fftw_real *in, int istride,
211 fftw_complex *out, int ostride,
212 fftw_real *work)
214 int n_after = p->n_after[cur_dim], n = p->n[cur_dim];
216 if (cur_dim == p->rank - 2) {
217 /* just do the last dimension directly: */
218 if (p->is_in_place)
219 rfftw_real2c_aux(p->plans[p->rank - 1], n,
220 in, istride, (n_after * istride) * 2,
221 out, istride, n_after * istride,
222 work);
223 else
224 rfftw_real2c_aux(p->plans[p->rank - 1], n,
225 in, istride, p->plans[p->rank - 1]->n * istride,
226 out, ostride, n_after * ostride,
227 work);
228 } else { /* we have at least two dimensions to go */
229 int nr = p->plans[p->rank - 1]->n;
230 int n_after_r = p->is_in_place ? n_after * 2
231 : nr * (n_after / (nr/2 + 1));
232 int i;
235 * process the subsequent dimensions recursively, in hyperslabs,
236 * to get maximum locality:
238 for (i = 0; i < n; ++i)
239 rfftwnd_real2c_aux(p, cur_dim + 1,
240 in + i * n_after_r * istride, istride,
241 out + i * n_after * ostride, ostride, work);
244 /* do the current dimension (in-place): */
245 fftw(p->plans[cur_dim], n_after,
246 out, n_after * ostride, ostride,
247 (fftw_complex *) work, 1, 0);
248 /* I hate this cast */
251 void rfftwnd_c2real_aux(fftwnd_plan p, int cur_dim,
252 fftw_complex *in, int istride,
253 fftw_real *out, int ostride,
254 fftw_real *work)
256 int n_after = p->n_after[cur_dim], n = p->n[cur_dim];
258 /* do the current dimension (in-place): */
259 fftw(p->plans[cur_dim], n_after,
260 in, n_after * istride, istride,
261 (fftw_complex *) work, 1, 0);
263 if (cur_dim == p->rank - 2) {
264 /* just do the last dimension directly: */
265 if (p->is_in_place)
266 rfftw_c2real_aux(p->plans[p->rank - 1], n,
267 in, istride, n_after * istride,
268 out, istride, (n_after * istride) * 2,
269 work);
270 else
271 rfftw_c2real_aux(p->plans[p->rank - 1], n,
272 in, istride, n_after * istride,
273 out, ostride, p->plans[p->rank - 1]->n * ostride,
274 work);
275 } else { /* we have at least two dimensions to go */
276 int nr = p->plans[p->rank - 1]->n;
277 int n_after_r = p->is_in_place ? n_after * 2 :
278 nr * (n_after / (nr/2 + 1));
279 int i;
282 * process the subsequent dimensions recursively, in hyperslabs,
283 * to get maximum locality:
285 for (i = 0; i < n; ++i)
286 rfftwnd_c2real_aux(p, cur_dim + 1,
287 in + i * n_after * istride, istride,
288 out + i * n_after_r * ostride, ostride, work);
293 * alternate version of rfftwnd_aux -- this version pushes the howmany
294 * loop down to the leaves of the computation, for greater locality
295 * in cases where dist < stride. It is also required for correctness
296 * if in==out, and we must call a special version of the executor.
297 * Note that work must point to 'howmany' copies of its data
298 * if in == out.
301 void rfftwnd_real2c_aux_howmany(fftwnd_plan p, int cur_dim,
302 int howmany,
303 fftw_real *in, int istride, int idist,
304 fftw_complex *out, int ostride, int odist,
305 fftw_complex *work)
307 int n_after = p->n_after[cur_dim], n = p->n[cur_dim];
308 int k;
310 if (cur_dim == p->rank - 2) {
311 /* just do the last dimension directly: */
312 if (p->is_in_place)
313 for (k = 0; k < n; ++k)
314 rfftw_real2c_overlap_aux(p->plans[p->rank - 1], howmany,
315 in + (k * n_after * istride) * 2,
316 istride, idist,
317 out + (k * n_after * ostride),
318 ostride, odist,
319 (fftw_real *) work);
320 else {
321 int nlast = p->plans[p->rank - 1]->n;
322 for (k = 0; k < n; ++k)
323 rfftw_real2c_aux(p->plans[p->rank - 1], howmany,
324 in + k * nlast * istride,
325 istride, idist,
326 out + k * n_after * ostride,
327 ostride, odist,
328 (fftw_real *) work);
330 } else { /* we have at least two dimensions to go */
331 int nr = p->plans[p->rank - 1]->n;
332 int n_after_r = p->is_in_place ? n_after * 2 :
333 nr * (n_after / (nr/2 + 1));
334 int i;
337 * process the subsequent dimensions recursively, in hyperslabs,
338 * to get maximum locality:
340 for (i = 0; i < n; ++i)
341 rfftwnd_real2c_aux_howmany(p, cur_dim + 1, howmany,
342 in + i * n_after_r * istride, istride, idist,
343 out + i * n_after * ostride, ostride, odist,
344 work);
347 /* do the current dimension (in-place): */
348 for (k = 0; k < n_after; ++k)
349 fftw(p->plans[cur_dim], howmany,
350 out + k * ostride, n_after * ostride, odist,
351 work, 1, 0);
354 void rfftwnd_c2real_aux_howmany(fftwnd_plan p, int cur_dim,
355 int howmany,
356 fftw_complex *in, int istride, int idist,
357 fftw_real *out, int ostride, int odist,
358 fftw_complex *work)
360 int n_after = p->n_after[cur_dim], n = p->n[cur_dim];
361 int k;
363 /* do the current dimension (in-place): */
364 for (k = 0; k < n_after; ++k)
365 fftw(p->plans[cur_dim], howmany,
366 in + k * istride, n_after * istride, idist,
367 work, 1, 0);
369 if (cur_dim == p->rank - 2) {
370 /* just do the last dimension directly: */
371 if (p->is_in_place)
372 for (k = 0; k < n; ++k)
373 rfftw_c2real_overlap_aux(p->plans[p->rank - 1], howmany,
374 in + (k * n_after * istride),
375 istride, idist,
376 out + (k * n_after * ostride) * 2,
377 ostride, odist,
378 (fftw_real *) work);
379 else {
380 int nlast = p->plans[p->rank - 1]->n;
381 for (k = 0; k < n; ++k)
382 rfftw_c2real_aux(p->plans[p->rank - 1], howmany,
383 in + k * n_after * istride,
384 istride, idist,
385 out + k * nlast * ostride,
386 ostride, odist,
387 (fftw_real *) work);
389 } else { /* we have at least two dimensions to go */
390 int nr = p->plans[p->rank - 1]->n;
391 int n_after_r = p->is_in_place ? n_after * 2
392 : nr * (n_after / (nr/2 + 1));
393 int i;
396 * process the subsequent dimensions recursively, in hyperslabs,
397 * to get maximum locality:
399 for (i = 0; i < n; ++i)
400 rfftwnd_c2real_aux_howmany(p, cur_dim + 1, howmany,
401 in + i * n_after * istride, istride, idist,
402 out + i * n_after_r * ostride, ostride, odist,
403 work);
407 /********** Computing the N-Dimensional FFT: User-Visible Routines **********/
409 void rfftwnd_real_to_complex(fftwnd_plan p, int howmany,
410 fftw_real *in, int istride, int idist,
411 fftw_complex *out, int ostride, int odist)
413 fftw_complex *work = p->work;
414 int rank = p->rank;
415 int free_work = 0;
417 if (p->dir != FFTW_REAL_TO_COMPLEX)
418 fftw_die("rfftwnd_real_to_complex with complex-to-real plan");
420 #ifdef FFTW_DEBUG
421 if (p->rank > 0 && (p->plans[0]->flags & FFTW_THREADSAFE)
422 && p->nwork && p->work)
423 fftw_die("bug with FFTW_THREADSAFE flag");
424 #endif
426 if (p->is_in_place) {
427 ostride = istride;
428 odist = (idist == 1) ? 1 : (idist / 2); /* ugh */
429 out = (fftw_complex *) in;
430 if (howmany > 1 && istride > idist && rank > 0) {
431 int new_nwork;
433 new_nwork = p->n[rank - 1] * howmany;
434 if (new_nwork > p->nwork) {
435 work = (fftw_complex *)
436 fftw_malloc(sizeof(fftw_complex) * new_nwork);
437 if (!work)
438 fftw_die("error allocating work array");
439 free_work = 1;
443 if (p->nwork && !work) {
444 work = (fftw_complex *) fftw_malloc(sizeof(fftw_complex) * p->nwork);
445 free_work = 1;
447 switch (rank) {
448 case 0:
449 break;
450 case 1:
451 if (p->is_in_place && howmany > 1 && istride > idist)
452 rfftw_real2c_overlap_aux(p->plans[0], howmany,
453 in, istride, idist,
454 out, ostride, odist,
455 (fftw_real *) work);
456 else
457 rfftw_real2c_aux(p->plans[0], howmany,
458 in, istride, idist,
459 out, ostride, odist,
460 (fftw_real *) work);
461 break;
462 default: /* rank >= 2 */
464 if (howmany > 1 && ostride > odist)
465 rfftwnd_real2c_aux_howmany(p, 0, howmany,
466 in, istride, idist,
467 out, ostride, odist,
468 work);
469 else {
470 int i;
472 for (i = 0; i < howmany; ++i)
473 rfftwnd_real2c_aux(p, 0,
474 in + i * idist, istride,
475 out + i * odist, ostride,
476 (fftw_real *) work);
481 if (free_work)
482 fftw_free(work);
485 void rfftwnd_complex_to_real(fftwnd_plan p, int howmany,
486 fftw_complex *in, int istride, int idist,
487 fftw_real *out, int ostride, int odist)
489 fftw_complex *work = p->work;
490 int rank = p->rank;
491 int free_work = 0;
493 if (p->dir != FFTW_COMPLEX_TO_REAL)
494 fftw_die("rfftwnd_complex_to_real with real-to-complex plan");
496 #ifdef FFTW_DEBUG
497 if (p->rank > 0 && (p->plans[0]->flags & FFTW_THREADSAFE)
498 && p->nwork && p->work)
499 fftw_die("bug with FFTW_THREADSAFE flag");
500 #endif
502 if (p->is_in_place) {
503 ostride = istride;
504 odist = idist;
505 odist = (idist == 1) ? 1 : (idist * 2); /* ugh */
506 out = (fftw_real *) in;
507 if (howmany > 1 && istride > idist && rank > 0) {
508 int new_nwork = p->n[rank - 1] * howmany;
509 if (new_nwork > p->nwork) {
510 work = (fftw_complex *)
511 fftw_malloc(sizeof(fftw_complex) * new_nwork);
512 if (!work)
513 fftw_die("error allocating work array");
514 free_work = 1;
518 if (p->nwork && !work) {
519 work = (fftw_complex *) fftw_malloc(sizeof(fftw_complex) * p->nwork);
520 free_work = 1;
522 switch (rank) {
523 case 0:
524 break;
525 case 1:
526 if (p->is_in_place && howmany > 1 && istride > idist)
527 rfftw_c2real_overlap_aux(p->plans[0], howmany,
528 in, istride, idist,
529 out, ostride, odist,
530 (fftw_real *) work);
531 else
532 rfftw_c2real_aux(p->plans[0], howmany,
533 in, istride, idist,
534 out, ostride, odist,
535 (fftw_real *) work);
536 break;
537 default: /* rank >= 2 */
539 if (howmany > 1 && ostride > odist)
540 rfftwnd_c2real_aux_howmany(p, 0, howmany,
541 in, istride, idist,
542 out, ostride, odist,
543 work);
544 else {
545 int i;
547 for (i = 0; i < howmany; ++i)
548 rfftwnd_c2real_aux(p, 0,
549 in + i * idist, istride,
550 out + i * odist, ostride,
551 (fftw_real *) work);
556 if (free_work)
557 fftw_free(work);
560 void rfftwnd_one_real_to_complex(fftwnd_plan p,
561 fftw_real *in, fftw_complex *out)
563 rfftwnd_real_to_complex(p, 1, in, 1, 1, out, 1, 1);
566 void rfftwnd_one_complex_to_real(fftwnd_plan p,
567 fftw_complex *in, fftw_real *out)
569 rfftwnd_complex_to_real(p, 1, in, 1, 1, out, 1, 1);