changed reading hint
[gromacs/adressmacs.git] / src / fftw / rexec.c
bloba82bb932aa363b44b7143149e01bc897870f10ab
1 /*
2 * Copyright (c) 1997-1999 Massachusetts Institute of Technology
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 * rexec.c -- execute the fft
24 /* $Id$ */
25 #include <stdio.h>
26 #include <stdlib.h>
28 #include <fftw-int.h>
29 #include <rfftw.h>
31 void rfftw_strided_copy(int n, fftw_real *in, int ostride,
32 fftw_real *out)
34 int i;
35 fftw_real r0, r1, r2, r3;
37 i = 0;
38 for (; i < (n & 3); ++i) {
39 out[i * ostride] = in[i];
41 for (; i < n; i += 4) {
42 r0 = in[i];
43 r1 = in[i + 1];
44 r2 = in[i + 2];
45 r3 = in[i + 3];
46 out[i * ostride] = r0;
47 out[(i + 1) * ostride] = r1;
48 out[(i + 2) * ostride] = r2;
49 out[(i + 3) * ostride] = r3;
53 void rfftw_executor_simple(int n, fftw_real *in,
54 fftw_real *out,
55 fftw_plan_node *p,
56 int istride,
57 int ostride)
59 switch (p->type) {
60 case FFTW_REAL2HC:
61 HACK_ALIGN_STACK_ODD();
62 (p->nodeu.real2hc.codelet) (in, out, out + n * ostride,
63 istride, ostride, -ostride);
64 break;
66 case FFTW_HC2REAL:
67 HACK_ALIGN_STACK_ODD();
68 (p->nodeu.hc2real.codelet) (in, in + n * istride, out,
69 istride, -istride, ostride);
70 break;
72 case FFTW_HC2HC:
74 int r = p->nodeu.hc2hc.size;
75 int m = n / r;
76 int i;
77 /*
78 * please do resist the temptation of initializing
79 * these variables here. Doing so forces the
80 * compiler to keep a live variable across the
81 * recursive call.
83 fftw_hc2hc_codelet *codelet;
84 fftw_complex *W;
86 switch (p->nodeu.hc2hc.dir) {
87 case FFTW_REAL_TO_COMPLEX:
88 for (i = 0; i < r; ++i)
89 rfftw_executor_simple(m, in + i * istride,
90 out + i * (m*ostride),
91 p->nodeu.hc2hc.recurse,
92 istride * r, ostride);
94 W = p->nodeu.hc2hc.tw->twarray;
95 codelet = p->nodeu.hc2hc.codelet;
96 HACK_ALIGN_STACK_EVEN();
97 codelet(out, W, m * ostride, m, ostride);
98 break;
99 case FFTW_COMPLEX_TO_REAL:
100 W = p->nodeu.hc2hc.tw->twarray;
101 codelet = p->nodeu.hc2hc.codelet;
102 HACK_ALIGN_STACK_EVEN();
103 codelet(in, W, m * istride, m, istride);
105 for (i = 0; i < r; ++i)
106 rfftw_executor_simple(m, in + i * (m*istride),
107 out + i * ostride,
108 p->nodeu.hc2hc.recurse,
109 istride, ostride * r);
110 break;
111 default:
112 goto bug;
115 break;
118 case FFTW_RGENERIC:
120 int r = p->nodeu.rgeneric.size;
121 int m = n / r;
122 int i;
123 fftw_rgeneric_codelet *codelet = p->nodeu.rgeneric.codelet;
124 fftw_complex *W = p->nodeu.rgeneric.tw->twarray;
126 switch (p->nodeu.rgeneric.dir) {
127 case FFTW_REAL_TO_COMPLEX:
128 for (i = 0; i < r; ++i)
129 rfftw_executor_simple(m, in + i * istride,
130 out + i * (m * ostride),
131 p->nodeu.rgeneric.recurse,
132 istride * r, ostride);
134 codelet(out, W, m, r, n, ostride);
135 break;
136 case FFTW_COMPLEX_TO_REAL:
137 codelet(in, W, m, r, n, istride);
139 for (i = 0; i < r; ++i)
140 rfftw_executor_simple(m, in + i * m * istride,
141 out + i * ostride,
142 p->nodeu.rgeneric.recurse,
143 istride, ostride * r);
144 break;
145 default:
146 goto bug;
149 break;
152 default:
153 bug:
154 fftw_die("BUG in rexecutor: invalid plan\n");
155 break;
159 static void rexecutor_simple_inplace(int n, fftw_real *in,
160 fftw_real *out,
161 fftw_plan_node *p,
162 int istride)
164 switch (p->type) {
165 case FFTW_REAL2HC:
166 HACK_ALIGN_STACK_ODD();
167 (p->nodeu.real2hc.codelet) (in, in, in + n * istride,
168 istride, istride, -istride);
169 break;
171 case FFTW_HC2REAL:
172 HACK_ALIGN_STACK_ODD();
173 (p->nodeu.hc2real.codelet) (in, in + n * istride, in,
174 istride, -istride, istride);
175 break;
177 default:
179 fftw_real *tmp;
181 if (out)
182 tmp = out;
183 else
184 tmp = (fftw_real *) fftw_malloc(n * sizeof(fftw_real));
186 rfftw_executor_simple(n, in, tmp, p, istride, 1);
187 rfftw_strided_copy(n, tmp, istride, in);
189 if (!out)
190 fftw_free(tmp);
195 static void rexecutor_many(int n, fftw_real *in,
196 fftw_real *out,
197 fftw_plan_node *p,
198 int istride,
199 int ostride,
200 int howmany, int idist, int odist)
202 switch (p->type) {
203 case FFTW_REAL2HC:
205 fftw_real2hc_codelet *codelet = p->nodeu.real2hc.codelet;
206 int s;
208 HACK_ALIGN_STACK_ODD();
209 for (s = 0; s < howmany; ++s)
210 codelet(in + s * idist, out + s * odist,
211 out + n * ostride + s * odist,
212 istride, ostride, -ostride);
213 break;
216 case FFTW_HC2REAL:
218 fftw_hc2real_codelet *codelet = p->nodeu.hc2real.codelet;
219 int s;
221 HACK_ALIGN_STACK_ODD();
222 for (s = 0; s < howmany; ++s)
223 codelet(in + s * idist, in + n * istride + s * idist,
224 out + s * odist,
225 istride, -istride, ostride);
226 break;
229 default:
231 int s;
232 for (s = 0; s < howmany; ++s) {
233 rfftw_executor_simple(n, in + s * idist,
234 out + s * odist,
235 p, istride, ostride);
241 static void rexecutor_many_inplace(int n, fftw_real *in,
242 fftw_real *out,
243 fftw_plan_node *p,
244 int istride,
245 int howmany, int idist)
247 switch (p->type) {
248 case FFTW_REAL2HC:
250 fftw_real2hc_codelet *codelet = p->nodeu.real2hc.codelet;
251 int s;
253 HACK_ALIGN_STACK_ODD();
254 for (s = 0; s < howmany; ++s)
255 codelet(in + s * idist, in + s * idist,
256 in + n * istride + s * idist,
257 istride, istride, -istride);
259 break;
262 case FFTW_HC2REAL:
264 fftw_hc2real_codelet *codelet = p->nodeu.hc2real.codelet;
265 int s;
267 HACK_ALIGN_STACK_ODD();
268 for (s = 0; s < howmany; ++s)
269 codelet(in + s * idist, in + n * istride + s * idist,
270 in + s * idist,
271 istride, -istride, istride);
273 break;
276 default:
278 int s;
279 fftw_real *tmp;
280 if (out)
281 tmp = out;
282 else
283 tmp = (fftw_real *) fftw_malloc(n * sizeof(fftw_real));
285 for (s = 0; s < howmany; ++s) {
286 rfftw_executor_simple(n,
287 in + s * idist,
288 tmp,
289 p, istride, 1);
290 rfftw_strided_copy(n, tmp, istride, in + s * idist);
293 if (!out)
294 fftw_free(tmp);
299 /* user interface */
300 void rfftw(fftw_plan plan, int howmany, fftw_real *in, int istride,
301 int idist, fftw_real *out, int ostride, int odist)
303 int n = plan->n;
305 if (plan->flags & FFTW_IN_PLACE) {
306 if (howmany == 1) {
307 rexecutor_simple_inplace(n, in, out, plan->root, istride);
308 } else {
309 rexecutor_many_inplace(n, in, out, plan->root, istride, howmany,
310 idist);
312 } else {
313 if (howmany == 1) {
314 rfftw_executor_simple(n, in, out, plan->root, istride, ostride);
315 } else {
316 rexecutor_many(n, in, out, plan->root, istride, ostride,
317 howmany, idist, odist);
322 void rfftw_one(fftw_plan plan, fftw_real *in, fftw_real *out)
324 int n = plan->n;
326 if (plan->flags & FFTW_IN_PLACE)
327 rexecutor_simple_inplace(n, in, out, plan->root, 1);
328 else
329 rfftw_executor_simple(n, in, out, plan->root, 1, 1);