changed reading hint
[gromacs/adressmacs.git] / src / fftw / executor.c
blob9844c89eac22a4ba34d03c30bc1662d2342fc69b
1 /*
2 * Copyright (c) 1997-1999 Massachusetts Institute of Technology
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 * executor.c -- execute the fft
24 /* $Id$ */
25 #include <fftw-int.h>
26 #include <stdio.h>
27 #include <stdlib.h>
29 const char *fftw_version = "FFTW V" FFTW_VERSION " ($Id$)";
32 * This function is called in other files, so we cannot declare
33 * it static.
35 void fftw_strided_copy(int n, fftw_complex *in, int ostride,
36 fftw_complex *out)
38 int i;
39 fftw_real r0, r1, i0, i1;
40 fftw_real r2, r3, i2, i3;
42 i = 0;
44 for (; i < (n & 3); ++i) {
45 out[i * ostride] = in[i];
48 for (; i < n; i += 4) {
49 r0 = c_re(in[i]);
50 i0 = c_im(in[i]);
51 r1 = c_re(in[i + 1]);
52 i1 = c_im(in[i + 1]);
53 r2 = c_re(in[i + 2]);
54 i2 = c_im(in[i + 2]);
55 r3 = c_re(in[i + 3]);
56 i3 = c_im(in[i + 3]);
57 c_re(out[i * ostride]) = r0;
58 c_im(out[i * ostride]) = i0;
59 c_re(out[(i + 1) * ostride]) = r1;
60 c_im(out[(i + 1) * ostride]) = i1;
61 c_re(out[(i + 2) * ostride]) = r2;
62 c_im(out[(i + 2) * ostride]) = i2;
63 c_re(out[(i + 3) * ostride]) = r3;
64 c_im(out[(i + 3) * ostride]) = i3;
70 * Do *not* declare simple executor static--we need to call it
71 * from executor_cilk.cilk...also, preface its name with "fftw_"
72 * to avoid any possible name collisions.
74 void fftw_executor_simple(int n, const fftw_complex *in,
75 fftw_complex *out,
76 fftw_plan_node *p,
77 int istride,
78 int ostride)
80 switch (p->type) {
81 case FFTW_NOTW:
82 HACK_ALIGN_STACK_ODD();
83 (p->nodeu.notw.codelet)(in, out, istride, ostride);
84 break;
86 case FFTW_TWIDDLE:
88 int r = p->nodeu.twiddle.size;
89 int m = n / r;
90 int i;
91 fftw_twiddle_codelet *codelet;
92 fftw_complex *W;
94 for (i = 0; i < r; ++i) {
95 fftw_executor_simple(m, in + i * istride,
96 out + i * (m * ostride),
97 p->nodeu.twiddle.recurse,
98 istride * r, ostride);
101 codelet = p->nodeu.twiddle.codelet;
102 W = p->nodeu.twiddle.tw->twarray;
104 HACK_ALIGN_STACK_EVEN();
105 codelet(out, W, m * ostride, m, ostride);
107 break;
110 case FFTW_GENERIC:
112 int r = p->nodeu.generic.size;
113 int m = n / r;
114 int i;
115 fftw_generic_codelet *codelet;
116 fftw_complex *W;
118 for (i = 0; i < r; ++i) {
119 fftw_executor_simple(m, in + i * istride,
120 out + i * (m * ostride),
121 p->nodeu.generic.recurse,
122 istride * r, ostride);
125 codelet = p->nodeu.generic.codelet;
126 W = p->nodeu.generic.tw->twarray;
127 codelet(out, W, m, r, n, ostride);
129 break;
132 case FFTW_RADER:
134 int r = p->nodeu.rader.size;
135 int m = n / r;
136 int i;
137 fftw_rader_codelet *codelet;
138 fftw_complex *W;
140 for (i = 0; i < r; ++i) {
141 fftw_executor_simple(m, in + i * istride,
142 out + i * (m * ostride),
143 p->nodeu.rader.recurse,
144 istride * r, ostride);
147 codelet = p->nodeu.rader.codelet;
148 W = p->nodeu.rader.tw->twarray;
149 codelet(out, W, m, r, ostride,
150 p->nodeu.rader.rader_data);
152 break;
155 default:
156 fftw_die("BUG in executor: invalid plan\n");
157 break;
161 static void executor_simple_inplace(int n, fftw_complex *in,
162 fftw_complex *out,
163 fftw_plan_node *p,
164 int istride)
166 switch (p->type) {
167 case FFTW_NOTW:
168 HACK_ALIGN_STACK_ODD();
169 (p->nodeu.notw.codelet)(in, in, istride, istride);
170 break;
172 default:
174 fftw_complex *tmp;
176 if (out)
177 tmp = out;
178 else
179 tmp = (fftw_complex *)
180 fftw_malloc(n * sizeof(fftw_complex));
182 fftw_executor_simple(n, in, tmp, p, istride, 1);
183 fftw_strided_copy(n, tmp, istride, in);
185 if (!out)
186 fftw_free(tmp);
191 static void executor_many(int n, const fftw_complex *in,
192 fftw_complex *out,
193 fftw_plan_node *p,
194 int istride,
195 int ostride,
196 int howmany, int idist, int odist)
198 switch (p->type) {
199 case FFTW_NOTW:
201 fftw_notw_codelet *codelet = p->nodeu.notw.codelet;
202 int s;
204 HACK_ALIGN_STACK_ODD();
205 for (s = 0; s < howmany; ++s)
206 codelet(in + s * idist,
207 out + s * odist,
208 istride, ostride);
209 break;
212 default:
214 int s;
215 for (s = 0; s < howmany; ++s) {
216 fftw_executor_simple(n, in + s * idist,
217 out + s * odist,
218 p, istride, ostride);
224 static void executor_many_inplace(int n, fftw_complex *in,
225 fftw_complex *out,
226 fftw_plan_node *p,
227 int istride,
228 int howmany, int idist)
230 switch (p->type) {
231 case FFTW_NOTW:
233 fftw_notw_codelet *codelet = p->nodeu.notw.codelet;
234 int s;
236 HACK_ALIGN_STACK_ODD();
237 for (s = 0; s < howmany; ++s)
238 codelet(in + s * idist,
239 in + s * idist,
240 istride, istride);
241 break;
244 default:
246 int s;
247 fftw_complex *tmp;
248 if (out)
249 tmp = out;
250 else
251 tmp = (fftw_complex *)
252 fftw_malloc(n * sizeof(fftw_complex));
254 for (s = 0; s < howmany; ++s) {
255 fftw_executor_simple(n,
256 in + s * idist,
257 tmp,
258 p, istride, 1);
259 fftw_strided_copy(n, tmp, istride, in + s * idist);
262 if (!out)
263 fftw_free(tmp);
268 /* user interface */
269 void fftw(fftw_plan plan, int howmany, fftw_complex *in, int istride,
270 int idist, fftw_complex *out, int ostride, int odist)
272 int n = plan->n;
274 if (plan->flags & FFTW_IN_PLACE) {
275 if (howmany == 1) {
276 executor_simple_inplace(n, in, out, plan->root, istride);
277 } else {
278 executor_many_inplace(n, in, out, plan->root, istride, howmany,
279 idist);
281 } else {
282 if (howmany == 1) {
283 fftw_executor_simple(n, in, out, plan->root, istride, ostride);
284 } else {
285 executor_many(n, in, out, plan->root, istride, ostride,
286 howmany, idist, odist);
291 void fftw_one(fftw_plan plan, fftw_complex *in, fftw_complex *out)
293 int n = plan->n;
295 if (plan->flags & FFTW_IN_PLACE)
296 executor_simple_inplace(n, in, out, plan->root, 1);
297 else
298 fftw_executor_simple(n, in, out, plan->root, 1, 1);