2 * Copyright (c) 1997-1999 Massachusetts Institute of Technology
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 * executor.c -- execute the fft
29 const char *fftw_version
= "FFTW V" FFTW_VERSION
" ($Id$)";
32 * This function is called in other files, so we cannot declare
35 void fftw_strided_copy(int n
, fftw_complex
*in
, int ostride
,
39 fftw_real r0
, r1
, i0
, i1
;
40 fftw_real r2
, r3
, i2
, i3
;
44 for (; i
< (n
& 3); ++i
) {
45 out
[i
* ostride
] = in
[i
];
48 for (; i
< n
; i
+= 4) {
57 c_re(out
[i
* ostride
]) = r0
;
58 c_im(out
[i
* ostride
]) = i0
;
59 c_re(out
[(i
+ 1) * ostride
]) = r1
;
60 c_im(out
[(i
+ 1) * ostride
]) = i1
;
61 c_re(out
[(i
+ 2) * ostride
]) = r2
;
62 c_im(out
[(i
+ 2) * ostride
]) = i2
;
63 c_re(out
[(i
+ 3) * ostride
]) = r3
;
64 c_im(out
[(i
+ 3) * ostride
]) = i3
;
70 * Do *not* declare simple executor static--we need to call it
71 * from executor_cilk.cilk...also, preface its name with "fftw_"
72 * to avoid any possible name collisions.
74 void fftw_executor_simple(int n
, const fftw_complex
*in
,
82 HACK_ALIGN_STACK_ODD();
83 (p
->nodeu
.notw
.codelet
)(in
, out
, istride
, ostride
);
88 int r
= p
->nodeu
.twiddle
.size
;
91 fftw_twiddle_codelet
*codelet
;
94 for (i
= 0; i
< r
; ++i
) {
95 fftw_executor_simple(m
, in
+ i
* istride
,
96 out
+ i
* (m
* ostride
),
97 p
->nodeu
.twiddle
.recurse
,
98 istride
* r
, ostride
);
101 codelet
= p
->nodeu
.twiddle
.codelet
;
102 W
= p
->nodeu
.twiddle
.tw
->twarray
;
104 HACK_ALIGN_STACK_EVEN();
105 codelet(out
, W
, m
* ostride
, m
, ostride
);
112 int r
= p
->nodeu
.generic
.size
;
115 fftw_generic_codelet
*codelet
;
118 for (i
= 0; i
< r
; ++i
) {
119 fftw_executor_simple(m
, in
+ i
* istride
,
120 out
+ i
* (m
* ostride
),
121 p
->nodeu
.generic
.recurse
,
122 istride
* r
, ostride
);
125 codelet
= p
->nodeu
.generic
.codelet
;
126 W
= p
->nodeu
.generic
.tw
->twarray
;
127 codelet(out
, W
, m
, r
, n
, ostride
);
134 int r
= p
->nodeu
.rader
.size
;
137 fftw_rader_codelet
*codelet
;
140 for (i
= 0; i
< r
; ++i
) {
141 fftw_executor_simple(m
, in
+ i
* istride
,
142 out
+ i
* (m
* ostride
),
143 p
->nodeu
.rader
.recurse
,
144 istride
* r
, ostride
);
147 codelet
= p
->nodeu
.rader
.codelet
;
148 W
= p
->nodeu
.rader
.tw
->twarray
;
149 codelet(out
, W
, m
, r
, ostride
,
150 p
->nodeu
.rader
.rader_data
);
156 fftw_die("BUG in executor: invalid plan\n");
161 static void executor_simple_inplace(int n
, fftw_complex
*in
,
168 HACK_ALIGN_STACK_ODD();
169 (p
->nodeu
.notw
.codelet
)(in
, in
, istride
, istride
);
179 tmp
= (fftw_complex
*)
180 fftw_malloc(n
* sizeof(fftw_complex
));
182 fftw_executor_simple(n
, in
, tmp
, p
, istride
, 1);
183 fftw_strided_copy(n
, tmp
, istride
, in
);
191 static void executor_many(int n
, const fftw_complex
*in
,
196 int howmany
, int idist
, int odist
)
201 fftw_notw_codelet
*codelet
= p
->nodeu
.notw
.codelet
;
204 HACK_ALIGN_STACK_ODD();
205 for (s
= 0; s
< howmany
; ++s
)
206 codelet(in
+ s
* idist
,
215 for (s
= 0; s
< howmany
; ++s
) {
216 fftw_executor_simple(n
, in
+ s
* idist
,
218 p
, istride
, ostride
);
224 static void executor_many_inplace(int n
, fftw_complex
*in
,
228 int howmany
, int idist
)
233 fftw_notw_codelet
*codelet
= p
->nodeu
.notw
.codelet
;
236 HACK_ALIGN_STACK_ODD();
237 for (s
= 0; s
< howmany
; ++s
)
238 codelet(in
+ s
* idist
,
251 tmp
= (fftw_complex
*)
252 fftw_malloc(n
* sizeof(fftw_complex
));
254 for (s
= 0; s
< howmany
; ++s
) {
255 fftw_executor_simple(n
,
259 fftw_strided_copy(n
, tmp
, istride
, in
+ s
* idist
);
269 void fftw(fftw_plan plan
, int howmany
, fftw_complex
*in
, int istride
,
270 int idist
, fftw_complex
*out
, int ostride
, int odist
)
274 if (plan
->flags
& FFTW_IN_PLACE
) {
276 executor_simple_inplace(n
, in
, out
, plan
->root
, istride
);
278 executor_many_inplace(n
, in
, out
, plan
->root
, istride
, howmany
,
283 fftw_executor_simple(n
, in
, out
, plan
->root
, istride
, ostride
);
285 executor_many(n
, in
, out
, plan
->root
, istride
, ostride
,
286 howmany
, idist
, odist
);
291 void fftw_one(fftw_plan plan
, fftw_complex
*in
, fftw_complex
*out
)
295 if (plan
->flags
& FFTW_IN_PLACE
)
296 executor_simple_inplace(n
, in
, out
, plan
->root
, 1);
298 fftw_executor_simple(n
, in
, out
, plan
->root
, 1, 1);