2 * Copyright (c) 1997-1999 Massachusetts Institute of Technology
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 * rexec.c -- execute the fft
31 void rfftw_strided_copy(int n
, fftw_real
*in
, int ostride
,
35 fftw_real r0
, r1
, r2
, r3
;
38 for (; i
< (n
& 3); ++i
) {
39 out
[i
* ostride
] = in
[i
];
41 for (; i
< n
; i
+= 4) {
46 out
[i
* ostride
] = r0
;
47 out
[(i
+ 1) * ostride
] = r1
;
48 out
[(i
+ 2) * ostride
] = r2
;
49 out
[(i
+ 3) * ostride
] = r3
;
53 void rfftw_executor_simple(int n
, fftw_real
*in
,
61 HACK_ALIGN_STACK_ODD();
62 (p
->nodeu
.real2hc
.codelet
) (in
, out
, out
+ n
* ostride
,
63 istride
, ostride
, -ostride
);
67 HACK_ALIGN_STACK_ODD();
68 (p
->nodeu
.hc2real
.codelet
) (in
, in
+ n
* istride
, out
,
69 istride
, -istride
, ostride
);
74 int r
= p
->nodeu
.hc2hc
.size
;
78 * please do resist the temptation of initializing
79 * these variables here. Doing so forces the
80 * compiler to keep a live variable across the
83 fftw_hc2hc_codelet
*codelet
;
86 switch (p
->nodeu
.hc2hc
.dir
) {
87 case FFTW_REAL_TO_COMPLEX
:
88 for (i
= 0; i
< r
; ++i
)
89 rfftw_executor_simple(m
, in
+ i
* istride
,
90 out
+ i
* (m
*ostride
),
91 p
->nodeu
.hc2hc
.recurse
,
92 istride
* r
, ostride
);
94 W
= p
->nodeu
.hc2hc
.tw
->twarray
;
95 codelet
= p
->nodeu
.hc2hc
.codelet
;
96 HACK_ALIGN_STACK_EVEN();
97 codelet(out
, W
, m
* ostride
, m
, ostride
);
99 case FFTW_COMPLEX_TO_REAL
:
100 W
= p
->nodeu
.hc2hc
.tw
->twarray
;
101 codelet
= p
->nodeu
.hc2hc
.codelet
;
102 HACK_ALIGN_STACK_EVEN();
103 codelet(in
, W
, m
* istride
, m
, istride
);
105 for (i
= 0; i
< r
; ++i
)
106 rfftw_executor_simple(m
, in
+ i
* (m
*istride
),
108 p
->nodeu
.hc2hc
.recurse
,
109 istride
, ostride
* r
);
120 int r
= p
->nodeu
.rgeneric
.size
;
123 fftw_rgeneric_codelet
*codelet
= p
->nodeu
.rgeneric
.codelet
;
124 fftw_complex
*W
= p
->nodeu
.rgeneric
.tw
->twarray
;
126 switch (p
->nodeu
.rgeneric
.dir
) {
127 case FFTW_REAL_TO_COMPLEX
:
128 for (i
= 0; i
< r
; ++i
)
129 rfftw_executor_simple(m
, in
+ i
* istride
,
130 out
+ i
* (m
* ostride
),
131 p
->nodeu
.rgeneric
.recurse
,
132 istride
* r
, ostride
);
134 codelet(out
, W
, m
, r
, n
, ostride
);
136 case FFTW_COMPLEX_TO_REAL
:
137 codelet(in
, W
, m
, r
, n
, istride
);
139 for (i
= 0; i
< r
; ++i
)
140 rfftw_executor_simple(m
, in
+ i
* m
* istride
,
142 p
->nodeu
.rgeneric
.recurse
,
143 istride
, ostride
* r
);
154 fftw_die("BUG in rexecutor: invalid plan\n");
159 static void rexecutor_simple_inplace(int n
, fftw_real
*in
,
166 HACK_ALIGN_STACK_ODD();
167 (p
->nodeu
.real2hc
.codelet
) (in
, in
, in
+ n
* istride
,
168 istride
, istride
, -istride
);
172 HACK_ALIGN_STACK_ODD();
173 (p
->nodeu
.hc2real
.codelet
) (in
, in
+ n
* istride
, in
,
174 istride
, -istride
, istride
);
184 tmp
= (fftw_real
*) fftw_malloc(n
* sizeof(fftw_real
));
186 rfftw_executor_simple(n
, in
, tmp
, p
, istride
, 1);
187 rfftw_strided_copy(n
, tmp
, istride
, in
);
195 static void rexecutor_many(int n
, fftw_real
*in
,
200 int howmany
, int idist
, int odist
)
205 fftw_real2hc_codelet
*codelet
= p
->nodeu
.real2hc
.codelet
;
208 HACK_ALIGN_STACK_ODD();
209 for (s
= 0; s
< howmany
; ++s
)
210 codelet(in
+ s
* idist
, out
+ s
* odist
,
211 out
+ n
* ostride
+ s
* odist
,
212 istride
, ostride
, -ostride
);
218 fftw_hc2real_codelet
*codelet
= p
->nodeu
.hc2real
.codelet
;
221 HACK_ALIGN_STACK_ODD();
222 for (s
= 0; s
< howmany
; ++s
)
223 codelet(in
+ s
* idist
, in
+ n
* istride
+ s
* idist
,
225 istride
, -istride
, ostride
);
232 for (s
= 0; s
< howmany
; ++s
) {
233 rfftw_executor_simple(n
, in
+ s
* idist
,
235 p
, istride
, ostride
);
241 static void rexecutor_many_inplace(int n
, fftw_real
*in
,
245 int howmany
, int idist
)
250 fftw_real2hc_codelet
*codelet
= p
->nodeu
.real2hc
.codelet
;
253 HACK_ALIGN_STACK_ODD();
254 for (s
= 0; s
< howmany
; ++s
)
255 codelet(in
+ s
* idist
, in
+ s
* idist
,
256 in
+ n
* istride
+ s
* idist
,
257 istride
, istride
, -istride
);
264 fftw_hc2real_codelet
*codelet
= p
->nodeu
.hc2real
.codelet
;
267 HACK_ALIGN_STACK_ODD();
268 for (s
= 0; s
< howmany
; ++s
)
269 codelet(in
+ s
* idist
, in
+ n
* istride
+ s
* idist
,
271 istride
, -istride
, istride
);
283 tmp
= (fftw_real
*) fftw_malloc(n
* sizeof(fftw_real
));
285 for (s
= 0; s
< howmany
; ++s
) {
286 rfftw_executor_simple(n
,
290 rfftw_strided_copy(n
, tmp
, istride
, in
+ s
* idist
);
300 void rfftw(fftw_plan plan
, int howmany
, fftw_real
*in
, int istride
,
301 int idist
, fftw_real
*out
, int ostride
, int odist
)
305 if (plan
->flags
& FFTW_IN_PLACE
) {
307 rexecutor_simple_inplace(n
, in
, out
, plan
->root
, istride
);
309 rexecutor_many_inplace(n
, in
, out
, plan
->root
, istride
, howmany
,
314 rfftw_executor_simple(n
, in
, out
, plan
->root
, istride
, ostride
);
316 rexecutor_many(n
, in
, out
, plan
->root
, istride
, ostride
,
317 howmany
, idist
, odist
);
322 void rfftw_one(fftw_plan plan
, fftw_real
*in
, fftw_real
*out
)
326 if (plan
->flags
& FFTW_IN_PLACE
)
327 rexecutor_simple_inplace(n
, in
, out
, plan
->root
, 1);
329 rfftw_executor_simple(n
, in
, out
, plan
->root
, 1, 1);