changed reading hint
[gromacs/adressmacs.git] / src / fftw / rexec2.c
blob67ad52a996a2320f4accf641081e8dbfd37b0aca
1 /*
2 * Copyright (c) 1997-1999 Massachusetts Institute of Technology
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 /* $Id$ */
22 * rexec2.c -- alternate rfftw executor, specifically designed for the
23 * multidimensional transforms. Given an extra work array,
24 * expects complex data in FFTW_COMPLEX format, and does
25 * not destroy the input in hc2real transforms.
28 #include <fftw-int.h>
29 #include <rfftw.h>
31 /* copies halfcomplex array in (contiguous) to fftw_complex array out. */
32 void rfftw_hc2c(int n, fftw_real *in, fftw_complex *out, int ostride)
34 int n2 = (n + 1) / 2;
35 int i = 1;
37 c_re(out[0]) = in[0];
38 c_im(out[0]) = 0.0;
39 for (; i < ((n2 - 1) & 3) + 1; ++i) {
40 c_re(out[i * ostride]) = in[i];
41 c_im(out[i * ostride]) = in[n - i];
43 for (; i < n2; i += 4) {
44 fftw_real r0, r1, r2, r3;
45 fftw_real i0, i1, i2, i3;
46 r0 = in[i];
47 r1 = in[i + 1];
48 r2 = in[i + 2];
49 r3 = in[i + 3];
50 i3 = in[n - (i + 3)];
51 i2 = in[n - (i + 2)];
52 i1 = in[n - (i + 1)];
53 i0 = in[n - i];
54 c_re(out[i * ostride]) = r0;
55 c_im(out[i * ostride]) = i0;
56 c_re(out[(i + 1) * ostride]) = r1;
57 c_im(out[(i + 1) * ostride]) = i1;
58 c_re(out[(i + 2) * ostride]) = r2;
59 c_im(out[(i + 2) * ostride]) = i2;
60 c_re(out[(i + 3) * ostride]) = r3;
61 c_im(out[(i + 3) * ostride]) = i3;
63 if ((n & 1) == 0) { /* store the Nyquist frequency */
64 c_re(out[n2 * ostride]) = in[n2];
65 c_im(out[n2 * ostride]) = 0.0;
69 /* reverse of rfftw_hc2c */
70 void rfftw_c2hc(int n, fftw_complex *in, int istride, fftw_real *out)
72 int n2 = (n + 1) / 2;
73 int i = 1;
75 out[0] = c_re(in[0]);
76 for (; i < ((n2 - 1) & 3) + 1; ++i) {
77 out[i] = c_re(in[i * istride]);
78 out[n - i] = c_im(in[i * istride]);
80 for (; i < n2; i += 4) {
81 fftw_real r0, r1, r2, r3;
82 fftw_real i0, i1, i2, i3;
83 r0 = c_re(in[i * istride]);
84 i0 = c_im(in[i * istride]);
85 r1 = c_re(in[(i + 1) * istride]);
86 i1 = c_im(in[(i + 1) * istride]);
87 r2 = c_re(in[(i + 2) * istride]);
88 i2 = c_im(in[(i + 2) * istride]);
89 r3 = c_re(in[(i + 3) * istride]);
90 i3 = c_im(in[(i + 3) * istride]);
91 out[i] = r0;
92 out[i + 1] = r1;
93 out[i + 2] = r2;
94 out[i + 3] = r3;
95 out[n - (i + 3)] = i3;
96 out[n - (i + 2)] = i2;
97 out[n - (i + 1)] = i1;
98 out[n - i] = i0;
100 if ((n & 1) == 0) /* store the Nyquist frequency */
101 out[n2] = c_re(in[n2 * istride]);
105 * in: array of n real numbers (* howmany).
106 * out: array of n/2 + 1 complex numbers (* howmany).
107 * work: array of n real numbers (stride 1)
109 * We must have out != in if dist < stride.
111 void rfftw_real2c_aux(fftw_plan plan, int howmany,
112 fftw_real *in, int istride, int idist,
113 fftw_complex *out, int ostride, int odist,
114 fftw_real *work)
116 fftw_plan_node *p = plan->root;
117 int j;
119 switch (p->type) {
120 case FFTW_REAL2HC:
122 fftw_real2hc_codelet *codelet = p->nodeu.real2hc.codelet;
123 int n = plan->n;
124 int n2 = (n & 1) ? 0 : (n + 1) / 2;
126 HACK_ALIGN_STACK_ODD();
127 for (j = 0; j < howmany; ++j, out += odist) {
128 codelet(in + j * idist,
129 &c_re(*out),
130 &c_im(*out),
131 istride, ostride * 2, ostride * 2);
132 c_im(out[0]) = 0.0;
133 c_im(out[n2 * ostride]) = 0.0;
135 break;
138 default:
140 int n = plan->n;
142 for (j = 0; j < howmany; ++j, in += idist, out += odist) {
143 rfftw_executor_simple(n, in, work, p, istride, 1);
144 rfftw_hc2c(n, work, out, ostride);
146 break;
152 * in: array of n/2 + 1 complex numbers (* howmany).
153 * out: array of n real numbers (* howmany).
154 * work: array of n real numbers (stride 1)
156 * We must have out != in if dist < stride.
158 void rfftw_c2real_aux(fftw_plan plan, int howmany,
159 fftw_complex *in, int istride, int idist,
160 fftw_real *out, int ostride, int odist,
161 fftw_real *work)
163 fftw_plan_node *p = plan->root;
165 switch (p->type) {
166 case FFTW_HC2REAL:
168 fftw_hc2real_codelet *codelet = p->nodeu.hc2real.codelet;
169 int j;
171 HACK_ALIGN_STACK_ODD();
172 for (j = 0; j < howmany; ++j)
173 codelet(&c_re(*(in + j * idist)),
174 &c_im(*(in + j * idist)),
175 out + j * odist,
176 istride * 2, istride * 2, ostride);
177 break;
180 default:
182 int j, n = plan->n;
184 for (j = 0; j < howmany; ++j, in += idist, out += odist) {
185 rfftw_c2hc(n, in, istride, work);
186 rfftw_executor_simple(n, work, out, p, 1, ostride);
188 break;
194 * The following two functions are similar to the ones above, BUT:
196 * work must contain n * howmany elements (stride 1)
198 * Can handle out == in for any stride/dist.
200 void rfftw_real2c_overlap_aux(fftw_plan plan, int howmany,
201 fftw_real *in, int istride, int idist,
202 fftw_complex *out, int ostride, int odist,
203 fftw_real *work)
205 int n = plan->n;
206 int j;
208 rfftw(plan, howmany, in, istride, idist, work, 1, n);
210 /* copy from work to out: */
211 for (j = 0; j < howmany; ++j, work += n, out += odist)
212 rfftw_hc2c(n, work, out, ostride);
215 void rfftw_c2real_overlap_aux(fftw_plan plan, int howmany,
216 fftw_complex *in, int istride, int idist,
217 fftw_real *out, int ostride, int odist,
218 fftw_real *work)
220 int n = plan->n;
221 int j;
223 /* copy from in to work: */
224 for (j = 0; j < howmany; ++j, in += idist)
225 rfftw_c2hc(n, in, istride, work + j * n);
227 rfftw(plan, howmany, work, 1, n, out, ostride, odist);