changed reading hint
[gromacs/adressmacs.git] / src / fftw / fftwnd_mpi.c
blobdbcedb0b57449630c5db90a3b8738dcc8be3e2c0
1 /*
2 * Copyright (c) 1997-1999 Massachusetts Institute of Technology
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 #include <stdlib.h>
22 #include <mpi.h>
24 #include <fftw_mpi.h>
26 /***************************** Plan Creation ****************************/
28 fftwnd_mpi_plan fftwnd_mpi_create_plan(MPI_Comm comm,
29 int rank, const int *n,
30 fftw_direction dir,
31 int flags)
33 fftwnd_mpi_plan p;
35 if (rank < 2)
36 return 0;
38 p = (fftwnd_mpi_plan) fftw_malloc(sizeof(fftwnd_mpi_plan_data));
39 p->p_fft_x = 0;
40 p->p_fft = 0;
41 p->p_transpose = 0;
42 p->p_transpose_inv = 0;
43 p->work = 0;
45 p->p_fft_x = fftw_create_plan(n[0], dir, flags | FFTW_IN_PLACE);
47 p->p_fft = fftwnd_create_plan(rank - 1, n + 1, dir, flags | FFTW_IN_PLACE);
48 if (!p->p_fft)
49 fftwnd_mpi_destroy_plan(p);
51 p->p_transpose = transpose_mpi_create_plan(n[0], n[1], comm);
52 if (!p->p_transpose)
53 fftwnd_mpi_destroy_plan(p);
55 p->p_transpose_inv = transpose_mpi_create_plan(n[1], n[0], comm);
56 if (!p->p_transpose_inv)
57 fftwnd_mpi_destroy_plan(p);
59 if (n[0] > p->p_fft->nwork)
60 p->work = (fftw_complex *) fftw_malloc(n[0] * sizeof(fftw_complex));
62 return p;
65 fftwnd_mpi_plan fftw2d_mpi_create_plan(MPI_Comm comm,
66 int nx, int ny,
67 fftw_direction dir, int flags)
69 int n[2];
71 n[0] = nx;
72 n[1] = ny;
74 return fftwnd_mpi_create_plan(comm, 2, n, dir, flags);
77 fftwnd_mpi_plan fftw3d_mpi_create_plan(MPI_Comm comm,
78 int nx, int ny, int nz,
79 fftw_direction dir, int flags)
81 int n[3];
83 n[0] = nx;
84 n[1] = ny;
85 n[2] = nz;
87 return fftwnd_mpi_create_plan(comm, 3, n, dir, flags);
90 /********************** Plan Destruction ************************/
92 void fftwnd_mpi_destroy_plan(fftwnd_mpi_plan p)
94 if (p) {
95 if (p->p_fft_x)
96 fftw_destroy_plan(p->p_fft_x);
97 if (p->p_fft)
98 fftwnd_destroy_plan(p->p_fft);
99 if (p->p_transpose)
100 transpose_mpi_destroy_plan(p->p_transpose);
101 if (p->p_transpose_inv)
102 transpose_mpi_destroy_plan(p->p_transpose_inv);
103 if (p->work)
104 fftw_free(p->work);
105 fftw_free(p);
109 void fftw_mpi_die(const char *error_string)
111 int my_pe;
113 MPI_Comm_rank(MPI_COMM_WORLD, &my_pe);
115 fprintf(stderr, "fftw process %d: %s", my_pe, error_string);
117 MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
120 /********************* Getting Local Size ***********************/
122 void fftwnd_mpi_local_sizes(fftwnd_mpi_plan p,
123 int *local_nx,
124 int *local_x_start,
125 int *local_ny_after_transpose,
126 int *local_y_start_after_transpose,
127 int *total_local_size)
129 if (p) {
130 transpose_mpi_get_local_size(p->p_transpose->nx,
131 p->p_transpose->my_pe,
132 p->p_transpose->n_pes,
133 local_nx,
134 local_x_start);
135 transpose_mpi_get_local_size(p->p_transpose->ny,
136 p->p_transpose->my_pe,
137 p->p_transpose->n_pes,
138 local_ny_after_transpose,
139 local_y_start_after_transpose);
140 *total_local_size =
141 transpose_mpi_get_local_storage_size(p->p_transpose->nx,
142 p->p_transpose->ny,
143 p->p_transpose->my_pe,
144 p->p_transpose->n_pes);
146 *total_local_size *= p->p_fft->n_after[0];
150 /******************** Computing the Transform *******************/
152 void fftwnd_mpi(fftwnd_mpi_plan p,
153 int n_fields, fftw_complex *local_data, fftw_complex *work,
154 fftwnd_mpi_output_order output_order)
156 int el_size = (sizeof(fftw_complex) / sizeof(TRANSPOSE_EL_TYPE))
157 * n_fields * p->p_fft->n_after[0];
159 if (n_fields <= 0)
160 return;
162 /* First, transform dimensions after the first, which are
163 local to this process: */
166 int local_nx = p->p_transpose->local_nx;
167 int n_after_x = p->p_fft->n[0] * p->p_fft->n_after[0];
169 if (n_fields > 1) {
170 fftwnd_plan p_fft = p->p_fft;
171 int fft_iter;
172 for (fft_iter = 0; fft_iter < local_nx; ++fft_iter)
173 fftwnd(p_fft, n_fields,
174 local_data + (n_after_x * n_fields) * fft_iter,
175 n_fields, 1,
176 NULL, 0, 0);
178 else
179 fftwnd(p->p_fft, local_nx,
180 local_data, 1, n_after_x, NULL, 0, 0);
183 /* Second, transpose the first dimension with the second dimension
184 to bring the x dimension local to this process: */
186 transpose_mpi(p->p_transpose, el_size,
187 (TRANSPOSE_EL_TYPE *) local_data,
188 (TRANSPOSE_EL_TYPE *) work);
190 /* Third, transform the x dimension, which is now local and contiguous: */
192 n_fields *= p->p_fft->n_after[0]; /* dimensions after y
193 no longer need be considered
194 separately from n_fields */
196 int local_ny = p->p_transpose->local_ny;
197 int nx = p->p_fft_x->n;
198 fftw_complex *work_1d = p->work ? p->work : p->p_fft->work;
200 if (n_fields > 1) {
201 fftw_plan p_fft_x = p->p_fft_x;
202 int fft_iter;
203 for (fft_iter = 0; fft_iter < local_ny; ++fft_iter)
204 fftw(p_fft_x, n_fields,
205 local_data + (nx * n_fields) * fft_iter, n_fields, 1,
206 work_1d, 1, 0);
208 else
209 fftw(p->p_fft_x, local_ny,
210 local_data, 1, nx, work_1d, 1, 0);
213 /* transpose back, if desired: */
214 if (output_order == FFTW_NORMAL_ORDER)
215 transpose_mpi(p->p_transpose_inv, el_size,
216 (TRANSPOSE_EL_TYPE *) local_data,
217 (TRANSPOSE_EL_TYPE *) work);