2 * Copyright (c) 1997-1999 Massachusetts Institute of Technology
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
26 /***************************** Plan Creation ****************************/
28 fftwnd_mpi_plan
fftwnd_mpi_create_plan(MPI_Comm comm
,
29 int rank
, const int *n
,
38 p
= (fftwnd_mpi_plan
) fftw_malloc(sizeof(fftwnd_mpi_plan_data
));
42 p
->p_transpose_inv
= 0;
45 p
->p_fft_x
= fftw_create_plan(n
[0], dir
, flags
| FFTW_IN_PLACE
);
47 p
->p_fft
= fftwnd_create_plan(rank
- 1, n
+ 1, dir
, flags
| FFTW_IN_PLACE
);
49 fftwnd_mpi_destroy_plan(p
);
51 p
->p_transpose
= transpose_mpi_create_plan(n
[0], n
[1], comm
);
53 fftwnd_mpi_destroy_plan(p
);
55 p
->p_transpose_inv
= transpose_mpi_create_plan(n
[1], n
[0], comm
);
56 if (!p
->p_transpose_inv
)
57 fftwnd_mpi_destroy_plan(p
);
59 if (n
[0] > p
->p_fft
->nwork
)
60 p
->work
= (fftw_complex
*) fftw_malloc(n
[0] * sizeof(fftw_complex
));
65 fftwnd_mpi_plan
fftw2d_mpi_create_plan(MPI_Comm comm
,
67 fftw_direction dir
, int flags
)
74 return fftwnd_mpi_create_plan(comm
, 2, n
, dir
, flags
);
77 fftwnd_mpi_plan
fftw3d_mpi_create_plan(MPI_Comm comm
,
78 int nx
, int ny
, int nz
,
79 fftw_direction dir
, int flags
)
87 return fftwnd_mpi_create_plan(comm
, 3, n
, dir
, flags
);
90 /********************** Plan Destruction ************************/
92 void fftwnd_mpi_destroy_plan(fftwnd_mpi_plan p
)
96 fftw_destroy_plan(p
->p_fft_x
);
98 fftwnd_destroy_plan(p
->p_fft
);
100 transpose_mpi_destroy_plan(p
->p_transpose
);
101 if (p
->p_transpose_inv
)
102 transpose_mpi_destroy_plan(p
->p_transpose_inv
);
109 void fftw_mpi_die(const char *error_string
)
113 MPI_Comm_rank(MPI_COMM_WORLD
, &my_pe
);
115 fprintf(stderr
, "fftw process %d: %s", my_pe
, error_string
);
117 MPI_Abort(MPI_COMM_WORLD
, EXIT_FAILURE
);
120 /********************* Getting Local Size ***********************/
122 void fftwnd_mpi_local_sizes(fftwnd_mpi_plan p
,
125 int *local_ny_after_transpose
,
126 int *local_y_start_after_transpose
,
127 int *total_local_size
)
130 transpose_mpi_get_local_size(p
->p_transpose
->nx
,
131 p
->p_transpose
->my_pe
,
132 p
->p_transpose
->n_pes
,
135 transpose_mpi_get_local_size(p
->p_transpose
->ny
,
136 p
->p_transpose
->my_pe
,
137 p
->p_transpose
->n_pes
,
138 local_ny_after_transpose
,
139 local_y_start_after_transpose
);
141 transpose_mpi_get_local_storage_size(p
->p_transpose
->nx
,
143 p
->p_transpose
->my_pe
,
144 p
->p_transpose
->n_pes
);
146 *total_local_size
*= p
->p_fft
->n_after
[0];
150 /******************** Computing the Transform *******************/
152 void fftwnd_mpi(fftwnd_mpi_plan p
,
153 int n_fields
, fftw_complex
*local_data
, fftw_complex
*work
,
154 fftwnd_mpi_output_order output_order
)
156 int el_size
= (sizeof(fftw_complex
) / sizeof(TRANSPOSE_EL_TYPE
))
157 * n_fields
* p
->p_fft
->n_after
[0];
162 /* First, transform dimensions after the first, which are
163 local to this process: */
166 int local_nx
= p
->p_transpose
->local_nx
;
167 int n_after_x
= p
->p_fft
->n
[0] * p
->p_fft
->n_after
[0];
170 fftwnd_plan p_fft
= p
->p_fft
;
172 for (fft_iter
= 0; fft_iter
< local_nx
; ++fft_iter
)
173 fftwnd(p_fft
, n_fields
,
174 local_data
+ (n_after_x
* n_fields
) * fft_iter
,
179 fftwnd(p
->p_fft
, local_nx
,
180 local_data
, 1, n_after_x
, NULL
, 0, 0);
183 /* Second, transpose the first dimension with the second dimension
184 to bring the x dimension local to this process: */
186 transpose_mpi(p
->p_transpose
, el_size
,
187 (TRANSPOSE_EL_TYPE
*) local_data
,
188 (TRANSPOSE_EL_TYPE
*) work
);
190 /* Third, transform the x dimension, which is now local and contiguous: */
192 n_fields
*= p
->p_fft
->n_after
[0]; /* dimensions after y
193 no longer need be considered
194 separately from n_fields */
196 int local_ny
= p
->p_transpose
->local_ny
;
197 int nx
= p
->p_fft_x
->n
;
198 fftw_complex
*work_1d
= p
->work
? p
->work
: p
->p_fft
->work
;
201 fftw_plan p_fft_x
= p
->p_fft_x
;
203 for (fft_iter
= 0; fft_iter
< local_ny
; ++fft_iter
)
204 fftw(p_fft_x
, n_fields
,
205 local_data
+ (nx
* n_fields
) * fft_iter
, n_fields
, 1,
209 fftw(p
->p_fft_x
, local_ny
,
210 local_data
, 1, nx
, work_1d
, 1, 0);
213 /* transpose back, if desired: */
214 if (output_order
== FFTW_NORMAL_ORDER
)
215 transpose_mpi(p
->p_transpose_inv
, el_size
,
216 (TRANSPOSE_EL_TYPE
*) local_data
,
217 (TRANSPOSE_EL_TYPE
*) work
);