changed reading hint
[gromacs/adressmacs.git] / src / fftw / test_transpose_mpi.c
blob68a796946d9046e112095374ce1d4186eeda9f15
1 /*
2 * Copyright (c) 1997-1999 Massachusetts Institute of Technology
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 #include <stdlib.h>
21 #include <stdio.h>
22 #include <time.h>
23 #include <mpi.h>
25 #include <fftw_mpi.h>
27 #define NX 14
28 #define NY 10
30 void perform_transpose_async(transpose_mpi_plan tp, int el_size,
31 TRANSPOSE_EL_TYPE *data,
32 int local_nx, int local_x_start, int ny)
34 int i,j,el;
35 TRANSPOSE_EL_TYPE *send_buf;
36 int step;
38 for (i = 0; i < local_nx * ny * el_size; ++i)
39 data[i] = 0.0;
41 send_buf = transpose_allocate_send_buf(tp,el_size);
43 transpose_in_place_local(tp, el_size, data, BEFORE_TRANSPOSE);
45 for (step = 0; step < tp->num_steps; ++step) {
46 int block_y_start, block_ny;
48 /* initialize block for step here (overlap initialization
49 and communication): */
50 transpose_get_send_block(tp, step,
51 &block_y_start, &block_ny);
52 for (i = 0; i < local_nx; ++i)
53 for (j = block_y_start; j < block_y_start + block_ny; ++j)
54 for (el = 0; el < el_size; ++el)
55 data[el+el_size*(i+j*local_nx)]
56 = ny * (i + local_x_start) + j;
58 transpose_finish_exchange_step(tp, step - 1);
59 transpose_start_exchange_step(tp,el_size,data,send_buf,step,
60 TRANSPOSE_ASYNC);
63 transpose_finish_exchange_step(tp, step - 1);
65 transpose_in_place_local(tp, el_size, data, AFTER_TRANSPOSE);
67 if (send_buf)
68 fftw_free(send_buf);
71 void test_transpose_plan(int nx, int ny, int el_size, int verbose,
72 transpose_mpi_plan tp,
73 TRANSPOSE_EL_TYPE *data,
74 TRANSPOSE_EL_TYPE *work,
75 int async)
77 int i,j,el;
78 int my_pe,n_pes;
79 int local_x_start,local_nx,local_y_start,local_ny, xbsize,ybsize;
81 MPI_Comm_rank(MPI_COMM_WORLD,&my_pe);
82 MPI_Comm_size(MPI_COMM_WORLD,&n_pes);
84 transpose_mpi_get_local_size(nx,my_pe,n_pes,
85 &local_nx,&local_x_start);
86 transpose_mpi_get_local_size(ny,my_pe,n_pes,
87 &local_ny,&local_y_start);
89 if (async) {
90 perform_transpose_async(tp,el_size,data,local_nx,local_x_start,ny);
92 else {
93 for (i = 0; i < local_nx; ++i)
94 for (j = 0; j < ny; ++j)
95 for (el = 0; el < el_size; ++el)
96 data[el+el_size*(i*ny+j)]
97 = ny * (i + local_x_start) + j;
99 transpose_mpi(tp,el_size,data,work);
102 /* print out result matrix if it is small: */
103 if (verbose && local_ny < 16 && nx < 16) {
104 int pe;
106 for (pe = 0; pe < n_pes; ++pe) {
107 MPI_Barrier(MPI_COMM_WORLD);
108 if (pe == my_pe) {
109 printf("\nprocess %d result: \n", my_pe);
110 for (j = 0; j < nx; ++j) {
111 for (i = 0; i < local_ny; ++i)
112 printf("%4.0f",data[el_size*(i*nx + j)]);
113 printf("\n");
117 MPI_Barrier(MPI_COMM_WORLD);
120 if (verbose)
121 printf("Checking result on process %d...\n",my_pe);
123 for (i = 0; i < local_ny; ++i)
124 for (j = 0; j < nx; ++j)
125 for (el = 0; el < el_size; ++el) {
126 if (data[el+el_size*(i*nx + j)] !=
127 (TRANSPOSE_EL_TYPE) (j*ny + i+local_y_start)) {
128 fprintf(stderr,
129 "Error with x=%d, y=%d on process %d!\n"
130 " -- is %g rather than %g\n",
131 j,i+local_y_start,my_pe,
132 data[el+el_size*(i*nx + j)],
133 (TRANSPOSE_EL_TYPE) (j*ny + i+local_y_start));
134 fftw_die("incorrect result.\n");
138 if (verbose)
139 printf("Process %d okay!\n",my_pe);
142 void test_transpose(int nx, int ny, int el_size, int verbose)
144 int my_pe,n_pes;
145 int i,j;
146 int local_x_start,local_nx,local_y_start,local_ny, xbsize,ybsize;
147 int total_local_size;
148 TRANSPOSE_EL_TYPE *data, *work;
149 transpose_mpi_plan tp;
150 int el;
152 MPI_Comm_rank(MPI_COMM_WORLD,&my_pe);
153 MPI_Comm_size(MPI_COMM_WORLD,&n_pes);
155 if (my_pe == 0)
156 printf("\nTesting transpose of %dx%d matrix (%d elements)...\n",
157 nx,ny,el_size);
159 transpose_mpi_get_local_size(nx,my_pe,n_pes,
160 &local_nx,&local_x_start);
161 transpose_mpi_get_local_size(ny,my_pe,n_pes,
162 &local_ny,&local_y_start);
163 total_local_size = transpose_mpi_get_local_storage_size(nx,ny,
164 my_pe,n_pes);
166 if (total_local_size == 0)
167 data = work = 0;
168 else {
169 data = (TRANSPOSE_EL_TYPE *)
170 fftw_malloc(total_local_size*el_size*sizeof(TRANSPOSE_EL_TYPE));
171 work = (TRANSPOSE_EL_TYPE *)
172 fftw_malloc(total_local_size*el_size*sizeof(TRANSPOSE_EL_TYPE));
175 tp = transpose_mpi_create_plan(nx,ny,MPI_COMM_WORLD);
177 /* output plan data, one process at a time: */
178 if (verbose)
179 for (j = 0; j < n_pes; ++j) {
180 if (j == my_pe) {
181 printf("Plan for process %d:\n",j);
182 printf(" nx = %d, ny = %d\n",tp->nx,tp->ny);
183 printf(" local_nx = %d, local_ny = %d\n",
184 tp->local_nx,tp->local_ny);
186 if (local_nx > 0 || local_ny > 0) {
187 printf(" send/recv block_size = %d/%d, "
188 "num_steps = %d\n",
189 tp->send_block_size, tp->recv_block_size,
190 tp->num_steps);
191 for (i = 0; i < tp->num_steps; ++i)
192 printf(" exchange[%d]: block = %d, dest_pe = %d, "
193 "send_size = %d, recv_size = %d\n",
194 i,tp->exchange[i].block_num,
195 tp->exchange[i].dest_pe,
196 tp->exchange[i].send_size,
197 tp->exchange[i].recv_size);
198 printf(" perm_block_size = %d, num_perm_blocks = %d\n",
199 tp->perm_block_size, tp->num_perm_blocks);
200 #if 0
201 for (i = 0; i < tp->num_perm_blocks; ++i)
202 printf(" perm_block_dest[%d] = %d\n",
203 i,tp->perm_block_dest[i]);
204 #endif
207 printf("\n");
209 MPI_Barrier(MPI_COMM_WORLD);
212 /* testing blocking, in-place transpose */
213 if (verbose && my_pe == 0)
214 printf("\nTesting blocking in-place transpose:\n");
215 test_transpose_plan(nx,ny,el_size,verbose,tp,data,NULL,0);
216 MPI_Barrier(MPI_COMM_WORLD);
218 /* Initialize data and check a second time, to make sure that
219 we can reuse plans:*/
220 if (verbose && my_pe == 0) printf("\nTesting again:\n");
221 test_transpose_plan(nx,ny,el_size,verbose,tp,data,NULL,0);
223 /* testing blocking, out-of-place transpose */
224 if (verbose && my_pe == 0)
225 printf("\nTesting blocking out-of-place transpose:\n");
226 test_transpose_plan(nx,ny,el_size,verbose,tp,data,work,0);
227 MPI_Barrier(MPI_COMM_WORLD);
229 /* Initialize data and check a second time, to make sure that
230 we can reuse plans:*/
231 if (verbose && my_pe == 0) printf("\nTesting again:\n");
232 test_transpose_plan(nx,ny,el_size,verbose,tp,data,work,0);
234 /* Test non-blocking (asynchronous) transpose calls: */
235 if (verbose && my_pe == 0) printf("\nTesting non-blocking transpose:\n");
237 test_transpose_plan(nx,ny,el_size,verbose,tp,data,NULL,1);
238 MPI_Barrier(MPI_COMM_WORLD);
240 /* Initialize data and check a second time, to make sure that
241 we can reuse plans:*/
242 if (verbose && my_pe == 0) printf("\nTesting again:\n");
243 test_transpose_plan(nx,ny,el_size,verbose,tp,data,NULL,1);
245 transpose_mpi_destroy_plan(tp);
247 if (data)
248 fftw_free(data);
249 if (work)
250 fftw_free(work);
252 if (verbose)
253 printf("Process %d okay!\n",my_pe);
255 fftw_check_memory_leaks();
257 if (my_pe == 0)
258 printf("okay!\n");
261 int main(int argc, char **argv)
263 int my_pe;
264 int nx = -1,ny = -1;
265 int el_size = 1;
266 int seed;
268 MPI_Init(&argc,&argv);
270 fftw_die_hook = fftw_mpi_die;
272 seed = time(NULL);
273 MPI_Bcast(&seed, 1, MPI_INT, 0, MPI_COMM_WORLD);
274 srand(seed);
276 MPI_Comm_rank(MPI_COMM_WORLD,&my_pe);
278 /* only process 0 is guaranteed to have the correct args */
279 if (my_pe == 0) {
280 if (argc >= 3) {
281 nx = atoi(argv[1]);
282 ny = atoi(argv[2]);
285 if (argc >= 4)
286 el_size = atoi(argv[3]);
288 /* broadcast args to other processes: */
289 MPI_Bcast(&nx, 1, MPI_INT, 0, MPI_COMM_WORLD);
290 MPI_Bcast(&ny, 1, MPI_INT, 0, MPI_COMM_WORLD);
291 MPI_Bcast(&el_size, 1, MPI_INT, 0, MPI_COMM_WORLD);
293 if (nx > 0 && ny > 0 && el_size > 0)
294 test_transpose(nx,ny,el_size,1);
295 else {
296 if (my_pe == 0)
297 printf("Doing random tests (does not terminate)...\n");
298 while (1) {
299 test_transpose(rand() % 10 + 1, rand() % 10 + 1,
300 rand() % 10 + 1, 0);
301 test_transpose(rand() % 100 + 1, rand() % 100 + 1,
302 rand() % 10 + 1, 0);
303 test_transpose(rand() % 1000 + 1, rand() % 1000 + 1,
304 rand() % 10 + 1, 0);
308 MPI_Finalize();
310 return 0;