1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
4 * This source code is part of
8 * GROningen MAchine for Chemical Simulations
11 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
12 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
13 * Copyright (c) 2001-2004, The GROMACS development team,
14 * check out http://www.gromacs.org for more information.
16 * This program is free software; you can redistribute it and/or
17 * modify it under the terms of the GNU General Public License
18 * as published by the Free Software Foundation; either version 2
19 * of the License, or (at your option) any later version.
21 * If you want to redistribute modifications, please consider that
22 * scientific software is very special. Version control is crucial -
23 * bugs must be traceable. We will be happy to consider code for
24 * inclusion in the official distribution, but derived work must not
25 * be called official GROMACS. Details are found in the README & COPYING
26 * files - if they are missing, get the official version at www.gromacs.org.
28 * To help us fund GROMACS development, we humbly ask that you cite
29 * the papers on the package - you can find them in the top README file.
31 * For more info, check our website at http://www.gromacs.org
34 * GROwing Monsters And Cloning Shrimps
36 /* This file is completely threadsafe - keep it that way! */
52 #include "gmx_parallel_3dfft.h"
57 static void print_parfft(FILE *fp
,char *title
,t_parfft
*pfft
)
59 fprintf(fp
,"PARALLEL FFT DATA:\n"
60 " local_nx: %3d local_x_start: %3d\n"
61 " local_ny_after_transpose: %3d local_y_start_after_transpose %3d\n",
62 pfft
->local_nx
,pfft
->local_x_start
,pfft
->local_ny_after_transpose
,
63 pfft
->local_y_start_after_transpose
);
69 gmx_alloc_aligned(size_t size
)
77 gmx_fatal(FARGS
,"Failed to allocated %u bytes of aligned memory.",size
+32);
80 p
= (void *) (((size_t) p0
+ 32) & (~((size_t) 31)));
82 /* Yeah, yeah, we cannot free this pointer, but who cares... */
86 t_fftgrid
*mk_fftgrid(int nx
,
94 /* parallel runs with non-parallel ffts haven't been tested yet */
96 int x1
,y1
,maxlocalsize
;
102 if (cr
&& cr
->nnodes
> 1) {
103 MPI_Comm_size(cr
->mpi_comm_mygroup
,&nnodes
);
111 grid
->nxyz
= nx
*ny
*nz
;
112 grid
->bParallel
= (nnodes
> 1);
116 grid
->la2r
= (nz
/2+1)*2;
123 grid
->la2c
= (nz
/2+1);
125 grid
->la12r
= ny
*grid
->la2r
;
129 grid
->la12c
= nx
*grid
->la2c
;
133 grid
->la12c
= ny
*grid
->la2c
;
136 /* This code assumes that the when the grid is not divisble by nnodes,
137 * the maximum difference in local grid sizes is 1.
139 x1
= (nx
% nnodes
== 0 ? 0 : 1);
140 y1
= (ny
% nnodes
== 0 ? 0 : 1);
142 grid
->nptr
= (nx
+ x1
)*(ny
+ y1
)*grid
->la2c
*2;
147 gmx_parallel_3dfft_init(&grid
->mpi_fft_setup
,nx
,ny
,nz
,
148 node2slab
,slab2grid_x
,cr
->mpi_comm_mygroup
,
151 gmx_parallel_3dfft_limits(grid
->mpi_fft_setup
,
152 &(grid
->pfft
.local_x_start
),
153 &(grid
->pfft
.local_nx
),
154 &(grid
->pfft
.local_y_start_after_transpose
),
155 &(grid
->pfft
.local_ny_after_transpose
));
157 gmx_fatal(FARGS
,"Parallel FFT supported with MPI only!");
162 gmx_fft_init_3d_real(&grid
->fft_setup
,nx
,ny
,nz
,bReproducible
? GMX_FFT_FLAG_CONSERVATIVE
: GMX_FFT_FLAG_NONE
);
164 grid
->ptr
= (real
*)gmx_alloc_aligned(grid
->nptr
*sizeof(*(grid
->ptr
)));
167 if (grid
->bParallel
&& debug
)
169 print_parfft(debug
,"Plan", &grid
->pfft
);
173 maxlocalsize
= max((nx
/nnodes
+ x1
)*ny
*grid
->la2c
*2,
174 (ny
/nnodes
+ y1
)*nx
*grid
->la2c
*2);
175 grid
->workspace
= (real
*)
176 gmx_alloc_aligned(maxlocalsize
*sizeof(*(grid
->workspace
)));
181 (real
*)gmx_alloc_aligned(grid
->nptr
*sizeof(*(grid
->workspace
)));
184 grid
->workspace
= (real
*)gmx_alloc_aligned(grid
->nptr
*sizeof(*(grid
->workspace
)));
191 pr_fftgrid(FILE *fp
,char *title
,t_fftgrid
*grid
)
193 int i
,j
,k
,index_x
,index_xy
,ntot
=0;
194 int nx
,ny
,nz
,nx2
,ny2
,nz2
,la12
,la2
;
197 /* Unpack structure */
198 unpack_fftgrid(grid
,&nx
,&ny
,&nz
,&nx2
,&ny2
,&nz2
,&la2
,&la12
,TRUE
,&ptr
);
199 for(i
=0; (i
<nx
); i
++) {
201 for(j
=0; (j
<ny
); j
++) {
202 index_xy
= index_x
+ la2
*j
;
203 for(k
=0; (k
<nz
); k
++) {
204 if (ptr
[index_xy
+k
] != 0) {
205 fprintf(fp
,"%-12s %5d %5d %5d %12.5e\n",
206 title
,i
,j
,k
,ptr
[index_xy
+k
]);
212 fprintf(fp
,"%d non zero elements in %s\n",ntot
,title
);
215 void done_fftgrid(t_fftgrid
*grid
)
217 /* memory can't be freed because it is allocated by gmx_alloc_aligned */
219 /* sfree(grid->ptr); */
223 if (grid
->workspace
) {
224 /* sfree(grid->workspace); */
225 grid
->workspace
=NULL
;
230 void gmxfft3D(t_fftgrid
*grid
,enum gmx_fft_direction dir
,t_commrec
*cr
)
237 if( dir
== GMX_FFT_REAL_TO_COMPLEX
|| dir
== GMX_FFT_COMPLEX_TO_REAL
)
239 gmx_parallel_3dfft(grid
->mpi_fft_setup
,dir
,grid
->ptr
,grid
->ptr
);
243 gmx_fatal(FARGS
,"Invalid direction for FFT: %d",dir
);
246 gmx_fatal(FARGS
,"Parallel FFT supported with MPI only!");
251 if( dir
== GMX_FFT_REAL_TO_COMPLEX
|| dir
== GMX_FFT_COMPLEX_TO_REAL
)
253 gmx_fft_3d_real(grid
->fft_setup
,dir
,grid
->ptr
,grid
->workspace
);
255 grid
->ptr
= grid
->workspace
;
256 grid
->workspace
= tmp
;
260 gmx_fatal(FARGS
,"Invalid direction for FFT: %d",dir
);
265 void clear_fftgrid(t_fftgrid
*grid
)
267 /* clears the whole grid */
274 for (i
=0; (i
<ngrid
); i
++) {
279 void unpack_fftgrid(t_fftgrid
*grid
,int *nx
,int *ny
,int *nz
,
280 int *nx2
,int *ny2
,int *nz2
,
281 int *la2
,int *la12
,bool bReal
,real
**ptr
)
302 /*****************************************************************
304 * For backward compatibility (for testing the ewald code vs. PPPM etc)
305 * some old grid routines are retained here.
307 ************************************************************************/
309 real
***mk_rgrid(int nx
,int ny
,int nz
)
321 for(i
=0; (i
<nx
); i
++) {
323 for(j
=0; (j
<ny
); j
++,n2
++) {
324 ptr2
[n2
] = &(ptr1
[n3
]);
331 void free_rgrid(real
***grid
,int nx
,int ny
)
336 for(i
=0; (i
<nx
); i
++) {
342 real
print_rgrid(FILE *fp
,char *title
,int nx
,int ny
,int nz
,real
***grid
)
349 fprintf(fp
,"Printing all non-zero real elements of %s\n",title
);
350 for(ix
=0; (ix
<nx
); ix
++)
351 for(iy
=0; (iy
<ny
); iy
++)
352 for(iz
=0; (iz
<nz
); iz
++) {
355 fprintf(fp
,"%s[%2d][%2d][%2d] = %12.5e\n",title
,ix
,iy
,iz
,g
);
361 void print_rgrid_pdb(char *fn
,int nx
,int ny
,int nz
,real
***grid
)
368 fp
=gmx_fio_fopen(fn
,"w");
369 for(ix
=0; (ix
<nx
); ix
++) {
370 for(iy
=0; (iy
<ny
); iy
++) {
371 for(iz
=0; (iz
<nz
); iz
++) {
374 if ((ig
!= 0) || (1)) {
378 fprintf(fp
,"ATOM %5d Na Na 1 %8.3f%8.3f%8.3f%6.2f%6.2f\n",
387 void clear_rgrid(int nx
,int ny
,int nz
,real
***grid
)
391 for(i
=0; (i
<nx
); i
++)
392 for(j
=0; (j
<ny
); j
++)
393 for(k
=0; (k
<nz
); k
++)
397 void clear_cgrid(int nx
,int ny
,int nz
,t_complex
***grid
)
401 for(i
=0; (i
<nx
); i
++)
402 for(j
=0; (j
<ny
); j
++)
403 for(k
=0; (k
<nz
); k
++)
404 grid
[i
][j
][k
] = cnul
;
407 t_complex
***mk_cgrid(int nx
,int ny
,int nz
)
419 for(i
=0; (i
<nx
); i
++) {
421 for(j
=0; (j
<ny
); j
++,n2
++) {
422 ptr2
[n2
] = &(ptr1
[n3
]);
429 void free_cgrid(t_complex
***grid
,int nx
,int ny
)
434 for(i
=0; (i
<nx
); i
++)
439 t_complex
print_cgrid(FILE *fp
,char *title
,int nx
,int ny
,int nz
,
447 fprintf(fp
,"Printing all non-zero complex elements of %s\n",title
);
448 for(ix
=0; (ix
<nx
); ix
++)
449 for(iy
=0; (iy
<ny
); iy
++)
450 for(iz
=0; (iz
<nz
); iz
++) {
452 if (fp
&& ((g
.re
!= 0) || (g
.im
!= 0)))
453 fprintf(fp
,"%s[%2d][%2d][%2d] = %12.5e + i %12.5e\n",
454 title
,ix
,iy
,iz
,g
.re
,g
.im
);
460 void print_cgrid_pdb(char *fn
,int nx
,int ny
,int nz
,t_complex
***grid
)
467 fp
=gmx_fio_fopen(fn
,"w");
468 for(ix
=0; (ix
<nx
); ix
++) {
469 for(iy
=0; (iy
<ny
); iy
++) {
470 for(iz
=0; (iz
<nz
); iz
++) {
471 g
=grid
[ix
][iy
][iz
].re
;
476 fprintf(fp
,"ATOM %5d Na Na 1 %8.3f%8.3f%8.3f%6.2f%6.2f\n",