4 * This source code is part of
8 * GROningen MAchine for Chemical Simulations
12 * Copyright (c) 1991-1999
13 * BIOSON Research Institute, Dept. of Biophysical Chemistry
14 * University of Groningen, The Netherlands
17 * GROMACS: A message-passing parallel molecular dynamics implementation
18 * H.J.C. Berendsen, D. van der Spoel and R. van Drunen
19 * Comp. Phys. Comm. 91, 43-56 (1995)
21 * Also check out our WWW page:
22 * http://md.chem.rug.nl/~gmx
27 * GRowing Old MAkes el Chrono Sweat
29 static char *SRCID_pme_c
= "$Id$";
39 #include "shift_util.h"
40 #include "ewald_util.h"
52 typedef real
*splinevec
[DIM
];
54 static void calc_idx(int natoms
,rvec invh
,
55 rvec x
[],ivec idx
[],int nx
,int ny
,int nz
,
56 int nnx
[],int nny
[],int nnz
[])
65 for(i
=0; (i
<natoms
); i
++) {
68 idxptr
[XX
] = nnx
[(int)(nx
+xptr
[XX
]*ix
)];
69 idxptr
[YY
] = nny
[(int)(ny
+xptr
[YY
]*iy
)];
70 idxptr
[ZZ
] = nnz
[(int)(nz
+xptr
[ZZ
]*iz
)];
74 void sum_qgrid(t_commrec
*cr
,t_nsborder
*nsb
,t_fftgrid
*grid
,bool bForward
)
76 static bool bFirst
=TRUE
;
84 localsize
=grid
->la12r
*grid
->pfft
.local_nx
;
87 maxproc
=grid
->nx
/grid
->pfft
.local_nx
;
89 /* NOTE: FFTW doesnt necessarily use all processors for the fft;
90 * above I assume that the ones that do have equal amounts of data.
91 * this is bad since its not guaranteed by fftw, but works for now...
92 * This will be fixed in the next release.
97 if(bForward
) { /* sum contributions to local grid */
98 for(i
=0;i
<maxproc
;i
++) {
99 MPI_Reduce(grid
->ptr
+i
*localsize
, /*ptr arithm. */
101 GMX_MPI_REAL
,MPI_SUM
,i
,MPI_COMM_WORLD
);
104 memcpy(grid
->ptr
+cr
->pid
*localsize
,tmp
,localsize
*sizeof(t_fft_r
));
106 else { /* distribute local grid to all processors */
107 for(i
=0;i
<maxproc
;i
++)
108 MPI_Bcast(grid
->ptr
+i
*localsize
, /* ptr arithm */
110 GMX_MPI_REAL
,i
,MPI_COMM_WORLD
);
113 fatal_error(0,"Parallel grid summation requires MPI.\n");
117 void spread_q_bsplines(t_fftgrid
*grid
,rvec invh
,t_nsborder
*nsb
,
118 ivec idx
[],real
*charge
,splinevec theta
,int order
,
119 int nnx
[],int nny
[],int nnz
[])
121 /* spread charges from home atoms to local grid */
123 int i
,j
,k
,n
,*i0
,*j0
,*k0
,*ii0
,*jj0
,*kk0
,ithx
,ithy
,ithz
;
124 int nx
,ny
,nz
,la2
,la12
,xidx
,yidx
,zidx
;
125 int start
,nr
,norder
,norder1
,*idxptr
,ind0
;
130 unpack_fftgrid(grid
,&nx
,&ny
,&nz
,&la2
,&la12
,TRUE
,&ptr
);
133 ii0
= nnx
+nx
+1-order
;
134 jj0
= nny
+ny
+1-order
;
135 kk0
= nnz
+nz
+1-order
;
140 for(n
=0; (n
<nr
); n
++) {
141 qn
= charge
[start
+n
];
148 i0
= ii0
+xidx
; /* Pointer arithmetic */
150 norder1
= norder
+order
;
152 for(ithx
=norder
; (ithx
<norder1
); ithx
++,i0
++) {
154 j0
= jj0
+yidx
; /* Pointer arithmetic */
157 for(ithy
=norder
; (ithy
<norder1
); ithy
++,j0
++) {
159 k0
= kk0
+zidx
; /* Pointer arithmetic */
160 valxy
= valx
*thy
[ithy
];
163 for(ithz
=norder
; (ithz
<norder1
); ithz
++,k0
++) {
165 ptr
[ind0
+k
] += valxy
*thz
[ithz
];
173 real
solve_pme(t_fftgrid
*grid
,real ewaldcoeff
,real vol
,
174 splinevec bsp_mod
,rvec invh
,matrix vir
,t_commrec
*cr
)
176 /* do recip sum over local cells in grid */
178 int nx
,ny
,nz
,la2
,la12
;
179 int kx
,ky
,kz
,idx
,idx0
,maxkx
,maxky
,maxkz
,kystart
,kyend
;
181 real factor
=M_PI
*M_PI
/(ewaldcoeff
*ewaldcoeff
);
182 real ets2
,struct2
,vfactor
,ets2vf
;
183 real eterm
,d1
,d2
,energy
=0;
187 real virxx
=0,virxy
=0,virxz
=0,viryy
=0,viryz
=0,virzz
=0;
190 unpack_fftgrid(grid
,&nx
,&ny
,&nz
,&la2
,&la12
,FALSE
,(t_fft_r
**)&ptr
);
201 if (bPar
) { /* transpose X & Y and only sum local cells */
203 kystart
= grid
->pfft
.local_y_start_after_transpose
;
204 kyend
= kystart
+grid
->pfft
.local_ny_after_transpose
;
206 fprintf(debug
,"solve_pme: kystart = %d, kyend=%d\n",kystart
,kyend
);
208 fatal_error(0,"Parallel PME attempted without MPI");
209 #endif /* end of parallel case loop */
215 for(ky
=kystart
; (ky
<kyend
); ky
++) { /* our local cells */
220 by
= M_PI
*vol
*bsp_mod
[YY
][ky
];
222 for(kx
=0; (kx
<nx
); kx
++) {
227 bx
= bsp_mod
[XX
][kx
];
229 p0
= ptr
+ INDEX(ky
,kx
,0); /* Pointer Arithmetic */
231 p0
= ptr
+ INDEX(kx
,ky
,0); /* Pointer Arithmetic */
232 for(kz
=0; (kz
<maxkz
); kz
++,p0
++) {
233 if ((kx
==0) && (ky
==0) && (kz
==0))
238 m2
= mx
*mx
+my
*my
+mz
*mz
;
239 denom
= m2
*bx
*by
*bsp_mod
[ZZ
][kz
];
240 eterm
= ONE_4PI_EPS0
*exp(-factor
*m2
)/denom
;
244 struct2
= d1
*d1
+d2
*d2
;
245 if ((kz
> 0) && (kz
< (nz
+1)/2))
247 ets2
= eterm
*struct2
;
248 vfactor
= (factor
*m2
+1)*2.0/m2
;
251 ets2vf
= ets2
*vfactor
;
252 virxx
+= ets2vf
*mx
*mx
-ets2
;
253 virxy
+= ets2vf
*mx
*my
;
254 virxz
+= ets2vf
*mx
*mz
;
255 viryy
+= ets2vf
*my
*my
-ets2
;
256 viryz
+= ets2vf
*my
*mz
;
257 virzz
+= ets2vf
*mz
*mz
-ets2
;
262 /* Update virial with local values */
273 for(nx
=0;nx
<DIM
;nx
++)
274 for(ny
=0;ny
<DIM
;ny
++)
276 /* this virial seems ok for isotropic scaling, but I'm
277 * experiencing problems on semiisotropic membranes */
279 /* this energy should be corrected for a charged system */
283 void gather_f_bsplines(t_fftgrid
*grid
,rvec invh
,t_nsborder
*nsb
,
284 ivec idx
[],rvec f
[],real
*charge
,splinevec theta
,
285 splinevec dtheta
,int order
,
286 int nnx
[],int nny
[],int nnz
[])
288 /* sum forces for local particles */
289 int i
,j
,k
,n
,*i0
,*j0
,*k0
,*ii0
,*jj0
,*kk0
,ithx
,ithy
,ithz
;
290 int nx
,ny
,nz
,la2
,la12
;
294 real fx
,fy
,fz
,gval
,tgz
;
295 real
*thx
,*thy
,*thz
,*dthx
,*dthy
,*dthz
;
296 int start
,nr
,sn
,norder
,norder1
,*idxptr
,ind0
;
300 unpack_fftgrid(grid
,&nx
,&ny
,&nz
,&la2
,&la12
,TRUE
,&ptr
);
308 ii0
= nnx
+nx
+1-order
;
309 jj0
= nny
+ny
+1-order
;
310 kk0
= nnz
+nz
+1-order
;
312 for(n
=0; (n
<nr
); n
++) {
325 i0
= ii0
+xidx
; /* Pointer arithemtic */
327 norder1
= norder
+order
;
328 for(ithx
=norder
; (ithx
<norder1
); ithx
++,i0
++) {
332 j0
= jj0
+yidx
; /* Pointer arithemtic */
334 for(ithy
=norder
; (ithy
<norder1
); ithy
++,j0
++) {
338 k0
= kk0
+zidx
; /* Pointer arithemtic */
341 for(ithz
=norder
; (ithz
<norder1
); ithz
++,k0
++) {
344 tgz
= thz
[ithz
]*gval
;
347 fz
-= tx
*ty
*dthz
[ithz
]*gval
;
351 f
[sn
][XX
]+=invh
[XX
]*fx
*qn
;
352 f
[sn
][YY
]+=invh
[YY
]*fy
*qn
;
353 f
[sn
][ZZ
]+=invh
[ZZ
]*fz
*qn
;
356 /* Since the energy and not forces are interpolated
357 * the net force might not be exactly zero.
358 * This can be solved by also interpolating F, but
359 * that comes at a cost.
360 * A better hack is to remove the net force every
361 * step, but that must be done at a higher level
362 * since this routine doesn't see all atoms if running
363 * in parallel. Don't know how important it is? EL 990726
368 void make_bsplines(splinevec theta
,splinevec dtheta
,int order
,int nx
,
369 int ny
,int nz
,rvec x
[],real
*charge
,
370 t_nsborder
*nsb
,rvec invh
)
372 /* construct splines for local atoms */
375 real
*data
,*ddata
,*xptr
;
384 rcons
= max(nx
,max(ny
,nz
))+1;
386 for(i
=0; (i
<nr
); i
++) {
388 if (charge
[sn
] != 0.0) {
390 for(j
=0; (j
<DIM
); j
++) {
391 dr
= rcons
+xptr
[j
]*invh
[j
];
394 /* dr is relative offset from lower cell limit */
395 data
=&(theta
[j
][i
*order
]);
400 for(k
=3; (k
<order
); k
++) {
402 data
[k
-1]=div
*dr
*data
[k
-2];
403 for(l
=1; (l
<(k
-1)); l
++)
404 data
[k
-l
-1]=div
*((dr
+l
)*data
[k
-l
-2]+(k
-l
-dr
)*
406 data
[0]=div
*(1-dr
)*data
[0];
409 ddata
= &(dtheta
[j
][i
*order
]);
411 for(k
=1; (k
<order
); k
++)
412 ddata
[k
]=data
[k
-1]-data
[k
];
415 data
[order
-1]=div
*dr
*data
[order
-2];
416 for(l
=1; (l
<(order
-1)); l
++)
417 data
[order
-l
-1]=div
*((dr
+l
)*data
[order
-l
-2]+
418 (order
-l
-dr
)*data
[order
-l
-1]);
419 data
[0]=div
*(1-dr
)*data
[0];
426 void make_dft_mod(real
*mod
,real
*data
,int ndata
)
431 for(i
=0;i
<ndata
;i
++) {
433 for(j
=0;j
<ndata
;j
++) {
434 arg
=(2.0*M_PI
*i
*j
)/ndata
;
435 sc
+=data
[j
]*cos(arg
);
436 ss
+=data
[j
]*sin(arg
);
442 mod
[i
]=(mod
[i
-1]+mod
[i
+1])*0.5;
447 void make_bspline_moduli(splinevec bsp_mod
,int nx
,int ny
,int nz
,int order
)
449 int nmax
=max(nx
,max(ny
,nz
));
450 real
*data
,*ddata
,*bsp_data
;
462 for(k
=3;k
<order
;k
++) {
466 data
[k
-l
-1]=div
*(l
*data
[k
-l
-2]+(k
-l
)*data
[k
-l
-1]);
472 ddata
[k
]=data
[k
-1]-data
[k
];
475 for(l
=1;l
<(order
-1);l
++)
476 data
[order
-l
-1]=div
*(l
*data
[order
-l
-2]+(order
-l
)*data
[order
-l
-1]);
481 for(i
=1;i
<=order
;i
++)
482 bsp_data
[i
]=data
[i
-1];
484 make_dft_mod(bsp_mod
[XX
],bsp_data
,nx
);
485 make_dft_mod(bsp_mod
[YY
],bsp_data
,ny
);
486 make_dft_mod(bsp_mod
[ZZ
],bsp_data
,nz
);
493 static t_fftgrid
*grid
=NULL
;
495 static splinevec theta
;
496 static splinevec dtheta
;
497 static splinevec bsp_mod
;
500 void init_pme(FILE *log
,t_commrec
*cr
,t_nsborder
*nsb
,t_inputrec
*ir
)
504 fprintf(log
,"Will do PME sum in reciprocal space.\n");
509 if (PAR(cr
) && cr
->nprocs
>1) {
510 fprintf(log
,"Parallelized PME sum used.\n");
511 if(nx
%(cr
->nprocs
)!=0)
512 fprintf(log
,"Warning: For load balance, "
513 "fourier_nx should be divisible by NPROCS\n");
516 /* allocate space for things */
517 snew(bsp_mod
[XX
],nx
);
518 snew(bsp_mod
[YY
],ny
);
519 snew(bsp_mod
[ZZ
],nz
);
521 snew(theta
[i
],ir
->pme_order
*HOMENR(nsb
));
522 snew(dtheta
[i
],ir
->pme_order
*HOMENR(nsb
));
525 grid
=mk_fftgrid(log
,PAR(cr
),nx
,ny
,nz
,ir
->bOptFFT
);
526 make_bspline_moduli(bsp_mod
,nx
,ny
,nz
,ir
->pme_order
);
529 real
do_pme(FILE *logfile
, bool bVerbose
,
530 t_inputrec
*ir
, rvec x
[],
531 rvec f
[], real charge
[],
532 rvec box
, t_commrec
*cr
,
533 t_nsborder
*nsb
, t_nrnb
*nrnb
,
534 matrix vir
, real ewaldcoeff
)
536 static ivec
*idx
=NULL
;
537 static int *nnx
,*nny
,*nnz
;
543 calc_invh(box
,nx
,ny
,nz
,invh
);
545 vol
=box
[XX
]*box
[YY
]*box
[ZZ
];
547 /* Compute fftgrid index for all atoms, with help of some extra variables */
549 snew(idx
,HOMENR(nsb
));
553 for(i
=0; (i
<3*nx
); i
++)
555 for(i
=0; (i
<3*ny
); i
++)
557 for(i
=0; (i
<3*nz
); i
++)
560 calc_idx(HOMENR(nsb
),invh
,x
+START(nsb
),idx
,nx
,ny
,nz
,nnx
,nny
,nnz
);
562 /* make local bsplines */
563 make_bsplines(theta
,dtheta
,ir
->pme_order
,nx
,ny
,nz
,x
,charge
,nsb
,invh
);
565 /* put local atoms on grid. */
566 spread_q_bsplines(grid
,invh
,nsb
,idx
,charge
,theta
,ir
->pme_order
,nnx
,nny
,nnz
);
568 inc_nrnb(nrnb
,eNR_SPREADQBSP
,
569 ir
->pme_order
*ir
->pme_order
*ir
->pme_order
*HOMENR(nsb
));
571 /* sum contributions to local grid from other processors */
573 sum_qgrid(cr
,nsb
,grid
,TRUE
);
576 gmxfft3D(grid
,FFTW_FORWARD
,cr
);
578 /* solve in k-space for our local cells */
579 energy
=solve_pme(grid
,ewaldcoeff
,vol
,bsp_mod
,invh
,vir
,cr
);
580 inc_nrnb(nrnb
,eNR_SOLVEPME
,nx
*ny
*nz
*0.5);
583 gmxfft3D(grid
,FFTW_BACKWARD
,cr
);
585 /* distribute local grid to all processors */
587 sum_qgrid(cr
,nsb
,grid
,FALSE
);
589 /* interpolate forces for our local atoms */
590 gather_f_bsplines(grid
,invh
,nsb
,idx
,f
,charge
,theta
,dtheta
,ir
->pme_order
,
592 /* gather_f_bsplines(grid,invh,nsb,x,f,charge,theta,dtheta,ir->pme_order);*/
594 inc_nrnb(nrnb
,eNR_GATHERFBSP
,
595 ir
->pme_order
*ir
->pme_order
*ir
->pme_order
*HOMENR(nsb
));
598 npme
= ntot
*log((real
)ntot
)/(cr
->nprocs
*log(2.0));
599 inc_nrnb(nrnb
,eNR_FFT
,2*npme
);