changed reading hint
[gromacs/adressmacs.git] / src / mdlib / pme.c
blob589802227914412ede608e5d2c1cc0f3b80b762e
1 /*
2 * $Id$
3 *
4 * This source code is part of
5 *
6 * G R O M A C S
7 *
8 * GROningen MAchine for Chemical Simulations
9 *
10 * VERSION 2.0
12 * Copyright (c) 1991-1999
13 * BIOSON Research Institute, Dept. of Biophysical Chemistry
14 * University of Groningen, The Netherlands
16 * Please refer to:
17 * GROMACS: A message-passing parallel molecular dynamics implementation
18 * H.J.C. Berendsen, D. van der Spoel and R. van Drunen
19 * Comp. Phys. Comm. 91, 43-56 (1995)
21 * Also check out our WWW page:
22 * http://md.chem.rug.nl/~gmx
23 * or e-mail to:
24 * gromacs@chem.rug.nl
26 * And Hey:
27 * GRowing Old MAkes el Chrono Sweat
29 static char *SRCID_pme_c = "$Id$";
31 #include <stdio.h>
32 #include <math.h>
33 #include "typedefs.h"
34 #include "txtdump.h"
35 #include "vec.h"
36 #include "complex.h"
37 #include "smalloc.h"
38 #include "futil.h"
39 #include "shift_util.h"
40 #include "ewald_util.h"
41 #include "fftgrid.h"
42 #include "fatal.h"
43 #include "ewald.h"
44 #include "pme.h"
45 #include "pppm.h"
46 #include "network.h"
47 #include "physics.h"
48 #include "nrnb.h"
50 #define DFT_TOL 1e-7
52 typedef real *splinevec[DIM];
54 static void calc_idx(int natoms,rvec invh,
55 rvec x[],ivec idx[],int nx,int ny,int nz,
56 int nnx[],int nny[],int nnz[])
58 int i;
59 int *idxptr;
60 real *xptr,ix,iy,iz;
62 ix = invh[XX];
63 iy = invh[YY];
64 iz = invh[ZZ];
65 for(i=0; (i<natoms); i++) {
66 xptr = x[i];
67 idxptr = idx[i];
68 idxptr[XX] = nnx[(int)(nx+xptr[XX]*ix)];
69 idxptr[YY] = nny[(int)(ny+xptr[YY]*iy)];
70 idxptr[ZZ] = nnz[(int)(nz+xptr[ZZ]*iz)];
74 void sum_qgrid(t_commrec *cr,t_nsborder *nsb,t_fftgrid *grid,bool bForward)
76 static bool bFirst=TRUE;
77 static t_fft_r *tmp;
78 int i;
79 static int localsize;
80 static int maxproc;
82 #ifdef USE_MPI
83 if(bFirst) {
84 localsize=grid->la12r*grid->pfft.local_nx;
85 if(!grid->workspace)
86 snew(tmp,localsize);
87 maxproc=grid->nx/grid->pfft.local_nx;
89 /* NOTE: FFTW doesnt necessarily use all processors for the fft;
90 * above I assume that the ones that do have equal amounts of data.
91 * this is bad since its not guaranteed by fftw, but works for now...
92 * This will be fixed in the next release.
94 bFirst=FALSE;
95 if(grid->workspace)
96 tmp=grid->workspace;
97 if(bForward) { /* sum contributions to local grid */
98 for(i=0;i<maxproc;i++) {
99 MPI_Reduce(grid->ptr+i*localsize, /*ptr arithm. */
100 tmp,localsize,
101 GMX_MPI_REAL,MPI_SUM,i,MPI_COMM_WORLD);
103 if(cr->pid<maxproc)
104 memcpy(grid->ptr+cr->pid*localsize,tmp,localsize*sizeof(t_fft_r));
106 else { /* distribute local grid to all processors */
107 for(i=0;i<maxproc;i++)
108 MPI_Bcast(grid->ptr+i*localsize, /* ptr arithm */
109 localsize,
110 GMX_MPI_REAL,i,MPI_COMM_WORLD);
112 #else
113 fatal_error(0,"Parallel grid summation requires MPI.\n");
114 #endif
117 void spread_q_bsplines(t_fftgrid *grid,rvec invh,t_nsborder *nsb,
118 ivec idx[],real *charge,splinevec theta,int order,
119 int nnx[],int nny[],int nnz[])
121 /* spread charges from home atoms to local grid */
122 t_fft_r *ptr;
123 int i,j,k,n,*i0,*j0,*k0,*ii0,*jj0,*kk0,ithx,ithy,ithz;
124 int nx,ny,nz,la2,la12,xidx,yidx,zidx;
125 int start,nr,norder,norder1,*idxptr,ind0;
126 real valx,valxy,qn;
127 real *thx,*thy,*thz;
129 clear_fftgrid(grid);
130 unpack_fftgrid(grid,&nx,&ny,&nz,&la2,&la12,TRUE,&ptr);
131 start = START(nsb);
132 nr = HOMENR(nsb);
133 ii0 = nnx+nx+1-order;
134 jj0 = nny+ny+1-order;
135 kk0 = nnz+nz+1-order;
136 thx = theta[XX];
137 thy = theta[YY];
138 thz = theta[ZZ];
140 for(n=0; (n<nr); n++) {
141 qn = charge[start+n];
142 idxptr = idx[n];
144 if (qn != 0) {
145 xidx = idxptr[XX];
146 yidx = idxptr[YY];
147 zidx = idxptr[ZZ];
148 i0 = ii0+xidx; /* Pointer arithmetic */
149 norder = n*order;
150 norder1 = norder+order;
152 for(ithx=norder; (ithx<norder1); ithx++,i0++) {
153 i = *i0;
154 j0 = jj0+yidx; /* Pointer arithmetic */
155 valx = qn*thx[ithx];
157 for(ithy=norder; (ithy<norder1); ithy++,j0++) {
158 j = *j0;
159 k0 = kk0+zidx; /* Pointer arithmetic */
160 valxy = valx*thy[ithy];
161 ind0 = INDEX(i,j,0);
163 for(ithz=norder; (ithz<norder1); ithz++,k0++) {
164 k = *k0;
165 ptr[ind0+k] += valxy*thz[ithz];
173 real solve_pme(t_fftgrid *grid,real ewaldcoeff,real vol,
174 splinevec bsp_mod,rvec invh,matrix vir,t_commrec *cr)
176 /* do recip sum over local cells in grid */
177 t_fft_c *ptr,*p0;
178 int nx,ny,nz,la2,la12;
179 int kx,ky,kz,idx,idx0,maxkx,maxky,maxkz,kystart,kyend;
180 real m2,mx,my,mz;
181 real factor=M_PI*M_PI/(ewaldcoeff*ewaldcoeff);
182 real ets2,struct2,vfactor,ets2vf;
183 real eterm,d1,d2,energy=0;
184 real denom;
185 real bx,by;
186 real invx,invy,invz;
187 real virxx=0,virxy=0,virxz=0,viryy=0,viryz=0,virzz=0;
188 bool bPar = PAR(cr);
190 unpack_fftgrid(grid,&nx,&ny,&nz,&la2,&la12,FALSE,(t_fft_r **)&ptr);
191 clear_mat(vir);
193 invx = invh[XX]/nx;
194 invy = invh[YY]/ny;
195 invz = invh[ZZ]/nz;
197 maxkx = (nx+1)/2;
198 maxky = (ny+1)/2;
199 maxkz = nz/2+1;
201 if (bPar) { /* transpose X & Y and only sum local cells */
202 #ifdef USE_MPI
203 kystart = grid->pfft.local_y_start_after_transpose;
204 kyend = kystart+grid->pfft.local_ny_after_transpose;
205 if (debug)
206 fprintf(debug,"solve_pme: kystart = %d, kyend=%d\n",kystart,kyend);
207 #else
208 fatal_error(0,"Parallel PME attempted without MPI");
209 #endif /* end of parallel case loop */
211 else {
212 kystart = 0;
213 kyend = ny;
215 for(ky=kystart; (ky<kyend); ky++) { /* our local cells */
216 if(ky<maxky)
217 my = ky*invy;
218 else
219 my = (ky-ny)*invy;
220 by = M_PI*vol*bsp_mod[YY][ky];
222 for(kx=0; (kx<nx); kx++) {
223 if(kx < maxkx)
224 mx = kx*invx;
225 else
226 mx = (kx-nx)*invx;
227 bx = bsp_mod[XX][kx];
228 if (bPar)
229 p0 = ptr + INDEX(ky,kx,0); /* Pointer Arithmetic */
230 else
231 p0 = ptr + INDEX(kx,ky,0); /* Pointer Arithmetic */
232 for(kz=0; (kz<maxkz); kz++,p0++) {
233 if ((kx==0) && (ky==0) && (kz==0))
234 continue;
235 d1 = p0->re;
236 d2 = p0->im;
237 mz = kz*invz;
238 m2 = mx*mx+my*my+mz*mz;
239 denom = m2*bx*by*bsp_mod[ZZ][kz];
240 eterm = ONE_4PI_EPS0*exp(-factor*m2)/denom;
241 p0->re = d1*eterm;
242 p0->im = d2*eterm;
244 struct2 = d1*d1+d2*d2;
245 if ((kz > 0) && (kz < (nz+1)/2))
246 struct2*=2;
247 ets2 = eterm*struct2;
248 vfactor = (factor*m2+1)*2.0/m2;
249 energy += ets2;
251 ets2vf = ets2*vfactor;
252 virxx += ets2vf*mx*mx-ets2;
253 virxy += ets2vf*mx*my;
254 virxz += ets2vf*mx*mz;
255 viryy += ets2vf*my*my-ets2;
256 viryz += ets2vf*my*mz;
257 virzz += ets2vf*mz*mz-ets2;
262 /* Update virial with local values */
263 vir[XX][XX] = virxx;
264 vir[XX][YY] = virxy;
265 vir[XX][ZZ] = virxz;
266 vir[YY][XX] = virxy;
267 vir[YY][YY] = viryy;
268 vir[YY][ZZ] = viryz;
269 vir[ZZ][XX] = virxz;
270 vir[ZZ][YY] = viryz;
271 vir[ZZ][ZZ] = virzz;
273 for(nx=0;nx<DIM;nx++)
274 for(ny=0;ny<DIM;ny++)
275 vir[nx][ny]*=0.25;
276 /* this virial seems ok for isotropic scaling, but I'm
277 * experiencing problems on semiisotropic membranes */
279 /* this energy should be corrected for a charged system */
280 return(0.5*energy);
283 void gather_f_bsplines(t_fftgrid *grid,rvec invh,t_nsborder *nsb,
284 ivec idx[],rvec f[],real *charge,splinevec theta,
285 splinevec dtheta,int order,
286 int nnx[],int nny[],int nnz[])
288 /* sum forces for local particles */
289 int i,j,k,n,*i0,*j0,*k0,*ii0,*jj0,*kk0,ithx,ithy,ithz;
290 int nx,ny,nz,la2,la12;
291 t_fft_r *ptr;
292 int xidx,yidx,zidx;
293 real tx,ty,dx,dy,qn;
294 real fx,fy,fz,gval,tgz;
295 real *thx,*thy,*thz,*dthx,*dthy,*dthz;
296 int start,nr,sn,norder,norder1,*idxptr,ind0;
298 start = START(nsb);
299 nr = HOMENR(nsb);
300 unpack_fftgrid(grid,&nx,&ny,&nz,&la2,&la12,TRUE,&ptr);
302 thx = theta[XX];
303 thy = theta[YY];
304 thz = theta[ZZ];
305 dthx = dtheta[XX];
306 dthy = dtheta[YY];
307 dthz = dtheta[ZZ];
308 ii0 = nnx+nx+1-order;
309 jj0 = nny+ny+1-order;
310 kk0 = nnz+nz+1-order;
312 for(n=0; (n<nr); n++) {
313 sn = start+n;
314 qn = charge[sn];
315 idxptr = idx[n];
317 if (qn != 0) {
318 xidx = idxptr[XX];
319 yidx = idxptr[YY];
320 zidx = idxptr[ZZ];
322 fx = 0.0;
323 fy = 0.0;
324 fz = 0.0;
325 i0 = ii0+xidx; /* Pointer arithemtic */
326 norder = n*order;
327 norder1 = norder+order;
328 for(ithx=norder; (ithx<norder1); ithx++,i0++) {
329 i = *i0;
330 tx = thx[ithx];
331 dx = dthx[ithx];
332 j0 = jj0+yidx; /* Pointer arithemtic */
334 for(ithy=norder; (ithy<norder1); ithy++,j0++) {
335 j = *j0;
336 ty = thy[ithy];
337 dy = dthy[ithy];
338 k0 = kk0+zidx; /* Pointer arithemtic */
339 ind0 = INDEX(i,j,0);
341 for(ithz=norder; (ithz<norder1); ithz++,k0++) {
342 k = *k0;
343 gval = ptr[ind0+k];
344 tgz = thz[ithz]*gval;
345 fx -= dx*ty*tgz;
346 fy -= tx*dy*tgz;
347 fz -= tx*ty*dthz[ithz]*gval;
351 f[sn][XX]+=invh[XX]*fx*qn;
352 f[sn][YY]+=invh[YY]*fy*qn;
353 f[sn][ZZ]+=invh[ZZ]*fz*qn;
356 /* Since the energy and not forces are interpolated
357 * the net force might not be exactly zero.
358 * This can be solved by also interpolating F, but
359 * that comes at a cost.
360 * A better hack is to remove the net force every
361 * step, but that must be done at a higher level
362 * since this routine doesn't see all atoms if running
363 * in parallel. Don't know how important it is? EL 990726
368 void make_bsplines(splinevec theta,splinevec dtheta,int order,int nx,
369 int ny,int nz,rvec x[],real *charge,
370 t_nsborder *nsb,rvec invh)
372 /* construct splines for local atoms */
373 int i,j,k,l;
374 real dr,div,rcons;
375 real *data,*ddata,*xptr;
376 int start,nr,sn;
377 rvec nk;
379 start = START(nsb);
380 nr = HOMENR(nsb);
381 nk[XX] = nx;
382 nk[YY] = ny;
383 nk[ZZ] = nz;
384 rcons = max(nx,max(ny,nz))+1;
386 for(i=0; (i<nr); i++) {
387 sn = start+i;
388 if (charge[sn] != 0.0) {
389 xptr = x[sn];
390 for(j=0; (j<DIM); j++) {
391 dr = rcons+xptr[j]*invh[j];
392 dr -= (int)dr;
394 /* dr is relative offset from lower cell limit */
395 data=&(theta[j][i*order]);
396 data[order-1]=0;
397 data[1]=dr;
398 data[0]=1-dr;
400 for(k=3; (k<order); k++) {
401 div=1.0/(k-1.0);
402 data[k-1]=div*dr*data[k-2];
403 for(l=1; (l<(k-1)); l++)
404 data[k-l-1]=div*((dr+l)*data[k-l-2]+(k-l-dr)*
405 data[k-l-1]);
406 data[0]=div*(1-dr)*data[0];
408 /* differentiate */
409 ddata = &(dtheta[j][i*order]);
410 ddata[0] = -data[0];
411 for(k=1; (k<order); k++)
412 ddata[k]=data[k-1]-data[k];
414 div=1.0/(order-1);
415 data[order-1]=div*dr*data[order-2];
416 for(l=1; (l<(order-1)); l++)
417 data[order-l-1]=div*((dr+l)*data[order-l-2]+
418 (order-l-dr)*data[order-l-1]);
419 data[0]=div*(1-dr)*data[0];
426 void make_dft_mod(real *mod,real *data,int ndata)
428 int i,j;
429 real sc,ss,arg;
431 for(i=0;i<ndata;i++) {
432 sc=ss=0;
433 for(j=0;j<ndata;j++) {
434 arg=(2.0*M_PI*i*j)/ndata;
435 sc+=data[j]*cos(arg);
436 ss+=data[j]*sin(arg);
438 mod[i]=sc*sc+ss*ss;
440 for(i=0;i<ndata;i++)
441 if(mod[i]<1e-7)
442 mod[i]=(mod[i-1]+mod[i+1])*0.5;
447 void make_bspline_moduli(splinevec bsp_mod,int nx,int ny,int nz,int order)
449 int nmax=max(nx,max(ny,nz));
450 real *data,*ddata,*bsp_data;
451 int i,k,l;
452 real div;
454 snew(data,order);
455 snew(ddata,order);
456 snew(bsp_data,nmax);
458 data[order-1]=0;
459 data[1]=0;
460 data[0]=1;
462 for(k=3;k<order;k++) {
463 div=1.0/(k-1.0);
464 data[k-1]=0;
465 for(l=1;l<(k-1);l++)
466 data[k-l-1]=div*(l*data[k-l-2]+(k-l)*data[k-l-1]);
467 data[0]=div*data[0];
469 /* differentiate */
470 ddata[0]=-data[0];
471 for(k=1;k<order;k++)
472 ddata[k]=data[k-1]-data[k];
473 div=1.0/(order-1);
474 data[order-1]=0;
475 for(l=1;l<(order-1);l++)
476 data[order-l-1]=div*(l*data[order-l-2]+(order-l)*data[order-l-1]);
477 data[0]=div*data[0];
479 for(i=0;i<nmax;i++)
480 bsp_data[i]=0;
481 for(i=1;i<=order;i++)
482 bsp_data[i]=data[i-1];
484 make_dft_mod(bsp_mod[XX],bsp_data,nx);
485 make_dft_mod(bsp_mod[YY],bsp_data,ny);
486 make_dft_mod(bsp_mod[ZZ],bsp_data,nz);
488 sfree(data);
489 sfree(ddata);
490 sfree(bsp_data);
493 static t_fftgrid *grid=NULL;
494 static int nx,ny,nz;
495 static splinevec theta;
496 static splinevec dtheta;
497 static splinevec bsp_mod;
500 void init_pme(FILE *log,t_commrec *cr,t_nsborder *nsb,t_inputrec *ir)
502 int i;
504 fprintf(log,"Will do PME sum in reciprocal space.\n");
505 nx = ir->nkx;
506 ny = ir->nky;
507 nz = ir->nkz;
509 if (PAR(cr) && cr->nprocs>1) {
510 fprintf(log,"Parallelized PME sum used.\n");
511 if(nx%(cr->nprocs)!=0)
512 fprintf(log,"Warning: For load balance, "
513 "fourier_nx should be divisible by NPROCS\n");
516 /* allocate space for things */
517 snew(bsp_mod[XX],nx);
518 snew(bsp_mod[YY],ny);
519 snew(bsp_mod[ZZ],nz);
520 for(i=0;i<DIM;i++) {
521 snew(theta[i],ir->pme_order*HOMENR(nsb));
522 snew(dtheta[i],ir->pme_order*HOMENR(nsb));
525 grid=mk_fftgrid(log,PAR(cr),nx,ny,nz,ir->bOptFFT);
526 make_bspline_moduli(bsp_mod,nx,ny,nz,ir->pme_order);
529 real do_pme(FILE *logfile, bool bVerbose,
530 t_inputrec *ir, rvec x[],
531 rvec f[], real charge[],
532 rvec box, t_commrec *cr,
533 t_nsborder *nsb, t_nrnb *nrnb,
534 matrix vir, real ewaldcoeff)
536 static ivec *idx=NULL;
537 static int *nnx,*nny,*nnz;
538 int i,ntot,npme;
539 rvec invh;
540 real energy,vol;
542 energy=0;
543 calc_invh(box,nx,ny,nz,invh);
545 vol=box[XX]*box[YY]*box[ZZ];
547 /* Compute fftgrid index for all atoms, with help of some extra variables */
548 if (!idx) {
549 snew(idx,HOMENR(nsb));
550 snew(nnx,3*nx);
551 snew(nny,3*ny);
552 snew(nnz,3*nz);
553 for(i=0; (i<3*nx); i++)
554 nnx[i] = i % nx;
555 for(i=0; (i<3*ny); i++)
556 nny[i] = i % ny;
557 for(i=0; (i<3*nz); i++)
558 nnz[i] = i % nz;
560 calc_idx(HOMENR(nsb),invh,x+START(nsb),idx,nx,ny,nz,nnx,nny,nnz);
562 /* make local bsplines */
563 make_bsplines(theta,dtheta,ir->pme_order,nx,ny,nz,x,charge,nsb,invh);
565 /* put local atoms on grid. */
566 spread_q_bsplines(grid,invh,nsb,idx,charge,theta,ir->pme_order,nnx,nny,nnz);
568 inc_nrnb(nrnb,eNR_SPREADQBSP,
569 ir->pme_order*ir->pme_order*ir->pme_order*HOMENR(nsb));
571 /* sum contributions to local grid from other processors */
572 if (PAR(cr))
573 sum_qgrid(cr,nsb,grid,TRUE);
575 /* do 3d-fft */
576 gmxfft3D(grid,FFTW_FORWARD,cr);
578 /* solve in k-space for our local cells */
579 energy=solve_pme(grid,ewaldcoeff,vol,bsp_mod,invh,vir,cr);
580 inc_nrnb(nrnb,eNR_SOLVEPME,nx*ny*nz*0.5);
582 /* do 3d-invfft */
583 gmxfft3D(grid,FFTW_BACKWARD,cr);
585 /* distribute local grid to all processors */
586 if (PAR(cr))
587 sum_qgrid(cr,nsb,grid,FALSE);
589 /* interpolate forces for our local atoms */
590 gather_f_bsplines(grid,invh,nsb,idx,f,charge,theta,dtheta,ir->pme_order,
591 nnx,nny,nnz);
592 /* gather_f_bsplines(grid,invh,nsb,x,f,charge,theta,dtheta,ir->pme_order);*/
594 inc_nrnb(nrnb,eNR_GATHERFBSP,
595 ir->pme_order*ir->pme_order*ir->pme_order*HOMENR(nsb));
597 ntot = grid->nxyz;
598 npme = ntot*log((real)ntot)/(cr->nprocs*log(2.0));
599 inc_nrnb(nrnb,eNR_FFT,2*npme);
601 return energy;