1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
4 * This source code is part of
8 * GROningen MAchine for Chemical Simulations
10 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
11 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
12 * Copyright (c) 2001-2008, The GROMACS development team,
13 * check out http://www.gromacs.org for more information.
15 * This program is free software; you can redistribute it and/or
16 * modify it under the terms of the GNU General Public License
17 * as published by the Free Software Foundation; either version 2
18 * of the License, or (at your option) any later version.
20 * If you want to redistribute modifications, please consider that
21 * scientific software is very special. Version control is crucial -
22 * bugs must be traceable. We will be happy to consider code for
23 * inclusion in the official distribution, but derived work must not
24 * be called official GROMACS. Details are found in the README & COPYING
25 * files - if they are missing, get the official version at www.gromacs.org.
27 * To help us fund GROMACS development, we humbly ask that you cite
28 * the papers on the package - you can find them in the top README file.
30 * For more info, check our website at http://www.gromacs.org
33 * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
54 #include "gmx_fatal.h"
55 #include "mtop_util.h"
67 #if ( defined(GMX_IA32_SSE) || defined(GMX_X86_64_SSE) || defined(GMX_X86_64_SSE2) )
69 #include "genborn_sse2_double.h"
71 #include "genborn_sse2_single.h"
72 #include "genborn_allvsall_sse2_single.h"
73 #endif /* GMX_DOUBLE */
76 #include "genborn_allvsall.h"
87 typedef struct gbtmpnbls
{
93 /* This function is exactly the same as the one in bondfree.c. The reason
94 * it is copied here is that the bonded gb-interactions are evaluated
95 * not in calc_bonds, but rather in calc_gb_forces
97 static int pbc_rvec_sub(const t_pbc
*pbc
,const rvec xi
,const rvec xj
,rvec dx
)
100 return pbc_dx_aiuc(pbc
,xi
,xj
,dx
);
108 int init_gb_nblist(int natoms
, t_nblist
*nl
)
110 nl
->maxnri
= natoms
*4;
120 /*nl->nltype = nltype;*/
122 srenew(nl
->iinr
, nl
->maxnri
);
123 srenew(nl
->gid
, nl
->maxnri
);
124 srenew(nl
->shift
, nl
->maxnri
);
125 srenew(nl
->jindex
, nl
->maxnri
+1);
132 int print_nblist(int natoms
, t_nblist
*nl
)
134 int i
,k
,ai
,aj
,nj0
,nj1
;
136 printf("genborn.c: print_nblist, natoms=%d\n",nl
->nri
);
138 for(i
=0;i
<nl
->nri
;i
++)
147 printf("ai=%d, aj=%d\n",ai
,aj
);
159 void fill_log_table(const int n
, real
*table
)
165 int incr
= 1 << (23-n
);
168 logfactor
= 1.0/log(2.0);
170 log_table
.exp
= 0x3F800000;
174 /* log2(numlog)=log(numlog)/log(2.0) */
175 table
[i
]=log(log_table
.numlog
)*logfactor
;
181 real
table_log(real val
, const real
*table
, const int n
)
183 int *const exp_ptr
= ((int*)&val
);
185 const int log_2
= ((x
>>23) & 255) - 127;
189 return ((val
+log_2
)*0.69314718);
192 void gb_pd_send(t_commrec
*cr
, real
*send_data
, int nr
)
196 int *index
,*sendc
,*disp
;
198 snew(sendc
,cr
->nnodes
);
199 snew(disp
,cr
->nnodes
);
201 index
= pd_index(cr
);
204 /* Setup count/index arrays */
205 for(i
=0;i
<cr
->nnodes
;i
++)
207 sendc
[i
] = index
[i
+1]-index
[i
];
211 /* Do communication */
212 MPI_Gatherv(send_data
+index
[cur
],sendc
[cur
],GMX_MPI_REAL
,send_data
,sendc
,
213 disp
,GMX_MPI_REAL
,0,cr
->mpi_comm_mygroup
);
214 MPI_Bcast(send_data
,nr
,GMX_MPI_REAL
,0,cr
->mpi_comm_mygroup
);
220 int init_gb_plist(t_params
*p_list
)
223 p_list
->param
= NULL
;
230 int init_gb_still(const t_commrec
*cr
, t_forcerec
*fr
,
231 const t_atomtypes
*atype
, t_idef
*idef
, t_atoms
*atoms
,
232 gmx_genborn_t
*born
,int natoms
)
235 int i
,j
,i1
,i2
,k
,m
,nbond
,nang
,ia
,ib
,ic
,id
,nb
,idx
,idx2
,at
;
239 real r
,ri
,rj
,ri2
,ri3
,rj2
,r2
,r3
,r4
,rk
,ratio
,term
,h
,doffset
;
240 real p1
,p2
,p3
,factor
,cosine
,rab
,rbc
;
247 snew(born
->gpol_still_work
,natoms
+3);
253 pd_at_range(cr
,&at0
,&at1
);
255 for(i
=0;i
<natoms
;i
++)
272 doffset
= born
->gb_doffset
;
274 for(i
=0;i
<natoms
;i
++)
276 born
->gpol_globalindex
[i
]=born
->vsolv_globalindex
[i
]=
277 born
->gb_radius_globalindex
[i
]=0;
280 /* Compute atomic solvation volumes for Still method */
281 for(i
=0;i
<natoms
;i
++)
283 ri
=atype
->gb_radius
[atoms
->atom
[i
].type
];
284 born
->gb_radius_globalindex
[i
] = ri
;
286 born
->vsolv_globalindex
[i
]=(4*M_PI
/3)*r3
;
289 for(j
=0;j
<idef
->il
[F_GB12
].nr
;j
+=3)
291 m
=idef
->il
[F_GB12
].iatoms
[j
];
292 ia
=idef
->il
[F_GB12
].iatoms
[j
+1];
293 ib
=idef
->il
[F_GB12
].iatoms
[j
+2];
295 r
=1.01*idef
->iparams
[m
].gb
.st
;
297 ri
= atype
->gb_radius
[atoms
->atom
[ia
].type
];
298 rj
= atype
->gb_radius
[atoms
->atom
[ib
].type
];
304 ratio
= (rj2
-ri2
-r
*r
)/(2*ri
*r
);
306 term
= (M_PI
/3.0)*h
*h
*(3.0*ri
-h
);
314 born
->vsolv_globalindex
[ia
] -= term
;
317 ratio
= (ri2
-rj2
-r
*r
)/(2*rj
*r
);
319 term
= (M_PI
/3.0)*h
*h
*(3.0*rj
-h
);
327 born
->vsolv_globalindex
[ib
] -= term
;
333 gmx_sum(natoms
,vsol
,cr
);
335 for(i
=0;i
<natoms
;i
++)
337 born
->vsolv_globalindex
[i
]=born
->vsolv_globalindex
[i
]-vsol
[i
];
341 /* Get the self-, 1-2 and 1-3 polarization energies for analytical Still
344 for(j
=0;j
<natoms
;j
++)
346 if(born
->use_globalindex
[j
]==1)
348 born
->gpol_globalindex
[j
]=-0.5*ONE_4PI_EPS0
/
349 (atype
->gb_radius
[atoms
->atom
[j
].type
]-doffset
+STILL_P1
);
354 for(j
=0;j
<idef
->il
[F_GB12
].nr
;j
+=3)
356 m
=idef
->il
[F_GB12
].iatoms
[j
];
357 ia
=idef
->il
[F_GB12
].iatoms
[j
+1];
358 ib
=idef
->il
[F_GB12
].iatoms
[j
+2];
360 r
=idef
->iparams
[m
].gb
.st
;
366 gp
[ia
]+=STILL_P2
*born
->vsolv_globalindex
[ib
]/r4
;
367 gp
[ib
]+=STILL_P2
*born
->vsolv_globalindex
[ia
]/r4
;
371 born
->gpol_globalindex
[ia
]=born
->gpol_globalindex
[ia
]+
372 STILL_P2
*born
->vsolv_globalindex
[ib
]/r4
;
373 born
->gpol_globalindex
[ib
]=born
->gpol_globalindex
[ib
]+
374 STILL_P2
*born
->vsolv_globalindex
[ia
]/r4
;
379 for(j
=0;j
<idef
->il
[F_GB13
].nr
;j
+=3)
381 m
=idef
->il
[F_GB13
].iatoms
[j
];
382 ia
=idef
->il
[F_GB13
].iatoms
[j
+1];
383 ib
=idef
->il
[F_GB13
].iatoms
[j
+2];
385 r
=idef
->iparams
[m
].gb
.st
;
390 gp
[ia
]+=STILL_P3
*born
->vsolv
[ib
]/r4
;
391 gp
[ib
]+=STILL_P3
*born
->vsolv
[ia
]/r4
;
395 born
->gpol_globalindex
[ia
]=born
->gpol_globalindex
[ia
]+
396 STILL_P3
*born
->vsolv_globalindex
[ib
]/r4
;
397 born
->gpol_globalindex
[ib
]=born
->gpol_globalindex
[ib
]+
398 STILL_P3
*born
->vsolv_globalindex
[ia
]/r4
;
404 gmx_sum(natoms
,gp
,cr
);
406 for(i
=0;i
<natoms
;i
++)
408 born
->gpol_globalindex
[i
]=born
->gpol_globalindex
[i
]+gp
[i
];
420 #define LOG_TABLE_ACCURACY 15 /* Accuracy of the table logarithm */
423 /* Initialize all GB datastructs and compute polarization energies */
424 int init_gb(gmx_genborn_t
**p_born
,
425 const t_commrec
*cr
, t_forcerec
*fr
, const t_inputrec
*ir
,
426 const gmx_mtop_t
*mtop
, real rgbradii
, int gb_algorithm
)
428 int i
,j
,m
,ai
,aj
,jj
,natoms
,nalloc
;
429 real rai
,sk
,p
,doffset
;
433 gmx_localtop_t
*localtop
;
435 natoms
= mtop
->natoms
;
437 atoms
= gmx_mtop_global_atoms(mtop
);
438 localtop
= gmx_mtop_generate_local_top(mtop
,ir
);
443 born
->nr
= fr
->natoms_force
;
446 snew(born
->drobc
, natoms
);
447 snew(born
->bRad
, natoms
);
449 /* Allocate memory for the global data arrays */
450 snew(born
->param_globalindex
, natoms
+3);
451 snew(born
->gpol_globalindex
, natoms
+3);
452 snew(born
->vsolv_globalindex
, natoms
+3);
453 snew(born
->gb_radius_globalindex
, natoms
+3);
454 snew(born
->use_globalindex
, natoms
+3);
456 snew(fr
->invsqrta
, natoms
);
457 snew(fr
->dvda
, natoms
);
460 fr
->dadx_rawptr
= NULL
;
462 born
->gpol_still_work
= NULL
;
463 born
->gpol_hct_work
= NULL
;
465 /* snew(born->asurf,natoms); */
466 /* snew(born->dasurf,natoms); */
468 /* Initialize the gb neighbourlist */
469 init_gb_nblist(natoms
,&(fr
->gblist
));
471 /* Do the Vsites exclusions (if any) */
472 for(i
=0;i
<natoms
;i
++)
474 jj
= atoms
.atom
[i
].type
;
475 if (mtop
->atomtypes
.gb_radius
[atoms
.atom
[i
].type
] > 0)
477 born
->use_globalindex
[i
] = 1;
481 born
->use_globalindex
[i
] = 0;
484 /* If we have a Vsite, put vs_globalindex[i]=0 */
485 if (C6 (fr
->nbfp
,fr
->ntype
,jj
,jj
) == 0 &&
486 C12(fr
->nbfp
,fr
->ntype
,jj
,jj
) == 0 &&
487 atoms
.atom
[i
].q
== 0)
489 born
->use_globalindex
[i
]=0;
493 /* Copy algorithm parameters from inputrecord to local structure */
494 born
->obc_alpha
= ir
->gb_obc_alpha
;
495 born
->obc_beta
= ir
->gb_obc_beta
;
496 born
->obc_gamma
= ir
->gb_obc_gamma
;
497 born
->gb_doffset
= ir
->gb_dielectric_offset
;
498 born
->gb_epsilon_solvent
= ir
->gb_epsilon_solvent
;
499 born
->epsilon_r
= ir
->epsilon_r
;
501 doffset
= born
->gb_doffset
;
503 /* If Still model, initialise the polarisation energies */
504 if(gb_algorithm
==egbSTILL
)
506 init_gb_still(cr
, fr
,&(mtop
->atomtypes
), &(localtop
->idef
), &atoms
,
511 /* If HCT/OBC, precalculate the sk*atype->S_hct factors */
512 else if(gb_algorithm
==egbHCT
|| gb_algorithm
==egbOBC
)
515 snew(born
->gpol_hct_work
, natoms
+3);
517 for(i
=0;i
<natoms
;i
++)
519 if(born
->use_globalindex
[i
]==1)
521 rai
= mtop
->atomtypes
.gb_radius
[atoms
.atom
[i
].type
]-doffset
;
522 sk
= rai
* mtop
->atomtypes
.S_hct
[atoms
.atom
[i
].type
];
523 born
->param_globalindex
[i
] = sk
;
524 born
->gb_radius_globalindex
[i
] = rai
;
528 born
->param_globalindex
[i
] = 0;
529 born
->gb_radius_globalindex
[i
] = 0;
534 /* Init the logarithm table */
535 p
=pow(2,LOG_TABLE_ACCURACY
);
536 snew(born
->log_table
, p
);
538 fill_log_table(LOG_TABLE_ACCURACY
, born
->log_table
);
540 /* Allocate memory for work arrays for temporary use */
541 snew(born
->work
,natoms
+4);
542 snew(born
->count
,natoms
);
543 snew(born
->nblist_work
,natoms
);
545 /* Domain decomposition specific stuff */
548 snew(born
->dd_work
,natoms
);
549 born
->nlocal
= cr
->dd
->nat_tot
; /* cr->dd->nat_tot will be zero here */
558 calc_gb_rad_still(t_commrec
*cr
, t_forcerec
*fr
,int natoms
, gmx_localtop_t
*top
,
559 const t_atomtypes
*atype
, rvec x
[], t_nblist
*nl
,
560 gmx_genborn_t
*born
,t_mdatoms
*md
)
562 int i
,k
,n
,nj0
,nj1
,ai
,aj
,type
;
565 real gpi
,dr
,dr2
,dr4
,idr4
,rvdw
,ratio
,ccf
,theta
,term
,rai
,raj
;
566 real ix1
,iy1
,iz1
,jx1
,jy1
,jz1
,dx11
,dy11
,dz11
;
567 real rinv
,idr2
,idr6
,vaj
,dccf
,cosq
,sinq
,prod
,gpi2
;
569 real vai
, prod_ai
, icf4
,icf6
;
571 factor
= 0.5*ONE_4PI_EPS0
;
574 for(i
=0;i
<born
->nr
;i
++)
576 born
->gpol_still_work
[i
]=0;
579 for(i
=0;i
<nl
->nri
;i
++ )
584 nj1
= nl
->jindex
[i
+1];
586 /* Load shifts for this list */
587 shift
= nl
->shift
[i
];
588 shX
= fr
->shift_vec
[shift
][0];
589 shY
= fr
->shift_vec
[shift
][1];
590 shZ
= fr
->shift_vec
[shift
][2];
594 rai
= top
->atomtypes
.gb_radius
[md
->typeA
[ai
]];
595 vai
= born
->vsolv
[ai
];
596 prod_ai
= STILL_P4
*vai
;
598 /* Load atom i coordinates, add shift vectors */
599 ix1
= shX
+ x
[ai
][0];
600 iy1
= shY
+ x
[ai
][1];
601 iz1
= shZ
+ x
[ai
][2];
614 dr2
= dx11
*dx11
+dy11
*dy11
+dz11
*dz11
;
615 rinv
= gmx_invsqrt(dr2
);
620 raj
= top
->atomtypes
.gb_radius
[md
->typeA
[aj
]];
624 ratio
= dr2
/ (rvdw
* rvdw
);
625 vaj
= born
->vsolv
[aj
];
627 if(ratio
>STILL_P5INV
)
634 theta
= ratio
*STILL_PIP5
;
636 term
= 0.5*(1.0-cosq
);
638 sinq
= 1.0 - cosq
*cosq
;
639 dccf
= 2.0*term
*sinq
*gmx_invsqrt(sinq
)*theta
;
644 icf6
= (4*ccf
-dccf
)*idr6
;
646 born
->gpol_still_work
[aj
] += prod_ai
*icf4
;
649 /* Save ai->aj and aj->ai chain rule terms */
650 fr
->dadx
[n
++] = prod
*icf6
;
651 fr
->dadx
[n
++] = prod_ai
*icf6
;
653 born
->gpol_still_work
[ai
]+=gpi
;
656 /* Parallel summations */
659 gmx_sum(natoms
, born
->gpol_still_work
, cr
);
661 else if(DOMAINDECOMP(cr
))
663 dd_atom_sum_real(cr
->dd
, born
->gpol_still_work
);
666 /* Calculate the radii */
667 for(i
=0;i
<fr
->natoms_force
;i
++) /* PELA born->nr */
669 if(born
->use
[i
] != 0)
672 gpi
= born
->gpol
[i
]+born
->gpol_still_work
[i
];
674 born
->bRad
[i
] = factor
*gmx_invsqrt(gpi2
);
675 fr
->invsqrta
[i
] = gmx_invsqrt(born
->bRad
[i
]);
679 /* Extra communication required for DD */
682 dd_atom_spread_real(cr
->dd
, born
->bRad
);
683 dd_atom_spread_real(cr
->dd
, fr
->invsqrta
);
692 calc_gb_rad_hct(t_commrec
*cr
,t_forcerec
*fr
,int natoms
, gmx_localtop_t
*top
,
693 const t_atomtypes
*atype
, rvec x
[], t_nblist
*nl
,
694 gmx_genborn_t
*born
,t_mdatoms
*md
)
696 int i
,k
,n
,ai
,aj
,nj0
,nj1
,at0
,at1
;
699 real rai
,raj
,gpi
,dr2
,dr
,sk
,sk_ai
,sk2
,sk2_ai
,lij
,uij
,diff2
,tmp
,sum_ai
;
700 real rad
,min_rad
,rinv
,rai_inv
;
701 real ix1
,iy1
,iz1
,jx1
,jy1
,jz1
,dx11
,dy11
,dz11
;
702 real lij2
, uij2
, lij3
, uij3
, t1
,t2
,t3
;
703 real lij_inv
,dlij
,duij
,sk2_rinv
,prod
,log_term
;
704 real doffset
,raj_inv
,dadx_val
;
707 doffset
= born
->gb_doffset
;
708 gb_radius
= born
->gb_radius
;
710 for(i
=0;i
<born
->nr
;i
++)
712 born
->gpol_hct_work
[i
] = 0;
715 /* Keep the compiler happy */
719 for(i
=0;i
<nl
->nri
;i
++)
723 nj0
= nl
->jindex
[ai
];
724 nj1
= nl
->jindex
[ai
+1];
726 /* Load shifts for this list */
727 shift
= nl
->shift
[i
];
728 shX
= fr
->shift_vec
[shift
][0];
729 shY
= fr
->shift_vec
[shift
][1];
730 shZ
= fr
->shift_vec
[shift
][2];
735 sk_ai
= born
->param
[ai
];
736 sk2_ai
= sk_ai
*sk_ai
;
738 /* Load atom i coordinates, add shift vectors */
739 ix1
= shX
+ x
[ai
][0];
740 iy1
= shY
+ x
[ai
][1];
741 iz1
= shZ
+ x
[ai
][2];
757 dr2
= dx11
*dx11
+dy11
*dy11
+dz11
*dz11
;
758 rinv
= gmx_invsqrt(dr2
);
761 sk
= born
->param
[aj
];
764 /* aj -> ai interaction */
785 lij_inv
= gmx_invsqrt(lij2
);
788 prod
= 0.25*sk2_rinv
;
790 /* log_term = table_log(uij*lij_inv,born->log_table,
791 LOG_TABLE_ACCURACY); */
792 log_term
= log(uij
*lij_inv
);
794 tmp
= lij
-uij
+ 0.25*dr
*diff2
+ (0.5*rinv
)*log_term
+
799 tmp
= tmp
+ 2.0 * (rai_inv
-lij
);
802 t1
= 0.5*lij2
+ prod
*lij3
- 0.25*(lij
*rinv
+lij3
*dr
);
803 t2
= -0.5*uij2
- 0.25*sk2_rinv
*uij3
+ 0.25*(uij
*rinv
+uij3
*dr
);
804 t3
= 0.125*(1.0+sk2_rinv
*rinv
)*(-diff2
)+0.25*log_term
*rinv
*rinv
;
806 dadx_val
= (dlij
*t1
+t2
+t3
)*rinv
; /* rb2 is moved to chainrule */
807 /* fr->dadx[n++] = (dlij*t1+duij*t2+t3)*rinv; */
808 /* rb2 is moved to chainrule */
816 fr
->dadx
[n
++] = dadx_val
;
819 /* ai -> aj interaction */
822 lij
= 1.0/(dr
-sk_ai
);
835 uij
= 1.0/(dr
+sk_ai
);
841 lij_inv
= gmx_invsqrt(lij2
);
842 sk2
= sk2_ai
; /* sk2_ai = sk_ai * sk_ai in i loop above */
844 prod
= 0.25 * sk2_rinv
;
846 /* log_term = table_log(uij*lij_inv,born->log_table,
847 LOG_TABLE_ACCURACY); */
848 log_term
= log(uij
*lij_inv
);
850 tmp
= lij
-uij
+ 0.25*dr
*diff2
+ (0.5*rinv
)*log_term
+
855 tmp
= tmp
+ 2.0 * (raj_inv
-lij
);
859 t1
= 0.5*lij2
+ prod
*lij3
- 0.25*(lij
*rinv
+lij3
*dr
);
860 t2
= -0.5*uij2
- 0.25*sk2_rinv
*uij3
+ 0.25*(uij
*rinv
+uij3
*dr
);
861 t3
= 0.125*(1.0+sk2_rinv
*rinv
)*(-diff2
)+0.25*log_term
*rinv
*rinv
;
863 dadx_val
= (dlij
*t1
+t2
+t3
)*rinv
; /* rb2 is moved to chainrule */
864 /* fr->dadx[n++] = (dlij*t1+duij*t2+t3)*rinv; */ /* rb2 is moved to chainrule */
866 born
->gpol_hct_work
[aj
] += 0.5*tmp
;
872 fr
->dadx
[n
++] = dadx_val
;
875 born
->gpol_hct_work
[ai
] += sum_ai
;
878 /* Parallel summations */
881 gmx_sum(natoms
, born
->gpol_hct_work
, cr
);
883 else if(DOMAINDECOMP(cr
))
885 dd_atom_sum_real(cr
->dd
, born
->gpol_hct_work
);
888 for(i
=0;i
<fr
->natoms_force
;i
++) /* PELA born->nr */
890 if(born
->use
[i
] != 0)
892 rai
= top
->atomtypes
.gb_radius
[md
->typeA
[i
]]-doffset
;
893 sum_ai
= 1.0/rai
- born
->gpol_hct_work
[i
];
894 min_rad
= rai
+ doffset
;
897 born
->bRad
[i
] = rad
> min_rad
? rad
: min_rad
;
898 fr
->invsqrta
[i
] = gmx_invsqrt(born
->bRad
[i
]);
902 /* Extra communication required for DD */
905 dd_atom_spread_real(cr
->dd
, born
->bRad
);
906 dd_atom_spread_real(cr
->dd
, fr
->invsqrta
);
914 calc_gb_rad_obc(t_commrec
*cr
, t_forcerec
*fr
, int natoms
, gmx_localtop_t
*top
,
915 const t_atomtypes
*atype
, rvec x
[], t_nblist
*nl
, gmx_genborn_t
*born
,t_mdatoms
*md
)
917 int i
,k
,ai
,aj
,nj0
,nj1
,n
,at0
,at1
;
920 real rai
,raj
,gpi
,dr2
,dr
,sk
,sk2
,lij
,uij
,diff2
,tmp
,sum_ai
;
921 real rad
, min_rad
,sum_ai2
,sum_ai3
,tsum
,tchain
,rinv
,rai_inv
,lij_inv
,rai_inv2
;
922 real log_term
,prod
,sk2_rinv
,sk_ai
,sk2_ai
;
923 real ix1
,iy1
,iz1
,jx1
,jy1
,jz1
,dx11
,dy11
,dz11
;
924 real lij2
,uij2
,lij3
,uij3
,dlij
,duij
,t1
,t2
,t3
;
925 real doffset
,raj_inv
,dadx_val
;
928 /* Keep the compiler happy */
933 doffset
= born
->gb_doffset
;
934 gb_radius
= born
->gb_radius
;
936 for(i
=0;i
<born
->nr
;i
++)
938 born
->gpol_hct_work
[i
] = 0;
941 for(i
=0;i
<nl
->nri
;i
++)
946 nj1
= nl
->jindex
[i
+1];
948 /* Load shifts for this list */
949 shift
= nl
->shift
[i
];
950 shX
= fr
->shift_vec
[shift
][0];
951 shY
= fr
->shift_vec
[shift
][1];
952 shZ
= fr
->shift_vec
[shift
][2];
957 sk_ai
= born
->param
[ai
];
958 sk2_ai
= sk_ai
*sk_ai
;
960 /* Load atom i coordinates, add shift vectors */
961 ix1
= shX
+ x
[ai
][0];
962 iy1
= shY
+ x
[ai
][1];
963 iz1
= shZ
+ x
[ai
][2];
979 dr2
= dx11
*dx11
+dy11
*dy11
+dz11
*dz11
;
980 rinv
= gmx_invsqrt(dr2
);
983 /* sk is precalculated in init_gb() */
984 sk
= born
->param
[aj
];
987 /* aj -> ai interaction */
1007 lij_inv
= gmx_invsqrt(lij2
);
1009 sk2_rinv
= sk2
*rinv
;
1010 prod
= 0.25*sk2_rinv
;
1012 log_term
= log(uij
*lij_inv
);
1014 /* log_term = table_log(uij*lij_inv,born->log_table,LOG_TABLE_ACCURACY); */
1015 tmp
= lij
-uij
+ 0.25*dr
*diff2
+ (0.5*rinv
)*log_term
+ prod
*(-diff2
);
1019 tmp
= tmp
+ 2.0 * (rai_inv
-lij
);
1023 t1
= 0.5*lij2
+ prod
*lij3
- 0.25*(lij
*rinv
+lij3
*dr
);
1024 t2
= -0.5*uij2
- 0.25*sk2_rinv
*uij3
+ 0.25*(uij
*rinv
+uij3
*dr
);
1025 t3
= 0.125*(1.0+sk2_rinv
*rinv
)*(-diff2
)+0.25*log_term
*rinv
*rinv
;
1027 dadx_val
= (dlij
*t1
+t2
+t3
)*rinv
; /* rb2 is moved to chainrule */
1035 fr
->dadx
[n
++] = dadx_val
;
1037 /* ai -> aj interaction */
1038 if(raj
< dr
+ sk_ai
)
1040 lij
= 1.0/(dr
-sk_ai
);
1053 uij
= 1.0/(dr
+sk_ai
);
1059 lij_inv
= gmx_invsqrt(lij2
);
1060 sk2
= sk2_ai
; /* sk2_ai = sk_ai * sk_ai in i loop above */
1061 sk2_rinv
= sk2
*rinv
;
1062 prod
= 0.25 * sk2_rinv
;
1064 /* log_term = table_log(uij*lij_inv,born->log_table,LOG_TABLE_ACCURACY); */
1065 log_term
= log(uij
*lij_inv
);
1067 tmp
= lij
-uij
+ 0.25*dr
*diff2
+ (0.5*rinv
)*log_term
+ prod
*(-diff2
);
1071 tmp
= tmp
+ 2.0 * (raj_inv
-lij
);
1074 t1
= 0.5*lij2
+ prod
*lij3
- 0.25*(lij
*rinv
+lij3
*dr
);
1075 t2
= -0.5*uij2
- 0.25*sk2_rinv
*uij3
+ 0.25*(uij
*rinv
+uij3
*dr
);
1076 t3
= 0.125*(1.0+sk2_rinv
*rinv
)*(-diff2
)+0.25*log_term
*rinv
*rinv
;
1078 dadx_val
= (dlij
*t1
+t2
+t3
)*rinv
; /* rb2 is moved to chainrule */
1080 born
->gpol_hct_work
[aj
] += 0.5*tmp
;
1087 fr
->dadx
[n
++] = dadx_val
;
1090 born
->gpol_hct_work
[ai
] += sum_ai
;
1094 /* Parallel summations */
1097 gmx_sum(natoms
, born
->gpol_hct_work
, cr
);
1099 else if(DOMAINDECOMP(cr
))
1101 dd_atom_sum_real(cr
->dd
, born
->gpol_hct_work
);
1104 for(i
=0;i
<fr
->natoms_force
;i
++) /* PELA born->nr */
1106 if(born
->use
[i
] != 0)
1108 rai
= top
->atomtypes
.gb_radius
[md
->typeA
[i
]];
1112 sum_ai
= rai
* born
->gpol_hct_work
[i
];
1113 sum_ai2
= sum_ai
* sum_ai
;
1114 sum_ai3
= sum_ai2
* sum_ai
;
1116 tsum
= tanh(born
->obc_alpha
*sum_ai
-born
->obc_beta
*sum_ai2
+born
->obc_gamma
*sum_ai3
);
1117 born
->bRad
[i
] = rai_inv
- tsum
*rai_inv2
;
1118 born
->bRad
[i
] = 1.0 / born
->bRad
[i
];
1120 fr
->invsqrta
[i
] = gmx_invsqrt(born
->bRad
[i
]);
1122 tchain
= rai
* (born
->obc_alpha
-2*born
->obc_beta
*sum_ai
+3*born
->obc_gamma
*sum_ai2
);
1123 born
->drobc
[i
] = (1.0-tsum
*tsum
)*tchain
*rai_inv2
;
1127 /* Extra (local) communication required for DD */
1128 if(DOMAINDECOMP(cr
))
1130 dd_atom_spread_real(cr
->dd
, born
->bRad
);
1131 dd_atom_spread_real(cr
->dd
, fr
->invsqrta
);
1132 dd_atom_spread_real(cr
->dd
, born
->drobc
);
1141 int calc_gb_rad(t_commrec
*cr
, t_forcerec
*fr
, t_inputrec
*ir
,gmx_localtop_t
*top
,
1142 const t_atomtypes
*atype
, rvec x
[], t_nblist
*nl
, gmx_genborn_t
*born
,t_mdatoms
*md
,t_nrnb
*nrnb
)
1148 if(fr
->bAllvsAll
&& fr
->dadx
==NULL
)
1150 /* We might need up to 8 atoms of padding before and after,
1151 * and another 4 units to guarantee SSE alignment.
1153 fr
->nalloc_dadx
= 2*(md
->homenr
+12)*(md
->nr
/2+1+12);
1154 snew(fr
->dadx_rawptr
,fr
->nalloc_dadx
);
1155 fr
->dadx
= (real
*) (((size_t) fr
->dadx_rawptr
+ 16) & (~((size_t) 15)));
1159 /* In the SSE-enabled gb-loops, when writing to dadx, we
1160 * always write 2*4 elements at a time, even in the case with only
1161 * 1-3 j particles, where we only really need to write 2*(1-3)
1162 * elements. This is because we want dadx to be aligned to a 16-
1163 * byte boundary, and being able to use _mm_store/load_ps
1165 ndadx
= 2 * (nl
->nrj
+ 3*nl
->nri
);
1167 /* First, reallocate the dadx array, we need 3 extra for SSE */
1168 if (ndadx
+ 3 > fr
->nalloc_dadx
)
1170 fr
->nalloc_dadx
= over_alloc_large(ndadx
) + 3;
1171 srenew(fr
->dadx_rawptr
,fr
->nalloc_dadx
);
1172 fr
->dadx
= (real
*) (((size_t) fr
->dadx_rawptr
+ 16) & (~((size_t) 15)));
1179 cnt
= md
->homenr
*(md
->nr
/2+1);
1181 if(ir
->gb_algorithm
==egbSTILL
)
1183 #if ( defined(GMX_IA32_SSE) || defined(GMX_X86_64_SSE) || defined(GMX_SSE2) )
1184 genborn_allvsall_calc_still_radii_sse2_single(fr
,md
,born
,top
,x
[0],cr
,&fr
->AllvsAll_workgb
);
1186 genborn_allvsall_calc_still_radii(fr
,md
,born
,top
,x
[0],cr
,&fr
->AllvsAll_workgb
);
1188 inc_nrnb(nrnb
,eNR_BORN_AVA_RADII_STILL
,cnt
);
1190 else if(ir
->gb_algorithm
==egbHCT
|| ir
->gb_algorithm
==egbOBC
)
1192 #if ( defined(GMX_IA32_SSE) || defined(GMX_X86_64_SSE) || defined(GMX_SSE2) )
1193 genborn_allvsall_calc_hct_obc_radii_sse2_single(fr
,md
,born
,ir
->gb_algorithm
,top
,x
[0],cr
,&fr
->AllvsAll_workgb
);
1195 genborn_allvsall_calc_hct_obc_radii(fr
,md
,born
,ir
->gb_algorithm
,top
,x
[0],cr
,&fr
->AllvsAll_workgb
);
1197 inc_nrnb(nrnb
,eNR_BORN_AVA_RADII_HCT_OBC
,cnt
);
1201 gmx_fatal(FARGS
,"Bad gb algorithm for all-vs-all interactions");
1203 inc_nrnb(nrnb
,eNR_NBKERNEL_OUTER
,md
->homenr
);
1209 /* Switch for determining which algorithm to use for Born radii calculation */
1212 #if ( defined(GMX_IA32_SSE2) || defined(GMX_X86_64_SSE2) || defined(GMX_SSE2) )
1213 /* x86 or x86-64 with GCC inline assembly and/or SSE intrinsics */
1214 switch(ir
->gb_algorithm
)
1217 calc_gb_rad_still_sse2_double(cr
,fr
,md
->nr
,top
, atype
, x
[0], nl
, born
, md
);
1220 calc_gb_rad_hct_sse2_double(cr
,fr
,md
->nr
,top
, atype
, x
[0], nl
, born
, md
);
1223 calc_gb_rad_obc_sse2_double(cr
,fr
,md
->nr
,top
, atype
, x
[0], nl
, born
, md
);
1227 gmx_fatal(FARGS
, "Unknown double precision sse-enabled algorithm for Born radii calculation: %d",ir
->gb_algorithm
);
1230 switch(ir
->gb_algorithm
)
1233 calc_gb_rad_still(cr
,fr
,born
->nr
,top
,atype
,x
,nl
,born
,md
);
1236 calc_gb_rad_hct(cr
,fr
,born
->nr
,top
,atype
,x
,nl
,born
,md
);
1239 calc_gb_rad_obc(cr
,fr
,born
->nr
,top
,atype
,x
,nl
,born
,md
);
1243 gmx_fatal(FARGS
, "Unknown double precision algorithm for Born radii calculation: %d",ir
->gb_algorithm
);
1250 #if (!defined DISABLE_SSE && ( defined(GMX_IA32_SSE) || defined(GMX_X86_64_SSE) || defined(GMX_SSE2) ) )
1251 /* x86 or x86-64 with GCC inline assembly and/or SSE intrinsics */
1252 switch(ir
->gb_algorithm
)
1255 calc_gb_rad_still_sse(cr
,fr
,born
->nr
,top
, atype
, x
[0], nl
, born
, md
);
1259 calc_gb_rad_hct_obc_sse(cr
,fr
,born
->nr
,top
, atype
, x
[0], nl
, born
, md
, ir
->gb_algorithm
);
1263 gmx_fatal(FARGS
, "Unknown sse-enabled algorithm for Born radii calculation: %d",ir
->gb_algorithm
);
1267 switch(ir
->gb_algorithm
)
1270 calc_gb_rad_still(cr
,fr
,born
->nr
,top
,atype
,x
,nl
,born
,md
);
1273 calc_gb_rad_hct(cr
,fr
,born
->nr
,top
,atype
,x
,nl
,born
,md
);
1276 calc_gb_rad_obc(cr
,fr
,born
->nr
,top
,atype
,x
,nl
,born
,md
);
1280 gmx_fatal(FARGS
, "Unknown algorithm for Born radii calculation: %d",ir
->gb_algorithm
);
1283 #endif /* Single precision sse */
1285 #endif /* Double or single precision */
1287 if(fr
->bAllvsAll
==FALSE
)
1289 switch(ir
->gb_algorithm
)
1292 inc_nrnb(nrnb
,eNR_BORN_RADII_STILL
,nl
->nrj
);
1296 inc_nrnb(nrnb
,eNR_BORN_RADII_HCT_OBC
,nl
->nrj
);
1302 inc_nrnb(nrnb
,eNR_NBKERNEL_OUTER
,nl
->nri
);
1310 real
gb_bonds_tab(rvec x
[], rvec f
[], rvec fshift
[], real
*charge
, real
*p_gbtabscale
,
1311 real
*invsqrta
, real
*dvda
, real
*GBtab
, t_idef
*idef
, real epsilon_r
,
1312 real gb_epsilon_solvent
, real facel
, const t_pbc
*pbc
, const t_graph
*graph
)
1314 int i
,j
,n0
,m
,nnn
,type
,ai
,aj
;
1320 real isaprod
,qq
,gbscale
,gbtabscale
,Y
,F
,Geps
,Heps2
,Fp
,VV
,FF
,rt
,eps
,eps2
;
1321 real vgb
,fgb
,vcoul
,fijC
,dvdatmp
,fscal
,dvdaj
;
1327 t_iatom
*forceatoms
;
1329 /* Scale the electrostatics by gb_epsilon_solvent */
1330 facel
= facel
* ((1.0/epsilon_r
) - 1.0/gb_epsilon_solvent
);
1332 gbtabscale
=*p_gbtabscale
;
1335 for(j
=F_GB12
;j
<=F_GB14
;j
++)
1337 forceatoms
= idef
->il
[j
].iatoms
;
1339 for(i
=0;i
<idef
->il
[j
].nr
; )
1341 /* To avoid reading in the interaction type, we just increment i to pass over
1342 * the types in the forceatoms array, this saves some memory accesses
1345 ai
= forceatoms
[i
++];
1346 aj
= forceatoms
[i
++];
1348 ki
= pbc_rvec_sub(pbc
,x
[ai
],x
[aj
],dx
);
1349 rsq11
= iprod(dx
,dx
);
1351 isai
= invsqrta
[ai
];
1352 iq
= (-1)*facel
*charge
[ai
];
1354 rinv11
= gmx_invsqrt(rsq11
);
1355 isaj
= invsqrta
[aj
];
1356 isaprod
= isai
*isaj
;
1357 qq
= isaprod
*iq
*charge
[aj
];
1358 gbscale
= isaprod
*gbtabscale
;
1367 Geps
= eps
*GBtab
[nnn
+2];
1368 Heps2
= eps2
*GBtab
[nnn
+3];
1371 FF
= Fp
+Geps
+2.0*Heps2
;
1373 fijC
= qq
*FF
*gbscale
;
1374 dvdatmp
= -(vgb
+fijC
*r
)*0.5;
1375 dvda
[aj
] = dvda
[aj
] + dvdatmp
*isaj
*isaj
;
1376 dvda
[ai
] = dvda
[ai
] + dvdatmp
*isai
*isai
;
1377 vctot
= vctot
+ vgb
;
1378 fgb
= -(fijC
)*rinv11
;
1381 ivec_sub(SHIFT_IVEC(graph
,ai
),SHIFT_IVEC(graph
,aj
),dt
);
1385 for (m
=0; (m
<DIM
); m
++) { /* 15 */
1389 fshift
[ki
][m
]+=fscal
;
1390 fshift
[CENTRAL
][m
]-=fscal
;
1398 real
calc_gb_selfcorrections(t_commrec
*cr
, int natoms
,
1399 real
*charge
, gmx_genborn_t
*born
, real
*dvda
, t_mdatoms
*md
, double facel
)
1402 real rai
,e
,derb
,q
,q2
,fi
,rai_inv
,vtot
;
1406 pd_at_range(cr
,&at0
,&at1
);
1408 else if(DOMAINDECOMP(cr
))
1411 at1
=cr
->dd
->nat_home
;
1420 /* Scale the electrostatics by gb_epsilon_solvent */
1421 facel
= facel
* ((1.0/born
->epsilon_r
) - 1.0/born
->gb_epsilon_solvent
);
1425 /* Apply self corrections */
1426 for(i
=at0
;i
<at1
;i
++)
1430 if(born
->use
[ai
]==1)
1432 rai
= born
->bRad
[ai
];
1438 derb
= 0.5*e
*rai_inv
*rai_inv
;
1439 dvda
[ai
] += derb
*rai
;
1448 real
calc_gb_nonpolar(t_commrec
*cr
, t_forcerec
*fr
,int natoms
,gmx_genborn_t
*born
, gmx_localtop_t
*top
,
1449 const t_atomtypes
*atype
, real
*dvda
,int gb_algorithm
, t_mdatoms
*md
)
1452 real e
,es
,rai
,rbi
,term
,probe
,tmp
,factor
;
1453 real rbi_inv
,rbi_inv2
;
1455 /* To keep the compiler happy */
1460 pd_at_range(cr
,&at0
,&at1
);
1462 else if(DOMAINDECOMP(cr
))
1465 at1
= cr
->dd
->nat_home
;
1473 /* The surface area factor is 0.0049 for Still model, 0.0054 for HCT/OBC */
1474 if(gb_algorithm
==egbSTILL
)
1476 factor
=0.0049*100*CAL2JOULE
;
1480 factor
=0.0054*100*CAL2JOULE
;
1483 /* if(gb_algorithm==egbHCT || gb_algorithm==egbOBC) */
1489 for(i
=at0
;i
<at1
;i
++)
1493 if(born
->use
[ai
]==1)
1495 rai
= top
->atomtypes
.gb_radius
[md
->typeA
[ai
]];
1496 rbi_inv
= fr
->invsqrta
[ai
];
1497 rbi_inv2
= rbi_inv
* rbi_inv
;
1498 tmp
= (rai
*rbi_inv2
)*(rai
*rbi_inv2
);
1500 e
= factor
*term
*(rai
+probe
)*(rai
+probe
)*tmp
;
1501 dvda
[ai
] = dvda
[ai
] - 6*e
*rbi_inv2
;
1511 real
calc_gb_chainrule(int natoms
, t_nblist
*nl
, real
*dadx
, real
*dvda
, rvec x
[], rvec t
[], rvec fshift
[],
1512 rvec shift_vec
[], int gb_algorithm
, gmx_genborn_t
*born
, t_mdatoms
*md
)
1514 int i
,k
,n
,ai
,aj
,nj0
,nj1
,n0
,n1
;
1517 real fgb
,fij
,rb2
,rbi
,fix1
,fiy1
,fiz1
;
1518 real ix1
,iy1
,iz1
,jx1
,jy1
,jz1
,dx11
,dy11
,dz11
,rsq11
;
1519 real rinv11
,tx
,ty
,tz
,rbai
,rbaj
,fgb_ai
;
1528 n1
= md
->start
+md
->homenr
+1+natoms
/2;
1530 if(gb_algorithm
==egbSTILL
)
1535 rbi
= born
->bRad
[k
];
1536 rb
[k
] = (2 * rbi
* rbi
* dvda
[k
])/ONE_4PI_EPS0
;
1539 else if(gb_algorithm
==egbHCT
)
1544 rbi
= born
->bRad
[k
];
1545 rb
[k
] = rbi
* rbi
* dvda
[k
];
1548 else if(gb_algorithm
==egbOBC
)
1553 rbi
= born
->bRad
[k
];
1554 rb
[k
] = rbi
* rbi
* born
->drobc
[k
] * dvda
[k
];
1558 for(i
=0;i
<nl
->nri
;i
++)
1562 nj0
= nl
->jindex
[ai
];
1563 nj1
= nl
->jindex
[ai
+1];
1565 /* Load shifts for this list */
1566 shift
= nl
->shift
[i
];
1567 shX
= shift_vec
[shift
][0];
1568 shY
= shift_vec
[shift
][1];
1569 shZ
= shift_vec
[shift
][2];
1571 /* Load atom i coordinates, add shift vectors */
1572 ix1
= shX
+ x
[ai
][0];
1573 iy1
= shY
+ x
[ai
][1];
1574 iz1
= shZ
+ x
[ai
][2];
1582 for(k
=nj0
;k
<nj1
;k
++)
1596 fgb
= rbai
*dadx
[n
++];
1597 fgb_ai
= rbaj
*dadx
[n
++];
1599 /* Total force between ai and aj is the sum of ai->aj and aj->ai */
1610 /* Update force on atom aj */
1611 t
[aj
][0] = t
[aj
][0] - tx
;
1612 t
[aj
][1] = t
[aj
][1] - ty
;
1613 t
[aj
][2] = t
[aj
][2] - tz
;
1616 /* Update force and shift forces on atom ai */
1617 t
[ai
][0] = t
[ai
][0] + fix1
;
1618 t
[ai
][1] = t
[ai
][1] + fiy1
;
1619 t
[ai
][2] = t
[ai
][2] + fiz1
;
1621 fshift
[shift
][0] = fshift
[shift
][0] + fix1
;
1622 fshift
[shift
][1] = fshift
[shift
][1] + fiy1
;
1623 fshift
[shift
][2] = fshift
[shift
][2] + fiz1
;
1631 real
calc_gb_forces(t_commrec
*cr
, t_mdatoms
*md
, gmx_genborn_t
*born
, gmx_localtop_t
*top
, const t_atomtypes
*atype
,
1632 rvec x
[], rvec f
[], t_forcerec
*fr
, t_idef
*idef
, int gb_algorithm
, t_nrnb
*nrnb
, bool bRad
,
1633 const t_pbc
*pbc
, const t_graph
*graph
)
1639 const t_pbc
*pbc_null
;
1648 /* Do a simple ACE type approximation for the non-polar solvation */
1649 v
+= calc_gb_nonpolar(cr
, fr
,born
->nr
, born
, top
, atype
, fr
->dvda
, gb_algorithm
,md
);
1651 /* Calculate the bonded GB-interactions using either table or analytical formula */
1653 v
+= gb_bonds_tab(x
,f
,fr
->fshift
, md
->chargeA
,&(fr
->gbtabscale
),
1654 fr
->invsqrta
,fr
->dvda
,fr
->gbtab
.tab
,idef
,born
->epsilon_r
,born
->gb_epsilon_solvent
, fr
->epsfac
, pbc_null
, graph
);
1656 #if ( defined(GMX_IA32_SSE2) || defined(GMX_X86_64_SSE2) || defined(GMX_SSE2) ) /*
1657 v += gb_bonds_analytic(x[0],f[0],md->chargeA,born->bRad,fr->dvda,idef,born->epsilon_r,born->gb_epsilon_solvent,fr->epsfac);
1659 v
+= gb_bonds_tab(x
,f
,fr
->fshift
, md
->chargeA
,&(fr
->gbtabscale
),
1660 fr
->invsqrta
,fr
->dvda
,fr
->gbtab
.tab
,idef
,born
->epsilon_r
,born
->gb_epsilon_solvent
, fr
->epsfac
, pbc_null
, graph
);
1663 v
+= gb_bonds_tab(x
,f
,fr
->fshift
, md
->chargeA
,&(fr
->gbtabscale
),
1664 fr
->invsqrta
,fr
->dvda
,fr
->gbtab
.tab
,idef
,born
->epsilon_r
,born
->gb_epsilon_solvent
, fr
->epsfac
, pbc_null
, graph
);
1668 /* Calculate self corrections to the GB energies - currently only A state used! (FIXME) */
1669 v
+= calc_gb_selfcorrections(cr
,born
->nr
,md
->chargeA
, born
, fr
->dvda
, md
, fr
->epsfac
);
1671 /* If parallel, sum the derivative of the potential w.r.t the born radii */
1674 gmx_sum(md
->nr
,fr
->dvda
, cr
);
1676 else if(DOMAINDECOMP(cr
))
1678 dd_atom_sum_real(cr
->dd
,fr
->dvda
);
1679 dd_atom_spread_real(cr
->dd
,fr
->dvda
);
1685 #if ( defined(GMX_IA32_SSE) || defined(GMX_X86_64_SSE) || defined(GMX_SSE2) )
1686 genborn_allvsall_calc_chainrule_sse2_single(fr
,md
,born
,x
[0],f
[0],gb_algorithm
,fr
->AllvsAll_workgb
);
1688 genborn_allvsall_calc_chainrule(fr
,md
,born
,x
[0],f
[0],gb_algorithm
,fr
->AllvsAll_workgb
);
1690 cnt
= md
->homenr
*(md
->nr
/2+1);
1691 inc_nrnb(nrnb
,eNR_BORN_AVA_CHAINRULE
,cnt
);
1692 inc_nrnb(nrnb
,eNR_NBKERNEL_OUTER
,md
->homenr
);
1699 #if ( defined(GMX_IA32_SSE2) || defined(GMX_X86_64_SSE2) || defined(GMX_SSE2) )
1700 calc_gb_chainrule_sse2_double(born
->nr
, &(fr
->gblist
), fr
->dadx
, fr
->dvda
,
1701 x
[0], f
[0], fr
->fshift
[0], fr
->shift_vec
[0],
1702 gb_algorithm
, born
);
1704 calc_gb_chainrule(born
->nr
, &(fr
->gblist
), fr
->dadx
, fr
->dvda
,
1705 x
, f
, fr
->fshift
, fr
->shift_vec
,
1706 gb_algorithm
, born
, md
);
1711 #if (!defined DISABLE_SSE && ( defined(GMX_IA32_SSE) || defined(GMX_X86_64_SSE) || defined(GMX_SSE2) ))
1712 /* x86 or x86-64 with GCC inline assembly and/or SSE intrinsics */
1713 calc_gb_chainrule_sse(born
->nr
, &(fr
->gblist
), fr
->dadx
, fr
->dvda
,
1714 x
[0], f
[0], fr
->fshift
[0], fr
->shift_vec
[0],
1715 gb_algorithm
, born
);
1717 /* Calculate the forces due to chain rule terms with non sse code */
1718 calc_gb_chainrule(born
->nr
, &(fr
->gblist
), fr
->dadx
, fr
->dvda
,
1719 x
, f
, fr
->fshift
, fr
->shift_vec
,
1720 gb_algorithm
, born
, md
);
1726 inc_nrnb(nrnb
,eNR_BORN_CHAINRULE
,fr
->gblist
.nrj
);
1727 inc_nrnb(nrnb
,eNR_NBKERNEL_OUTER
,fr
->gblist
.nri
);
1735 static void add_j_to_gblist(gbtmpnbl_t
*list
,int aj
)
1737 if (list
->naj
>= list
->aj_nalloc
)
1739 list
->aj_nalloc
= over_alloc_large(list
->naj
+1);
1740 srenew(list
->aj
,list
->aj_nalloc
);
1743 list
->aj
[list
->naj
++] = aj
;
1746 static gbtmpnbl_t
*find_gbtmplist(struct gbtmpnbls
*lists
,int shift
)
1750 /* Search the list with the same shift, if there is one */
1752 while (ind
< lists
->nlist
&& shift
!= lists
->list
[ind
].shift
)
1756 if (ind
== lists
->nlist
)
1758 if (lists
->nlist
== lists
->list_nalloc
)
1760 lists
->list_nalloc
++;
1761 srenew(lists
->list
,lists
->list_nalloc
);
1762 for(i
=lists
->nlist
; i
<lists
->list_nalloc
; i
++)
1764 lists
->list
[i
].aj
= NULL
;
1765 lists
->list
[i
].aj_nalloc
= 0;
1770 lists
->list
[lists
->nlist
].shift
= shift
;
1771 lists
->list
[lists
->nlist
].naj
= 0;
1775 return &lists
->list
[ind
];
1778 static void add_bondeds_to_gblist(t_ilist
*il
,
1779 bool bMolPBC
,t_pbc
*pbc
,t_graph
*g
,rvec
*x
,
1780 struct gbtmpnbls
*nls
)
1782 int ind
,j
,ai
,aj
,shift
,found
;
1788 for(ind
=0; ind
<il
->nr
; ind
+=3)
1790 ai
= il
->iatoms
[ind
+1];
1791 aj
= il
->iatoms
[ind
+2];
1796 rvec_sub(x
[ai
],x
[aj
],dx
);
1797 ivec_sub(SHIFT_IVEC(g
,ai
),SHIFT_IVEC(g
,aj
),dt
);
1798 shift
= IVEC2IS(dt
);
1802 shift
= pbc_dx_aiuc(pbc
,x
[ai
],x
[aj
],dx
);
1805 /* Find the list for this shift or create one */
1806 list
= find_gbtmplist(&nls
[ai
],shift
);
1810 /* So that we do not add the same bond twice.
1811 * This happens with some constraints between 1-3 atoms
1812 * that are in the bond-list but should not be in the GB nb-list */
1813 for(j
=0;j
<list
->naj
;j
++)
1815 if (list
->aj
[j
] == aj
)
1825 gmx_incons("ai == aj");
1828 add_j_to_gblist(list
,aj
);
1834 compare_int (const void * a
, const void * b
)
1836 return ( *(int*)a
- *(int*)b
);
1841 int make_gb_nblist(t_commrec
*cr
, int gb_algorithm
, real gbcut
,
1842 rvec x
[], matrix box
,
1843 t_forcerec
*fr
, t_idef
*idef
, t_graph
*graph
, gmx_genborn_t
*born
)
1845 int i
,l
,ii
,j
,k
,n
,nj0
,nj1
,ai
,aj
,at0
,at1
,found
,shift
,s
;
1850 struct gbtmpnbls
*nls
;
1851 gbtmpnbl_t
*list
=NULL
;
1853 nls
= born
->nblist_work
;
1855 for(i
=0;i
<born
->nr
;i
++)
1862 set_pbc_dd(&pbc
,fr
->ePBC
,cr
->dd
,TRUE
,box
);
1865 switch (gb_algorithm
)
1869 /* Loop over 1-2, 1-3 and 1-4 interactions */
1870 for(j
=F_GB12
;j
<=F_GB14
;j
++)
1872 add_bondeds_to_gblist(&idef
->il
[j
],fr
->bMolPBC
,&pbc
,graph
,x
,nls
);
1876 /* Loop over 1-4 interactions */
1877 add_bondeds_to_gblist(&idef
->il
[F_GB14
],fr
->bMolPBC
,&pbc
,graph
,x
,nls
);
1880 gmx_incons("Unknown GB algorithm");
1883 /* Loop over the VDWQQ and VDW nblists to set up the nonbonded part of the GB list */
1884 for(n
=0; (n
<fr
->nnblists
); n
++)
1886 for(i
=0; (i
<eNL_NR
); i
++)
1888 nblist
=&(fr
->nblists
[n
].nlist_sr
[i
]);
1890 if (nblist
->nri
> 0 && (i
==eNL_VDWQQ
|| i
==eNL_QQ
))
1892 for(j
=0;j
<nblist
->nri
;j
++)
1894 ai
= nblist
->iinr
[j
];
1895 shift
= nblist
->shift
[j
];
1897 /* Find the list for this shift or create one */
1898 list
= find_gbtmplist(&nls
[ai
],shift
);
1900 nj0
= nblist
->jindex
[j
];
1901 nj1
= nblist
->jindex
[j
+1];
1903 /* Add all the j-atoms in the non-bonded list to the GB list */
1904 for(k
=nj0
;k
<nj1
;k
++)
1906 add_j_to_gblist(list
,nblist
->jjnr
[k
]);
1913 /* Zero out some counters */
1917 fr
->gblist
.jindex
[0] = fr
->gblist
.nri
;
1919 for(i
=0;i
<fr
->natoms_force
;i
++)
1921 for(s
=0; s
<nls
[i
].nlist
; s
++)
1923 list
= &nls
[i
].list
[s
];
1925 /* Only add those atoms that actually have neighbours */
1926 if (born
->use
[i
] != 0)
1928 fr
->gblist
.iinr
[fr
->gblist
.nri
] = i
;
1929 fr
->gblist
.shift
[fr
->gblist
.nri
] = list
->shift
;
1932 for(k
=0; k
<list
->naj
; k
++)
1934 /* Memory allocation for jjnr */
1935 if(fr
->gblist
.nrj
>= fr
->gblist
.maxnrj
)
1937 fr
->gblist
.maxnrj
+= over_alloc_large(fr
->gblist
.maxnrj
);
1941 fprintf(debug
,"Increasing GB neighbourlist j size to %d\n",fr
->gblist
.maxnrj
);
1944 srenew(fr
->gblist
.jjnr
,fr
->gblist
.maxnrj
);
1948 if(i
== list
->aj
[k
])
1950 gmx_incons("i == list->aj[k]");
1952 fr
->gblist
.jjnr
[fr
->gblist
.nrj
++] = list
->aj
[k
];
1955 fr
->gblist
.jindex
[fr
->gblist
.nri
] = fr
->gblist
.nrj
;
1962 for(i
=0;i
<fr
->gblist
.nri
;i
++)
1964 nj0
= fr
->gblist
.jindex
[i
];
1965 nj1
= fr
->gblist
.jindex
[i
+1];
1966 ai
= fr
->gblist
.iinr
[i
];
1969 for(j
=nj0
;j
<nj1
;j
++)
1971 if(fr
->gblist
.jjnr
[j
]<ai
)
1972 fr
->gblist
.jjnr
[j
]+=fr
->natoms_force
;
1974 qsort(fr
->gblist
.jjnr
+nj0
,nj1
-nj0
,sizeof(int),compare_int
);
1976 for(j
=nj0
;j
<nj1
;j
++)
1978 if(fr
->gblist
.jjnr
[j
]>=fr
->natoms_force
)
1979 fr
->gblist
.jjnr
[j
]-=fr
->natoms_force
;
1988 void make_local_gb(const t_commrec
*cr
, gmx_genborn_t
*born
, int gb_algorithm
)
1991 gmx_domdec_t
*dd
=NULL
;
1993 if(DOMAINDECOMP(cr
))
2001 /* Single node or particle decomp (global==local), just copy pointers and return */
2002 if(gb_algorithm
==egbSTILL
)
2004 born
->gpol
= born
->gpol_globalindex
;
2005 born
->vsolv
= born
->vsolv_globalindex
;
2006 born
->gb_radius
= born
->gb_radius_globalindex
;
2010 born
->param
= born
->param_globalindex
;
2011 born
->gb_radius
= born
->gb_radius_globalindex
;
2014 born
->use
= born
->use_globalindex
;
2019 /* Reallocation of local arrays if necessary */
2020 if(born
->nlocal
< dd
->nat_tot
)
2022 born
->nlocal
= dd
->nat_tot
;
2024 /* Arrays specific to different gb algorithms */
2025 if(gb_algorithm
==egbSTILL
)
2027 srenew(born
->gpol
, born
->nlocal
+3);
2028 srenew(born
->vsolv
, born
->nlocal
+3);
2029 srenew(born
->gb_radius
, born
->nlocal
+3);
2033 srenew(born
->param
, born
->nlocal
+3);
2034 srenew(born
->gb_radius
, born
->nlocal
+3);
2037 /* All gb-algorithms use the array for vsites exclusions */
2038 srenew(born
->use
, born
->nlocal
+3);
2041 /* With dd, copy algorithm specific arrays */
2042 if(gb_algorithm
==egbSTILL
)
2044 for(i
=at0
;i
<at1
;i
++)
2046 born
->gpol
[i
] = born
->gpol_globalindex
[dd
->gatindex
[i
]];
2047 born
->vsolv
[i
] = born
->vsolv_globalindex
[dd
->gatindex
[i
]];
2048 born
->gb_radius
[i
] = born
->gb_radius_globalindex
[dd
->gatindex
[i
]];
2049 born
->use
[i
] = born
->use_globalindex
[dd
->gatindex
[i
]];
2054 for(i
=at0
;i
<at1
;i
++)
2056 born
->param
[i
] = born
->param_globalindex
[dd
->gatindex
[i
]];
2057 born
->gb_radius
[i
] = born
->gb_radius_globalindex
[dd
->gatindex
[i
]];
2058 born
->use
[i
] = born
->use_globalindex
[dd
->gatindex
[i
]];