1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
4 * This source code is part of
8 * GROningen MAchine for Chemical Simulations
10 * Written by David van der Spoel, Erik Lindahl, Berk Hess, and others.
11 * Copyright (c) 1991-2000, University of Groningen, The Netherlands.
12 * Copyright (c) 2001-2008, The GROMACS development team,
13 * check out http://www.gromacs.org for more information.
15 * This program is free software; you can redistribute it and/or
16 * modify it under the terms of the GNU General Public License
17 * as published by the Free Software Foundation; either version 2
18 * of the License, or (at your option) any later version.
20 * If you want to redistribute modifications, please consider that
21 * scientific software is very special. Version control is crucial -
22 * bugs must be traceable. We will be happy to consider code for
23 * inclusion in the official distribution, but derived work must not
24 * be called official GROMACS. Details are found in the README & COPYING
25 * files - if they are missing, get the official version at www.gromacs.org.
27 * To help us fund GROMACS development, we humbly ask that you cite
28 * the papers on the package - you can find them in the top README file.
30 * For more info, check our website at http://www.gromacs.org
33 * Gallium Rubidium Oxygen Manganese Argon Carbon Silicon
54 #include "gmx_fatal.h"
55 #include "mtop_util.h"
67 #if ( defined(GMX_IA32_SSE) || defined(GMX_X86_64_SSE) || defined(GMX_SSE2) )
69 #include "genborn_sse2_double.h"
71 #include "genborn_sse2_single.h"
72 #include "genborn_allvsall_sse2_single.h"
73 #endif /* GMX_DOUBLE */
76 #include "genborn_allvsall.h"
87 typedef struct gbtmpnbls
{
93 /* This function is exactly the same as the one in bondfree.c. The reason
94 * it is copied here is that the bonded gb-interactions are evaluated
95 * not in calc_bonds, but rather in calc_gb_forces
97 static int pbc_rvec_sub(const t_pbc
*pbc
,const rvec xi
,const rvec xj
,rvec dx
)
100 return pbc_dx_aiuc(pbc
,xi
,xj
,dx
);
108 int init_gb_nblist(int natoms
, t_nblist
*nl
)
110 nl
->maxnri
= natoms
*4;
120 /*nl->nltype = nltype;*/
122 srenew(nl
->iinr
, nl
->maxnri
);
123 srenew(nl
->gid
, nl
->maxnri
);
124 srenew(nl
->shift
, nl
->maxnri
);
125 srenew(nl
->jindex
, nl
->maxnri
+1);
132 int print_nblist(int natoms
, t_nblist
*nl
)
134 int i
,k
,ai
,aj
,nj0
,nj1
;
136 printf("genborn.c: print_nblist, natoms=%d\n",nl
->nri
);
138 for(i
=0;i
<nl
->nri
;i
++)
147 printf("ai=%d, aj=%d\n",ai
,aj
);
159 void fill_log_table(const int n
, real
*table
)
165 int incr
= 1 << (23-n
);
168 logfactor
= 1.0/log(2.0);
170 log_table
.exp
= 0x3F800000;
174 /* log2(numlog)=log(numlog)/log(2.0) */
175 table
[i
]=log(log_table
.numlog
)*logfactor
;
181 real
table_log(real val
, const real
*table
, const int n
)
183 int *const exp_ptr
= ((int*)&val
);
185 const int log_2
= ((x
>>23) & 255) - 127;
189 return ((val
+log_2
)*0.69314718);
192 void gb_pd_send(t_commrec
*cr
, real
*send_data
, int nr
)
196 int *index
,*sendc
,*disp
;
198 snew(sendc
,cr
->nnodes
);
199 snew(disp
,cr
->nnodes
);
201 index
= pd_index(cr
);
204 /* Setup count/index arrays */
205 for(i
=0;i
<cr
->nnodes
;i
++)
207 sendc
[i
] = index
[i
+1]-index
[i
];
211 /* Do communication */
212 MPI_Gatherv(send_data
+index
[cur
],sendc
[cur
],GMX_MPI_REAL
,send_data
,sendc
,
213 disp
,GMX_MPI_REAL
,0,cr
->mpi_comm_mygroup
);
214 MPI_Bcast(send_data
,nr
,GMX_MPI_REAL
,0,cr
->mpi_comm_mygroup
);
220 int init_gb_plist(t_params
*p_list
)
223 p_list
->param
= NULL
;
230 int init_gb_still(const t_commrec
*cr
, t_forcerec
*fr
,
231 const t_atomtypes
*atype
, t_idef
*idef
, t_atoms
*atoms
,
232 gmx_genborn_t
*born
,int natoms
)
235 int i
,j
,i1
,i2
,k
,m
,nbond
,nang
,ia
,ib
,ic
,id
,nb
,idx
,idx2
,at
;
239 real r
,ri
,rj
,ri2
,ri3
,rj2
,r2
,r3
,r4
,rk
,ratio
,term
,h
,doffset
;
240 real p1
,p2
,p3
,factor
,cosine
,rab
,rbc
;
247 snew(born
->gpol_still_work
,natoms
+3);
253 pd_at_range(cr
,&at0
,&at1
);
255 for(i
=0;i
<natoms
;i
++)
272 doffset
= born
->gb_doffset
;
274 for(i
=0;i
<natoms
;i
++)
276 born
->gpol_globalindex
[i
]=born
->vsolv_globalindex
[i
]=
277 born
->gb_radius_globalindex
[i
]=0;
280 /* Compute atomic solvation volumes for Still method */
281 for(i
=0;i
<natoms
;i
++)
283 ri
=atype
->gb_radius
[atoms
->atom
[i
].type
];
284 born
->gb_radius_globalindex
[i
] = ri
;
286 born
->vsolv_globalindex
[i
]=(4*M_PI
/3)*r3
;
289 for(j
=0;j
<idef
->il
[F_GB12
].nr
;j
+=3)
291 m
=idef
->il
[F_GB12
].iatoms
[j
];
292 ia
=idef
->il
[F_GB12
].iatoms
[j
+1];
293 ib
=idef
->il
[F_GB12
].iatoms
[j
+2];
295 r
=1.01*idef
->iparams
[m
].gb
.st
;
297 ri
= atype
->gb_radius
[atoms
->atom
[ia
].type
];
298 rj
= atype
->gb_radius
[atoms
->atom
[ib
].type
];
304 ratio
= (rj2
-ri2
-r
*r
)/(2*ri
*r
);
306 term
= (M_PI
/3.0)*h
*h
*(3.0*ri
-h
);
314 born
->vsolv_globalindex
[ia
] -= term
;
317 ratio
= (ri2
-rj2
-r
*r
)/(2*rj
*r
);
319 term
= (M_PI
/3.0)*h
*h
*(3.0*rj
-h
);
327 born
->vsolv_globalindex
[ib
] -= term
;
333 gmx_sum(natoms
,vsol
,cr
);
335 for(i
=0;i
<natoms
;i
++)
337 born
->vsolv_globalindex
[i
]=born
->vsolv_globalindex
[i
]-vsol
[i
];
341 /* Get the self-, 1-2 and 1-3 polarization energies for analytical Still
344 for(j
=0;j
<natoms
;j
++)
346 if(born
->use_globalindex
[j
]==1)
348 born
->gpol_globalindex
[j
]=-0.5*ONE_4PI_EPS0
/
349 (atype
->gb_radius
[atoms
->atom
[j
].type
]-doffset
+STILL_P1
);
354 for(j
=0;j
<idef
->il
[F_GB12
].nr
;j
+=3)
356 m
=idef
->il
[F_GB12
].iatoms
[j
];
357 ia
=idef
->il
[F_GB12
].iatoms
[j
+1];
358 ib
=idef
->il
[F_GB12
].iatoms
[j
+2];
360 r
=idef
->iparams
[m
].gb
.st
;
366 gp
[ia
]+=STILL_P2
*born
->vsolv_globalindex
[ib
]/r4
;
367 gp
[ib
]+=STILL_P2
*born
->vsolv_globalindex
[ia
]/r4
;
371 born
->gpol_globalindex
[ia
]=born
->gpol_globalindex
[ia
]+
372 STILL_P2
*born
->vsolv_globalindex
[ib
]/r4
;
373 born
->gpol_globalindex
[ib
]=born
->gpol_globalindex
[ib
]+
374 STILL_P2
*born
->vsolv_globalindex
[ia
]/r4
;
379 for(j
=0;j
<idef
->il
[F_GB13
].nr
;j
+=3)
381 m
=idef
->il
[F_GB13
].iatoms
[j
];
382 ia
=idef
->il
[F_GB13
].iatoms
[j
+1];
383 ib
=idef
->il
[F_GB13
].iatoms
[j
+2];
385 r
=idef
->iparams
[m
].gb
.st
;
390 gp
[ia
]+=STILL_P3
*born
->vsolv
[ib
]/r4
;
391 gp
[ib
]+=STILL_P3
*born
->vsolv
[ia
]/r4
;
395 born
->gpol_globalindex
[ia
]=born
->gpol_globalindex
[ia
]+
396 STILL_P3
*born
->vsolv_globalindex
[ib
]/r4
;
397 born
->gpol_globalindex
[ib
]=born
->gpol_globalindex
[ib
]+
398 STILL_P3
*born
->vsolv_globalindex
[ia
]/r4
;
404 gmx_sum(natoms
,gp
,cr
);
406 for(i
=0;i
<natoms
;i
++)
408 born
->gpol_globalindex
[i
]=born
->gpol_globalindex
[i
]+gp
[i
];
420 #define LOG_TABLE_ACCURACY 15 /* Accuracy of the table logarithm */
423 /* Initialize all GB datastructs and compute polarization energies */
424 int init_gb(gmx_genborn_t
**p_born
,
425 const t_commrec
*cr
, t_forcerec
*fr
, const t_inputrec
*ir
,
426 const gmx_mtop_t
*mtop
, real rgbradii
, int gb_algorithm
)
428 int i
,j
,m
,ai
,aj
,jj
,natoms
,nalloc
;
429 real rai
,sk
,p
,doffset
;
433 gmx_localtop_t
*localtop
;
435 natoms
= mtop
->natoms
;
437 atoms
= gmx_mtop_global_atoms(mtop
);
438 localtop
= gmx_mtop_generate_local_top(mtop
,ir
);
443 born
->nr
= fr
->natoms_force
;
446 snew(born
->drobc
, natoms
);
447 snew(born
->bRad
, natoms
);
449 /* Allocate memory for the global data arrays */
450 snew(born
->param_globalindex
, natoms
+3);
451 snew(born
->gpol_globalindex
, natoms
+3);
452 snew(born
->vsolv_globalindex
, natoms
+3);
453 snew(born
->gb_radius_globalindex
, natoms
+3);
454 snew(born
->use_globalindex
, natoms
+3);
456 snew(fr
->invsqrta
, natoms
);
457 snew(fr
->dvda
, natoms
);
460 fr
->dadx_rawptr
= NULL
;
462 born
->gpol_still_work
= NULL
;
463 born
->gpol_hct_work
= NULL
;
465 /* snew(born->asurf,natoms); */
466 /* snew(born->dasurf,natoms); */
468 /* Initialize the gb neighbourlist */
469 init_gb_nblist(natoms
,&(fr
->gblist
));
471 /* Do the Vsites exclusions (if any) */
472 for(i
=0;i
<natoms
;i
++)
474 jj
= atoms
.atom
[i
].type
;
475 if (mtop
->atomtypes
.gb_radius
[atoms
.atom
[i
].type
] > 0)
477 born
->use_globalindex
[i
] = 1;
481 born
->use_globalindex
[i
] = 0;
484 /* If we have a Vsite, put vs_globalindex[i]=0 */
485 if (C6 (fr
->nbfp
,fr
->ntype
,jj
,jj
) == 0 &&
486 C12(fr
->nbfp
,fr
->ntype
,jj
,jj
) == 0 &&
487 atoms
.atom
[i
].q
== 0)
489 born
->use_globalindex
[i
]=0;
493 /* Copy algorithm parameters from inputrecord to local structure */
494 born
->obc_alpha
= ir
->gb_obc_alpha
;
495 born
->obc_beta
= ir
->gb_obc_beta
;
496 born
->obc_gamma
= ir
->gb_obc_gamma
;
497 born
->gb_doffset
= ir
->gb_dielectric_offset
;
498 born
->gb_epsilon_solvent
= ir
->gb_epsilon_solvent
;
500 doffset
= born
->gb_doffset
;
502 /* If Still model, initialise the polarisation energies */
503 if(gb_algorithm
==egbSTILL
)
505 init_gb_still(cr
, fr
,&(mtop
->atomtypes
), &(localtop
->idef
), &atoms
,
510 /* If HCT/OBC, precalculate the sk*atype->S_hct factors */
511 else if(gb_algorithm
==egbHCT
|| gb_algorithm
==egbOBC
)
514 snew(born
->gpol_hct_work
, natoms
+3);
516 for(i
=0;i
<natoms
;i
++)
518 if(born
->use_globalindex
[i
]==1)
520 rai
= mtop
->atomtypes
.gb_radius
[atoms
.atom
[i
].type
]-doffset
;
521 sk
= rai
* mtop
->atomtypes
.S_hct
[atoms
.atom
[i
].type
];
522 born
->param_globalindex
[i
] = sk
;
523 born
->gb_radius_globalindex
[i
] = rai
;
527 born
->param_globalindex
[i
] = 0;
528 born
->gb_radius_globalindex
[i
] = 0;
533 /* Init the logarithm table */
534 p
=pow(2,LOG_TABLE_ACCURACY
);
535 snew(born
->log_table
, p
);
537 fill_log_table(LOG_TABLE_ACCURACY
, born
->log_table
);
539 /* Allocate memory for work arrays for temporary use */
540 snew(born
->work
,natoms
+4);
541 snew(born
->count
,natoms
);
542 snew(born
->nblist_work
,natoms
);
544 /* Domain decomposition specific stuff */
547 snew(born
->dd_work
,natoms
);
548 born
->nlocal
= cr
->dd
->nat_tot
; /* cr->dd->nat_tot will be zero here */
557 calc_gb_rad_still(t_commrec
*cr
, t_forcerec
*fr
,int natoms
, gmx_localtop_t
*top
,
558 const t_atomtypes
*atype
, rvec x
[], t_nblist
*nl
,
559 gmx_genborn_t
*born
,t_mdatoms
*md
)
561 int i
,k
,n
,nj0
,nj1
,ai
,aj
,type
;
564 real gpi
,dr
,dr2
,dr4
,idr4
,rvdw
,ratio
,ccf
,theta
,term
,rai
,raj
;
565 real ix1
,iy1
,iz1
,jx1
,jy1
,jz1
,dx11
,dy11
,dz11
;
566 real rinv
,idr2
,idr6
,vaj
,dccf
,cosq
,sinq
,prod
,gpi2
;
568 real vai
, prod_ai
, icf4
,icf6
;
570 factor
= 0.5*ONE_4PI_EPS0
;
573 for(i
=0;i
<born
->nr
;i
++)
575 born
->gpol_still_work
[i
]=0;
578 for(i
=0;i
<nl
->nri
;i
++ )
583 nj1
= nl
->jindex
[i
+1];
585 /* Load shifts for this list */
586 shift
= nl
->shift
[i
];
587 shX
= fr
->shift_vec
[shift
][0];
588 shY
= fr
->shift_vec
[shift
][1];
589 shZ
= fr
->shift_vec
[shift
][2];
593 rai
= top
->atomtypes
.gb_radius
[md
->typeA
[ai
]];
594 vai
= born
->vsolv
[ai
];
595 prod_ai
= STILL_P4
*vai
;
597 /* Load atom i coordinates, add shift vectors */
598 ix1
= shX
+ x
[ai
][0];
599 iy1
= shY
+ x
[ai
][1];
600 iz1
= shZ
+ x
[ai
][2];
613 dr2
= dx11
*dx11
+dy11
*dy11
+dz11
*dz11
;
614 rinv
= gmx_invsqrt(dr2
);
619 raj
= top
->atomtypes
.gb_radius
[md
->typeA
[aj
]];
623 ratio
= dr2
/ (rvdw
* rvdw
);
624 vaj
= born
->vsolv
[aj
];
626 if(ratio
>STILL_P5INV
)
633 theta
= ratio
*STILL_PIP5
;
635 term
= 0.5*(1.0-cosq
);
637 sinq
= 1.0 - cosq
*cosq
;
638 dccf
= 2.0*term
*sinq
*gmx_invsqrt(sinq
)*theta
;
643 icf6
= (4*ccf
-dccf
)*idr6
;
645 born
->gpol_still_work
[aj
] += prod_ai
*icf4
;
648 /* Save ai->aj and aj->ai chain rule terms */
649 fr
->dadx
[n
++] = prod
*icf6
;
650 fr
->dadx
[n
++] = prod_ai
*icf6
;
652 born
->gpol_still_work
[ai
]+=gpi
;
655 /* Parallel summations */
658 gmx_sum(natoms
, born
->gpol_still_work
, cr
);
660 else if(DOMAINDECOMP(cr
))
662 dd_atom_sum_real(cr
->dd
, born
->gpol_still_work
);
665 /* Calculate the radii */
666 for(i
=0;i
<fr
->natoms_force
;i
++) /* PELA born->nr */
668 if(born
->use
[i
] != 0)
671 gpi
= born
->gpol
[i
]+born
->gpol_still_work
[i
];
673 born
->bRad
[i
] = factor
*gmx_invsqrt(gpi2
);
674 fr
->invsqrta
[i
] = gmx_invsqrt(born
->bRad
[i
]);
678 /* Extra communication required for DD */
681 dd_atom_spread_real(cr
->dd
, born
->bRad
);
682 dd_atom_spread_real(cr
->dd
, fr
->invsqrta
);
691 calc_gb_rad_hct(t_commrec
*cr
,t_forcerec
*fr
,int natoms
, gmx_localtop_t
*top
,
692 const t_atomtypes
*atype
, rvec x
[], t_nblist
*nl
,
693 gmx_genborn_t
*born
,t_mdatoms
*md
)
695 int i
,k
,n
,ai
,aj
,nj0
,nj1
,at0
,at1
;
698 real rai
,raj
,gpi
,dr2
,dr
,sk
,sk_ai
,sk2
,sk2_ai
,lij
,uij
,diff2
,tmp
,sum_ai
;
699 real rad
,min_rad
,rinv
,rai_inv
;
700 real ix1
,iy1
,iz1
,jx1
,jy1
,jz1
,dx11
,dy11
,dz11
;
701 real lij2
, uij2
, lij3
, uij3
, t1
,t2
,t3
;
702 real lij_inv
,dlij
,duij
,sk2_rinv
,prod
,log_term
;
703 real doffset
,raj_inv
,dadx_val
;
706 doffset
= born
->gb_doffset
;
707 gb_radius
= born
->gb_radius
;
709 for(i
=0;i
<born
->nr
;i
++)
711 born
->gpol_hct_work
[i
] = 0;
714 /* Keep the compiler happy */
718 for(i
=0;i
<nl
->nri
;i
++)
722 nj0
= nl
->jindex
[ai
];
723 nj1
= nl
->jindex
[ai
+1];
725 /* Load shifts for this list */
726 shift
= nl
->shift
[i
];
727 shX
= fr
->shift_vec
[shift
][0];
728 shY
= fr
->shift_vec
[shift
][1];
729 shZ
= fr
->shift_vec
[shift
][2];
734 sk_ai
= born
->param
[ai
];
735 sk2_ai
= sk_ai
*sk_ai
;
737 /* Load atom i coordinates, add shift vectors */
738 ix1
= shX
+ x
[ai
][0];
739 iy1
= shY
+ x
[ai
][1];
740 iz1
= shZ
+ x
[ai
][2];
756 dr2
= dx11
*dx11
+dy11
*dy11
+dz11
*dz11
;
757 rinv
= gmx_invsqrt(dr2
);
760 sk
= born
->param
[aj
];
763 /* aj -> ai interaction */
784 lij_inv
= gmx_invsqrt(lij2
);
787 prod
= 0.25*sk2_rinv
;
789 /* log_term = table_log(uij*lij_inv,born->log_table,
790 LOG_TABLE_ACCURACY); */
791 log_term
= log(uij
*lij_inv
);
793 tmp
= lij
-uij
+ 0.25*dr
*diff2
+ (0.5*rinv
)*log_term
+
798 tmp
= tmp
+ 2.0 * (rai_inv
-lij
);
801 t1
= 0.5*lij2
+ prod
*lij3
- 0.25*(lij
*rinv
+lij3
*dr
);
802 t2
= -0.5*uij2
- 0.25*sk2_rinv
*uij3
+ 0.25*(uij
*rinv
+uij3
*dr
);
803 t3
= 0.125*(1.0+sk2_rinv
*rinv
)*(-diff2
)+0.25*log_term
*rinv
*rinv
;
805 dadx_val
= (dlij
*t1
+t2
+t3
)*rinv
; /* rb2 is moved to chainrule */
806 /* fr->dadx[n++] = (dlij*t1+duij*t2+t3)*rinv; */
807 /* rb2 is moved to chainrule */
815 fr
->dadx
[n
++] = dadx_val
;
818 /* ai -> aj interaction */
821 lij
= 1.0/(dr
-sk_ai
);
834 uij
= 1.0/(dr
+sk_ai
);
840 lij_inv
= gmx_invsqrt(lij2
);
841 sk2
= sk2_ai
; /* sk2_ai = sk_ai * sk_ai in i loop above */
843 prod
= 0.25 * sk2_rinv
;
845 /* log_term = table_log(uij*lij_inv,born->log_table,
846 LOG_TABLE_ACCURACY); */
847 log_term
= log(uij
*lij_inv
);
849 tmp
= lij
-uij
+ 0.25*dr
*diff2
+ (0.5*rinv
)*log_term
+
854 tmp
= tmp
+ 2.0 * (raj_inv
-lij
);
858 t1
= 0.5*lij2
+ prod
*lij3
- 0.25*(lij
*rinv
+lij3
*dr
);
859 t2
= -0.5*uij2
- 0.25*sk2_rinv
*uij3
+ 0.25*(uij
*rinv
+uij3
*dr
);
860 t3
= 0.125*(1.0+sk2_rinv
*rinv
)*(-diff2
)+0.25*log_term
*rinv
*rinv
;
862 dadx_val
= (dlij
*t1
+t2
+t3
)*rinv
; /* rb2 is moved to chainrule */
863 /* fr->dadx[n++] = (dlij*t1+duij*t2+t3)*rinv; */ /* rb2 is moved to chainrule */
865 born
->gpol_hct_work
[aj
] += 0.5*tmp
;
871 fr
->dadx
[n
++] = dadx_val
;
874 born
->gpol_hct_work
[ai
] += sum_ai
;
877 /* Parallel summations */
880 gmx_sum(natoms
, born
->gpol_hct_work
, cr
);
882 else if(DOMAINDECOMP(cr
))
884 dd_atom_sum_real(cr
->dd
, born
->gpol_hct_work
);
887 for(i
=0;i
<fr
->natoms_force
;i
++) /* PELA born->nr */
889 if(born
->use
[i
] != 0)
891 rai
= top
->atomtypes
.gb_radius
[md
->typeA
[i
]]-doffset
;
892 sum_ai
= 1.0/rai
- born
->gpol_hct_work
[i
];
893 min_rad
= rai
+ doffset
;
896 born
->bRad
[i
] = rad
> min_rad
? rad
: min_rad
;
897 fr
->invsqrta
[i
] = gmx_invsqrt(born
->bRad
[i
]);
901 /* Extra communication required for DD */
904 dd_atom_spread_real(cr
->dd
, born
->bRad
);
905 dd_atom_spread_real(cr
->dd
, fr
->invsqrta
);
913 calc_gb_rad_obc(t_commrec
*cr
, t_forcerec
*fr
, int natoms
, gmx_localtop_t
*top
,
914 const t_atomtypes
*atype
, rvec x
[], t_nblist
*nl
, gmx_genborn_t
*born
,t_mdatoms
*md
)
916 int i
,k
,ai
,aj
,nj0
,nj1
,n
,at0
,at1
;
919 real rai
,raj
,gpi
,dr2
,dr
,sk
,sk2
,lij
,uij
,diff2
,tmp
,sum_ai
;
920 real rad
, min_rad
,sum_ai2
,sum_ai3
,tsum
,tchain
,rinv
,rai_inv
,lij_inv
,rai_inv2
;
921 real log_term
,prod
,sk2_rinv
,sk_ai
,sk2_ai
;
922 real ix1
,iy1
,iz1
,jx1
,jy1
,jz1
,dx11
,dy11
,dz11
;
923 real lij2
,uij2
,lij3
,uij3
,dlij
,duij
,t1
,t2
,t3
;
924 real doffset
,raj_inv
,dadx_val
;
927 /* Keep the compiler happy */
932 doffset
= born
->gb_doffset
;
933 gb_radius
= born
->gb_radius
;
935 for(i
=0;i
<born
->nr
;i
++)
937 born
->gpol_hct_work
[i
] = 0;
940 for(i
=0;i
<nl
->nri
;i
++)
945 nj1
= nl
->jindex
[i
+1];
947 /* Load shifts for this list */
948 shift
= nl
->shift
[i
];
949 shX
= fr
->shift_vec
[shift
][0];
950 shY
= fr
->shift_vec
[shift
][1];
951 shZ
= fr
->shift_vec
[shift
][2];
956 sk_ai
= born
->param
[ai
];
957 sk2_ai
= sk_ai
*sk_ai
;
959 /* Load atom i coordinates, add shift vectors */
960 ix1
= shX
+ x
[ai
][0];
961 iy1
= shY
+ x
[ai
][1];
962 iz1
= shZ
+ x
[ai
][2];
978 dr2
= dx11
*dx11
+dy11
*dy11
+dz11
*dz11
;
979 rinv
= gmx_invsqrt(dr2
);
982 /* sk is precalculated in init_gb() */
983 sk
= born
->param
[aj
];
986 /* aj -> ai interaction */
1006 lij_inv
= gmx_invsqrt(lij2
);
1008 sk2_rinv
= sk2
*rinv
;
1009 prod
= 0.25*sk2_rinv
;
1011 log_term
= log(uij
*lij_inv
);
1013 /* log_term = table_log(uij*lij_inv,born->log_table,LOG_TABLE_ACCURACY); */
1014 tmp
= lij
-uij
+ 0.25*dr
*diff2
+ (0.5*rinv
)*log_term
+ prod
*(-diff2
);
1018 tmp
= tmp
+ 2.0 * (rai_inv
-lij
);
1022 t1
= 0.5*lij2
+ prod
*lij3
- 0.25*(lij
*rinv
+lij3
*dr
);
1023 t2
= -0.5*uij2
- 0.25*sk2_rinv
*uij3
+ 0.25*(uij
*rinv
+uij3
*dr
);
1024 t3
= 0.125*(1.0+sk2_rinv
*rinv
)*(-diff2
)+0.25*log_term
*rinv
*rinv
;
1026 dadx_val
= (dlij
*t1
+t2
+t3
)*rinv
; /* rb2 is moved to chainrule */
1034 fr
->dadx
[n
++] = dadx_val
;
1036 /* ai -> aj interaction */
1037 if(raj
< dr
+ sk_ai
)
1039 lij
= 1.0/(dr
-sk_ai
);
1052 uij
= 1.0/(dr
+sk_ai
);
1058 lij_inv
= gmx_invsqrt(lij2
);
1059 sk2
= sk2_ai
; /* sk2_ai = sk_ai * sk_ai in i loop above */
1060 sk2_rinv
= sk2
*rinv
;
1061 prod
= 0.25 * sk2_rinv
;
1063 /* log_term = table_log(uij*lij_inv,born->log_table,LOG_TABLE_ACCURACY); */
1064 log_term
= log(uij
*lij_inv
);
1066 tmp
= lij
-uij
+ 0.25*dr
*diff2
+ (0.5*rinv
)*log_term
+ prod
*(-diff2
);
1070 tmp
= tmp
+ 2.0 * (raj_inv
-lij
);
1073 t1
= 0.5*lij2
+ prod
*lij3
- 0.25*(lij
*rinv
+lij3
*dr
);
1074 t2
= -0.5*uij2
- 0.25*sk2_rinv
*uij3
+ 0.25*(uij
*rinv
+uij3
*dr
);
1075 t3
= 0.125*(1.0+sk2_rinv
*rinv
)*(-diff2
)+0.25*log_term
*rinv
*rinv
;
1077 dadx_val
= (dlij
*t1
+t2
+t3
)*rinv
; /* rb2 is moved to chainrule */
1079 born
->gpol_hct_work
[aj
] += 0.5*tmp
;
1086 fr
->dadx
[n
++] = dadx_val
;
1089 born
->gpol_hct_work
[ai
] += sum_ai
;
1093 /* Parallel summations */
1096 gmx_sum(natoms
, born
->gpol_hct_work
, cr
);
1098 else if(DOMAINDECOMP(cr
))
1100 dd_atom_sum_real(cr
->dd
, born
->gpol_hct_work
);
1103 for(i
=0;i
<fr
->natoms_force
;i
++) /* PELA born->nr */
1105 if(born
->use
[i
] != 0)
1107 rai
= top
->atomtypes
.gb_radius
[md
->typeA
[i
]];
1111 sum_ai
= rai
* born
->gpol_hct_work
[i
];
1112 sum_ai2
= sum_ai
* sum_ai
;
1113 sum_ai3
= sum_ai2
* sum_ai
;
1115 tsum
= tanh(born
->obc_alpha
*sum_ai
-born
->obc_beta
*sum_ai2
+born
->obc_gamma
*sum_ai3
);
1116 born
->bRad
[i
] = rai_inv
- tsum
*rai_inv2
;
1117 born
->bRad
[i
] = 1.0 / born
->bRad
[i
];
1119 fr
->invsqrta
[i
] = gmx_invsqrt(born
->bRad
[i
]);
1121 tchain
= rai
* (born
->obc_alpha
-2*born
->obc_beta
*sum_ai
+3*born
->obc_gamma
*sum_ai2
);
1122 born
->drobc
[i
] = (1.0-tsum
*tsum
)*tchain
*rai_inv2
;
1126 /* Extra (local) communication required for DD */
1127 if(DOMAINDECOMP(cr
))
1129 dd_atom_spread_real(cr
->dd
, born
->bRad
);
1130 dd_atom_spread_real(cr
->dd
, fr
->invsqrta
);
1131 dd_atom_spread_real(cr
->dd
, born
->drobc
);
1140 int calc_gb_rad(t_commrec
*cr
, t_forcerec
*fr
, t_inputrec
*ir
,gmx_localtop_t
*top
,
1141 const t_atomtypes
*atype
, rvec x
[], t_nblist
*nl
, gmx_genborn_t
*born
,t_mdatoms
*md
,t_nrnb
*nrnb
)
1147 if(fr
->bAllvsAll
&& fr
->dadx
==NULL
)
1149 /* We might need up to 8 atoms of padding before and after,
1150 * and another 4 units to guarantee SSE alignment.
1152 fr
->nalloc_dadx
= 2*(md
->homenr
+12)*(md
->nr
/2+1+12);
1153 snew(fr
->dadx_rawptr
,fr
->nalloc_dadx
);
1154 fr
->dadx
= (real
*) (((size_t) fr
->dadx_rawptr
+ 16) & (~((size_t) 15)));
1158 /* In the SSE-enabled gb-loops, when writing to dadx, we
1159 * always write 2*4 elements at a time, even in the case with only
1160 * 1-3 j particles, where we only really need to write 2*(1-3)
1161 * elements. This is because we want dadx to be aligned to a 16-
1162 * byte boundary, and being able to use _mm_store/load_ps
1164 ndadx
= 2 * (nl
->nrj
+ 3*nl
->nri
);
1166 /* First, reallocate the dadx array, we need 3 extra for SSE */
1167 if (ndadx
+ 3 > fr
->nalloc_dadx
)
1169 fr
->nalloc_dadx
= over_alloc_large(ndadx
) + 3;
1170 srenew(fr
->dadx_rawptr
,fr
->nalloc_dadx
);
1171 fr
->dadx
= (real
*) (((size_t) fr
->dadx_rawptr
+ 16) & (~((size_t) 15)));
1178 cnt
= md
->homenr
*(md
->nr
/2+1);
1180 if(ir
->gb_algorithm
==egbSTILL
)
1182 #if ( defined(GMX_IA32_SSE) || defined(GMX_X86_64_SSE) || defined(GMX_SSE2) )
1183 genborn_allvsall_calc_still_radii_sse2_single(fr
,md
,born
,top
,x
[0],cr
,&fr
->AllvsAll_workgb
);
1185 genborn_allvsall_calc_still_radii(fr
,md
,born
,top
,x
[0],cr
,&fr
->AllvsAll_workgb
);
1187 inc_nrnb(nrnb
,eNR_BORN_AVA_RADII_STILL
,cnt
);
1189 else if(ir
->gb_algorithm
==egbHCT
|| ir
->gb_algorithm
==egbOBC
)
1191 #if ( defined(GMX_IA32_SSE) || defined(GMX_X86_64_SSE) || defined(GMX_SSE2) )
1192 genborn_allvsall_calc_hct_obc_radii_sse2_single(fr
,md
,born
,ir
->gb_algorithm
,top
,x
[0],cr
,&fr
->AllvsAll_workgb
);
1194 genborn_allvsall_calc_hct_obc_radii(fr
,md
,born
,ir
->gb_algorithm
,top
,x
[0],cr
,&fr
->AllvsAll_workgb
);
1196 inc_nrnb(nrnb
,eNR_BORN_AVA_RADII_HCT_OBC
,cnt
);
1200 gmx_fatal(FARGS
,"Bad gb algorithm for all-vs-all interactions");
1202 inc_nrnb(nrnb
,eNR_NBKERNEL_OUTER
,md
->homenr
);
1208 /* Switch for determining which algorithm to use for Born radii calculation */
1211 #if ( defined(GMX_IA32_SSE2) || defined(GMX_X86_64_SSE2) || defined(GMX_SSE2) )
1212 /* x86 or x86-64 with GCC inline assembly and/or SSE intrinsics */
1213 switch(ir
->gb_algorithm
)
1216 calc_gb_rad_still_sse2_double(cr
,fr
,md
->nr
,top
, atype
, x
[0], nl
, born
, md
);
1219 calc_gb_rad_hct_sse2_double(cr
,fr
,md
->nr
,top
, atype
, x
[0], nl
, born
, md
);
1222 calc_gb_rad_obc_sse2_double(cr
,fr
,md
->nr
,top
, atype
, x
[0], nl
, born
, md
);
1226 gmx_fatal(FARGS
, "Unknown double precision sse-enabled algorithm for Born radii calculation: %d",ir
->gb_algorithm
);
1229 switch(ir
->gb_algorithm
)
1232 calc_gb_rad_still(cr
,fr
,born
->nr
,top
,atype
,x
,nl
,born
,md
);
1235 calc_gb_rad_hct(cr
,fr
,born
->nr
,top
,atype
,x
,nl
,born
,md
);
1238 calc_gb_rad_obc(cr
,fr
,born
->nr
,top
,atype
,x
,nl
,born
,md
);
1242 gmx_fatal(FARGS
, "Unknown double precision algorithm for Born radii calculation: %d",ir
->gb_algorithm
);
1249 #if (!defined DISABLE_SSE && ( defined(GMX_IA32_SSE) || defined(GMX_X86_64_SSE) || defined(GMX_SSE2) ) )
1250 /* x86 or x86-64 with GCC inline assembly and/or SSE intrinsics */
1251 switch(ir
->gb_algorithm
)
1254 calc_gb_rad_still_sse(cr
,fr
,born
->nr
,top
, atype
, x
[0], nl
, born
, md
);
1258 calc_gb_rad_hct_obc_sse(cr
,fr
,born
->nr
,top
, atype
, x
[0], nl
, born
, md
, ir
->gb_algorithm
);
1262 gmx_fatal(FARGS
, "Unknown sse-enabled algorithm for Born radii calculation: %d",ir
->gb_algorithm
);
1266 switch(ir
->gb_algorithm
)
1269 calc_gb_rad_still(cr
,fr
,born
->nr
,top
,atype
,x
,nl
,born
,md
);
1272 calc_gb_rad_hct(cr
,fr
,born
->nr
,top
,atype
,x
,nl
,born
,md
);
1275 calc_gb_rad_obc(cr
,fr
,born
->nr
,top
,atype
,x
,nl
,born
,md
);
1279 gmx_fatal(FARGS
, "Unknown algorithm for Born radii calculation: %d",ir
->gb_algorithm
);
1282 #endif /* Single precision sse */
1284 #endif /* Double or single precision */
1286 if(fr
->bAllvsAll
==FALSE
)
1288 switch(ir
->gb_algorithm
)
1291 inc_nrnb(nrnb
,eNR_BORN_RADII_STILL
,nl
->nrj
);
1295 inc_nrnb(nrnb
,eNR_BORN_RADII_HCT_OBC
,nl
->nrj
);
1301 inc_nrnb(nrnb
,eNR_NBKERNEL_OUTER
,nl
->nri
);
1309 real
gb_bonds_tab(rvec x
[], rvec f
[], rvec fshift
[], real
*charge
, real
*p_gbtabscale
,
1310 real
*invsqrta
, real
*dvda
, real
*GBtab
, t_idef
*idef
,
1311 real gb_epsilon_solvent
, real facel
, const t_pbc
*pbc
, const t_graph
*graph
)
1313 int i
,j
,n0
,m
,nnn
,type
,ai
,aj
;
1319 real isaprod
,qq
,gbscale
,gbtabscale
,Y
,F
,Geps
,Heps2
,Fp
,VV
,FF
,rt
,eps
,eps2
;
1320 real vgb
,fgb
,vcoul
,fijC
,dvdatmp
,fscal
,dvdaj
;
1326 t_iatom
*forceatoms
;
1328 /* Scale the electrostatics by gb_epsilon_solvent */
1329 facel
= facel
* (1.0 - 1.0/gb_epsilon_solvent
);
1331 gbtabscale
=*p_gbtabscale
;
1334 for(j
=F_GB12
;j
<=F_GB14
;j
++)
1336 forceatoms
= idef
->il
[j
].iatoms
;
1338 for(i
=0;i
<idef
->il
[j
].nr
; )
1340 /* To avoid reading in the interaction type, we just increment i to pass over
1341 * the types in the forceatoms array, this saves some memory accesses
1344 ai
= forceatoms
[i
++];
1345 aj
= forceatoms
[i
++];
1347 ki
= pbc_rvec_sub(pbc
,x
[ai
],x
[aj
],dx
);
1348 rsq11
= iprod(dx
,dx
);
1350 isai
= invsqrta
[ai
];
1351 iq
= (-1)*facel
*charge
[ai
];
1353 rinv11
= gmx_invsqrt(rsq11
);
1354 isaj
= invsqrta
[aj
];
1355 isaprod
= isai
*isaj
;
1356 qq
= isaprod
*iq
*charge
[aj
];
1357 gbscale
= isaprod
*gbtabscale
;
1366 Geps
= eps
*GBtab
[nnn
+2];
1367 Heps2
= eps2
*GBtab
[nnn
+3];
1370 FF
= Fp
+Geps
+2.0*Heps2
;
1372 fijC
= qq
*FF
*gbscale
;
1373 dvdatmp
= -(vgb
+fijC
*r
)*0.5;
1374 dvda
[aj
] = dvda
[aj
] + dvdatmp
*isaj
*isaj
;
1375 dvda
[ai
] = dvda
[ai
] + dvdatmp
*isai
*isai
;
1376 vctot
= vctot
+ vgb
;
1377 fgb
= -(fijC
)*rinv11
;
1380 ivec_sub(SHIFT_IVEC(graph
,ai
),SHIFT_IVEC(graph
,aj
),dt
);
1384 for (m
=0; (m
<DIM
); m
++) { /* 15 */
1388 fshift
[ki
][m
]+=fscal
;
1389 fshift
[CENTRAL
][m
]-=fscal
;
1397 real
calc_gb_selfcorrections(t_commrec
*cr
, int natoms
,
1398 real
*charge
, gmx_genborn_t
*born
, real
*dvda
, t_mdatoms
*md
, double facel
)
1401 real rai
,e
,derb
,q
,q2
,fi
,rai_inv
,vtot
;
1405 pd_at_range(cr
,&at0
,&at1
);
1407 else if(DOMAINDECOMP(cr
))
1410 at1
=cr
->dd
->nat_home
;
1419 /* Scale the electrostatics by gb_epsilon_solvent */
1420 facel
= facel
* (1.0 - 1.0/born
->gb_epsilon_solvent
);
1424 /* Apply self corrections */
1425 for(i
=at0
;i
<at1
;i
++)
1429 if(born
->use
[ai
]==1)
1431 rai
= born
->bRad
[ai
];
1437 derb
= 0.5*e
*rai_inv
*rai_inv
;
1438 dvda
[ai
] += derb
*rai
;
1447 real
calc_gb_nonpolar(t_commrec
*cr
, t_forcerec
*fr
,int natoms
,gmx_genborn_t
*born
, gmx_localtop_t
*top
,
1448 const t_atomtypes
*atype
, real
*dvda
,int gb_algorithm
, t_mdatoms
*md
)
1451 real e
,es
,rai
,rbi
,term
,probe
,tmp
,factor
;
1452 real rbi_inv
,rbi_inv2
;
1454 /* To keep the compiler happy */
1459 pd_at_range(cr
,&at0
,&at1
);
1461 else if(DOMAINDECOMP(cr
))
1464 at1
= cr
->dd
->nat_home
;
1472 /* The surface area factor is 0.0049 for Still model, 0.0054 for HCT/OBC */
1473 if(gb_algorithm
==egbSTILL
)
1475 factor
=0.0049*100*CAL2JOULE
;
1479 factor
=0.0054*100*CAL2JOULE
;
1482 /* if(gb_algorithm==egbHCT || gb_algorithm==egbOBC) */
1488 for(i
=at0
;i
<at1
;i
++)
1492 if(born
->use
[ai
]==1)
1494 rai
= top
->atomtypes
.gb_radius
[md
->typeA
[ai
]];
1495 rbi_inv
= fr
->invsqrta
[ai
];
1496 rbi_inv2
= rbi_inv
* rbi_inv
;
1497 tmp
= (rai
*rbi_inv2
)*(rai
*rbi_inv2
);
1499 e
= factor
*term
*(rai
+probe
)*(rai
+probe
)*tmp
;
1500 dvda
[ai
] = dvda
[ai
] - 6*e
*rbi_inv2
;
1510 real
calc_gb_chainrule(int natoms
, t_nblist
*nl
, real
*dadx
, real
*dvda
, rvec x
[], rvec t
[], rvec fshift
[],
1511 rvec shift_vec
[], int gb_algorithm
, gmx_genborn_t
*born
, t_mdatoms
*md
)
1513 int i
,k
,n
,ai
,aj
,nj0
,nj1
,n0
,n1
;
1516 real fgb
,fij
,rb2
,rbi
,fix1
,fiy1
,fiz1
;
1517 real ix1
,iy1
,iz1
,jx1
,jy1
,jz1
,dx11
,dy11
,dz11
,rsq11
;
1518 real rinv11
,tx
,ty
,tz
,rbai
,rbaj
,fgb_ai
;
1527 n1
= md
->start
+md
->homenr
+1+natoms
/2;
1529 if(gb_algorithm
==egbSTILL
)
1534 rbi
= born
->bRad
[k
];
1535 rb
[k
] = (2 * rbi
* rbi
* dvda
[k
])/ONE_4PI_EPS0
;
1538 else if(gb_algorithm
==egbHCT
)
1543 rbi
= born
->bRad
[k
];
1544 rb
[k
] = rbi
* rbi
* dvda
[k
];
1547 else if(gb_algorithm
==egbOBC
)
1552 rbi
= born
->bRad
[k
];
1553 rb
[k
] = rbi
* rbi
* born
->drobc
[k
] * dvda
[k
];
1557 for(i
=0;i
<nl
->nri
;i
++)
1561 nj0
= nl
->jindex
[ai
];
1562 nj1
= nl
->jindex
[ai
+1];
1564 /* Load shifts for this list */
1565 shift
= nl
->shift
[i
];
1566 shX
= shift_vec
[shift
][0];
1567 shY
= shift_vec
[shift
][1];
1568 shZ
= shift_vec
[shift
][2];
1570 /* Load atom i coordinates, add shift vectors */
1571 ix1
= shX
+ x
[ai
][0];
1572 iy1
= shY
+ x
[ai
][1];
1573 iz1
= shZ
+ x
[ai
][2];
1581 for(k
=nj0
;k
<nj1
;k
++)
1595 fgb
= rbai
*dadx
[n
++];
1596 fgb_ai
= rbaj
*dadx
[n
++];
1598 /* Total force between ai and aj is the sum of ai->aj and aj->ai */
1609 /* Update force on atom aj */
1610 t
[aj
][0] = t
[aj
][0] - tx
;
1611 t
[aj
][1] = t
[aj
][1] - ty
;
1612 t
[aj
][2] = t
[aj
][2] - tz
;
1615 /* Update force and shift forces on atom ai */
1616 t
[ai
][0] = t
[ai
][0] + fix1
;
1617 t
[ai
][1] = t
[ai
][1] + fiy1
;
1618 t
[ai
][2] = t
[ai
][2] + fiz1
;
1620 fshift
[shift
][0] = fshift
[shift
][0] + fix1
;
1621 fshift
[shift
][1] = fshift
[shift
][1] + fiy1
;
1622 fshift
[shift
][2] = fshift
[shift
][2] + fiz1
;
1630 real
calc_gb_forces(t_commrec
*cr
, t_mdatoms
*md
, gmx_genborn_t
*born
, gmx_localtop_t
*top
, const t_atomtypes
*atype
,
1631 rvec x
[], rvec f
[], t_forcerec
*fr
, t_idef
*idef
, int gb_algorithm
, t_nrnb
*nrnb
, bool bRad
,
1632 const t_pbc
*pbc
, const t_graph
*graph
)
1638 const t_pbc
*pbc_null
;
1647 /* Do a simple ACE type approximation for the non-polar solvation */
1648 v
+= calc_gb_nonpolar(cr
, fr
,born
->nr
, born
, top
, atype
, fr
->dvda
, gb_algorithm
,md
);
1650 /* Calculate the bonded GB-interactions using either table or analytical formula */
1652 v
+= gb_bonds_tab(x
,f
,fr
->fshift
, md
->chargeA
,&(fr
->gbtabscale
),
1653 fr
->invsqrta
,fr
->dvda
,fr
->gbtab
.tab
,idef
,born
->gb_epsilon_solvent
, fr
->epsfac
, pbc_null
, graph
);
1655 #if ( defined(GMX_IA32_SSE2) || defined(GMX_X86_64_SSE2) || defined(GMX_SSE2) )
1656 v
+= gb_bonds_analytic(x
[0],f
[0],md
->chargeA
,born
->bRad
,fr
->dvda
,idef
,born
->gb_epsilon_solvent
,fr
->epsfac
);
1658 v
+= gb_bonds_tab(x
,f
,fr
->fshift
, md
->chargeA
,&(fr
->gbtabscale
),
1659 fr
->invsqrta
,fr
->dvda
,fr
->gbtab
.tab
,idef
,born
->gb_epsilon_solvent
, fr
->epsfac
, pbc_null
, graph
);
1663 /* Calculate self corrections to the GB energies - currently only A state used! (FIXME) */
1664 v
+= calc_gb_selfcorrections(cr
,born
->nr
,md
->chargeA
, born
, fr
->dvda
, md
, fr
->epsfac
);
1666 /* If parallel, sum the derivative of the potential w.r.t the born radii */
1669 gmx_sum(md
->nr
,fr
->dvda
, cr
);
1671 else if(DOMAINDECOMP(cr
))
1673 dd_atom_sum_real(cr
->dd
,fr
->dvda
);
1674 dd_atom_spread_real(cr
->dd
,fr
->dvda
);
1680 #if ( defined(GMX_IA32_SSE) || defined(GMX_X86_64_SSE) || defined(GMX_SSE2) )
1681 genborn_allvsall_calc_chainrule_sse2_single(fr
,md
,born
,x
[0],f
[0],gb_algorithm
,fr
->AllvsAll_workgb
);
1683 genborn_allvsall_calc_chainrule(fr
,md
,born
,x
[0],f
[0],gb_algorithm
,fr
->AllvsAll_workgb
);
1685 cnt
= md
->homenr
*(md
->nr
/2+1);
1686 inc_nrnb(nrnb
,eNR_BORN_AVA_CHAINRULE
,cnt
);
1687 inc_nrnb(nrnb
,eNR_NBKERNEL_OUTER
,md
->homenr
);
1694 #if ( defined(GMX_IA32_SSE2) || defined(GMX_X86_64_SSE2) || defined(GMX_SSE2) )
1695 calc_gb_chainrule_sse2_double(born
->nr
, &(fr
->gblist
), fr
->dadx
, fr
->dvda
,
1696 x
[0], f
[0], fr
->fshift
[0], fr
->shift_vec
[0],
1697 gb_algorithm
, born
);
1699 calc_gb_chainrule(born
->nr
, &(fr
->gblist
), fr
->dadx
, fr
->dvda
,
1700 x
, f
, fr
->fshift
, fr
->shift_vec
,
1701 gb_algorithm
, born
, md
);
1706 #if (!defined DISABLE_SSE && ( defined(GMX_IA32_SSE) || defined(GMX_X86_64_SSE) || defined(GMX_SSE2) ))
1707 /* x86 or x86-64 with GCC inline assembly and/or SSE intrinsics */
1708 calc_gb_chainrule_sse(born
->nr
, &(fr
->gblist
), fr
->dadx
, fr
->dvda
,
1709 x
[0], f
[0], fr
->fshift
[0], fr
->shift_vec
[0],
1710 gb_algorithm
, born
, md
);
1712 /* Calculate the forces due to chain rule terms with non sse code */
1713 calc_gb_chainrule(born
->nr
, &(fr
->gblist
), fr
->dadx
, fr
->dvda
,
1714 x
, f
, fr
->fshift
, fr
->shift_vec
,
1715 gb_algorithm
, born
, md
);
1721 inc_nrnb(nrnb
,eNR_BORN_CHAINRULE
,fr
->gblist
.nrj
);
1722 inc_nrnb(nrnb
,eNR_NBKERNEL_OUTER
,fr
->gblist
.nri
);
1730 static void add_j_to_gblist(gbtmpnbl_t
*list
,int aj
)
1732 if (list
->naj
>= list
->aj_nalloc
)
1734 list
->aj_nalloc
= over_alloc_large(list
->naj
+1);
1735 srenew(list
->aj
,list
->aj_nalloc
);
1738 list
->aj
[list
->naj
++] = aj
;
1741 static gbtmpnbl_t
*find_gbtmplist(struct gbtmpnbls
*lists
,int shift
)
1745 /* Search the list with the same shift, if there is one */
1747 while (ind
< lists
->nlist
&& shift
!= lists
->list
[ind
].shift
)
1751 if (ind
== lists
->nlist
)
1753 if (lists
->nlist
== lists
->list_nalloc
)
1755 lists
->list_nalloc
++;
1756 srenew(lists
->list
,lists
->list_nalloc
);
1757 for(i
=lists
->nlist
; i
<lists
->list_nalloc
; i
++)
1759 lists
->list
[i
].aj
= NULL
;
1760 lists
->list
[i
].aj_nalloc
= 0;
1765 lists
->list
[lists
->nlist
].shift
= shift
;
1766 lists
->list
[lists
->nlist
].naj
= 0;
1770 return &lists
->list
[ind
];
1773 static void add_bondeds_to_gblist(t_ilist
*il
,
1774 bool bMolPBC
,t_pbc
*pbc
,t_graph
*g
,rvec
*x
,
1775 struct gbtmpnbls
*nls
)
1777 int ind
,j
,ai
,aj
,shift
,found
;
1783 for(ind
=0; ind
<il
->nr
; ind
+=3)
1785 ai
= il
->iatoms
[ind
+1];
1786 aj
= il
->iatoms
[ind
+2];
1791 rvec_sub(x
[ai
],x
[aj
],dx
);
1792 ivec_sub(SHIFT_IVEC(g
,ai
),SHIFT_IVEC(g
,aj
),dt
);
1793 shift
= IVEC2IS(dt
);
1797 shift
= pbc_dx_aiuc(pbc
,x
[ai
],x
[aj
],dx
);
1800 /* Find the list for this shift or create one */
1801 list
= find_gbtmplist(&nls
[ai
],shift
);
1805 /* So that we do not add the same bond twice.
1806 * This happens with some constraints between 1-3 atoms
1807 * that are in the bond-list but should not be in the GB nb-list */
1808 for(j
=0;j
<list
->naj
;j
++)
1810 if (list
->aj
[j
] == aj
)
1820 gmx_incons("ai == aj");
1823 add_j_to_gblist(list
,aj
);
1829 compare_int (const void * a
, const void * b
)
1831 return ( *(int*)a
- *(int*)b
);
1836 int make_gb_nblist(t_commrec
*cr
, int gb_algorithm
, real gbcut
,
1837 rvec x
[], matrix box
,
1838 t_forcerec
*fr
, t_idef
*idef
, t_graph
*graph
, gmx_genborn_t
*born
)
1840 int i
,l
,ii
,j
,k
,n
,nj0
,nj1
,ai
,aj
,at0
,at1
,found
,shift
,s
;
1845 struct gbtmpnbls
*nls
;
1846 gbtmpnbl_t
*list
=NULL
;
1848 nls
= born
->nblist_work
;
1850 for(i
=0;i
<born
->nr
;i
++)
1857 set_pbc_dd(&pbc
,fr
->ePBC
,cr
->dd
,TRUE
,box
);
1860 switch (gb_algorithm
)
1864 /* Loop over 1-2, 1-3 and 1-4 interactions */
1865 for(j
=F_GB12
;j
<=F_GB14
;j
++)
1867 add_bondeds_to_gblist(&idef
->il
[j
],fr
->bMolPBC
,&pbc
,graph
,x
,nls
);
1871 /* Loop over 1-4 interactions */
1872 add_bondeds_to_gblist(&idef
->il
[F_GB14
],fr
->bMolPBC
,&pbc
,graph
,x
,nls
);
1875 gmx_incons("Unknown GB algorithm");
1878 /* Loop over the VDWQQ and VDW nblists to set up the nonbonded part of the GB list */
1879 for(n
=0; (n
<fr
->nnblists
); n
++)
1881 for(i
=0; (i
<eNL_NR
); i
++)
1883 nblist
=&(fr
->nblists
[n
].nlist_sr
[i
]);
1885 if (nblist
->nri
> 0 && (i
==eNL_VDWQQ
|| i
==eNL_QQ
))
1887 for(j
=0;j
<nblist
->nri
;j
++)
1889 ai
= nblist
->iinr
[j
];
1890 shift
= nblist
->shift
[j
];
1892 /* Find the list for this shift or create one */
1893 list
= find_gbtmplist(&nls
[ai
],shift
);
1895 nj0
= nblist
->jindex
[j
];
1896 nj1
= nblist
->jindex
[j
+1];
1898 /* Add all the j-atoms in the non-bonded list to the GB list */
1899 for(k
=nj0
;k
<nj1
;k
++)
1901 add_j_to_gblist(list
,nblist
->jjnr
[k
]);
1908 /* Zero out some counters */
1912 fr
->gblist
.jindex
[0] = fr
->gblist
.nri
;
1914 for(i
=0;i
<fr
->natoms_force
;i
++)
1916 for(s
=0; s
<nls
[i
].nlist
; s
++)
1918 list
= &nls
[i
].list
[s
];
1920 /* Only add those atoms that actually have neighbours */
1921 if (born
->use
[i
] != 0)
1923 fr
->gblist
.iinr
[fr
->gblist
.nri
] = i
;
1924 fr
->gblist
.shift
[fr
->gblist
.nri
] = list
->shift
;
1927 for(k
=0; k
<list
->naj
; k
++)
1929 /* Memory allocation for jjnr */
1930 if(fr
->gblist
.nrj
>= fr
->gblist
.maxnrj
)
1932 fr
->gblist
.maxnrj
+= over_alloc_large(fr
->gblist
.maxnrj
);
1936 fprintf(debug
,"Increasing GB neighbourlist j size to %d\n",fr
->gblist
.maxnrj
);
1939 srenew(fr
->gblist
.jjnr
,fr
->gblist
.maxnrj
);
1943 if(i
== list
->aj
[k
])
1945 gmx_incons("i == list->aj[k]");
1947 fr
->gblist
.jjnr
[fr
->gblist
.nrj
++] = list
->aj
[k
];
1950 fr
->gblist
.jindex
[fr
->gblist
.nri
] = fr
->gblist
.nrj
;
1957 for(i
=0;i
<fr
->gblist
.nri
;i
++)
1959 nj0
= fr
->gblist
.jindex
[i
];
1960 nj1
= fr
->gblist
.jindex
[i
+1];
1961 ai
= fr
->gblist
.iinr
[i
];
1964 for(j
=nj0
;j
<nj1
;j
++)
1966 if(fr
->gblist
.jjnr
[j
]<ai
)
1967 fr
->gblist
.jjnr
[j
]+=fr
->natoms_force
;
1969 qsort(fr
->gblist
.jjnr
+nj0
,nj1
-nj0
,sizeof(int),compare_int
);
1971 for(j
=nj0
;j
<nj1
;j
++)
1973 if(fr
->gblist
.jjnr
[j
]>=fr
->natoms_force
)
1974 fr
->gblist
.jjnr
[j
]-=fr
->natoms_force
;
1983 void make_local_gb(const t_commrec
*cr
, gmx_genborn_t
*born
, int gb_algorithm
)
1986 gmx_domdec_t
*dd
=NULL
;
1988 if(DOMAINDECOMP(cr
))
1996 /* Single node or particle decomp (global==local), just copy pointers and return */
1997 if(gb_algorithm
==egbSTILL
)
1999 born
->gpol
= born
->gpol_globalindex
;
2000 born
->vsolv
= born
->vsolv_globalindex
;
2001 born
->gb_radius
= born
->gb_radius_globalindex
;
2005 born
->param
= born
->param_globalindex
;
2006 born
->gb_radius
= born
->gb_radius_globalindex
;
2009 born
->use
= born
->use_globalindex
;
2014 /* Reallocation of local arrays if necessary */
2015 if(born
->nlocal
< dd
->nat_tot
)
2017 born
->nlocal
= dd
->nat_tot
;
2019 /* Arrays specific to different gb algorithms */
2020 if(gb_algorithm
==egbSTILL
)
2022 srenew(born
->gpol
, born
->nlocal
+3);
2023 srenew(born
->vsolv
, born
->nlocal
+3);
2024 srenew(born
->gb_radius
, born
->nlocal
+3);
2028 srenew(born
->param
, born
->nlocal
+3);
2029 srenew(born
->gb_radius
, born
->nlocal
+3);
2032 /* All gb-algorithms use the array for vsites exclusions */
2033 srenew(born
->use
, born
->nlocal
+3);
2036 /* With dd, copy algorithm specific arrays */
2037 if(gb_algorithm
==egbSTILL
)
2039 for(i
=at0
;i
<at1
;i
++)
2041 born
->gpol
[i
] = born
->gpol_globalindex
[dd
->gatindex
[i
]];
2042 born
->vsolv
[i
] = born
->vsolv_globalindex
[dd
->gatindex
[i
]];
2043 born
->gb_radius
[i
] = born
->gb_radius_globalindex
[dd
->gatindex
[i
]];
2044 born
->use
[i
] = born
->use_globalindex
[dd
->gatindex
[i
]];
2049 for(i
=at0
;i
<at1
;i
++)
2051 born
->param
[i
] = born
->param_globalindex
[dd
->gatindex
[i
]];
2052 born
->gb_radius
[i
] = born
->gb_radius_globalindex
[dd
->gatindex
[i
]];
2053 born
->use
[i
] = born
->use_globalindex
[dd
->gatindex
[i
]];