2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2012,2013,2014,2015,2016,2018,2019, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
36 /* When calculating RF or Ewald interactions we calculate the electrostatic
37 * forces and energies on excluded atom pairs here in the non-bonded loops.
39 #if defined CHECK_EXCLS && (defined CALC_COULOMB || defined LJ_EWALD)
53 egp_cj
= nbatParams
.energrp
[cj
];
55 for (i
= 0; i
< UNROLLI
; i
++)
63 type_i_off
= type
[ai
]*ntype2
;
65 for (j
= 0; j
< UNROLLJ
; j
++)
72 real FrLJ6
= 0, FrLJ12
= 0, frLJ
= 0;
74 #if defined LJ_FORCE_SWITCH || defined LJ_POT_SWITCH
93 /* A multiply mask used to zero an interaction
94 * when either the distance cutoff is exceeded, or
95 * (if appropriate) the i and j indices are
96 * unsuitable for this kind of inner loop. */
100 /* A multiply mask used to zero an interaction
101 * when that interaction should be excluded
102 * (e.g. because of bonding). */
105 interact
= ((l_cj
[cjind
].excl
>>(i
*UNROLLI
+ j
)) & 1);
109 skipmask
= (cj
== ci_sh
&& j
<= i
) ? 0.0 : 1.0;
120 dx
= xi
[i
*XI_STRIDE
+XX
] - x
[aj
*X_STRIDE
+XX
];
121 dy
= xi
[i
*XI_STRIDE
+YY
] - x
[aj
*X_STRIDE
+YY
];
122 dz
= xi
[i
*XI_STRIDE
+ZZ
] - x
[aj
*X_STRIDE
+ZZ
];
124 rsq
= dx
*dx
+ dy
*dy
+ dz
*dz
;
126 /* Prepare to enforce the cut-off. */
127 skipmask
= (rsq
>= rcut2
) ? 0 : skipmask
;
128 /* 9 flops for r^2 + cut-off check */
130 // Ensure the distances do not fall below the limit where r^-12 overflows.
131 // This should never happen for normal interactions.
132 rsq
= std::max(rsq
, NBNXN_MIN_RSQ
);
138 rinv
= gmx::invsqrt(rsq
);
139 /* 5 flops for invsqrt */
141 /* Partially enforce the cut-off (and perhaps
142 * exclusions) to avoid possible overflow of
143 * rinvsix when computing LJ, and/or overflowing
144 * the Coulomb table during lookup. */
145 rinv
= rinv
* skipmask
;
153 c6
= nbfp
[type_i_off
+type
[aj
]*2 ];
154 c12
= nbfp
[type_i_off
+type
[aj
]*2+1];
156 #if defined LJ_CUT || defined LJ_FORCE_SWITCH || defined LJ_POT_SWITCH
157 rinvsix
= interact
*rinvsq
*rinvsq
*rinvsq
;
159 FrLJ12
= c12
*rinvsix
*rinvsix
;
160 frLJ
= FrLJ12
- FrLJ6
;
161 /* 7 flops for r^-2 + LJ force */
162 #if defined CALC_ENERGIES || defined LJ_POT_SWITCH
163 VLJ
= (FrLJ12
+ c12
*ic
->repulsion_shift
.cpot
)/12 -
164 (FrLJ6
+ c6
*ic
->dispersion_shift
.cpot
)/6;
165 /* 7 flops for LJ energy */
169 #if defined LJ_FORCE_SWITCH || defined LJ_POT_SWITCH
170 /* Force or potential switching from ic->rvdw_switch */
172 rsw
= r
- ic
->rvdw_switch
;
173 rsw
= (rsw
>= 0.0 ? rsw
: 0.0);
175 #ifdef LJ_FORCE_SWITCH
177 -c6
*(ic
->dispersion_shift
.c2
+ ic
->dispersion_shift
.c3
*rsw
)*rsw
*rsw
*r
178 + c12
*(ic
->repulsion_shift
.c2
+ ic
->repulsion_shift
.c3
*rsw
)*rsw
*rsw
*r
;
179 #if defined CALC_ENERGIES
181 -c6
*(-ic
->dispersion_shift
.c2
/3 - ic
->dispersion_shift
.c3
/4*rsw
)*rsw
*rsw
*rsw
182 + c12
*(-ic
->repulsion_shift
.c2
/3 - ic
->repulsion_shift
.c3
/4*rsw
)*rsw
*rsw
*rsw
;
186 #if defined CALC_ENERGIES || defined LJ_POT_SWITCH
187 /* Masking should be done after force switching,
188 * but before potential switching.
190 /* Need to zero the interaction if there should be exclusion. */
191 VLJ
= VLJ
* interact
;
198 sw
= 1.0 + (swV3
+ (swV4
+ swV5
*rsw
)*rsw
)*rsw
*rsw
*rsw
;
199 dsw
= (swF2
+ (swF3
+ swF4
*rsw
)*rsw
)*rsw
*rsw
;
201 frLJ
= frLJ
*sw
- r
*VLJ
*dsw
;
208 real c6grid
, rinvsix_nm
, cr2
, expmcr2
, poly
;
213 #ifdef LJ_EWALD_COMB_GEOM
214 c6grid
= ljc
[type
[ai
]*2]*ljc
[type
[aj
]*2];
215 #elif defined LJ_EWALD_COMB_LB
217 real sigma
, sigma2
, epsilon
;
219 /* These sigma and epsilon are scaled to give 6*C6 */
220 sigma
= ljc
[type
[ai
]*2] + ljc
[type
[aj
]*2];
221 epsilon
= ljc
[type
[ai
]*2+1]*ljc
[type
[aj
]*2+1];
223 sigma2
= sigma
*sigma
;
224 c6grid
= epsilon
*sigma2
*sigma2
*sigma2
;
227 #error "No LJ Ewald combination rule defined"
231 /* Recalculate rinvsix without exclusion mask */
232 rinvsix_nm
= rinvsq
*rinvsq
*rinvsq
;
234 rinvsix_nm
= rinvsix
;
236 cr2
= lje_coeff2
*rsq
;
240 expmcr2
= expf(-cr2
);
242 poly
= 1 + cr2
+ 0.5*cr2
*cr2
;
244 /* Subtract the grid force from the total LJ force */
245 frLJ
+= c6grid
*(rinvsix_nm
- expmcr2
*(rinvsix_nm
*poly
+ lje_coeff6_6
));
247 /* Shift should only be applied to real LJ pairs */
248 sh_mask
= lje_vc
*interact
;
250 VLJ
+= c6grid
/6*(rinvsix_nm
*(1 - expmcr2
*poly
) + sh_mask
);
253 #endif /* LJ_EWALD */
255 #ifdef VDW_CUTOFF_CHECK
256 /* Mask for VdW cut-off shorter than Coulomb cut-off */
260 skipmask_rvdw
= (rsq
< rvdw2
) ? 1.0 : 0.0;
261 frLJ
*= skipmask_rvdw
;
263 VLJ
*= skipmask_rvdw
;
267 #if defined CALC_ENERGIES
268 /* Need to zero the interaction if r >= rcut */
269 VLJ
= VLJ
* skipmask
;
270 /* 1 more flop for LJ energy */
272 #endif /* VDW_CUTOFF_CHECK */
277 Vvdw
[egp_sh_i
[i
] + ((egp_cj
>> (nbatParams
.neg_2log
*j
)) & egp_mask
)] += VLJ
;
280 /* 1 flop for LJ energy addition */
286 /* Enforce the cut-off and perhaps exclusions. In
287 * those cases, rinv is zero because of skipmask,
288 * but fcoul and vcoul will later be non-zero (in
289 * both RF and table cases) because of the
290 * contributions that do not depend on rinv. These
291 * contributions cannot be allowed to accumulate
292 * to the force and potential, and the easiest way
293 * to do this is to zero the charges in
295 qq
= skipmask
* qi
[i
] * q
[aj
];
298 fcoul
= qq
*(interact
*rinv
*rinvsq
- k_rf2
);
299 /* 4 flops for RF force */
301 vcoul
= qq
*(interact
*rinv
+ k_rf
*rsq
- c_rf
);
302 /* 4 flops for RF energy */
307 rs
= rsq
*rinv
*tab_coul_scale
;
311 /* fexcl = F_i + frac * (F_(i+1)-F_i) */
312 fexcl
= tab_coul_FDV0
[ri
*4] + frac
*tab_coul_FDV0
[ri
*4+1];
314 /* fexcl = (1-frac) * F_i + frac * F_(i+1) */
315 fexcl
= (1 - frac
)*tab_coul_F
[ri
] + frac
*tab_coul_F
[ri
+1];
317 fcoul
= interact
*rinvsq
- fexcl
;
318 /* 7 flops for float 1/r-table force */
321 vcoul
= qq
*(interact
*(rinv
- ic
->sh_ewald
)
322 -(tab_coul_FDV0
[ri
*4+2]
323 -halfsp
*frac
*(tab_coul_FDV0
[ri
*4] + fexcl
)));
324 /* 7 flops for float 1/r-table energy (8 with excls) */
326 vcoul
= qq
*(interact
*(rinv
- ic
->sh_ewald
)
328 -halfsp
*frac
*(tab_coul_F
[ri
] + fexcl
)));
336 Vc
[egp_sh_i
[i
] + ((egp_cj
>> (nbatParams
.neg_2log
*j
)) & egp_mask
)] += vcoul
;
339 /* 1 flop for Coulomb energy addition */
349 fscal
= frLJ
*rinvsq
+ fcoul
;
350 /* 2 flops for scalar LJ+Coulomb force */
365 /* Increment i-atom force */
366 fi
[i
*FI_STRIDE
+XX
] += fx
;
367 fi
[i
*FI_STRIDE
+YY
] += fy
;
368 fi
[i
*FI_STRIDE
+ZZ
] += fz
;
369 /* Decrement j-atom force */
370 f
[aj
*F_STRIDE
+XX
] -= fx
;
371 f
[aj
*F_STRIDE
+YY
] -= fy
;
372 f
[aj
*F_STRIDE
+ZZ
] -= fz
;
373 /* 9 flops for force addition */