Introduce SimulatorBuilder
[gromacs.git] / src / gromacs / nbnxm / kernels_reference / kernel_ref_inner.h
blob62e1356b3cb41d53a01fa26e2cbb8a1ea744421a
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2012,2013,2014,2015,2016,2018,2019, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
36 /* When calculating RF or Ewald interactions we calculate the electrostatic
37 * forces and energies on excluded atom pairs here in the non-bonded loops.
39 #if defined CHECK_EXCLS && (defined CALC_COULOMB || defined LJ_EWALD)
40 #define EXCL_FORCES
41 #endif
44 int cj;
45 #ifdef ENERGY_GROUPS
46 int egp_cj;
47 #endif
48 int i;
50 cj = l_cj[cjind].cj;
52 #ifdef ENERGY_GROUPS
53 egp_cj = nbatParams.energrp[cj];
54 #endif
55 for (i = 0; i < UNROLLI; i++)
57 int ai;
58 int type_i_off;
59 int j;
61 ai = ci*UNROLLI + i;
63 type_i_off = type[ai]*ntype2;
65 for (j = 0; j < UNROLLJ; j++)
67 int aj;
68 real dx, dy, dz;
69 real rsq, rinv;
70 real rinvsq, rinvsix;
71 real c6, c12;
72 real FrLJ6 = 0, FrLJ12 = 0, frLJ = 0;
73 real VLJ gmx_unused;
74 #if defined LJ_FORCE_SWITCH || defined LJ_POT_SWITCH
75 real r, rsw;
76 #endif
78 #ifdef CALC_COULOMB
79 real qq;
80 real fcoul;
81 #ifdef CALC_COUL_TAB
82 real rs, frac;
83 int ri;
84 real fexcl;
85 #endif
86 #ifdef CALC_ENERGIES
87 real vcoul;
88 #endif
89 #endif
90 real fscal;
91 real fx, fy, fz;
93 /* A multiply mask used to zero an interaction
94 * when either the distance cutoff is exceeded, or
95 * (if appropriate) the i and j indices are
96 * unsuitable for this kind of inner loop. */
97 real skipmask;
99 #ifdef CHECK_EXCLS
100 /* A multiply mask used to zero an interaction
101 * when that interaction should be excluded
102 * (e.g. because of bonding). */
103 int interact;
105 interact = ((l_cj[cjind].excl>>(i*UNROLLI + j)) & 1);
106 #ifndef EXCL_FORCES
107 skipmask = interact;
108 #else
109 skipmask = (cj == ci_sh && j <= i) ? 0.0 : 1.0;
110 #endif
111 #else
112 #define interact 1.0
113 skipmask = 1.0;
114 #endif
116 VLJ = 0;
118 aj = cj*UNROLLJ + j;
120 dx = xi[i*XI_STRIDE+XX] - x[aj*X_STRIDE+XX];
121 dy = xi[i*XI_STRIDE+YY] - x[aj*X_STRIDE+YY];
122 dz = xi[i*XI_STRIDE+ZZ] - x[aj*X_STRIDE+ZZ];
124 rsq = dx*dx + dy*dy + dz*dz;
126 /* Prepare to enforce the cut-off. */
127 skipmask = (rsq >= rcut2) ? 0 : skipmask;
128 /* 9 flops for r^2 + cut-off check */
130 // Ensure the distances do not fall below the limit where r^-12 overflows.
131 // This should never happen for normal interactions.
132 rsq = std::max(rsq, NBNXN_MIN_RSQ);
134 #ifdef COUNT_PAIRS
135 npair++;
136 #endif
138 rinv = gmx::invsqrt(rsq);
139 /* 5 flops for invsqrt */
141 /* Partially enforce the cut-off (and perhaps
142 * exclusions) to avoid possible overflow of
143 * rinvsix when computing LJ, and/or overflowing
144 * the Coulomb table during lookup. */
145 rinv = rinv * skipmask;
147 rinvsq = rinv*rinv;
149 #ifdef HALF_LJ
150 if (i < UNROLLI/2)
151 #endif
153 c6 = nbfp[type_i_off+type[aj]*2 ];
154 c12 = nbfp[type_i_off+type[aj]*2+1];
156 #if defined LJ_CUT || defined LJ_FORCE_SWITCH || defined LJ_POT_SWITCH
157 rinvsix = interact*rinvsq*rinvsq*rinvsq;
158 FrLJ6 = c6*rinvsix;
159 FrLJ12 = c12*rinvsix*rinvsix;
160 frLJ = FrLJ12 - FrLJ6;
161 /* 7 flops for r^-2 + LJ force */
162 #if defined CALC_ENERGIES || defined LJ_POT_SWITCH
163 VLJ = (FrLJ12 + c12*ic->repulsion_shift.cpot)/12 -
164 (FrLJ6 + c6*ic->dispersion_shift.cpot)/6;
165 /* 7 flops for LJ energy */
166 #endif
167 #endif
169 #if defined LJ_FORCE_SWITCH || defined LJ_POT_SWITCH
170 /* Force or potential switching from ic->rvdw_switch */
171 r = rsq*rinv;
172 rsw = r - ic->rvdw_switch;
173 rsw = (rsw >= 0.0 ? rsw : 0.0);
174 #endif
175 #ifdef LJ_FORCE_SWITCH
176 frLJ +=
177 -c6*(ic->dispersion_shift.c2 + ic->dispersion_shift.c3*rsw)*rsw*rsw*r
178 + c12*(ic->repulsion_shift.c2 + ic->repulsion_shift.c3*rsw)*rsw*rsw*r;
179 #if defined CALC_ENERGIES
180 VLJ +=
181 -c6*(-ic->dispersion_shift.c2/3 - ic->dispersion_shift.c3/4*rsw)*rsw*rsw*rsw
182 + c12*(-ic->repulsion_shift.c2/3 - ic->repulsion_shift.c3/4*rsw)*rsw*rsw*rsw;
183 #endif
184 #endif
186 #if defined CALC_ENERGIES || defined LJ_POT_SWITCH
187 /* Masking should be done after force switching,
188 * but before potential switching.
190 /* Need to zero the interaction if there should be exclusion. */
191 VLJ = VLJ * interact;
192 #endif
194 #ifdef LJ_POT_SWITCH
196 real sw, dsw;
198 sw = 1.0 + (swV3 + (swV4+ swV5*rsw)*rsw)*rsw*rsw*rsw;
199 dsw = (swF2 + (swF3 + swF4*rsw)*rsw)*rsw*rsw;
201 frLJ = frLJ*sw - r*VLJ*dsw;
202 VLJ *= sw;
204 #endif
206 #ifdef LJ_EWALD
208 real c6grid, rinvsix_nm, cr2, expmcr2, poly;
209 #ifdef CALC_ENERGIES
210 real sh_mask;
211 #endif
213 #ifdef LJ_EWALD_COMB_GEOM
214 c6grid = ljc[type[ai]*2]*ljc[type[aj]*2];
215 #elif defined LJ_EWALD_COMB_LB
217 real sigma, sigma2, epsilon;
219 /* These sigma and epsilon are scaled to give 6*C6 */
220 sigma = ljc[type[ai]*2] + ljc[type[aj]*2];
221 epsilon = ljc[type[ai]*2+1]*ljc[type[aj]*2+1];
223 sigma2 = sigma*sigma;
224 c6grid = epsilon*sigma2*sigma2*sigma2;
226 #else
227 #error "No LJ Ewald combination rule defined"
228 #endif
230 #ifdef CHECK_EXCLS
231 /* Recalculate rinvsix without exclusion mask */
232 rinvsix_nm = rinvsq*rinvsq*rinvsq;
233 #else
234 rinvsix_nm = rinvsix;
235 #endif
236 cr2 = lje_coeff2*rsq;
237 #if GMX_DOUBLE
238 expmcr2 = exp(-cr2);
239 #else
240 expmcr2 = expf(-cr2);
241 #endif
242 poly = 1 + cr2 + 0.5*cr2*cr2;
244 /* Subtract the grid force from the total LJ force */
245 frLJ += c6grid*(rinvsix_nm - expmcr2*(rinvsix_nm*poly + lje_coeff6_6));
246 #ifdef CALC_ENERGIES
247 /* Shift should only be applied to real LJ pairs */
248 sh_mask = lje_vc*interact;
250 VLJ += c6grid/6*(rinvsix_nm*(1 - expmcr2*poly) + sh_mask);
251 #endif
253 #endif /* LJ_EWALD */
255 #ifdef VDW_CUTOFF_CHECK
256 /* Mask for VdW cut-off shorter than Coulomb cut-off */
258 real skipmask_rvdw;
260 skipmask_rvdw = (rsq < rvdw2) ? 1.0 : 0.0;
261 frLJ *= skipmask_rvdw;
262 #ifdef CALC_ENERGIES
263 VLJ *= skipmask_rvdw;
264 #endif
266 #else
267 #if defined CALC_ENERGIES
268 /* Need to zero the interaction if r >= rcut */
269 VLJ = VLJ * skipmask;
270 /* 1 more flop for LJ energy */
271 #endif
272 #endif /* VDW_CUTOFF_CHECK */
275 #ifdef CALC_ENERGIES
276 #ifdef ENERGY_GROUPS
277 Vvdw[egp_sh_i[i] + ((egp_cj >> (nbatParams.neg_2log*j)) & egp_mask)] += VLJ;
278 #else
279 Vvdw_ci += VLJ;
280 /* 1 flop for LJ energy addition */
281 #endif
282 #endif
285 #ifdef CALC_COULOMB
286 /* Enforce the cut-off and perhaps exclusions. In
287 * those cases, rinv is zero because of skipmask,
288 * but fcoul and vcoul will later be non-zero (in
289 * both RF and table cases) because of the
290 * contributions that do not depend on rinv. These
291 * contributions cannot be allowed to accumulate
292 * to the force and potential, and the easiest way
293 * to do this is to zero the charges in
294 * advance. */
295 qq = skipmask * qi[i] * q[aj];
297 #ifdef CALC_COUL_RF
298 fcoul = qq*(interact*rinv*rinvsq - k_rf2);
299 /* 4 flops for RF force */
300 #ifdef CALC_ENERGIES
301 vcoul = qq*(interact*rinv + k_rf*rsq - c_rf);
302 /* 4 flops for RF energy */
303 #endif
304 #endif
306 #ifdef CALC_COUL_TAB
307 rs = rsq*rinv*tab_coul_scale;
308 ri = int(rs);
309 frac = rs - ri;
310 #if !GMX_DOUBLE
311 /* fexcl = F_i + frac * (F_(i+1)-F_i) */
312 fexcl = tab_coul_FDV0[ri*4] + frac*tab_coul_FDV0[ri*4+1];
313 #else
314 /* fexcl = (1-frac) * F_i + frac * F_(i+1) */
315 fexcl = (1 - frac)*tab_coul_F[ri] + frac*tab_coul_F[ri+1];
316 #endif
317 fcoul = interact*rinvsq - fexcl;
318 /* 7 flops for float 1/r-table force */
319 #ifdef CALC_ENERGIES
320 #if !GMX_DOUBLE
321 vcoul = qq*(interact*(rinv - ic->sh_ewald)
322 -(tab_coul_FDV0[ri*4+2]
323 -halfsp*frac*(tab_coul_FDV0[ri*4] + fexcl)));
324 /* 7 flops for float 1/r-table energy (8 with excls) */
325 #else
326 vcoul = qq*(interact*(rinv - ic->sh_ewald)
327 -(tab_coul_V[ri]
328 -halfsp*frac*(tab_coul_F[ri] + fexcl)));
329 #endif
330 #endif
331 fcoul *= qq*rinv;
332 #endif
334 #ifdef CALC_ENERGIES
335 #ifdef ENERGY_GROUPS
336 Vc[egp_sh_i[i] + ((egp_cj >> (nbatParams.neg_2log*j)) & egp_mask)] += vcoul;
337 #else
338 Vc_ci += vcoul;
339 /* 1 flop for Coulomb energy addition */
340 #endif
341 #endif
342 #endif
344 #ifdef CALC_COULOMB
345 #ifdef HALF_LJ
346 if (i < UNROLLI/2)
347 #endif
349 fscal = frLJ*rinvsq + fcoul;
350 /* 2 flops for scalar LJ+Coulomb force */
352 #ifdef HALF_LJ
353 else
355 fscal = fcoul;
357 #endif
358 #else
359 fscal = frLJ*rinvsq;
360 #endif
361 fx = fscal*dx;
362 fy = fscal*dy;
363 fz = fscal*dz;
365 /* Increment i-atom force */
366 fi[i*FI_STRIDE+XX] += fx;
367 fi[i*FI_STRIDE+YY] += fy;
368 fi[i*FI_STRIDE+ZZ] += fz;
369 /* Decrement j-atom force */
370 f[aj*F_STRIDE+XX] -= fx;
371 f[aj*F_STRIDE+YY] -= fy;
372 f[aj*F_STRIDE+ZZ] -= fz;
373 /* 9 flops for force addition */
378 #undef interact
379 #undef EXCL_FORCES