src/gromacs/nbnxm/kernels_reference/kernel_ref_inner.h

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2012,2013,2014,2015,2016,2018,2019, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35
  36 /* When calculating RF or Ewald interactions we calculate the electrostatic
  37  * forces and energies on excluded atom pairs here in the non-bonded loops.
  38  */
  39 #if defined CHECK_EXCLS && (defined CALC_COULOMB || defined LJ_EWALD)
  40 #define EXCL_FORCES
  41 #endif
  42
  43 {
  44     int cj;
  45 #ifdef ENERGY_GROUPS
  46     int egp_cj;
  47 #endif
  48     int i;
  49
  50     cj = l_cj[cjind].cj;
  51
  52 #ifdef ENERGY_GROUPS
  53     egp_cj = nbatParams.energrp[cj];
  54 #endif
  55     for (i = 0; i < UNROLLI; i++)
  56     {
  57         int ai;
  58         int type_i_off;
  59         int j;
  60
  61         ai = ci*UNROLLI + i;
  62
  63         type_i_off = type[ai]*ntype2;
  64
  65         for (j = 0; j < UNROLLJ; j++)
  66         {
  67             int             aj;
  68             real            dx, dy, dz;
  69             real            rsq, rinv;
  70             real            rinvsq, rinvsix;
  71             real            c6, c12;
  72             real            FrLJ6 = 0, FrLJ12 = 0, frLJ = 0;
  73             real            VLJ gmx_unused;
  74 #if defined LJ_FORCE_SWITCH || defined LJ_POT_SWITCH
  75             real            r, rsw;
  76 #endif
  77
  78 #ifdef CALC_COULOMB
  79             real qq;
  80             real fcoul;
  81 #ifdef CALC_COUL_TAB
  82             real rs, frac;
  83             int  ri;
  84             real fexcl;
  85 #endif
  86 #ifdef CALC_ENERGIES
  87             real vcoul;
  88 #endif
  89 #endif
  90             real fscal;
  91             real fx, fy, fz;
  92
  93             /* A multiply mask used to zero an interaction
  94              * when either the distance cutoff is exceeded, or
  95              * (if appropriate) the i and j indices are
  96              * unsuitable for this kind of inner loop. */
  97             real skipmask;
  98
  99 #ifdef CHECK_EXCLS
 100             /* A multiply mask used to zero an interaction
 101              * when that interaction should be excluded
 102              * (e.g. because of bonding). */
 103             int interact;
 104
 105             interact = ((l_cj[cjind].excl>>(i*UNROLLI + j)) & 1);
 106 #ifndef EXCL_FORCES
 107             skipmask = interact;
 108 #else
 109             skipmask = (cj == ci_sh && j <= i) ? 0.0 : 1.0;
 110 #endif
 111 #else
 112 #define interact 1.0
 113             skipmask = 1.0;
 114 #endif
 115
 116             VLJ = 0;
 117
 118             aj = cj*UNROLLJ + j;
 119
 120             dx  = xi[i*XI_STRIDE+XX] - x[aj*X_STRIDE+XX];
 121             dy  = xi[i*XI_STRIDE+YY] - x[aj*X_STRIDE+YY];
 122             dz  = xi[i*XI_STRIDE+ZZ] - x[aj*X_STRIDE+ZZ];
 123
 124             rsq = dx*dx + dy*dy + dz*dz;
 125
 126             /* Prepare to enforce the cut-off. */
 127             skipmask = (rsq >= rcut2) ? 0 : skipmask;
 128             /* 9 flops for r^2 + cut-off check */
 129
 130             // Ensure the distances do not fall below the limit where r^-12 overflows.
 131             // This should never happen for normal interactions.
 132             rsq = std::max(rsq, NBNXN_MIN_RSQ);
 133
 134 #ifdef COUNT_PAIRS
 135             npair++;
 136 #endif
 137
 138             rinv = gmx::invsqrt(rsq);
 139             /* 5 flops for invsqrt */
 140
 141             /* Partially enforce the cut-off (and perhaps
 142              * exclusions) to avoid possible overflow of
 143              * rinvsix when computing LJ, and/or overflowing
 144              * the Coulomb table during lookup. */
 145             rinv = rinv * skipmask;
 146
 147             rinvsq  = rinv*rinv;
 148
 149 #ifdef HALF_LJ
 150             if (i < UNROLLI/2)
 151 #endif
 152             {
 153                 c6      = nbfp[type_i_off+type[aj]*2  ];
 154                 c12     = nbfp[type_i_off+type[aj]*2+1];
 155
 156 #if defined LJ_CUT || defined LJ_FORCE_SWITCH || defined LJ_POT_SWITCH
 157                 rinvsix = interact*rinvsq*rinvsq*rinvsq;
 158                 FrLJ6   = c6*rinvsix;
 159                 FrLJ12  = c12*rinvsix*rinvsix;
 160                 frLJ    = FrLJ12 - FrLJ6;
 161                 /* 7 flops for r^-2 + LJ force */
 162 #if defined CALC_ENERGIES || defined LJ_POT_SWITCH
 163                 VLJ     = (FrLJ12 + c12*ic->repulsion_shift.cpot)/12 -
 164                     (FrLJ6 + c6*ic->dispersion_shift.cpot)/6;
 165                 /* 7 flops for LJ energy */
 166 #endif
 167 #endif
 168
 169 #if defined LJ_FORCE_SWITCH || defined LJ_POT_SWITCH
 170                 /* Force or potential switching from ic->rvdw_switch */
 171                 r       = rsq*rinv;
 172                 rsw     = r - ic->rvdw_switch;
 173                 rsw     = (rsw >= 0.0 ? rsw : 0.0);
 174 #endif
 175 #ifdef LJ_FORCE_SWITCH
 176                 frLJ   +=
 177                     -c6*(ic->dispersion_shift.c2 + ic->dispersion_shift.c3*rsw)*rsw*rsw*r
 178                     + c12*(ic->repulsion_shift.c2 + ic->repulsion_shift.c3*rsw)*rsw*rsw*r;
 179 #if defined CALC_ENERGIES
 180                 VLJ    +=
 181                     -c6*(-ic->dispersion_shift.c2/3 - ic->dispersion_shift.c3/4*rsw)*rsw*rsw*rsw
 182                     + c12*(-ic->repulsion_shift.c2/3 - ic->repulsion_shift.c3/4*rsw)*rsw*rsw*rsw;
 183 #endif
 184 #endif
 185
 186 #if defined CALC_ENERGIES || defined LJ_POT_SWITCH
 187                 /* Masking should be done after force switching,
 188                  * but before potential switching.
 189                  */
 190                 /* Need to zero the interaction if there should be exclusion. */
 191                 VLJ     = VLJ * interact;
 192 #endif
 193
 194 #ifdef LJ_POT_SWITCH
 195                 {
 196                     real sw, dsw;
 197
 198                     sw    = 1.0 + (swV3 + (swV4+ swV5*rsw)*rsw)*rsw*rsw*rsw;
 199                     dsw   = (swF2 + (swF3 + swF4*rsw)*rsw)*rsw*rsw;
 200
 201                     frLJ  = frLJ*sw - r*VLJ*dsw;
 202                     VLJ  *= sw;
 203                 }
 204 #endif
 205
 206 #ifdef LJ_EWALD
 207                 {
 208                     real            c6grid, rinvsix_nm, cr2, expmcr2, poly;
 209 #ifdef CALC_ENERGIES
 210                     real            sh_mask;
 211 #endif
 212
 213 #ifdef LJ_EWALD_COMB_GEOM
 214                     c6grid       = ljc[type[ai]*2]*ljc[type[aj]*2];
 215 #elif defined LJ_EWALD_COMB_LB
 216                     {
 217                         real sigma, sigma2, epsilon;
 218
 219                         /* These sigma and epsilon are scaled to give 6*C6 */
 220                         sigma   = ljc[type[ai]*2] + ljc[type[aj]*2];
 221                         epsilon = ljc[type[ai]*2+1]*ljc[type[aj]*2+1];
 222
 223                         sigma2  = sigma*sigma;
 224                         c6grid  = epsilon*sigma2*sigma2*sigma2;
 225                     }
 226 #else
 227 #error "No LJ Ewald combination rule defined"
 228 #endif
 229
 230 #ifdef CHECK_EXCLS
 231                     /* Recalculate rinvsix without exclusion mask */
 232                     rinvsix_nm   = rinvsq*rinvsq*rinvsq;
 233 #else
 234                     rinvsix_nm   = rinvsix;
 235 #endif
 236                     cr2          = lje_coeff2*rsq;
 237 #if GMX_DOUBLE
 238                     expmcr2      = exp(-cr2);
 239 #else
 240                     expmcr2      = expf(-cr2);
 241 #endif
 242                     poly         = 1 + cr2 + 0.5*cr2*cr2;
 243
 244                     /* Subtract the grid force from the total LJ force */
 245                     frLJ        += c6grid*(rinvsix_nm - expmcr2*(rinvsix_nm*poly + lje_coeff6_6));
 246 #ifdef CALC_ENERGIES
 247                     /* Shift should only be applied to real LJ pairs */
 248                     sh_mask      = lje_vc*interact;
 249
 250                     VLJ         += c6grid/6*(rinvsix_nm*(1 - expmcr2*poly) + sh_mask);
 251 #endif
 252                 }
 253 #endif          /* LJ_EWALD */
 254
 255 #ifdef VDW_CUTOFF_CHECK
 256                 /* Mask for VdW cut-off shorter than Coulomb cut-off */
 257                 {
 258                     real skipmask_rvdw;
 259
 260                     skipmask_rvdw = (rsq < rvdw2) ? 1.0 : 0.0;
 261                     frLJ         *= skipmask_rvdw;
 262 #ifdef CALC_ENERGIES
 263                     VLJ    *= skipmask_rvdw;
 264 #endif
 265                 }
 266 #else
 267 #if defined CALC_ENERGIES
 268                 /* Need to zero the interaction if r >= rcut */
 269                 VLJ     = VLJ * skipmask;
 270                 /* 1 more flop for LJ energy */
 271 #endif
 272 #endif          /* VDW_CUTOFF_CHECK */
 273
 274
 275 #ifdef CALC_ENERGIES
 276 #ifdef ENERGY_GROUPS
 277                 Vvdw[egp_sh_i[i] + ((egp_cj >> (nbatParams.neg_2log*j)) & egp_mask)] += VLJ;
 278 #else
 279                 Vvdw_ci += VLJ;
 280                 /* 1 flop for LJ energy addition */
 281 #endif
 282 #endif
 283             }
 284
 285 #ifdef CALC_COULOMB
 286             /* Enforce the cut-off and perhaps exclusions. In
 287              * those cases, rinv is zero because of skipmask,
 288              * but fcoul and vcoul will later be non-zero (in
 289              * both RF and table cases) because of the
 290              * contributions that do not depend on rinv. These
 291              * contributions cannot be allowed to accumulate
 292              * to the force and potential, and the easiest way
 293              * to do this is to zero the charges in
 294              * advance. */
 295             qq = skipmask * qi[i] * q[aj];
 296
 297 #ifdef CALC_COUL_RF
 298             fcoul  = qq*(interact*rinv*rinvsq - k_rf2);
 299             /* 4 flops for RF force */
 300 #ifdef CALC_ENERGIES
 301             vcoul  = qq*(interact*rinv + k_rf*rsq - c_rf);
 302             /* 4 flops for RF energy */
 303 #endif
 304 #endif
 305
 306 #ifdef CALC_COUL_TAB
 307             rs     = rsq*rinv*tab_coul_scale;
 308             ri     = int(rs);
 309             frac   = rs - ri;
 310 #if !GMX_DOUBLE
 311             /* fexcl = F_i + frac * (F_(i+1)-F_i) */
 312             fexcl  = tab_coul_FDV0[ri*4] + frac*tab_coul_FDV0[ri*4+1];
 313 #else
 314             /* fexcl = (1-frac) * F_i + frac * F_(i+1) */
 315             fexcl  = (1 - frac)*tab_coul_F[ri] + frac*tab_coul_F[ri+1];
 316 #endif
 317             fcoul  = interact*rinvsq - fexcl;
 318             /* 7 flops for float 1/r-table force */
 319 #ifdef CALC_ENERGIES
 320 #if !GMX_DOUBLE
 321             vcoul  = qq*(interact*(rinv - ic->sh_ewald)
 322                          -(tab_coul_FDV0[ri*4+2]
 323                            -halfsp*frac*(tab_coul_FDV0[ri*4] + fexcl)));
 324             /* 7 flops for float 1/r-table energy (8 with excls) */
 325 #else
 326             vcoul  = qq*(interact*(rinv - ic->sh_ewald)
 327                          -(tab_coul_V[ri]
 328                            -halfsp*frac*(tab_coul_F[ri] + fexcl)));
 329 #endif
 330 #endif
 331             fcoul *= qq*rinv;
 332 #endif
 333
 334 #ifdef CALC_ENERGIES
 335 #ifdef ENERGY_GROUPS
 336             Vc[egp_sh_i[i] + ((egp_cj >> (nbatParams.neg_2log*j)) & egp_mask)] += vcoul;
 337 #else
 338             Vc_ci += vcoul;
 339             /* 1 flop for Coulomb energy addition */
 340 #endif
 341 #endif
 342 #endif
 343
 344 #ifdef CALC_COULOMB
 345 #ifdef HALF_LJ
 346             if (i < UNROLLI/2)
 347 #endif
 348             {
 349                 fscal = frLJ*rinvsq + fcoul;
 350                 /* 2 flops for scalar LJ+Coulomb force */
 351             }
 352 #ifdef HALF_LJ
 353             else
 354             {
 355                 fscal = fcoul;
 356             }
 357 #endif
 358 #else
 359             fscal = frLJ*rinvsq;
 360 #endif
 361             fx = fscal*dx;
 362             fy = fscal*dy;
 363             fz = fscal*dz;
 364
 365             /* Increment i-atom force */
 366             fi[i*FI_STRIDE+XX] += fx;
 367             fi[i*FI_STRIDE+YY] += fy;
 368             fi[i*FI_STRIDE+ZZ] += fz;
 369             /* Decrement j-atom force */
 370             f[aj*F_STRIDE+XX]  -= fx;
 371             f[aj*F_STRIDE+YY]  -= fy;
 372             f[aj*F_STRIDE+ZZ]  -= fz;
 373             /* 9 flops for force addition */
 374         }
 375     }
 376 }
 377
 378 #undef interact
 379 #undef EXCL_FORCES