Propose a post-submit matrix
[gromacs.git] / src / gromacs / mdlib / nbnxn_kernels / simd_4xn / nbnxn_kernel_simd_4xn.cpp
blob8f062dfc312444f49235d47802ef31fca3676aa0
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2012,2013,2014,2015,2016,2017, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
36 * Note: this file was generated by the Verlet kernel generator for
37 * kernel type 4xn.
40 #include "gmxpre.h"
42 #include "config.h"
44 #include "gromacs/math/vectypes.h"
45 #include "gromacs/mdlib/nb_verlet.h"
46 #include "gromacs/mdlib/nbnxn_simd.h"
47 #include "gromacs/mdtypes/interaction_const.h"
48 #include "gromacs/mdtypes/md_enums.h"
50 #ifdef GMX_NBNXN_SIMD_4XN
52 #include "gromacs/simd/vector_operations.h"
54 #if !(GMX_SIMD_REAL_WIDTH == 2 || GMX_SIMD_REAL_WIDTH == 4 || GMX_SIMD_REAL_WIDTH == 8)
55 #error "unsupported SIMD width"
56 #endif
58 #define GMX_SIMD_J_UNROLL_SIZE 1
59 #include "nbnxn_kernel_simd_4xn.h"
61 #include "gromacs/mdlib/force_flags.h"
62 #include "gromacs/mdlib/gmx_omp_nthreads.h"
63 #include "gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_common.h"
64 #include "gromacs/simd/simd.h"
65 #include "gromacs/utility/fatalerror.h"
66 #include "gromacs/utility/real.h"
68 /*! \brief Kinds of electrostatic treatments in SIMD Verlet kernels
70 enum {
71 coulktRF, coulktTAB, coulktTAB_TWIN, coulktEWALD, coulktEWALD_TWIN, coulktNR
74 /*! \brief Kinds of Van der Waals treatments in SIMD Verlet kernels
76 enum {
77 vdwktLJCUT_COMBGEOM, vdwktLJCUT_COMBLB, vdwktLJCUT_COMBNONE, vdwktLJFORCESWITCH, vdwktLJPOTSWITCH, vdwktLJEWALDCOMBGEOM, vdwktNR
80 /* Declare and define the kernel function pointer lookup tables.
81 * The minor index of the array goes over both the LJ combination rules,
82 * which is only supported by plain cut-off, and the LJ switch/PME functions.
84 static p_nbk_func_noener p_nbk_noener[coulktNR][vdwktNR] =
87 nbnxn_kernel_ElecRF_VdwLJCombGeom_F_4xn,
88 nbnxn_kernel_ElecRF_VdwLJCombLB_F_4xn,
89 nbnxn_kernel_ElecRF_VdwLJ_F_4xn,
90 nbnxn_kernel_ElecRF_VdwLJFSw_F_4xn,
91 nbnxn_kernel_ElecRF_VdwLJPSw_F_4xn,
92 nbnxn_kernel_ElecRF_VdwLJEwCombGeom_F_4xn,
95 nbnxn_kernel_ElecQSTab_VdwLJCombGeom_F_4xn,
96 nbnxn_kernel_ElecQSTab_VdwLJCombLB_F_4xn,
97 nbnxn_kernel_ElecQSTab_VdwLJ_F_4xn,
98 nbnxn_kernel_ElecQSTab_VdwLJFSw_F_4xn,
99 nbnxn_kernel_ElecQSTab_VdwLJPSw_F_4xn,
100 nbnxn_kernel_ElecQSTab_VdwLJEwCombGeom_F_4xn,
103 nbnxn_kernel_ElecQSTabTwinCut_VdwLJCombGeom_F_4xn,
104 nbnxn_kernel_ElecQSTabTwinCut_VdwLJCombLB_F_4xn,
105 nbnxn_kernel_ElecQSTabTwinCut_VdwLJ_F_4xn,
106 nbnxn_kernel_ElecQSTabTwinCut_VdwLJFSw_F_4xn,
107 nbnxn_kernel_ElecQSTabTwinCut_VdwLJPSw_F_4xn,
108 nbnxn_kernel_ElecQSTabTwinCut_VdwLJEwCombGeom_F_4xn,
111 nbnxn_kernel_ElecEw_VdwLJCombGeom_F_4xn,
112 nbnxn_kernel_ElecEw_VdwLJCombLB_F_4xn,
113 nbnxn_kernel_ElecEw_VdwLJ_F_4xn,
114 nbnxn_kernel_ElecEw_VdwLJFSw_F_4xn,
115 nbnxn_kernel_ElecEw_VdwLJPSw_F_4xn,
116 nbnxn_kernel_ElecEw_VdwLJEwCombGeom_F_4xn,
119 nbnxn_kernel_ElecEwTwinCut_VdwLJCombGeom_F_4xn,
120 nbnxn_kernel_ElecEwTwinCut_VdwLJCombLB_F_4xn,
121 nbnxn_kernel_ElecEwTwinCut_VdwLJ_F_4xn,
122 nbnxn_kernel_ElecEwTwinCut_VdwLJFSw_F_4xn,
123 nbnxn_kernel_ElecEwTwinCut_VdwLJPSw_F_4xn,
124 nbnxn_kernel_ElecEwTwinCut_VdwLJEwCombGeom_F_4xn,
128 static p_nbk_func_ener p_nbk_ener[coulktNR][vdwktNR] =
131 nbnxn_kernel_ElecRF_VdwLJCombGeom_VF_4xn,
132 nbnxn_kernel_ElecRF_VdwLJCombLB_VF_4xn,
133 nbnxn_kernel_ElecRF_VdwLJ_VF_4xn,
134 nbnxn_kernel_ElecRF_VdwLJFSw_VF_4xn,
135 nbnxn_kernel_ElecRF_VdwLJPSw_VF_4xn,
136 nbnxn_kernel_ElecRF_VdwLJEwCombGeom_VF_4xn,
139 nbnxn_kernel_ElecQSTab_VdwLJCombGeom_VF_4xn,
140 nbnxn_kernel_ElecQSTab_VdwLJCombLB_VF_4xn,
141 nbnxn_kernel_ElecQSTab_VdwLJ_VF_4xn,
142 nbnxn_kernel_ElecQSTab_VdwLJFSw_VF_4xn,
143 nbnxn_kernel_ElecQSTab_VdwLJPSw_VF_4xn,
144 nbnxn_kernel_ElecQSTab_VdwLJEwCombGeom_VF_4xn,
147 nbnxn_kernel_ElecQSTabTwinCut_VdwLJCombGeom_VF_4xn,
148 nbnxn_kernel_ElecQSTabTwinCut_VdwLJCombLB_VF_4xn,
149 nbnxn_kernel_ElecQSTabTwinCut_VdwLJ_VF_4xn,
150 nbnxn_kernel_ElecQSTabTwinCut_VdwLJFSw_VF_4xn,
151 nbnxn_kernel_ElecQSTabTwinCut_VdwLJPSw_VF_4xn,
152 nbnxn_kernel_ElecQSTabTwinCut_VdwLJEwCombGeom_VF_4xn,
155 nbnxn_kernel_ElecEw_VdwLJCombGeom_VF_4xn,
156 nbnxn_kernel_ElecEw_VdwLJCombLB_VF_4xn,
157 nbnxn_kernel_ElecEw_VdwLJ_VF_4xn,
158 nbnxn_kernel_ElecEw_VdwLJFSw_VF_4xn,
159 nbnxn_kernel_ElecEw_VdwLJPSw_VF_4xn,
160 nbnxn_kernel_ElecEw_VdwLJEwCombGeom_VF_4xn,
163 nbnxn_kernel_ElecEwTwinCut_VdwLJCombGeom_VF_4xn,
164 nbnxn_kernel_ElecEwTwinCut_VdwLJCombLB_VF_4xn,
165 nbnxn_kernel_ElecEwTwinCut_VdwLJ_VF_4xn,
166 nbnxn_kernel_ElecEwTwinCut_VdwLJFSw_VF_4xn,
167 nbnxn_kernel_ElecEwTwinCut_VdwLJPSw_VF_4xn,
168 nbnxn_kernel_ElecEwTwinCut_VdwLJEwCombGeom_VF_4xn,
172 static p_nbk_func_ener p_nbk_energrp[coulktNR][vdwktNR] =
175 nbnxn_kernel_ElecRF_VdwLJCombGeom_VgrpF_4xn,
176 nbnxn_kernel_ElecRF_VdwLJCombLB_VgrpF_4xn,
177 nbnxn_kernel_ElecRF_VdwLJ_VgrpF_4xn,
178 nbnxn_kernel_ElecRF_VdwLJFSw_VgrpF_4xn,
179 nbnxn_kernel_ElecRF_VdwLJPSw_VgrpF_4xn,
180 nbnxn_kernel_ElecRF_VdwLJEwCombGeom_VgrpF_4xn,
183 nbnxn_kernel_ElecQSTab_VdwLJCombGeom_VgrpF_4xn,
184 nbnxn_kernel_ElecQSTab_VdwLJCombLB_VgrpF_4xn,
185 nbnxn_kernel_ElecQSTab_VdwLJ_VgrpF_4xn,
186 nbnxn_kernel_ElecQSTab_VdwLJFSw_VgrpF_4xn,
187 nbnxn_kernel_ElecQSTab_VdwLJPSw_VgrpF_4xn,
188 nbnxn_kernel_ElecQSTab_VdwLJEwCombGeom_VgrpF_4xn,
191 nbnxn_kernel_ElecQSTabTwinCut_VdwLJCombGeom_VgrpF_4xn,
192 nbnxn_kernel_ElecQSTabTwinCut_VdwLJCombLB_VgrpF_4xn,
193 nbnxn_kernel_ElecQSTabTwinCut_VdwLJ_VgrpF_4xn,
194 nbnxn_kernel_ElecQSTabTwinCut_VdwLJFSw_VgrpF_4xn,
195 nbnxn_kernel_ElecQSTabTwinCut_VdwLJPSw_VgrpF_4xn,
196 nbnxn_kernel_ElecQSTabTwinCut_VdwLJEwCombGeom_VgrpF_4xn,
199 nbnxn_kernel_ElecEw_VdwLJCombGeom_VgrpF_4xn,
200 nbnxn_kernel_ElecEw_VdwLJCombLB_VgrpF_4xn,
201 nbnxn_kernel_ElecEw_VdwLJ_VgrpF_4xn,
202 nbnxn_kernel_ElecEw_VdwLJFSw_VgrpF_4xn,
203 nbnxn_kernel_ElecEw_VdwLJPSw_VgrpF_4xn,
204 nbnxn_kernel_ElecEw_VdwLJEwCombGeom_VgrpF_4xn,
207 nbnxn_kernel_ElecEwTwinCut_VdwLJCombGeom_VgrpF_4xn,
208 nbnxn_kernel_ElecEwTwinCut_VdwLJCombLB_VgrpF_4xn,
209 nbnxn_kernel_ElecEwTwinCut_VdwLJ_VgrpF_4xn,
210 nbnxn_kernel_ElecEwTwinCut_VdwLJFSw_VgrpF_4xn,
211 nbnxn_kernel_ElecEwTwinCut_VdwLJPSw_VgrpF_4xn,
212 nbnxn_kernel_ElecEwTwinCut_VdwLJEwCombGeom_VgrpF_4xn,
217 static void
218 reduce_group_energies(int ng, int ng_2log,
219 const real *VSvdw, const real *VSc,
220 real *Vvdw, real *Vc)
222 const int unrollj = GMX_SIMD_REAL_WIDTH/GMX_SIMD_J_UNROLL_SIZE;
223 const int unrollj_half = unrollj/2;
224 int ng_p2, i, j, j0, j1, c, s;
226 ng_p2 = (1<<ng_2log);
228 /* The size of the x86 SIMD energy group buffer array is:
229 * ng*ng*ng_p2*unrollj_half*simd_width
231 for (i = 0; i < ng; i++)
233 for (j = 0; j < ng; j++)
235 Vvdw[i*ng+j] = 0;
236 Vc[i*ng+j] = 0;
239 for (j1 = 0; j1 < ng; j1++)
241 for (j0 = 0; j0 < ng; j0++)
243 c = ((i*ng + j1)*ng_p2 + j0)*unrollj_half*unrollj;
244 for (s = 0; s < unrollj_half; s++)
246 Vvdw[i*ng+j0] += VSvdw[c+0];
247 Vvdw[i*ng+j1] += VSvdw[c+1];
248 Vc [i*ng+j0] += VSc [c+0];
249 Vc [i*ng+j1] += VSc [c+1];
250 c += unrollj + 2;
257 #else /* GMX_NBNXN_SIMD_4XN */
259 #include "gromacs/utility/fatalerror.h"
261 #endif /* GMX_NBNXN_SIMD_4XN */
263 void
264 nbnxn_kernel_simd_4xn(nbnxn_pairlist_set_t gmx_unused *nbl_list,
265 const nbnxn_atomdata_t gmx_unused *nbat,
266 const interaction_const_t gmx_unused *ic,
267 int gmx_unused ewald_excl,
268 rvec gmx_unused *shift_vec,
269 int gmx_unused force_flags,
270 int gmx_unused clearF,
271 real gmx_unused *fshift,
272 real gmx_unused *Vc,
273 real gmx_unused *Vvdw)
274 #ifdef GMX_NBNXN_SIMD_4XN
276 int nnbl;
277 nbnxn_pairlist_t **nbl;
278 int coulkt, vdwkt = 0;
279 int nb, nthreads gmx_unused;
281 nnbl = nbl_list->nnbl;
282 nbl = nbl_list->nbl;
284 if (EEL_RF(ic->eeltype) || ic->eeltype == eelCUT)
286 coulkt = coulktRF;
288 else
290 if (ewald_excl == ewaldexclTable)
292 if (ic->rcoulomb == ic->rvdw)
294 coulkt = coulktTAB;
296 else
298 coulkt = coulktTAB_TWIN;
301 else
303 if (ic->rcoulomb == ic->rvdw)
305 coulkt = coulktEWALD;
307 else
309 coulkt = coulktEWALD_TWIN;
314 if (ic->vdwtype == evdwCUT)
316 switch (ic->vdw_modifier)
318 case eintmodNONE:
319 case eintmodPOTSHIFT:
320 switch (nbat->comb_rule)
322 case ljcrGEOM: vdwkt = vdwktLJCUT_COMBGEOM; break;
323 case ljcrLB: vdwkt = vdwktLJCUT_COMBLB; break;
324 case ljcrNONE: vdwkt = vdwktLJCUT_COMBNONE; break;
325 default: gmx_incons("Unknown combination rule");
327 break;
328 case eintmodFORCESWITCH:
329 vdwkt = vdwktLJFORCESWITCH;
330 break;
331 case eintmodPOTSWITCH:
332 vdwkt = vdwktLJPOTSWITCH;
333 break;
334 default:
335 gmx_incons("Unsupported VdW interaction modifier");
338 else if (ic->vdwtype == evdwPME)
340 if (ic->ljpme_comb_rule == eljpmeLB)
342 gmx_incons("The nbnxn SIMD kernels don't support LJ-PME with LB");
344 vdwkt = vdwktLJEWALDCOMBGEOM;
346 else
348 gmx_incons("Unsupported VdW interaction type");
350 // cppcheck-suppress unreadVariable
351 nthreads = gmx_omp_nthreads_get(emntNonbonded);
352 #pragma omp parallel for schedule(static) num_threads(nthreads)
353 for (nb = 0; nb < nnbl; nb++)
355 // Presently, the kernels do not call C++ code that can throw, so
356 // no need for a try/catch pair in this OpenMP region.
357 nbnxn_atomdata_output_t *out;
358 real *fshift_p;
360 out = &nbat->out[nb];
362 if (clearF == enbvClearFYes)
364 clear_f(nbat, nb, out->f);
367 if ((force_flags & GMX_FORCE_VIRIAL) && nnbl == 1)
369 fshift_p = fshift;
371 else
373 fshift_p = out->fshift;
375 if (clearF == enbvClearFYes)
377 clear_fshift(fshift_p);
381 if (!(force_flags & GMX_FORCE_ENERGY))
383 /* Don't calculate energies */
384 p_nbk_noener[coulkt][vdwkt](nbl[nb], nbat,
386 shift_vec,
387 out->f,
388 fshift_p);
390 else if (out->nV == 1)
392 /* No energy groups */
393 out->Vvdw[0] = 0;
394 out->Vc[0] = 0;
396 p_nbk_ener[coulkt][vdwkt](nbl[nb], nbat,
398 shift_vec,
399 out->f,
400 fshift_p,
401 out->Vvdw,
402 out->Vc);
404 else
406 /* Calculate energy group contributions */
407 int i;
409 for (i = 0; i < out->nVS; i++)
411 out->VSvdw[i] = 0;
413 for (i = 0; i < out->nVS; i++)
415 out->VSc[i] = 0;
418 p_nbk_energrp[coulkt][vdwkt](nbl[nb], nbat,
420 shift_vec,
421 out->f,
422 fshift_p,
423 out->VSvdw,
424 out->VSc);
426 reduce_group_energies(nbat->nenergrp, nbat->neg_2log,
427 out->VSvdw, out->VSc,
428 out->Vvdw, out->Vc);
432 if (force_flags & GMX_FORCE_ENERGY)
434 reduce_energies_over_lists(nbat, nnbl, Vvdw, Vc);
437 #else
439 gmx_incons("nbnxn_kernel_simd_4xn called when such kernels "
440 " are not enabled.");
442 #endif
443 #undef GMX_SIMD_J_UNROLL_SIZE