2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2012,2013,2014,2015,2016,2017, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
36 * Note: this file was generated by the Verlet kernel generator for
44 #include "gromacs/math/vectypes.h"
45 #include "gromacs/mdlib/nb_verlet.h"
46 #include "gromacs/mdlib/nbnxn_simd.h"
47 #include "gromacs/mdtypes/interaction_const.h"
48 #include "gromacs/mdtypes/md_enums.h"
50 #ifdef GMX_NBNXN_SIMD_4XN
52 #include "gromacs/simd/vector_operations.h"
54 #if !(GMX_SIMD_REAL_WIDTH == 2 || GMX_SIMD_REAL_WIDTH == 4 || GMX_SIMD_REAL_WIDTH == 8)
55 #error "unsupported SIMD width"
58 #define GMX_SIMD_J_UNROLL_SIZE 1
59 #include "nbnxn_kernel_simd_4xn.h"
61 #include "gromacs/mdlib/force_flags.h"
62 #include "gromacs/mdlib/gmx_omp_nthreads.h"
63 #include "gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_common.h"
64 #include "gromacs/simd/simd.h"
65 #include "gromacs/utility/fatalerror.h"
66 #include "gromacs/utility/real.h"
68 /*! \brief Kinds of electrostatic treatments in SIMD Verlet kernels
71 coulktRF
, coulktTAB
, coulktTAB_TWIN
, coulktEWALD
, coulktEWALD_TWIN
, coulktNR
74 /*! \brief Kinds of Van der Waals treatments in SIMD Verlet kernels
77 vdwktLJCUT_COMBGEOM
, vdwktLJCUT_COMBLB
, vdwktLJCUT_COMBNONE
, vdwktLJFORCESWITCH
, vdwktLJPOTSWITCH
, vdwktLJEWALDCOMBGEOM
, vdwktNR
80 /* Declare and define the kernel function pointer lookup tables.
81 * The minor index of the array goes over both the LJ combination rules,
82 * which is only supported by plain cut-off, and the LJ switch/PME functions.
84 static p_nbk_func_noener p_nbk_noener
[coulktNR
][vdwktNR
] =
87 nbnxn_kernel_ElecRF_VdwLJCombGeom_F_4xn
,
88 nbnxn_kernel_ElecRF_VdwLJCombLB_F_4xn
,
89 nbnxn_kernel_ElecRF_VdwLJ_F_4xn
,
90 nbnxn_kernel_ElecRF_VdwLJFSw_F_4xn
,
91 nbnxn_kernel_ElecRF_VdwLJPSw_F_4xn
,
92 nbnxn_kernel_ElecRF_VdwLJEwCombGeom_F_4xn
,
95 nbnxn_kernel_ElecQSTab_VdwLJCombGeom_F_4xn
,
96 nbnxn_kernel_ElecQSTab_VdwLJCombLB_F_4xn
,
97 nbnxn_kernel_ElecQSTab_VdwLJ_F_4xn
,
98 nbnxn_kernel_ElecQSTab_VdwLJFSw_F_4xn
,
99 nbnxn_kernel_ElecQSTab_VdwLJPSw_F_4xn
,
100 nbnxn_kernel_ElecQSTab_VdwLJEwCombGeom_F_4xn
,
103 nbnxn_kernel_ElecQSTabTwinCut_VdwLJCombGeom_F_4xn
,
104 nbnxn_kernel_ElecQSTabTwinCut_VdwLJCombLB_F_4xn
,
105 nbnxn_kernel_ElecQSTabTwinCut_VdwLJ_F_4xn
,
106 nbnxn_kernel_ElecQSTabTwinCut_VdwLJFSw_F_4xn
,
107 nbnxn_kernel_ElecQSTabTwinCut_VdwLJPSw_F_4xn
,
108 nbnxn_kernel_ElecQSTabTwinCut_VdwLJEwCombGeom_F_4xn
,
111 nbnxn_kernel_ElecEw_VdwLJCombGeom_F_4xn
,
112 nbnxn_kernel_ElecEw_VdwLJCombLB_F_4xn
,
113 nbnxn_kernel_ElecEw_VdwLJ_F_4xn
,
114 nbnxn_kernel_ElecEw_VdwLJFSw_F_4xn
,
115 nbnxn_kernel_ElecEw_VdwLJPSw_F_4xn
,
116 nbnxn_kernel_ElecEw_VdwLJEwCombGeom_F_4xn
,
119 nbnxn_kernel_ElecEwTwinCut_VdwLJCombGeom_F_4xn
,
120 nbnxn_kernel_ElecEwTwinCut_VdwLJCombLB_F_4xn
,
121 nbnxn_kernel_ElecEwTwinCut_VdwLJ_F_4xn
,
122 nbnxn_kernel_ElecEwTwinCut_VdwLJFSw_F_4xn
,
123 nbnxn_kernel_ElecEwTwinCut_VdwLJPSw_F_4xn
,
124 nbnxn_kernel_ElecEwTwinCut_VdwLJEwCombGeom_F_4xn
,
128 static p_nbk_func_ener p_nbk_ener
[coulktNR
][vdwktNR
] =
131 nbnxn_kernel_ElecRF_VdwLJCombGeom_VF_4xn
,
132 nbnxn_kernel_ElecRF_VdwLJCombLB_VF_4xn
,
133 nbnxn_kernel_ElecRF_VdwLJ_VF_4xn
,
134 nbnxn_kernel_ElecRF_VdwLJFSw_VF_4xn
,
135 nbnxn_kernel_ElecRF_VdwLJPSw_VF_4xn
,
136 nbnxn_kernel_ElecRF_VdwLJEwCombGeom_VF_4xn
,
139 nbnxn_kernel_ElecQSTab_VdwLJCombGeom_VF_4xn
,
140 nbnxn_kernel_ElecQSTab_VdwLJCombLB_VF_4xn
,
141 nbnxn_kernel_ElecQSTab_VdwLJ_VF_4xn
,
142 nbnxn_kernel_ElecQSTab_VdwLJFSw_VF_4xn
,
143 nbnxn_kernel_ElecQSTab_VdwLJPSw_VF_4xn
,
144 nbnxn_kernel_ElecQSTab_VdwLJEwCombGeom_VF_4xn
,
147 nbnxn_kernel_ElecQSTabTwinCut_VdwLJCombGeom_VF_4xn
,
148 nbnxn_kernel_ElecQSTabTwinCut_VdwLJCombLB_VF_4xn
,
149 nbnxn_kernel_ElecQSTabTwinCut_VdwLJ_VF_4xn
,
150 nbnxn_kernel_ElecQSTabTwinCut_VdwLJFSw_VF_4xn
,
151 nbnxn_kernel_ElecQSTabTwinCut_VdwLJPSw_VF_4xn
,
152 nbnxn_kernel_ElecQSTabTwinCut_VdwLJEwCombGeom_VF_4xn
,
155 nbnxn_kernel_ElecEw_VdwLJCombGeom_VF_4xn
,
156 nbnxn_kernel_ElecEw_VdwLJCombLB_VF_4xn
,
157 nbnxn_kernel_ElecEw_VdwLJ_VF_4xn
,
158 nbnxn_kernel_ElecEw_VdwLJFSw_VF_4xn
,
159 nbnxn_kernel_ElecEw_VdwLJPSw_VF_4xn
,
160 nbnxn_kernel_ElecEw_VdwLJEwCombGeom_VF_4xn
,
163 nbnxn_kernel_ElecEwTwinCut_VdwLJCombGeom_VF_4xn
,
164 nbnxn_kernel_ElecEwTwinCut_VdwLJCombLB_VF_4xn
,
165 nbnxn_kernel_ElecEwTwinCut_VdwLJ_VF_4xn
,
166 nbnxn_kernel_ElecEwTwinCut_VdwLJFSw_VF_4xn
,
167 nbnxn_kernel_ElecEwTwinCut_VdwLJPSw_VF_4xn
,
168 nbnxn_kernel_ElecEwTwinCut_VdwLJEwCombGeom_VF_4xn
,
172 static p_nbk_func_ener p_nbk_energrp
[coulktNR
][vdwktNR
] =
175 nbnxn_kernel_ElecRF_VdwLJCombGeom_VgrpF_4xn
,
176 nbnxn_kernel_ElecRF_VdwLJCombLB_VgrpF_4xn
,
177 nbnxn_kernel_ElecRF_VdwLJ_VgrpF_4xn
,
178 nbnxn_kernel_ElecRF_VdwLJFSw_VgrpF_4xn
,
179 nbnxn_kernel_ElecRF_VdwLJPSw_VgrpF_4xn
,
180 nbnxn_kernel_ElecRF_VdwLJEwCombGeom_VgrpF_4xn
,
183 nbnxn_kernel_ElecQSTab_VdwLJCombGeom_VgrpF_4xn
,
184 nbnxn_kernel_ElecQSTab_VdwLJCombLB_VgrpF_4xn
,
185 nbnxn_kernel_ElecQSTab_VdwLJ_VgrpF_4xn
,
186 nbnxn_kernel_ElecQSTab_VdwLJFSw_VgrpF_4xn
,
187 nbnxn_kernel_ElecQSTab_VdwLJPSw_VgrpF_4xn
,
188 nbnxn_kernel_ElecQSTab_VdwLJEwCombGeom_VgrpF_4xn
,
191 nbnxn_kernel_ElecQSTabTwinCut_VdwLJCombGeom_VgrpF_4xn
,
192 nbnxn_kernel_ElecQSTabTwinCut_VdwLJCombLB_VgrpF_4xn
,
193 nbnxn_kernel_ElecQSTabTwinCut_VdwLJ_VgrpF_4xn
,
194 nbnxn_kernel_ElecQSTabTwinCut_VdwLJFSw_VgrpF_4xn
,
195 nbnxn_kernel_ElecQSTabTwinCut_VdwLJPSw_VgrpF_4xn
,
196 nbnxn_kernel_ElecQSTabTwinCut_VdwLJEwCombGeom_VgrpF_4xn
,
199 nbnxn_kernel_ElecEw_VdwLJCombGeom_VgrpF_4xn
,
200 nbnxn_kernel_ElecEw_VdwLJCombLB_VgrpF_4xn
,
201 nbnxn_kernel_ElecEw_VdwLJ_VgrpF_4xn
,
202 nbnxn_kernel_ElecEw_VdwLJFSw_VgrpF_4xn
,
203 nbnxn_kernel_ElecEw_VdwLJPSw_VgrpF_4xn
,
204 nbnxn_kernel_ElecEw_VdwLJEwCombGeom_VgrpF_4xn
,
207 nbnxn_kernel_ElecEwTwinCut_VdwLJCombGeom_VgrpF_4xn
,
208 nbnxn_kernel_ElecEwTwinCut_VdwLJCombLB_VgrpF_4xn
,
209 nbnxn_kernel_ElecEwTwinCut_VdwLJ_VgrpF_4xn
,
210 nbnxn_kernel_ElecEwTwinCut_VdwLJFSw_VgrpF_4xn
,
211 nbnxn_kernel_ElecEwTwinCut_VdwLJPSw_VgrpF_4xn
,
212 nbnxn_kernel_ElecEwTwinCut_VdwLJEwCombGeom_VgrpF_4xn
,
218 reduce_group_energies(int ng
, int ng_2log
,
219 const real
*VSvdw
, const real
*VSc
,
220 real
*Vvdw
, real
*Vc
)
222 const int unrollj
= GMX_SIMD_REAL_WIDTH
/GMX_SIMD_J_UNROLL_SIZE
;
223 const int unrollj_half
= unrollj
/2;
224 int ng_p2
, i
, j
, j0
, j1
, c
, s
;
226 ng_p2
= (1<<ng_2log
);
228 /* The size of the x86 SIMD energy group buffer array is:
229 * ng*ng*ng_p2*unrollj_half*simd_width
231 for (i
= 0; i
< ng
; i
++)
233 for (j
= 0; j
< ng
; j
++)
239 for (j1
= 0; j1
< ng
; j1
++)
241 for (j0
= 0; j0
< ng
; j0
++)
243 c
= ((i
*ng
+ j1
)*ng_p2
+ j0
)*unrollj_half
*unrollj
;
244 for (s
= 0; s
< unrollj_half
; s
++)
246 Vvdw
[i
*ng
+j0
] += VSvdw
[c
+0];
247 Vvdw
[i
*ng
+j1
] += VSvdw
[c
+1];
248 Vc
[i
*ng
+j0
] += VSc
[c
+0];
249 Vc
[i
*ng
+j1
] += VSc
[c
+1];
257 #else /* GMX_NBNXN_SIMD_4XN */
259 #include "gromacs/utility/fatalerror.h"
261 #endif /* GMX_NBNXN_SIMD_4XN */
264 nbnxn_kernel_simd_4xn(nbnxn_pairlist_set_t gmx_unused
*nbl_list
,
265 const nbnxn_atomdata_t gmx_unused
*nbat
,
266 const interaction_const_t gmx_unused
*ic
,
267 int gmx_unused ewald_excl
,
268 rvec gmx_unused
*shift_vec
,
269 int gmx_unused force_flags
,
270 int gmx_unused clearF
,
271 real gmx_unused
*fshift
,
273 real gmx_unused
*Vvdw
)
274 #ifdef GMX_NBNXN_SIMD_4XN
277 nbnxn_pairlist_t
**nbl
;
278 int coulkt
, vdwkt
= 0;
279 int nb
, nthreads gmx_unused
;
281 nnbl
= nbl_list
->nnbl
;
284 if (EEL_RF(ic
->eeltype
) || ic
->eeltype
== eelCUT
)
290 if (ewald_excl
== ewaldexclTable
)
292 if (ic
->rcoulomb
== ic
->rvdw
)
298 coulkt
= coulktTAB_TWIN
;
303 if (ic
->rcoulomb
== ic
->rvdw
)
305 coulkt
= coulktEWALD
;
309 coulkt
= coulktEWALD_TWIN
;
314 if (ic
->vdwtype
== evdwCUT
)
316 switch (ic
->vdw_modifier
)
319 case eintmodPOTSHIFT
:
320 switch (nbat
->comb_rule
)
322 case ljcrGEOM
: vdwkt
= vdwktLJCUT_COMBGEOM
; break;
323 case ljcrLB
: vdwkt
= vdwktLJCUT_COMBLB
; break;
324 case ljcrNONE
: vdwkt
= vdwktLJCUT_COMBNONE
; break;
325 default: gmx_incons("Unknown combination rule");
328 case eintmodFORCESWITCH
:
329 vdwkt
= vdwktLJFORCESWITCH
;
331 case eintmodPOTSWITCH
:
332 vdwkt
= vdwktLJPOTSWITCH
;
335 gmx_incons("Unsupported VdW interaction modifier");
338 else if (ic
->vdwtype
== evdwPME
)
340 if (ic
->ljpme_comb_rule
== eljpmeLB
)
342 gmx_incons("The nbnxn SIMD kernels don't support LJ-PME with LB");
344 vdwkt
= vdwktLJEWALDCOMBGEOM
;
348 gmx_incons("Unsupported VdW interaction type");
350 // cppcheck-suppress unreadVariable
351 nthreads
= gmx_omp_nthreads_get(emntNonbonded
);
352 #pragma omp parallel for schedule(static) num_threads(nthreads)
353 for (nb
= 0; nb
< nnbl
; nb
++)
355 // Presently, the kernels do not call C++ code that can throw, so
356 // no need for a try/catch pair in this OpenMP region.
357 nbnxn_atomdata_output_t
*out
;
360 out
= &nbat
->out
[nb
];
362 if (clearF
== enbvClearFYes
)
364 clear_f(nbat
, nb
, out
->f
);
367 if ((force_flags
& GMX_FORCE_VIRIAL
) && nnbl
== 1)
373 fshift_p
= out
->fshift
;
375 if (clearF
== enbvClearFYes
)
377 clear_fshift(fshift_p
);
381 if (!(force_flags
& GMX_FORCE_ENERGY
))
383 /* Don't calculate energies */
384 p_nbk_noener
[coulkt
][vdwkt
](nbl
[nb
], nbat
,
390 else if (out
->nV
== 1)
392 /* No energy groups */
396 p_nbk_ener
[coulkt
][vdwkt
](nbl
[nb
], nbat
,
406 /* Calculate energy group contributions */
409 for (i
= 0; i
< out
->nVS
; i
++)
413 for (i
= 0; i
< out
->nVS
; i
++)
418 p_nbk_energrp
[coulkt
][vdwkt
](nbl
[nb
], nbat
,
426 reduce_group_energies(nbat
->nenergrp
, nbat
->neg_2log
,
427 out
->VSvdw
, out
->VSc
,
432 if (force_flags
& GMX_FORCE_ENERGY
)
434 reduce_energies_over_lists(nbat
, nnbl
, Vvdw
, Vc
);
439 gmx_incons("nbnxn_kernel_simd_4xn called when such kernels "
440 " are not enabled.");
443 #undef GMX_SIMD_J_UNROLL_SIZE