2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2012,2013,2014, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
36 /* Get the half-width SIMD stuff from the kernel utils files */
37 #include "gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_simd_utils.h"
40 #if GMX_SIMD_REAL_WIDTH >= 2*NBNXN_CPU_CLUSTER_I_SIZE
41 #define STRIDE_S (GMX_SIMD_REAL_WIDTH/2)
43 #define STRIDE_S NBNXN_CPU_CLUSTER_I_SIZE
46 static gmx_inline gmx_simd_real_t
gmx_load_hpr_hilo_pr(const real
*a
)
49 gmx_simd_real_t a_a_S
;
51 gmx_load_hpr(&a_S
, a
);
53 gmx_2hpr_to_pr(a_S
, a_S
, &a_a_S
);
58 static gmx_inline gmx_simd_real_t
gmx_set_2real_shift_pr(const real
*a
, real shift
)
60 gmx_mm_hpr a0_S
, a1_S
;
61 gmx_simd_real_t a0_a1_S
;
63 gmx_set1_hpr(&a0_S
, a
[0] + shift
);
64 gmx_set1_hpr(&a1_S
, a
[1] + shift
);
66 gmx_2hpr_to_pr(a0_S
, a1_S
, &a0_a1_S
);
71 /* Copies PBC shifted i-cell packed atom coordinates to working array */
72 static gmx_inline
void
73 icell_set_x_simd_2xnn(int ci
,
74 real shx
, real shy
, real shz
,
76 int gmx_unused stride
, const real
*x
,
77 nbnxn_list_work_t
*work
)
80 nbnxn_x_ci_simd_2xnn_t
*x_ci
;
82 x_ci
= work
->x_ci_simd_2xnn
;
84 ia
= X_IND_CI_SIMD_2XNN(ci
);
86 x_ci
->ix_S0
= gmx_set_2real_shift_pr(x
+ ia
+ 0*STRIDE_S
+ 0, shx
);
87 x_ci
->iy_S0
= gmx_set_2real_shift_pr(x
+ ia
+ 1*STRIDE_S
+ 0, shy
);
88 x_ci
->iz_S0
= gmx_set_2real_shift_pr(x
+ ia
+ 2*STRIDE_S
+ 0, shz
);
89 x_ci
->ix_S2
= gmx_set_2real_shift_pr(x
+ ia
+ 0*STRIDE_S
+ 2, shx
);
90 x_ci
->iy_S2
= gmx_set_2real_shift_pr(x
+ ia
+ 1*STRIDE_S
+ 2, shy
);
91 x_ci
->iz_S2
= gmx_set_2real_shift_pr(x
+ ia
+ 2*STRIDE_S
+ 2, shz
);
94 /* SIMD code for making a pair list of cell ci vs cell cjf-cjl
95 * for coordinates in packed format.
96 * Checks bouding box distances and possibly atom pair distances.
97 * This is an accelerated version of make_cluster_list_simple.
99 static gmx_inline
void
100 make_cluster_list_simd_2xnn(const nbnxn_grid_t
*gridj
,
101 nbnxn_pairlist_t
*nbl
,
102 int ci
, int cjf
, int cjl
,
103 gmx_bool remove_sub_diag
,
105 real rl2
, float rbb2
,
108 const nbnxn_x_ci_simd_2xnn_t
*work
;
109 const nbnxn_bb_t
*bb_ci
;
111 gmx_simd_real_t jx_S
, jy_S
, jz_S
;
113 gmx_simd_real_t dx_S0
, dy_S0
, dz_S0
;
114 gmx_simd_real_t dx_S2
, dy_S2
, dz_S2
;
116 gmx_simd_real_t rsq_S0
;
117 gmx_simd_real_t rsq_S2
;
119 gmx_simd_bool_t wco_S0
;
120 gmx_simd_bool_t wco_S2
;
121 gmx_simd_bool_t wco_any_S
;
123 gmx_simd_real_t rc2_S
;
127 int xind_f
, xind_l
, cj
;
129 cjf
= CI_TO_CJ_SIMD_2XNN(cjf
);
130 cjl
= CI_TO_CJ_SIMD_2XNN(cjl
+1) - 1;
132 work
= nbl
->work
->x_ci_simd_2xnn
;
134 bb_ci
= nbl
->work
->bb_ci
;
136 rc2_S
= gmx_simd_set1_r(rl2
);
139 while (!InRange
&& cjf
<= cjl
)
141 #ifdef NBNXN_SEARCH_BB_SIMD4
142 d2
= subc_bb_dist2_simd4(0, bb_ci
, cjf
, gridj
->bbj
);
144 d2
= subc_bb_dist2(0, bb_ci
, cjf
, gridj
->bbj
);
148 /* Check if the distance is within the distance where
149 * we use only the bounding box distance rbb,
150 * or within the cut-off and there is at least one atom pair
151 * within the cut-off.
159 xind_f
= X_IND_CJ_SIMD_2XNN(CI_TO_CJ_SIMD_2XNN(gridj
->cell0
) + cjf
);
161 jx_S
= gmx_load_hpr_hilo_pr(x_j
+xind_f
+0*STRIDE_S
);
162 jy_S
= gmx_load_hpr_hilo_pr(x_j
+xind_f
+1*STRIDE_S
);
163 jz_S
= gmx_load_hpr_hilo_pr(x_j
+xind_f
+2*STRIDE_S
);
165 /* Calculate distance */
166 dx_S0
= gmx_simd_sub_r(work
->ix_S0
, jx_S
);
167 dy_S0
= gmx_simd_sub_r(work
->iy_S0
, jy_S
);
168 dz_S0
= gmx_simd_sub_r(work
->iz_S0
, jz_S
);
169 dx_S2
= gmx_simd_sub_r(work
->ix_S2
, jx_S
);
170 dy_S2
= gmx_simd_sub_r(work
->iy_S2
, jy_S
);
171 dz_S2
= gmx_simd_sub_r(work
->iz_S2
, jz_S
);
173 /* rsq = dx*dx+dy*dy+dz*dz */
174 rsq_S0
= gmx_simd_calc_rsq_r(dx_S0
, dy_S0
, dz_S0
);
175 rsq_S2
= gmx_simd_calc_rsq_r(dx_S2
, dy_S2
, dz_S2
);
177 wco_S0
= gmx_simd_cmplt_r(rsq_S0
, rc2_S
);
178 wco_S2
= gmx_simd_cmplt_r(rsq_S2
, rc2_S
);
180 wco_any_S
= gmx_simd_or_b(wco_S0
, wco_S2
);
182 InRange
= gmx_simd_anytrue_b(wco_any_S
);
184 *ndistc
+= 2*GMX_SIMD_REAL_WIDTH
;
197 while (!InRange
&& cjl
> cjf
)
199 #ifdef NBNXN_SEARCH_BB_SIMD4
200 d2
= subc_bb_dist2_simd4(0, bb_ci
, cjl
, gridj
->bbj
);
202 d2
= subc_bb_dist2(0, bb_ci
, cjl
, gridj
->bbj
);
206 /* Check if the distance is within the distance where
207 * we use only the bounding box distance rbb,
208 * or within the cut-off and there is at least one atom pair
209 * within the cut-off.
217 xind_l
= X_IND_CJ_SIMD_2XNN(CI_TO_CJ_SIMD_2XNN(gridj
->cell0
) + cjl
);
219 jx_S
= gmx_load_hpr_hilo_pr(x_j
+xind_l
+0*STRIDE_S
);
220 jy_S
= gmx_load_hpr_hilo_pr(x_j
+xind_l
+1*STRIDE_S
);
221 jz_S
= gmx_load_hpr_hilo_pr(x_j
+xind_l
+2*STRIDE_S
);
223 /* Calculate distance */
224 dx_S0
= gmx_simd_sub_r(work
->ix_S0
, jx_S
);
225 dy_S0
= gmx_simd_sub_r(work
->iy_S0
, jy_S
);
226 dz_S0
= gmx_simd_sub_r(work
->iz_S0
, jz_S
);
227 dx_S2
= gmx_simd_sub_r(work
->ix_S2
, jx_S
);
228 dy_S2
= gmx_simd_sub_r(work
->iy_S2
, jy_S
);
229 dz_S2
= gmx_simd_sub_r(work
->iz_S2
, jz_S
);
231 /* rsq = dx*dx+dy*dy+dz*dz */
232 rsq_S0
= gmx_simd_calc_rsq_r(dx_S0
, dy_S0
, dz_S0
);
233 rsq_S2
= gmx_simd_calc_rsq_r(dx_S2
, dy_S2
, dz_S2
);
235 wco_S0
= gmx_simd_cmplt_r(rsq_S0
, rc2_S
);
236 wco_S2
= gmx_simd_cmplt_r(rsq_S2
, rc2_S
);
238 wco_any_S
= gmx_simd_or_b(wco_S0
, wco_S2
);
240 InRange
= gmx_simd_anytrue_b(wco_any_S
);
242 *ndistc
+= 2*GMX_SIMD_REAL_WIDTH
;
252 for (cj
= cjf
; cj
<= cjl
; cj
++)
254 /* Store cj and the interaction mask */
255 nbl
->cj
[nbl
->ncj
].cj
= CI_TO_CJ_SIMD_2XNN(gridj
->cell0
) + cj
;
256 nbl
->cj
[nbl
->ncj
].excl
= get_imask_simd_2xnn(remove_sub_diag
, ci
, cj
);
259 /* Increase the closing index in i super-cell list */
260 nbl
->ci
[nbl
->nci
].cj_ind_end
= nbl
->ncj
;