Move main.*, splitter.*, gmx_omp_nthreads.* to mdlib
[gromacs.git] / src / gromacs / mdlib / nbnxn_search_simd_2xnn.h
blob2ff7bc991e32ad57953e57a7c25d98d9c5ebc0e4
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2012,2013,2014, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
36 /* Get the half-width SIMD stuff from the kernel utils files */
37 #include "gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_simd_utils.h"
40 #if GMX_SIMD_REAL_WIDTH >= 2*NBNXN_CPU_CLUSTER_I_SIZE
41 #define STRIDE_S (GMX_SIMD_REAL_WIDTH/2)
42 #else
43 #define STRIDE_S NBNXN_CPU_CLUSTER_I_SIZE
44 #endif
46 static gmx_inline gmx_simd_real_t gmx_load_hpr_hilo_pr(const real *a)
48 gmx_mm_hpr a_S;
49 gmx_simd_real_t a_a_S;
51 gmx_load_hpr(&a_S, a);
53 gmx_2hpr_to_pr(a_S, a_S, &a_a_S);
55 return a_a_S;
58 static gmx_inline gmx_simd_real_t gmx_set_2real_shift_pr(const real *a, real shift)
60 gmx_mm_hpr a0_S, a1_S;
61 gmx_simd_real_t a0_a1_S;
63 gmx_set1_hpr(&a0_S, a[0] + shift);
64 gmx_set1_hpr(&a1_S, a[1] + shift);
66 gmx_2hpr_to_pr(a0_S, a1_S, &a0_a1_S);
68 return a0_a1_S;
71 /* Copies PBC shifted i-cell packed atom coordinates to working array */
72 static gmx_inline void
73 icell_set_x_simd_2xnn(int ci,
74 real shx, real shy, real shz,
75 int gmx_unused na_c,
76 int gmx_unused stride, const real *x,
77 nbnxn_list_work_t *work)
79 int ia;
80 nbnxn_x_ci_simd_2xnn_t *x_ci;
82 x_ci = work->x_ci_simd_2xnn;
84 ia = X_IND_CI_SIMD_2XNN(ci);
86 x_ci->ix_S0 = gmx_set_2real_shift_pr(x + ia + 0*STRIDE_S + 0, shx);
87 x_ci->iy_S0 = gmx_set_2real_shift_pr(x + ia + 1*STRIDE_S + 0, shy);
88 x_ci->iz_S0 = gmx_set_2real_shift_pr(x + ia + 2*STRIDE_S + 0, shz);
89 x_ci->ix_S2 = gmx_set_2real_shift_pr(x + ia + 0*STRIDE_S + 2, shx);
90 x_ci->iy_S2 = gmx_set_2real_shift_pr(x + ia + 1*STRIDE_S + 2, shy);
91 x_ci->iz_S2 = gmx_set_2real_shift_pr(x + ia + 2*STRIDE_S + 2, shz);
94 /* SIMD code for making a pair list of cell ci vs cell cjf-cjl
95 * for coordinates in packed format.
96 * Checks bouding box distances and possibly atom pair distances.
97 * This is an accelerated version of make_cluster_list_simple.
99 static gmx_inline void
100 make_cluster_list_simd_2xnn(const nbnxn_grid_t *gridj,
101 nbnxn_pairlist_t *nbl,
102 int ci, int cjf, int cjl,
103 gmx_bool remove_sub_diag,
104 const real *x_j,
105 real rl2, float rbb2,
106 int *ndistc)
108 const nbnxn_x_ci_simd_2xnn_t *work;
109 const nbnxn_bb_t *bb_ci;
111 gmx_simd_real_t jx_S, jy_S, jz_S;
113 gmx_simd_real_t dx_S0, dy_S0, dz_S0;
114 gmx_simd_real_t dx_S2, dy_S2, dz_S2;
116 gmx_simd_real_t rsq_S0;
117 gmx_simd_real_t rsq_S2;
119 gmx_simd_bool_t wco_S0;
120 gmx_simd_bool_t wco_S2;
121 gmx_simd_bool_t wco_any_S;
123 gmx_simd_real_t rc2_S;
125 gmx_bool InRange;
126 float d2;
127 int xind_f, xind_l, cj;
129 cjf = CI_TO_CJ_SIMD_2XNN(cjf);
130 cjl = CI_TO_CJ_SIMD_2XNN(cjl+1) - 1;
132 work = nbl->work->x_ci_simd_2xnn;
134 bb_ci = nbl->work->bb_ci;
136 rc2_S = gmx_simd_set1_r(rl2);
138 InRange = FALSE;
139 while (!InRange && cjf <= cjl)
141 #ifdef NBNXN_SEARCH_BB_SIMD4
142 d2 = subc_bb_dist2_simd4(0, bb_ci, cjf, gridj->bbj);
143 #else
144 d2 = subc_bb_dist2(0, bb_ci, cjf, gridj->bbj);
145 #endif
146 *ndistc += 2;
148 /* Check if the distance is within the distance where
149 * we use only the bounding box distance rbb,
150 * or within the cut-off and there is at least one atom pair
151 * within the cut-off.
153 if (d2 < rbb2)
155 InRange = TRUE;
157 else if (d2 < rl2)
159 xind_f = X_IND_CJ_SIMD_2XNN(CI_TO_CJ_SIMD_2XNN(gridj->cell0) + cjf);
161 jx_S = gmx_load_hpr_hilo_pr(x_j+xind_f+0*STRIDE_S);
162 jy_S = gmx_load_hpr_hilo_pr(x_j+xind_f+1*STRIDE_S);
163 jz_S = gmx_load_hpr_hilo_pr(x_j+xind_f+2*STRIDE_S);
165 /* Calculate distance */
166 dx_S0 = gmx_simd_sub_r(work->ix_S0, jx_S);
167 dy_S0 = gmx_simd_sub_r(work->iy_S0, jy_S);
168 dz_S0 = gmx_simd_sub_r(work->iz_S0, jz_S);
169 dx_S2 = gmx_simd_sub_r(work->ix_S2, jx_S);
170 dy_S2 = gmx_simd_sub_r(work->iy_S2, jy_S);
171 dz_S2 = gmx_simd_sub_r(work->iz_S2, jz_S);
173 /* rsq = dx*dx+dy*dy+dz*dz */
174 rsq_S0 = gmx_simd_calc_rsq_r(dx_S0, dy_S0, dz_S0);
175 rsq_S2 = gmx_simd_calc_rsq_r(dx_S2, dy_S2, dz_S2);
177 wco_S0 = gmx_simd_cmplt_r(rsq_S0, rc2_S);
178 wco_S2 = gmx_simd_cmplt_r(rsq_S2, rc2_S);
180 wco_any_S = gmx_simd_or_b(wco_S0, wco_S2);
182 InRange = gmx_simd_anytrue_b(wco_any_S);
184 *ndistc += 2*GMX_SIMD_REAL_WIDTH;
186 if (!InRange)
188 cjf++;
191 if (!InRange)
193 return;
196 InRange = FALSE;
197 while (!InRange && cjl > cjf)
199 #ifdef NBNXN_SEARCH_BB_SIMD4
200 d2 = subc_bb_dist2_simd4(0, bb_ci, cjl, gridj->bbj);
201 #else
202 d2 = subc_bb_dist2(0, bb_ci, cjl, gridj->bbj);
203 #endif
204 *ndistc += 2;
206 /* Check if the distance is within the distance where
207 * we use only the bounding box distance rbb,
208 * or within the cut-off and there is at least one atom pair
209 * within the cut-off.
211 if (d2 < rbb2)
213 InRange = TRUE;
215 else if (d2 < rl2)
217 xind_l = X_IND_CJ_SIMD_2XNN(CI_TO_CJ_SIMD_2XNN(gridj->cell0) + cjl);
219 jx_S = gmx_load_hpr_hilo_pr(x_j+xind_l+0*STRIDE_S);
220 jy_S = gmx_load_hpr_hilo_pr(x_j+xind_l+1*STRIDE_S);
221 jz_S = gmx_load_hpr_hilo_pr(x_j+xind_l+2*STRIDE_S);
223 /* Calculate distance */
224 dx_S0 = gmx_simd_sub_r(work->ix_S0, jx_S);
225 dy_S0 = gmx_simd_sub_r(work->iy_S0, jy_S);
226 dz_S0 = gmx_simd_sub_r(work->iz_S0, jz_S);
227 dx_S2 = gmx_simd_sub_r(work->ix_S2, jx_S);
228 dy_S2 = gmx_simd_sub_r(work->iy_S2, jy_S);
229 dz_S2 = gmx_simd_sub_r(work->iz_S2, jz_S);
231 /* rsq = dx*dx+dy*dy+dz*dz */
232 rsq_S0 = gmx_simd_calc_rsq_r(dx_S0, dy_S0, dz_S0);
233 rsq_S2 = gmx_simd_calc_rsq_r(dx_S2, dy_S2, dz_S2);
235 wco_S0 = gmx_simd_cmplt_r(rsq_S0, rc2_S);
236 wco_S2 = gmx_simd_cmplt_r(rsq_S2, rc2_S);
238 wco_any_S = gmx_simd_or_b(wco_S0, wco_S2);
240 InRange = gmx_simd_anytrue_b(wco_any_S);
242 *ndistc += 2*GMX_SIMD_REAL_WIDTH;
244 if (!InRange)
246 cjl--;
250 if (cjf <= cjl)
252 for (cj = cjf; cj <= cjl; cj++)
254 /* Store cj and the interaction mask */
255 nbl->cj[nbl->ncj].cj = CI_TO_CJ_SIMD_2XNN(gridj->cell0) + cj;
256 nbl->cj[nbl->ncj].excl = get_imask_simd_2xnn(remove_sub_diag, ci, cj);
257 nbl->ncj++;
259 /* Increase the closing index in i super-cell list */
260 nbl->ci[nbl->nci].cj_ind_end = nbl->ncj;
264 #undef STRIDE_S