From 0f0053fb631e26582e7def9fa95acd34610c7abe Mon Sep 17 00:00:00 2001 From: Erik Lindahl Date: Tue, 7 Jul 2015 21:19:00 +0200 Subject: [PATCH] Convert nbnxn search,grid,atomdata to C++ Move iteration variables into loops, use cmath/algorithm, and std::max/min. Remove extern "C" from include files. Change-Id: I14e0d6e6ca92a41df243bbef4d2f8f0a16507c13 --- .../mdlib/{nbnxn_atomdata.c => nbnxn_atomdata.cpp} | 275 ++++++------- src/gromacs/mdlib/nbnxn_atomdata.h | 10 +- src/gromacs/mdlib/{nbnxn_grid.c => nbnxn_grid.cpp} | 396 +++++++++---------- src/gromacs/mdlib/nbnxn_grid.h | 6 - .../mdlib/{nbnxn_search.c => nbnxn_search.cpp} | 428 +++++++++------------ src/gromacs/mdlib/nbnxn_search.h | 6 - 6 files changed, 491 insertions(+), 630 deletions(-) rename src/gromacs/mdlib/{nbnxn_atomdata.c => nbnxn_atomdata.cpp} (89%) rename src/gromacs/mdlib/{nbnxn_grid.c => nbnxn_grid.cpp} (85%) rename src/gromacs/mdlib/{nbnxn_search.c => nbnxn_search.cpp} (93%) diff --git a/src/gromacs/mdlib/nbnxn_atomdata.c b/src/gromacs/mdlib/nbnxn_atomdata.cpp similarity index 89% rename from src/gromacs/mdlib/nbnxn_atomdata.c rename to src/gromacs/mdlib/nbnxn_atomdata.cpp index 8c05933036..20bb7b6e2c 100644 --- a/src/gromacs/mdlib/nbnxn_atomdata.c +++ b/src/gromacs/mdlib/nbnxn_atomdata.cpp @@ -40,14 +40,16 @@ #include "config.h" #include -#include #include #include +#include + +#include + #include "thread_mpi/atomic.h" #include "gromacs/legacyheaders/gmx_omp_nthreads.h" -#include "gromacs/legacyheaders/macros.h" #include "gromacs/math/vec.h" #include "gromacs/mdlib/nb_verlet.h" #include "gromacs/mdlib/nbnxn_consts.h" @@ -61,6 +63,7 @@ #include "gromacs/utility/gmxomp.h" #include "gromacs/utility/smalloc.h" + /* Default nbnxn allocation routine, allocates NBNXN_MEM_ALIGN byte aligned */ void nbnxn_alloc_aligned(void **ptr, size_t nbytes) { @@ -151,8 +154,6 @@ static void nbnxn_atomdata_output_init(nbnxn_atomdata_output_t *out, int nenergrp, int stride, nbnxn_alloc_t *ma) { - int cj_size; - out->f = NULL; ma((void **)&out->fshift, SHIFTS*DIM*sizeof(*out->fshift)); out->nV = nenergrp*nenergrp; @@ -162,7 +163,7 @@ static void nbnxn_atomdata_output_init(nbnxn_atomdata_output_t *out, if (nb_kernel_type == nbnxnk4xN_SIMD_4xN || nb_kernel_type == nbnxnk4xN_SIMD_2xNN) { - cj_size = nbnxn_kernel_to_cluster_j_size(nb_kernel_type); + int cj_size = nbnxn_kernel_to_cluster_j_size(nb_kernel_type); out->nVS = nenergrp*nenergrp*stride*(cj_size>>1)*cj_size; ma((void **)&out->VSvdw, out->nVS*sizeof(*out->VSvdw)); ma((void **)&out->VSc, out->nVS*sizeof(*out->VSc )); @@ -192,23 +193,23 @@ static void copy_int_to_nbat_int(const int *a, int na, int na_round, static void clear_nbat_real(int na, int nbatFormat, real *xnb, int a0) { - int a, d, j, c; + int j, c; switch (nbatFormat) { case nbatXYZ: - for (a = 0; a < na; a++) + for (int a = 0; a < na; a++) { - for (d = 0; d < DIM; d++) + for (int d = 0; d < DIM; d++) { xnb[(a0+a)*STRIDE_XYZ+d] = 0; } } break; case nbatXYZQ: - for (a = 0; a < na; a++) + for (int a = 0; a < na; a++) { - for (d = 0; d < DIM; d++) + for (int d = 0; d < DIM; d++) { xnb[(a0+a)*STRIDE_XYZQ+d] = 0; } @@ -217,7 +218,7 @@ static void clear_nbat_real(int na, int nbatFormat, real *xnb, int a0) case nbatX4: j = X4_IND_A(a0); c = a0 & (PACK_X4-1); - for (a = 0; a < na; a++) + for (int a = 0; a < na; a++) { xnb[j+XX*PACK_X4] = 0; xnb[j+YY*PACK_X4] = 0; @@ -234,7 +235,7 @@ static void clear_nbat_real(int na, int nbatFormat, real *xnb, int a0) case nbatX8: j = X8_IND_A(a0); c = a0 & (PACK_X8-1); - for (a = 0; a < na; a++) + for (int a = 0; a < na; a++) { xnb[j+XX*PACK_X8] = 0; xnb[j+YY*PACK_X8] = 0; @@ -374,10 +375,9 @@ void copy_rvec_to_nbat_real(const int *a, int na, int na_round, /* Stores the LJ parameter data in a format convenient for different kernels */ static void set_lj_parameter_data(nbnxn_atomdata_t *nbat, gmx_bool bSIMD) { - int nt, i, j; real c6, c12; - nt = nbat->ntype; + int nt = nbat->ntype; if (bSIMD) { @@ -388,9 +388,9 @@ static void set_lj_parameter_data(nbnxn_atomdata_t *nbat, gmx_bool bSIMD) * be used, but introducing the conditional code is not * really worth it. */ nbat->alloc((void **)&nbat->nbfp_s4, nt*nt*4*sizeof(*nbat->nbfp_s4)); - for (i = 0; i < nt; i++) + for (int i = 0; i < nt; i++) { - for (j = 0; j < nt; j++) + for (int j = 0; j < nt; j++) { nbat->nbfp_s4[(i*nt+j)*4+0] = nbat->nbfp[(i*nt+j)*2+0]; nbat->nbfp_s4[(i*nt+j)*4+1] = nbat->nbfp[(i*nt+j)*2+1]; @@ -409,15 +409,15 @@ static void set_lj_parameter_data(nbnxn_atomdata_t *nbat, gmx_bool bSIMD) case ljcrGEOM: nbat->comb_rule = ljcrGEOM; - for (i = 0; i < nt; i++) + for (int i = 0; i < nt; i++) { /* Store the sqrt of the diagonal from the nbfp matrix */ - nbat->nbfp_comb[i*2 ] = sqrt(nbat->nbfp[(i*nt+i)*2 ]); - nbat->nbfp_comb[i*2+1] = sqrt(nbat->nbfp[(i*nt+i)*2+1]); + nbat->nbfp_comb[i*2 ] = std::sqrt(nbat->nbfp[(i*nt+i)*2 ]); + nbat->nbfp_comb[i*2+1] = std::sqrt(nbat->nbfp[(i*nt+i)*2+1]); } break; case ljcrLB: - for (i = 0; i < nt; i++) + for (int i = 0; i < nt; i++) { /* Get 6*C6 and 12*C12 from the diagonal of the nbfp matrix */ c6 = nbat->nbfp[(i*nt+i)*2 ]; @@ -427,8 +427,8 @@ static void set_lj_parameter_data(nbnxn_atomdata_t *nbat, gmx_bool bSIMD) /* We store 0.5*2^1/6*sigma and sqrt(4*3*eps), * so we get 6*C6 and 12*C12 after combining. */ - nbat->nbfp_comb[i*2 ] = 0.5*pow(c12/c6, 1.0/6.0); - nbat->nbfp_comb[i*2+1] = sqrt(c6*c6/c12); + nbat->nbfp_comb[i*2 ] = 0.5*std::pow(static_cast(c12/c6), static_cast(1.0/6.0)); + nbat->nbfp_comb[i*2+1] = std::sqrt(c6*c6/c12); } else { @@ -450,7 +450,6 @@ static void set_lj_parameter_data(nbnxn_atomdata_t *nbat, gmx_bool bSIMD) static void nbnxn_atomdata_init_simple_exclusion_masks(nbnxn_atomdata_t *nbat) { - int i, j; const int simd_width = GMX_SIMD_REAL_WIDTH; int simd_excl_size; /* Set the diagonal cluster pair exclusion mask setup data. @@ -460,18 +459,16 @@ nbnxn_atomdata_init_simple_exclusion_masks(nbnxn_atomdata_t *nbat) * In the kernel we can subtract 1 to generate the subsequent mask. */ int simd_4xn_diag_size; - const real simdFalse = -1, simdTrue = 1; - real *simd_interaction_array; - simd_4xn_diag_size = max(NBNXN_CPU_CLUSTER_I_SIZE, simd_width); + simd_4xn_diag_size = std::max(NBNXN_CPU_CLUSTER_I_SIZE, simd_width); snew_aligned(nbat->simd_4xn_diagonal_j_minus_i, simd_4xn_diag_size, NBNXN_MEM_ALIGN); - for (j = 0; j < simd_4xn_diag_size; j++) + for (int j = 0; j < simd_4xn_diag_size; j++) { nbat->simd_4xn_diagonal_j_minus_i[j] = j - 0.5; } snew_aligned(nbat->simd_2xnn_diagonal_j_minus_i, simd_width, NBNXN_MEM_ALIGN); - for (j = 0; j < simd_width/2; j++) + for (int j = 0; j < simd_width/2; j++) { /* The j-cluster size is half the SIMD width */ nbat->simd_2xnn_diagonal_j_minus_i[j] = j - 0.5; @@ -493,7 +490,7 @@ nbnxn_atomdata_init_simple_exclusion_masks(nbnxn_atomdata_t *nbat) snew_aligned(nbat->simd_exclusion_filter1, simd_excl_size, NBNXN_MEM_ALIGN); snew_aligned(nbat->simd_exclusion_filter2, simd_excl_size*2, NBNXN_MEM_ALIGN); - for (j = 0; j < simd_excl_size; j++) + for (int j = 0; j < simd_excl_size; j++) { /* Set the consecutive bits for masking pair exclusions */ nbat->simd_exclusion_filter1[j] = (1U << j); @@ -516,12 +513,15 @@ nbnxn_atomdata_init_simple_exclusion_masks(nbnxn_atomdata_t *nbat) * indices are used in the kernels. */ simd_excl_size = NBNXN_CPU_CLUSTER_I_SIZE*NBNXN_CPU_CLUSTER_I_SIZE; - const int qpx_simd_width = GMX_SIMD_REAL_WIDTH; + const int qpx_simd_width = GMX_SIMD_REAL_WIDTH; + const real simdFalse = -1, simdTrue = 1; + real *simd_interaction_array; + snew_aligned(simd_interaction_array, simd_excl_size * qpx_simd_width, NBNXN_MEM_ALIGN); - for (j = 0; j < simd_excl_size; j++) + for (int j = 0; j < simd_excl_size; j++) { int index = j * qpx_simd_width; - for (i = 0; i < qpx_simd_width; i++) + for (int i = 0; i < qpx_simd_width; i++) { simd_interaction_array[index + i] = (j & (1 << i)) ? simdTrue : simdFalse; } @@ -542,7 +542,7 @@ void nbnxn_atomdata_init(FILE *fp, nbnxn_alloc_t *alloc, nbnxn_free_t *free) { - int i, j, nth; + int nth; real c6, c12, tol; char *ptr; gmx_bool simple, bCombGeom, bCombLB, bSIMD; @@ -591,13 +591,13 @@ void nbnxn_atomdata_init(FILE *fp, /* Temporarily fill nbat->nbfp_comb with sigma and epsilon * to check for the LB rule. */ - for (i = 0; i < ntype; i++) + for (int i = 0; i < ntype; i++) { c6 = nbfp[(i*ntype+i)*2 ]/6.0; c12 = nbfp[(i*ntype+i)*2+1]/12.0; if (c6 > 0 && c12 > 0) { - nbat->nbfp_comb[i*2 ] = pow(c12/c6, 1.0/6.0); + nbat->nbfp_comb[i*2 ] = std::pow(static_cast(c12/c6), static_cast(1.0/6.0)); nbat->nbfp_comb[i*2+1] = 0.25*c6*c6/c12; } else if (c6 == 0 && c12 == 0) @@ -612,9 +612,9 @@ void nbnxn_atomdata_init(FILE *fp, } } - for (i = 0; i < nbat->ntype; i++) + for (int i = 0; i < nbat->ntype; i++) { - for (j = 0; j < nbat->ntype; j++) + for (int j = 0; j < nbat->ntype; j++) { if (i < ntype && j < ntype) { @@ -638,8 +638,9 @@ void nbnxn_atomdata_init(FILE *fp, ((c6 == 0 && c12 == 0 && (nbat->nbfp_comb[i*2+1] == 0 || nbat->nbfp_comb[j*2+1] == 0)) || (c6 > 0 && c12 > 0 && - gmx_within_tol(pow(c12/c6, 1.0/6.0), 0.5*(nbat->nbfp_comb[i*2]+nbat->nbfp_comb[j*2]), tol) && - gmx_within_tol(0.25*c6*c6/c12, sqrt(nbat->nbfp_comb[i*2+1]*nbat->nbfp_comb[j*2+1]), tol))); + gmx_within_tol(std::pow(static_cast(c12/c6), static_cast(1.0/6.0)), + 0.5*(nbat->nbfp_comb[i*2]+nbat->nbfp_comb[j*2]), tol) && + gmx_within_tol(0.25*c6*c6/c12, std::sqrt(nbat->nbfp_comb[i*2+1]*nbat->nbfp_comb[j*2+1]), tol))); } else { @@ -720,8 +721,8 @@ void nbnxn_atomdata_init(FILE *fp, if (bSIMD) { - pack_x = max(NBNXN_CPU_CLUSTER_I_SIZE, - nbnxn_kernel_to_cluster_j_size(nb_kernel_type)); + pack_x = std::max(NBNXN_CPU_CLUSTER_I_SIZE, + nbnxn_kernel_to_cluster_j_size(nb_kernel_type)); switch (pack_x) { case 4: @@ -784,7 +785,7 @@ void nbnxn_atomdata_init(FILE *fp, nbat->nout = nout; snew(nbat->out, nbat->nout); nbat->nalloc = 0; - for (i = 0; i < nbat->nout; i++) + for (int i = 0; i < nbat->nout; i++) { nbnxn_atomdata_output_init(&nbat->out[i], nb_kernel_type, @@ -825,16 +826,14 @@ static void copy_lj_to_nbat_lj_comb_x4(const real *ljparam_type, const int *type, int na, real *ljparam_at) { - int is, k, i; - /* The LJ params follow the combination rule: * copy the params for the type array to the atom array. */ - for (is = 0; is < na; is += PACK_X4) + for (int is = 0; is < na; is += PACK_X4) { - for (k = 0; k < PACK_X4; k++) + for (int k = 0; k < PACK_X4; k++) { - i = is + k; + int i = is + k; ljparam_at[is*2 +k] = ljparam_type[type[i]*2 ]; ljparam_at[is*2+PACK_X4+k] = ljparam_type[type[i]*2+1]; } @@ -845,16 +844,14 @@ static void copy_lj_to_nbat_lj_comb_x8(const real *ljparam_type, const int *type, int na, real *ljparam_at) { - int is, k, i; - /* The LJ params follow the combination rule: * copy the params for the type array to the atom array. */ - for (is = 0; is < na; is += PACK_X8) + for (int is = 0; is < na; is += PACK_X8) { - for (k = 0; k < PACK_X8; k++) + for (int k = 0; k < PACK_X8; k++) { - i = is + k; + int i = is + k; ljparam_at[is*2 +k] = ljparam_type[type[i]*2 ]; ljparam_at[is*2+PACK_X8+k] = ljparam_type[type[i]*2+1]; } @@ -867,18 +864,15 @@ static void nbnxn_atomdata_set_atomtypes(nbnxn_atomdata_t *nbat, const nbnxn_search_t nbs, const int *type) { - int g, i, ncz, ash; - const nbnxn_grid_t *grid; - - for (g = 0; g < ngrid; g++) + for (int g = 0; g < ngrid; g++) { - grid = &nbs->grid[g]; + const nbnxn_grid_t * grid = &nbs->grid[g]; /* Loop over all columns and copy and fill */ - for (i = 0; i < grid->ncx*grid->ncy; i++) + for (int i = 0; i < grid->ncx*grid->ncy; i++) { - ncz = grid->cxy_ind[i+1] - grid->cxy_ind[i]; - ash = (grid->cell0 + grid->cxy_ind[i])*grid->na_sc; + int ncz = grid->cxy_ind[i+1] - grid->cxy_ind[i]; + int ash = (grid->cell0 + grid->cxy_ind[i])*grid->na_sc; copy_int_to_nbat_int(nbs->a+ash, grid->cxy_na[i], ncz*grid->na_sc, type, nbat->ntype-1, nbat->type+ash); @@ -891,20 +885,17 @@ static void nbnxn_atomdata_set_ljcombparams(nbnxn_atomdata_t *nbat, int ngrid, const nbnxn_search_t nbs) { - int g, i, ncz, ash; - const nbnxn_grid_t *grid; - if (nbat->comb_rule != ljcrNONE) { - for (g = 0; g < ngrid; g++) + for (int g = 0; g < ngrid; g++) { - grid = &nbs->grid[g]; + const nbnxn_grid_t * grid = &nbs->grid[g]; /* Loop over all columns and copy and fill */ - for (i = 0; i < grid->ncx*grid->ncy; i++) + for (int i = 0; i < grid->ncx*grid->ncy; i++) { - ncz = grid->cxy_ind[i+1] - grid->cxy_ind[i]; - ash = (grid->cell0 + grid->cxy_ind[i])*grid->na_sc; + int ncz = grid->cxy_ind[i+1] - grid->cxy_ind[i]; + int ash = (grid->cell0 + grid->cxy_ind[i])*grid->na_sc; if (nbat->XFormat == nbatX4) { @@ -929,20 +920,19 @@ static void nbnxn_atomdata_set_charges(nbnxn_atomdata_t *nbat, const nbnxn_search_t nbs, const real *charge) { - int g, cxy, ncz, ash, na, na_round, i, j; + int i; real *q; - const nbnxn_grid_t *grid; - for (g = 0; g < ngrid; g++) + for (int g = 0; g < ngrid; g++) { - grid = &nbs->grid[g]; + const nbnxn_grid_t * grid = &nbs->grid[g]; /* Loop over all columns and copy and fill */ - for (cxy = 0; cxy < grid->ncx*grid->ncy; cxy++) + for (int cxy = 0; cxy < grid->ncx*grid->ncy; cxy++) { - ash = (grid->cell0 + grid->cxy_ind[cxy])*grid->na_sc; - na = grid->cxy_na[cxy]; - na_round = (grid->cxy_ind[cxy+1] - grid->cxy_ind[cxy])*grid->na_sc; + int ash = (grid->cell0 + grid->cxy_ind[cxy])*grid->na_sc; + int na = grid->cxy_na[cxy]; + int na_round = (grid->cxy_ind[cxy+1] - grid->cxy_ind[cxy])*grid->na_sc; if (nbat->XFormat == nbatXYZQ) { @@ -989,8 +979,7 @@ static void nbnxn_atomdata_mask_fep(nbnxn_atomdata_t *nbat, const nbnxn_search_t nbs) { real *q; - int stride_q, g, nsubc, c_offset, c, subc, i, ind; - const nbnxn_grid_t *grid; + int stride_q, nsubc; if (nbat->XFormat == nbatXYZQ) { @@ -1003,9 +992,9 @@ static void nbnxn_atomdata_mask_fep(nbnxn_atomdata_t *nbat, stride_q = 1; } - for (g = 0; g < ngrid; g++) + for (int g = 0; g < ngrid; g++) { - grid = &nbs->grid[g]; + const nbnxn_grid_t * grid = &nbs->grid[g]; if (grid->bSimple) { nsubc = 1; @@ -1015,20 +1004,20 @@ static void nbnxn_atomdata_mask_fep(nbnxn_atomdata_t *nbat, nsubc = GPU_NSUBCELL; } - c_offset = grid->cell0*grid->na_sc; + int c_offset = grid->cell0*grid->na_sc; /* Loop over all columns and copy and fill */ - for (c = 0; c < grid->nc*nsubc; c++) + for (int c = 0; c < grid->nc*nsubc; c++) { /* Does this cluster contain perturbed particles? */ if (grid->fep[c] != 0) { - for (i = 0; i < grid->na_c; i++) + for (int i = 0; i < grid->na_c; i++) { /* Is this a perturbed particle? */ if (grid->fep[c] & (1 << i)) { - ind = c_offset + c*grid->na_c + i; + int ind = c_offset + c*grid->na_c + i; /* Set atom type and charge to non-interacting */ nbat->type[ind] = nbat->ntype - 1; q[ind*stride_q] = 0; @@ -1044,17 +1033,17 @@ static void copy_egp_to_nbat_egps(const int *a, int na, int na_round, int na_c, int bit_shift, const int *in, int *innb) { - int i, j, sa, at; + int i; int comb; - j = 0; + int j = 0; for (i = 0; i < na; i += na_c) { /* Store na_c energy group numbers into one int */ comb = 0; - for (sa = 0; sa < na_c; sa++) + for (int sa = 0; sa < na_c; sa++) { - at = a[i+sa]; + int at = a[i+sa]; if (at >= 0) { comb |= (GET_CGINFO_GID(in[at]) << (sa*bit_shift)); @@ -1075,23 +1064,20 @@ static void nbnxn_atomdata_set_energygroups(nbnxn_atomdata_t *nbat, const nbnxn_search_t nbs, const int *atinfo) { - int g, i, ncz, ash; - const nbnxn_grid_t *grid; - if (nbat->nenergrp == 1) { return; } - for (g = 0; g < ngrid; g++) + for (int g = 0; g < ngrid; g++) { - grid = &nbs->grid[g]; + const nbnxn_grid_t * grid = &nbs->grid[g]; /* Loop over all columns and copy and fill */ - for (i = 0; i < grid->ncx*grid->ncy; i++) + for (int i = 0; i < grid->ncx*grid->ncy; i++) { - ncz = grid->cxy_ind[i+1] - grid->cxy_ind[i]; - ash = (grid->cell0 + grid->cxy_ind[i])*grid->na_sc; + int ncz = grid->cxy_ind[i+1] - grid->cxy_ind[i]; + int ash = (grid->cell0 + grid->cxy_ind[i])*grid->na_sc; copy_egp_to_nbat_egps(nbs->a+ash, grid->cxy_na[i], ncz*grid->na_sc, nbat->na_c, nbat->neg_2log, @@ -1183,19 +1169,17 @@ void nbnxn_atomdata_copy_x_to_nbat_x(const nbnxn_search_t nbs, #pragma omp parallel for num_threads(nth) schedule(static) for (th = 0; th < nth; th++) { - int g; - - for (g = g0; g < g1; g++) + for (int g = g0; g < g1; g++) { const nbnxn_grid_t *grid; - int cxy0, cxy1, cxy; + int cxy0, cxy1; grid = &nbs->grid[g]; cxy0 = (grid->ncx*grid->ncy* th +nth-1)/nth; cxy1 = (grid->ncx*grid->ncy*(th+1)+nth-1)/nth; - for (cxy = cxy0; cxy < cxy1; cxy++) + for (int cxy = cxy0; cxy < cxy1; cxy++) { int na, ash, na_fill; @@ -1227,9 +1211,7 @@ static void nbnxn_atomdata_clear_reals(real * gmx_restrict dest, int i0, int i1) { - int i; - - for (i = i0; i < i1; i++) + for (int i = i0; i < i1; i++) { dest[i] = 0; } @@ -1242,14 +1224,12 @@ nbnxn_atomdata_reduce_reals(real * gmx_restrict dest, int nsrc, int i0, int i1) { - int i, s; - if (bDestSet) { /* The destination buffer contains data, add to it */ - for (i = i0; i < i1; i++) + for (int i = i0; i < i1; i++) { - for (s = 0; s < nsrc; s++) + for (int s = 0; s < nsrc; s++) { dest[i] += src[s][i]; } @@ -1258,10 +1238,10 @@ nbnxn_atomdata_reduce_reals(real * gmx_restrict dest, else { /* The destination buffer is unitialized, set it first */ - for (i = i0; i < i1; i++) + for (int i = i0; i < i1; i++) { dest[i] = src[0][i]; - for (s = 1; s < nsrc; s++) + for (int s = 1; s < nsrc; s++) { dest[i] += src[s][i]; } @@ -1280,15 +1260,14 @@ nbnxn_atomdata_reduce_reals_simd(real gmx_unused * gmx_restrict dest, /* The SIMD width here is actually independent of that in the kernels, * but we use the same width for simplicity (usually optimal anyhow). */ - int i, s; gmx_simd_real_t dest_SSE, src_SSE; if (bDestSet) { - for (i = i0; i < i1; i += GMX_SIMD_REAL_WIDTH) + for (int i = i0; i < i1; i += GMX_SIMD_REAL_WIDTH) { dest_SSE = gmx_simd_load_r(dest+i); - for (s = 0; s < nsrc; s++) + for (int s = 0; s < nsrc; s++) { src_SSE = gmx_simd_load_r(src[s]+i); dest_SSE = gmx_simd_add_r(dest_SSE, src_SSE); @@ -1298,10 +1277,10 @@ nbnxn_atomdata_reduce_reals_simd(real gmx_unused * gmx_restrict dest, } else { - for (i = i0; i < i1; i += GMX_SIMD_REAL_WIDTH) + for (int i = i0; i < i1; i += GMX_SIMD_REAL_WIDTH) { dest_SSE = gmx_simd_load_r(src[0]+i); - for (s = 1; s < nsrc; s++) + for (int s = 1; s < nsrc; s++) { src_SSE = gmx_simd_load_r(src[s]+i); dest_SSE = gmx_simd_add_r(dest_SSE, src_SSE); @@ -1321,7 +1300,6 @@ nbnxn_atomdata_add_nbat_f_to_f_part(const nbnxn_search_t nbs, int a0, int a1, rvec *f) { - int a, i, fa; const int *cell; const real *fnb; @@ -1336,9 +1314,9 @@ nbnxn_atomdata_add_nbat_f_to_f_part(const nbnxn_search_t nbs, { fnb = out[0].f; - for (a = a0; a < a1; a++) + for (int a = a0; a < a1; a++) { - i = cell[a]*nbat->fstride; + int i = cell[a]*nbat->fstride; f[a][XX] += fnb[i]; f[a][YY] += fnb[i+1]; @@ -1347,11 +1325,11 @@ nbnxn_atomdata_add_nbat_f_to_f_part(const nbnxn_search_t nbs, } else { - for (a = a0; a < a1; a++) + for (int a = a0; a < a1; a++) { - i = cell[a]*nbat->fstride; + int i = cell[a]*nbat->fstride; - for (fa = 0; fa < nfa; fa++) + for (int fa = 0; fa < nfa; fa++) { f[a][XX] += out[fa].f[i]; f[a][YY] += out[fa].f[i+1]; @@ -1365,9 +1343,9 @@ nbnxn_atomdata_add_nbat_f_to_f_part(const nbnxn_search_t nbs, { fnb = out[0].f; - for (a = a0; a < a1; a++) + for (int a = a0; a < a1; a++) { - i = X4_IND_A(cell[a]); + int i = X4_IND_A(cell[a]); f[a][XX] += fnb[i+XX*PACK_X4]; f[a][YY] += fnb[i+YY*PACK_X4]; @@ -1376,11 +1354,11 @@ nbnxn_atomdata_add_nbat_f_to_f_part(const nbnxn_search_t nbs, } else { - for (a = a0; a < a1; a++) + for (int a = a0; a < a1; a++) { - i = X4_IND_A(cell[a]); + int i = X4_IND_A(cell[a]); - for (fa = 0; fa < nfa; fa++) + for (int fa = 0; fa < nfa; fa++) { f[a][XX] += out[fa].f[i+XX*PACK_X4]; f[a][YY] += out[fa].f[i+YY*PACK_X4]; @@ -1394,9 +1372,9 @@ nbnxn_atomdata_add_nbat_f_to_f_part(const nbnxn_search_t nbs, { fnb = out[0].f; - for (a = a0; a < a1; a++) + for (int a = a0; a < a1; a++) { - i = X8_IND_A(cell[a]); + int i = X8_IND_A(cell[a]); f[a][XX] += fnb[i+XX*PACK_X8]; f[a][YY] += fnb[i+YY*PACK_X8]; @@ -1405,11 +1383,11 @@ nbnxn_atomdata_add_nbat_f_to_f_part(const nbnxn_search_t nbs, } else { - for (a = a0; a < a1; a++) + for (int a = a0; a < a1; a++) { - i = X8_IND_A(cell[a]); + int i = X8_IND_A(cell[a]); - for (fa = 0; fa < nfa; fa++) + for (int fa = 0; fa < nfa; fa++) { f[a][XX] += out[fa].f[i+XX*PACK_X8]; f[a][YY] += out[fa].f[i+YY*PACK_X8]; @@ -1555,30 +1533,26 @@ static void nbnxn_atomdata_add_nbat_f_to_f_treereduce(const nbnxn_atomdata_t *nb static void nbnxn_atomdata_add_nbat_f_to_f_stdreduce(const nbnxn_atomdata_t *nbat, int nth) { - int th; #pragma omp parallel for num_threads(nth) schedule(static) - for (th = 0; th < nth; th++) + for (int th = 0; th < nth; th++) { const nbnxn_buffer_flags_t *flags; - int b0, b1, b; - int i0, i1; int nfptr; real *fptr[NBNXN_BUFFERFLAG_MAX_THREADS]; - int out; flags = &nbat->buffer_flags; /* Calculate the cell-block range for our thread */ - b0 = (flags->nflag* th )/nth; - b1 = (flags->nflag*(th+1))/nth; + int b0 = (flags->nflag* th )/nth; + int b1 = (flags->nflag*(th+1))/nth; - for (b = b0; b < b1; b++) + for (int b = b0; b < b1; b++) { - i0 = b *NBNXN_BUFFERFLAG_SIZE*nbat->fstride; - i1 = (b+1)*NBNXN_BUFFERFLAG_SIZE*nbat->fstride; + int i0 = b *NBNXN_BUFFERFLAG_SIZE*nbat->fstride; + int i1 = (b+1)*NBNXN_BUFFERFLAG_SIZE*nbat->fstride; nfptr = 0; - for (out = 1; out < nbat->nout; out++) + for (int out = 1; out < nbat->nout; out++) { if (bitmask_is_set(flags->flag[b], out)) { @@ -1613,7 +1587,6 @@ void nbnxn_atomdata_add_nbat_f_to_f(const nbnxn_search_t nbs, rvec *f) { int a0 = 0, na = 0; - int nth, th; nbs_cycle_start(&nbs->cc[enbsCCreducef]); @@ -1633,7 +1606,7 @@ void nbnxn_atomdata_add_nbat_f_to_f(const nbnxn_search_t nbs, break; } - nth = gmx_omp_nthreads_get(emntNonbonded); + int nth = gmx_omp_nthreads_get(emntNonbonded); if (nbat->nout > 1) { @@ -1655,7 +1628,7 @@ void nbnxn_atomdata_add_nbat_f_to_f(const nbnxn_search_t nbs, } } #pragma omp parallel for num_threads(nth) schedule(static) - for (th = 0; th < nth; th++) + for (int th = 0; th < nth; th++) { nbnxn_atomdata_add_nbat_f_to_f_part(nbs, nbat, nbat->out, @@ -1672,17 +1645,13 @@ void nbnxn_atomdata_add_nbat_f_to_f(const nbnxn_search_t nbs, void nbnxn_atomdata_add_nbat_fshift_to_fshift(const nbnxn_atomdata_t *nbat, rvec *fshift) { - const nbnxn_atomdata_output_t *out; - int th; - int s; - rvec sum; - - out = nbat->out; + const nbnxn_atomdata_output_t * out = nbat->out; - for (s = 0; s < SHIFTS; s++) + for (int s = 0; s < SHIFTS; s++) { + rvec sum; clear_rvec(sum); - for (th = 0; th < nbat->nout; th++) + for (int th = 0; th < nbat->nout; th++) { sum[XX] += out[th].fshift[s*DIM+XX]; sum[YY] += out[th].fshift[s*DIM+YY]; diff --git a/src/gromacs/mdlib/nbnxn_atomdata.h b/src/gromacs/mdlib/nbnxn_atomdata.h index f3f50c6634..1b4e404029 100644 --- a/src/gromacs/mdlib/nbnxn_atomdata.h +++ b/src/gromacs/mdlib/nbnxn_atomdata.h @@ -1,7 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2012,2013,2014, by the GROMACS development team, led by + * Copyright (c) 2012,2013,2014,2015, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -39,10 +39,6 @@ #include "gromacs/legacyheaders/typedefs.h" #include "gromacs/mdlib/nbnxn_pairlist.h" -#ifdef __cplusplus -extern "C" { -#endif - /* Default nbnxn allocation routine, allocates 32 byte aligned, * which works for plain C and aligned SSE and AVX loads/stores. @@ -119,8 +115,4 @@ void nbnxn_atomdata_add_nbat_f_to_f(const nbnxn_search_t nbs, void nbnxn_atomdata_add_nbat_fshift_to_fshift(const nbnxn_atomdata_t *nbat, rvec *fshift); -#ifdef __cplusplus -} -#endif - #endif diff --git a/src/gromacs/mdlib/nbnxn_grid.c b/src/gromacs/mdlib/nbnxn_grid.cpp similarity index 85% rename from src/gromacs/mdlib/nbnxn_grid.c rename to src/gromacs/mdlib/nbnxn_grid.cpp index f14024fb3d..b4e52f8c16 100644 --- a/src/gromacs/mdlib/nbnxn_grid.c +++ b/src/gromacs/mdlib/nbnxn_grid.cpp @@ -37,12 +37,16 @@ #include "nbnxn_grid.h" +#include "config.h" + #include -#include #include +#include + +#include + #include "gromacs/legacyheaders/gmx_omp_nthreads.h" -#include "gromacs/legacyheaders/macros.h" #include "gromacs/legacyheaders/types/commrec.h" #include "gromacs/math/utilities.h" #include "gromacs/math/vec.h" @@ -104,8 +108,8 @@ static int set_grid_size_xy(const nbnxn_search_t nbs, { rvec size; int na_c; - real adens, tlen, tlen_x, tlen_y, nc_max; - int t; + int nc_max; + real tlen, tlen_x, tlen_y; rvec_sub(corner1, corner0, size); @@ -119,17 +123,17 @@ static int set_grid_size_xy(const nbnxn_search_t nbs, /* To minimize the zero interactions, we should make * the largest of the i/j cell cubic. */ - na_c = max(grid->na_c, grid->na_cj); + na_c = std::max(grid->na_c, grid->na_cj); /* Approximately cubic cells */ - tlen = pow(na_c/atom_density, 1.0/3.0); + tlen = std::pow(static_cast(na_c/atom_density), static_cast(1.0/3.0)); tlen_x = tlen; tlen_y = tlen; } else { /* Approximately cubic sub cells */ - tlen = pow(grid->na_c/atom_density, 1.0/3.0); + tlen = std::pow(static_cast(grid->na_c/atom_density), static_cast(1.0/3.0)); tlen_x = tlen*GPU_NSUBCELL_X; tlen_y = tlen*GPU_NSUBCELL_Y; } @@ -137,8 +141,8 @@ static int set_grid_size_xy(const nbnxn_search_t nbs, * in the nbsist when the fixed cell dimensions (x,y) are * larger than the variable one (z) than the other way around. */ - grid->ncx = max(1, (int)(size[XX]/tlen_x)); - grid->ncy = max(1, (int)(size[YY]/tlen_y)); + grid->ncx = std::max(1, static_cast(size[XX]/tlen_x)); + grid->ncy = std::max(1, static_cast(size[YY]/tlen_y)); } else { @@ -170,7 +174,7 @@ static int set_grid_size_xy(const nbnxn_search_t nbs, srenew(grid->cxy_na, grid->cxy_nalloc); srenew(grid->cxy_ind, grid->cxy_nalloc+1); } - for (t = 0; t < nbs->nthread_max; t++) + for (int t = 0; t < nbs->nthread_max; t++) { if (grid->ncx*grid->ncy+1 > nbs->work[t].cxy_na_nalloc) { @@ -273,10 +277,6 @@ static void sort_atoms(int dim, gmx_bool Backwards, real h0, real invh, int n_per_h, int *sort) { - int nsort, i, c; - int zi, zim, zi_min, zi_max; - int cp, tmp; - if (n <= 1) { /* Nothing to do */ @@ -296,20 +296,20 @@ static void sort_atoms(int dim, gmx_bool Backwards, /* Set nsort to the maximum possible number of holes used. * In worst case all n elements end up in the last bin. */ - nsort = n_per_h*SORT_GRID_OVERSIZE + n; + int nsort = n_per_h*SORT_GRID_OVERSIZE + n; /* Determine the index range used, so we can limit it for the second pass */ - zi_min = INT_MAX; - zi_max = -1; + int zi_min = INT_MAX; + int zi_max = -1; /* Sort the particles using a simple index sort */ - for (i = 0; i < n; i++) + for (int i = 0; i < n; i++) { /* The cast takes care of float-point rounding effects below zero. * This code assumes particles are less than 1/SORT_GRID_OVERSIZE * times the box height out of the box. */ - zi = (int)((x[a[i]][dim] - h0)*invh); + int zi = static_cast((x[a[i]][dim] - h0)*invh); #ifndef NDEBUG /* As we can have rounding effect, we use > iso >= here */ @@ -337,8 +337,8 @@ static void sort_atoms(int dim, gmx_bool Backwards, if (sort[zi] < 0) { sort[zi] = a[i]; - zi_min = min(zi_min, zi); - zi_max = max(zi_max, zi); + zi_min = std::min(zi_min, zi); + zi_max = std::max(zi_max, zi); } else { @@ -358,27 +358,27 @@ static void sort_atoms(int dim, gmx_bool Backwards, if (sort[zi] >= 0) { /* Shift all elements by one slot until we find an empty slot */ - cp = sort[zi]; - zim = zi + 1; + int cp = sort[zi]; + int zim = zi + 1; while (sort[zim] >= 0) { - tmp = sort[zim]; + int tmp = sort[zim]; sort[zim] = cp; cp = tmp; zim++; } sort[zim] = cp; - zi_max = max(zi_max, zim); + zi_max = std::max(zi_max, zim); } sort[zi] = a[i]; - zi_max = max(zi_max, zi); + zi_max = std::max(zi_max, zi); } } - c = 0; + int c = 0; if (!Backwards) { - for (zi = 0; zi < nsort; zi++) + for (int zi = 0; zi < nsort; zi++) { if (sort[zi] >= 0) { @@ -389,7 +389,7 @@ static void sort_atoms(int dim, gmx_bool Backwards, } else { - for (zi = zi_max; zi >= zi_min; zi--) + for (int zi = zi_max; zi >= zi_min; zi--) { if (sort[zi] >= 0) { @@ -415,7 +415,7 @@ static void sort_atoms(int dim, gmx_bool Backwards, /* Coordinate order x,y,z, bb order xyz0 */ static void calc_bounding_box(int na, int stride, const real *x, nbnxn_bb_t *bb) { - int i, j; + int i; real xl, xh, yl, yh, zl, zh; i = 0; @@ -426,14 +426,14 @@ static void calc_bounding_box(int na, int stride, const real *x, nbnxn_bb_t *bb) zl = x[i+ZZ]; zh = x[i+ZZ]; i += stride; - for (j = 1; j < na; j++) - { - xl = min(xl, x[i+XX]); - xh = max(xh, x[i+XX]); - yl = min(yl, x[i+YY]); - yh = max(yh, x[i+YY]); - zl = min(zl, x[i+ZZ]); - zh = max(zh, x[i+ZZ]); + for (int j = 1; j < na; j++) + { + xl = std::min(xl, x[i+XX]); + xh = std::max(xh, x[i+XX]); + yl = std::min(yl, x[i+YY]); + yh = std::max(yh, x[i+YY]); + zl = std::min(zl, x[i+ZZ]); + zh = std::max(zh, x[i+ZZ]); i += stride; } /* Note: possible double to float conversion here */ @@ -448,7 +448,6 @@ static void calc_bounding_box(int na, int stride, const real *x, nbnxn_bb_t *bb) /* Packed coordinates, bb order xyz0 */ static void calc_bounding_box_x_x4(int na, const real *x, nbnxn_bb_t *bb) { - int j; real xl, xh, yl, yh, zl, zh; xl = x[XX*PACK_X4]; @@ -457,14 +456,14 @@ static void calc_bounding_box_x_x4(int na, const real *x, nbnxn_bb_t *bb) yh = x[YY*PACK_X4]; zl = x[ZZ*PACK_X4]; zh = x[ZZ*PACK_X4]; - for (j = 1; j < na; j++) + for (int j = 1; j < na; j++) { - xl = min(xl, x[j+XX*PACK_X4]); - xh = max(xh, x[j+XX*PACK_X4]); - yl = min(yl, x[j+YY*PACK_X4]); - yh = max(yh, x[j+YY*PACK_X4]); - zl = min(zl, x[j+ZZ*PACK_X4]); - zh = max(zh, x[j+ZZ*PACK_X4]); + xl = std::min(xl, x[j+XX*PACK_X4]); + xh = std::max(xh, x[j+XX*PACK_X4]); + yl = std::min(yl, x[j+YY*PACK_X4]); + yh = std::max(yh, x[j+YY*PACK_X4]); + zl = std::min(zl, x[j+ZZ*PACK_X4]); + zh = std::max(zh, x[j+ZZ*PACK_X4]); } /* Note: possible double to float conversion here */ bb->lower[BB_X] = R2F_D(xl); @@ -478,7 +477,6 @@ static void calc_bounding_box_x_x4(int na, const real *x, nbnxn_bb_t *bb) /* Packed coordinates, bb order xyz0 */ static void calc_bounding_box_x_x8(int na, const real *x, nbnxn_bb_t *bb) { - int j; real xl, xh, yl, yh, zl, zh; xl = x[XX*PACK_X8]; @@ -487,14 +485,14 @@ static void calc_bounding_box_x_x8(int na, const real *x, nbnxn_bb_t *bb) yh = x[YY*PACK_X8]; zl = x[ZZ*PACK_X8]; zh = x[ZZ*PACK_X8]; - for (j = 1; j < na; j++) + for (int j = 1; j < na; j++) { - xl = min(xl, x[j+XX*PACK_X8]); - xh = max(xh, x[j+XX*PACK_X8]); - yl = min(yl, x[j+YY*PACK_X8]); - yh = max(yh, x[j+YY*PACK_X8]); - zl = min(zl, x[j+ZZ*PACK_X8]); - zh = max(zh, x[j+ZZ*PACK_X8]); + xl = std::min(xl, x[j+XX*PACK_X8]); + xh = std::max(xh, x[j+XX*PACK_X8]); + yl = std::min(yl, x[j+YY*PACK_X8]); + yh = std::max(yh, x[j+YY*PACK_X8]); + zl = std::min(zl, x[j+ZZ*PACK_X8]); + zh = std::max(zh, x[j+ZZ*PACK_X8]); } /* Note: possible double to float conversion here */ bb->lower[BB_X] = R2F_D(xl); @@ -509,11 +507,11 @@ static void calc_bounding_box_x_x8(int na, const real *x, nbnxn_bb_t *bb) static void calc_bounding_box_x_x4_halves(int na, const real *x, nbnxn_bb_t *bb, nbnxn_bb_t *bbj) { - calc_bounding_box_x_x4(min(na, 2), x, bbj); + calc_bounding_box_x_x4(std::min(na, 2), x, bbj); if (na > 2) { - calc_bounding_box_x_x4(min(na-2, 2), x+(PACK_X4>>1), bbj+1); + calc_bounding_box_x_x4(std::min(na-2, 2), x+(PACK_X4>>1), bbj+1); } else { @@ -541,8 +539,8 @@ static void calc_bounding_box_x_x4_halves(int na, const real *x, for (i = 0; i < NNBSBB_C; i++) { - bb->lower[i] = min(bbj[0].lower[i], bbj[1].lower[i]); - bb->upper[i] = max(bbj[0].upper[i], bbj[1].upper[i]); + bb->lower[i] = std::min(bbj[0].lower[i], bbj[1].lower[i]); + bb->upper[i] = std::max(bbj[0].upper[i], bbj[1].upper[i]); } } #endif @@ -553,7 +551,7 @@ static void calc_bounding_box_x_x4_halves(int na, const real *x, /* Coordinate order xyz, bb order xxxxyyyyzzzz */ static void calc_bounding_box_xxxx(int na, int stride, const real *x, float *bb) { - int i, j; + int i; real xl, xh, yl, yh, zl, zh; i = 0; @@ -564,14 +562,14 @@ static void calc_bounding_box_xxxx(int na, int stride, const real *x, float *bb) zl = x[i+ZZ]; zh = x[i+ZZ]; i += stride; - for (j = 1; j < na; j++) - { - xl = min(xl, x[i+XX]); - xh = max(xh, x[i+XX]); - yl = min(yl, x[i+YY]); - yh = max(yh, x[i+YY]); - zl = min(zl, x[i+ZZ]); - zh = max(zh, x[i+ZZ]); + for (int j = 1; j < na; j++) + { + xl = std::min(xl, x[i+XX]); + xh = std::max(xh, x[i+XX]); + yl = std::min(yl, x[i+YY]); + yh = std::max(yh, x[i+YY]); + zl = std::min(zl, x[i+ZZ]); + zh = std::max(zh, x[i+ZZ]); i += stride; } /* Note: possible double to float conversion here */ @@ -593,12 +591,10 @@ static void calc_bounding_box_simd4(int na, const float *x, nbnxn_bb_t *bb) gmx_simd4_float_t bb_0_S, bb_1_S; gmx_simd4_float_t x_S; - int i; - bb_0_S = gmx_simd4_load_f(x); bb_1_S = bb_0_S; - for (i = 1; i < na; i++) + for (int i = 1; i < na; i++) { x_S = gmx_simd4_load_f(x+i*NNBSBB_C); bb_0_S = gmx_simd4_min_f(bb_0_S, x_S); @@ -630,14 +626,13 @@ static void calc_bounding_box_xxxx_simd4(int na, const float *x, /* Combines pairs of consecutive bounding boxes */ static void combine_bounding_box_pairs(nbnxn_grid_t *grid, const nbnxn_bb_t *bb) { - int i, j, sc2, nc2, c2; - - for (i = 0; i < grid->ncx*grid->ncy; i++) + for (int i = 0; i < grid->ncx*grid->ncy; i++) { /* Starting bb in a column is expected to be 2-aligned */ - sc2 = grid->cxy_ind[i]>>1; + int sc2 = grid->cxy_ind[i]>>1; /* For odd numbers skip the last bb here */ - nc2 = (grid->cxy_na[i]+3)>>(2+1); + int nc2 = (grid->cxy_na[i]+3)>>(2+1); + int c2; for (c2 = sc2; c2 < sc2+nc2; c2++) { #ifdef NBNXN_SEARCH_BB_SIMD4 @@ -650,19 +645,19 @@ static void combine_bounding_box_pairs(nbnxn_grid_t *grid, const nbnxn_bb_t *bb) gmx_simd4_store_f(&grid->bbj[c2].lower[0], min_S); gmx_simd4_store_f(&grid->bbj[c2].upper[0], max_S); #else - for (j = 0; j < NNBSBB_C; j++) + for (int j = 0; j < NNBSBB_C; j++) { - grid->bbj[c2].lower[j] = min(bb[c2*2+0].lower[j], - bb[c2*2+1].lower[j]); - grid->bbj[c2].upper[j] = max(bb[c2*2+0].upper[j], - bb[c2*2+1].upper[j]); + grid->bbj[c2].lower[j] = std::min(bb[c2*2+0].lower[j], + bb[c2*2+1].lower[j]); + grid->bbj[c2].upper[j] = std::max(bb[c2*2+0].upper[j], + bb[c2*2+1].upper[j]); } #endif } if (((grid->cxy_na[i]+3)>>2) & 1) { /* The bb count in this column is odd: duplicate the last bb */ - for (j = 0; j < NNBSBB_C; j++) + for (int j = 0; j < NNBSBB_C; j++) { grid->bbj[c2].lower[j] = bb[c2*2].lower[j]; grid->bbj[c2].upper[j] = bb[c2*2].upper[j]; @@ -676,13 +671,12 @@ static void combine_bounding_box_pairs(nbnxn_grid_t *grid, const nbnxn_bb_t *bb) static void print_bbsizes_simple(FILE *fp, const nbnxn_grid_t *grid) { - int c, d; dvec ba; clear_dvec(ba); - for (c = 0; c < grid->nc; c++) + for (int c = 0; c < grid->nc; c++) { - for (d = 0; d < DIM; d++) + for (int d = 0; d < DIM; d++) { ba[d] += grid->bb[c].upper[d] - grid->bb[c].lower[d]; } @@ -705,22 +699,20 @@ static void print_bbsizes_simple(FILE *fp, static void print_bbsizes_supersub(FILE *fp, const nbnxn_grid_t *grid) { - int ns, c, s; + int ns; dvec ba; clear_dvec(ba); ns = 0; - for (c = 0; c < grid->nc; c++) + for (int c = 0; c < grid->nc; c++) { #ifdef NBNXN_BBXXXX - for (s = 0; s < grid->nsubc[c]; s += STRIDE_PBB) + for (int s = 0; s < grid->nsubc[c]; s += STRIDE_PBB) { - int cs_w, i, d; - - cs_w = (c*GPU_NSUBCELL + s)/STRIDE_PBB; - for (i = 0; i < STRIDE_PBB; i++) + int cs_w = (c*GPU_NSUBCELL + s)/STRIDE_PBB; + for (int i = 0; i < STRIDE_PBB; i++) { - for (d = 0; d < DIM; d++) + for (int d = 0; d < DIM; d++) { ba[d] += grid->pbb[cs_w*NNBSBB_XXXX+(DIM+d)*STRIDE_PBB+i] - @@ -729,12 +721,10 @@ static void print_bbsizes_supersub(FILE *fp, } } #else - for (s = 0; s < grid->nsubc[c]; s++) + for (int s = 0; s < grid->nsubc[c]; s++) { - int cs, d; - - cs = c*GPU_NSUBCELL + s; - for (d = 0; d < DIM; d++) + int cs = c*GPU_NSUBCELL + s; + for (int d = 0; d < DIM; d++) { ba[d] += grid->bb[cs].upper[d] - grid->bb[cs].lower[d]; } @@ -764,22 +754,21 @@ static void sort_cluster_on_flag(int na_c, int *order, int *flags) { - int subc, s, a, n1, n2, a_lj_max, i, j; + int subc; int sort1[NBNXN_NA_SC_MAX/GPU_NSUBCELL]; int sort2[NBNXN_NA_SC_MAX/GPU_NSUBCELL]; - gmx_bool haveQ, bFEP; *flags = 0; subc = 0; - for (s = a0; s < a1; s += na_c) + for (int s = a0; s < a1; s += na_c) { /* Make lists for this (sub-)cell on atoms with and without LJ */ - n1 = 0; - n2 = 0; - haveQ = FALSE; - a_lj_max = -1; - for (a = s; a < min(s+na_c, a1); a++) + int n1 = 0; + int n2 = 0; + gmx_bool haveQ = FALSE; + int a_lj_max = -1; + for (int a = s; a < std::min(s+na_c, a1); a++) { haveQ = haveQ || GET_CGINFO_HAS_Q(atinfo[order[a]]); @@ -801,17 +790,17 @@ static void sort_cluster_on_flag(int na_c, if (2*n1 <= na_c) { - /* Only sort when strictly necessary. + /* Only sort when strictly necessary. Ordering particles * Ordering particles can lead to less accurate summation * due to rounding, both for LJ and Coulomb interactions. */ if (2*(a_lj_max - s) >= na_c) { - for (i = 0; i < n1; i++) + for (int i = 0; i < n1; i++) { order[a0+i] = sort1[i]; } - for (j = 0; j < n2; j++) + for (int j = 0; j < n2; j++) { order[a0+n1+j] = sort2[j]; } @@ -862,12 +851,12 @@ static void fill_cell(const nbnxn_search_t nbs, if (nbs->bFEP) { /* Set the fep flag for perturbed atoms in this (sub-)cell */ - int c, at; + int c; /* The grid-local cluster/(sub-)cell index */ c = (a0 >> grid->na_c_2log) - grid->cell0*(grid->bSimple ? 1 : GPU_NSUBCELL); grid->fep[c] = 0; - for (at = a0; at < a1; at++) + for (int at = a0; at < a1; at++) { if (nbs->a[at] >= 0 && GET_CGINFO_FEP(atinfo[nbs->a[at]])) { @@ -980,10 +969,7 @@ static void sort_columns_simple(const nbnxn_search_t nbs, int cxy_start, int cxy_end, int *sort_work) { - int cxy; - int cx, cy, cz, ncz, cfilled, c; - int na, ash, ind, a; - int na_c, ash_c; + int cfilled, c; if (debug) { @@ -992,14 +978,14 @@ static void sort_columns_simple(const nbnxn_search_t nbs, } /* Sort the atoms within each x,y column in 3 dimensions */ - for (cxy = cxy_start; cxy < cxy_end; cxy++) + for (int cxy = cxy_start; cxy < cxy_end; cxy++) { - cx = cxy/grid->ncy; - cy = cxy - cx*grid->ncy; + int cx = cxy/grid->ncy; + int cy = cxy - cx*grid->ncy; - na = grid->cxy_na[cxy]; - ncz = grid->cxy_ind[cxy+1] - grid->cxy_ind[cxy]; - ash = (grid->cell0 + grid->cxy_ind[cxy])*grid->na_sc; + int na = grid->cxy_na[cxy]; + int ncz = grid->cxy_ind[cxy+1] - grid->cxy_ind[cxy]; + int ash = (grid->cell0 + grid->cxy_ind[cxy])*grid->na_sc; /* Sort the atoms within each x,y column on z coordinate */ sort_atoms(ZZ, FALSE, dd_zone, @@ -1010,12 +996,12 @@ static void sort_columns_simple(const nbnxn_search_t nbs, /* Fill the ncz cells in this column */ cfilled = grid->cxy_ind[cxy]; - for (cz = 0; cz < ncz; cz++) + for (int cz = 0; cz < ncz; cz++) { c = grid->cxy_ind[cxy] + cz; - ash_c = ash + cz*grid->na_sc; - na_c = min(grid->na_sc, na-(ash_c-ash)); + int ash_c = ash + cz*grid->na_sc; + int na_c = std::min(grid->na_sc, na-(ash_c-ash)); fill_cell(nbs, grid, nbat, ash_c, ash_c+na_c, atinfo, x, @@ -1037,7 +1023,7 @@ static void sort_columns_simple(const nbnxn_search_t nbs, } /* Set the unused atom indices to -1 */ - for (ind = na; ind < ncz*grid->na_sc; ind++) + for (int ind = na; ind < ncz*grid->na_sc; ind++) { nbs->a[ash+ind] = -1; } @@ -1055,16 +1041,9 @@ static void sort_columns_supersub(const nbnxn_search_t nbs, int cxy_start, int cxy_end, int *sort_work) { - int cxy; - int cx, cy, cz = -1, c = -1, ncz; - int na, ash, na_c, ind, a; - int subdiv_z, sub_z, na_z, ash_z; - int subdiv_y, sub_y, na_y, ash_y; - int subdiv_x, sub_x, na_x, ash_x; - nbnxn_bb_t bb_work_array[2], *bb_work_aligned; - bb_work_aligned = (nbnxn_bb_t *)(((size_t)(bb_work_array+1)) & (~((size_t)15))); + bb_work_aligned = reinterpret_cast((reinterpret_cast(bb_work_array+1)) & (~(static_cast(15)))); if (debug) { @@ -1072,19 +1051,19 @@ static void sort_columns_supersub(const nbnxn_search_t nbs, grid->cell0, cxy_start, cxy_end, a0, a1); } - subdiv_x = grid->na_c; - subdiv_y = GPU_NSUBCELL_X*subdiv_x; - subdiv_z = GPU_NSUBCELL_Y*subdiv_y; + int subdiv_x = grid->na_c; + int subdiv_y = GPU_NSUBCELL_X*subdiv_x; + int subdiv_z = GPU_NSUBCELL_Y*subdiv_y; /* Sort the atoms within each x,y column in 3 dimensions */ - for (cxy = cxy_start; cxy < cxy_end; cxy++) + for (int cxy = cxy_start; cxy < cxy_end; cxy++) { - cx = cxy/grid->ncy; - cy = cxy - cx*grid->ncy; + int cx = cxy/grid->ncy; + int cy = cxy - cx*grid->ncy; - na = grid->cxy_na[cxy]; - ncz = grid->cxy_ind[cxy+1] - grid->cxy_ind[cxy]; - ash = (grid->cell0 + grid->cxy_ind[cxy])*grid->na_sc; + int na = grid->cxy_na[cxy]; + int ncz = grid->cxy_ind[cxy+1] - grid->cxy_ind[cxy]; + int ash = (grid->cell0 + grid->cxy_ind[cxy])*grid->na_sc; /* Sort the atoms within each x,y column on z coordinate */ sort_atoms(ZZ, FALSE, dd_zone, @@ -1094,22 +1073,22 @@ static void sort_columns_supersub(const nbnxn_search_t nbs, sort_work); /* This loop goes over the supercells and subcells along z at once */ - for (sub_z = 0; sub_z < ncz*GPU_NSUBCELL_Z; sub_z++) + for (int sub_z = 0; sub_z < ncz*GPU_NSUBCELL_Z; sub_z++) { - ash_z = ash + sub_z*subdiv_z; - na_z = min(subdiv_z, na-(ash_z-ash)); - + int ash_z = ash + sub_z*subdiv_z; + int na_z = std::min(subdiv_z, na-(ash_z-ash)); + int cz = -1; /* We have already sorted on z */ if (sub_z % GPU_NSUBCELL_Z == 0) { cz = sub_z/GPU_NSUBCELL_Z; - c = grid->cxy_ind[cxy] + cz; + int c = grid->cxy_ind[cxy] + cz; /* The number of atoms in this supercell */ - na_c = min(grid->na_sc, na-(ash_z-ash)); + int na_c = std::min(grid->na_sc, na-(ash_z-ash)); - grid->nsubc[c] = min(GPU_NSUBCELL, (na_c+grid->na_c-1)/grid->na_c); + grid->nsubc[c] = std::min(GPU_NSUBCELL, (na_c+grid->na_c-1)/grid->na_c); /* Store the z-boundaries of the super cell */ grid->bbcz[c*NNBSBB_D ] = x[nbs->a[ash_z]][ZZ]; @@ -1125,10 +1104,10 @@ static void sort_columns_supersub(const nbnxn_search_t nbs, sort_work); #endif - for (sub_y = 0; sub_y < GPU_NSUBCELL_Y; sub_y++) + for (int sub_y = 0; sub_y < GPU_NSUBCELL_Y; sub_y++) { - ash_y = ash_z + sub_y*subdiv_y; - na_y = min(subdiv_y, na-(ash_y-ash)); + int ash_y = ash_z + sub_y*subdiv_y; + int na_y = std::min(subdiv_y, na-(ash_y-ash)); #if GPU_NSUBCELL_X > 1 /* Sort the atoms along x */ @@ -1139,10 +1118,10 @@ static void sort_columns_supersub(const nbnxn_search_t nbs, sort_work); #endif - for (sub_x = 0; sub_x < GPU_NSUBCELL_X; sub_x++) + for (int sub_x = 0; sub_x < GPU_NSUBCELL_X; sub_x++) { - ash_x = ash_y + sub_x*subdiv_x; - na_x = min(subdiv_x, na-(ash_x-ash)); + int ash_x = ash_y + sub_x*subdiv_x; + int na_x = std::min(subdiv_x, na-(ash_x-ash)); fill_cell(nbs, grid, nbat, ash_x, ash_x+na_x, atinfo, x, @@ -1155,7 +1134,7 @@ static void sort_columns_supersub(const nbnxn_search_t nbs, } /* Set the unused atom indices to -1 */ - for (ind = na; ind < ncz*grid->na_sc; ind++) + for (int ind = na; ind < ncz*grid->na_sc; ind++) { nbs->a[ash+ind] = -1; } @@ -1171,21 +1150,18 @@ static void calc_column_indices(nbnxn_grid_t *grid, int *cell, int *cxy_na) { - int n0, n1, i; - int cx, cy; - /* We add one extra cell for particles which moved during DD */ - for (i = 0; i < grid->ncx*grid->ncy+1; i++) + for (int i = 0; i < grid->ncx*grid->ncy+1; i++) { cxy_na[i] = 0; } - n0 = a0 + (int)((thread+0)*(a1 - a0))/nthread; - n1 = a0 + (int)((thread+1)*(a1 - a0))/nthread; + int n0 = a0 + static_cast((thread+0)*(a1 - a0))/nthread; + int n1 = a0 + static_cast((thread+1)*(a1 - a0))/nthread; if (dd_zone == 0) { /* Home zone */ - for (i = n0; i < n1; i++) + for (int i = n0; i < n1; i++) { if (move == NULL || move[i] >= 0) { @@ -1194,8 +1170,8 @@ static void calc_column_indices(nbnxn_grid_t *grid, * The int cast takes care of the lower bound, * we will explicitly take care of the upper bound. */ - cx = (int)((x[i][XX] - grid->c0[XX])*grid->inv_sx); - cy = (int)((x[i][YY] - grid->c0[YY])*grid->inv_sy); + int cx = static_cast((x[i][XX] - grid->c0[XX])*grid->inv_sx); + int cy = static_cast((x[i][YY] - grid->c0[YY])*grid->inv_sy); #ifndef NDEBUG if (cx < 0 || cx > grid->ncx || @@ -1209,8 +1185,8 @@ static void calc_column_indices(nbnxn_grid_t *grid, } #endif /* Take care of potential rouding issues */ - cx = min(cx, grid->ncx - 1); - cy = min(cy, grid->ncy - 1); + cx = std::min(cx, grid->ncx - 1); + cy = std::min(cy, grid->ncy - 1); /* For the moment cell will contain only the, grid local, * x and y indices, not z. @@ -1231,10 +1207,10 @@ static void calc_column_indices(nbnxn_grid_t *grid, else { /* Non-home zone */ - for (i = n0; i < n1; i++) + for (int i = n0; i < n1; i++) { - cx = (int)((x[i][XX] - grid->c0[XX])*grid->inv_sx); - cy = (int)((x[i][YY] - grid->c0[YY])*grid->inv_sy); + int cx = static_cast((x[i][XX] - grid->c0[XX])*grid->inv_sx); + int cy = static_cast((x[i][YY] - grid->c0[YY])*grid->inv_sy); /* For non-home zones there could be particles outside * the non-bonded cut-off range, which have been communicated @@ -1242,10 +1218,10 @@ static void calc_column_indices(nbnxn_grid_t *grid, * matter where these end up on the grid. For performance * we put them in an extra row at the border. */ - cx = max(cx, 0); - cx = min(cx, grid->ncx - 1); - cy = max(cy, 0); - cy = min(cy, grid->ncy - 1); + cx = std::max(cx, 0); + cx = std::min(cx, grid->ncx - 1); + cy = std::max(cy, 0); + cy = std::min(cy, grid->ncy - 1); /* For the moment cell will contain only the, grid local, * x and y indices, not z. @@ -1267,15 +1243,15 @@ static void calc_cell_indices(const nbnxn_search_t nbs, const int *move, nbnxn_atomdata_t *nbat) { - int n0, n1, i; + int n0, n1; int cx, cy, cxy, ncz_max, ncz; - int nthread, thread; - int *cxy_na, cxy_na_i; + int nthread; + int cxy_na_i; nthread = gmx_omp_nthreads_get(emntPairsearch); #pragma omp parallel for num_threads(nthread) schedule(static) - for (thread = 0; thread < nthread; thread++) + for (int thread = 0; thread < nthread; thread++) { calc_column_indices(grid, a0, a1, x, dd_zone, move, thread, nthread, nbs->cell, nbs->work[thread].cxy_na); @@ -1285,7 +1261,7 @@ static void calc_cell_indices(const nbnxn_search_t nbs, ncz_max = 0; ncz = 0; grid->cxy_ind[0] = 0; - for (i = 0; i < grid->ncx*grid->ncy+1; i++) + for (int i = 0; i < grid->ncx*grid->ncy+1; i++) { /* We set ncz_max at the beginning of the loop iso at the end * to skip i=grid->ncx*grid->ncy which are moved particles @@ -1296,7 +1272,7 @@ static void calc_cell_indices(const nbnxn_search_t nbs, ncz_max = ncz; } cxy_na_i = nbs->work[0].cxy_na[i]; - for (thread = 1; thread < nthread; thread++) + for (int thread = 1; thread < nthread; thread++) { cxy_na_i += nbs->work[thread].cxy_na[i]; } @@ -1322,7 +1298,7 @@ static void calc_cell_indices(const nbnxn_search_t nbs, ncz_max); if (gmx_debug_at) { - i = 0; + int i = 0; for (cy = 0; cy < grid->ncy; cy++) { for (cx = 0; cx < grid->ncx; cx++) @@ -1338,14 +1314,14 @@ static void calc_cell_indices(const nbnxn_search_t nbs, /* Make sure the work array for sorting is large enough */ if (ncz_max*grid->na_sc*SGSF > nbs->work[0].sort_work_nalloc) { - for (thread = 0; thread < nbs->nthread_max; thread++) + for (int thread = 0; thread < nbs->nthread_max; thread++) { nbs->work[thread].sort_work_nalloc = over_alloc_large(ncz_max*grid->na_sc*SGSF); srenew(nbs->work[thread].sort_work, nbs->work[thread].sort_work_nalloc); /* When not in use, all elements should be -1 */ - for (i = 0; i < nbs->work[thread].sort_work_nalloc; i++) + for (int i = 0; i < nbs->work[thread].sort_work_nalloc; i++) { nbs->work[thread].sort_work[i] = -1; } @@ -1355,7 +1331,7 @@ static void calc_cell_indices(const nbnxn_search_t nbs, /* Now we know the dimensions we can fill the grid. * This is the first, unsorted fill. We sort the columns after this. */ - for (i = a0; i < a1; i++) + for (int i = a0; i < a1; i++) { /* At this point nbs->cell contains the local grid x,y indices */ cxy = nbs->cell[i]; @@ -1369,7 +1345,7 @@ static void calc_cell_indices(const nbnxn_search_t nbs, n1 = grid->nc*grid->na_sc+grid->cxy_na[grid->ncx*grid->ncy]; if (dd_zone == 0) { - for (i = n0; i < n1; i++) + for (int i = n0; i < n1; i++) { nbs->cell[nbs->a[i]] = i; } @@ -1378,7 +1354,7 @@ static void calc_cell_indices(const nbnxn_search_t nbs, /* Sort the super-cell columns along z into the sub-cells. */ #pragma omp parallel for num_threads(nthread) schedule(static) - for (thread = 0; thread < nthread; thread++) + for (int thread = 0; thread < nthread; thread++) { if (grid->bSimple) { @@ -1404,7 +1380,7 @@ static void calc_cell_indices(const nbnxn_search_t nbs, if (!grid->bSimple) { grid->nsubc_tot = 0; - for (i = 0; i < grid->nc; i++) + for (int i = 0; i < grid->nc; i++) { grid->nsubc_tot += grid->nsubc[i]; } @@ -1429,15 +1405,13 @@ static void calc_cell_indices(const nbnxn_search_t nbs, static void init_buffer_flags(nbnxn_buffer_flags_t *flags, int natoms) { - int b; - flags->nflag = (natoms + NBNXN_BUFFERFLAG_SIZE - 1)/NBNXN_BUFFERFLAG_SIZE; if (flags->nflag > flags->flag_nalloc) { flags->flag_nalloc = over_alloc_large(flags->nflag); srenew(flags->flag, flags->flag_nalloc); } - for (b = 0; b < flags->nflag; b++) + for (int b = 0; b < flags->nflag; b++) { bitmask_clear(&(flags->flag[b])); } @@ -1520,7 +1494,7 @@ void nbnxn_put_on_grid(nbnxn_search_t nbs, } else { - nbs->natoms_nonlocal = max(nbs->natoms_nonlocal, a1); + nbs->natoms_nonlocal = std::max(nbs->natoms_nonlocal, a1); } /* We always use the home zone (grid[0]) for setting the cell size, @@ -1571,12 +1545,11 @@ void nbnxn_put_on_grid_nonlocal(nbnxn_search_t nbs, int nb_kernel_type, nbnxn_atomdata_t *nbat) { - int zone, d; rvec c0, c1; - for (zone = 1; zone < zones->n; zone++) + for (int zone = 1; zone < zones->n; zone++) { - for (d = 0; d < DIM; d++) + for (int d = 0; d < DIM; d++) { c0[d] = zones->size[zone].bb_x0[d]; c1[d] = zones->size[zone].bb_x1[d]; @@ -1602,8 +1575,7 @@ void nbnxn_grid_add_simple(nbnxn_search_t nbs, nbnxn_grid_t *grid; float *bbcz; nbnxn_bb_t *bb; - int ncd, sc; - int nthreads gmx_unused; + int ncd; grid = &nbs->grid[0]; @@ -1630,17 +1602,18 @@ void nbnxn_grid_add_simple(nbnxn_search_t nbs, bbcz = grid->bbcz_simple; bb = grid->bb_simple; - nthreads = gmx_omp_nthreads_get(emntPairsearch); +#if (defined GMX_OPENMP) && !(defined __clang_analyzer__) + // cppcheck-suppress unreadVariable + int nthreads = gmx_omp_nthreads_get(emntPairsearch); +#endif + #pragma omp parallel for num_threads(nthreads) schedule(static) - for (sc = 0; sc < grid->nc; sc++) + for (int sc = 0; sc < grid->nc; sc++) { - int c, tx, na; - - for (c = 0; c < ncd; c++) + for (int c = 0; c < ncd; c++) { - tx = sc*ncd + c; - - na = NBNXN_CPU_CLUSTER_I_SIZE; + int tx = sc*ncd + c; + int na = NBNXN_CPU_CLUSTER_I_SIZE; while (na > 0 && nbat->type[tx*NBNXN_CPU_CLUSTER_I_SIZE+na-1] == nbat->ntype-1) { @@ -1706,20 +1679,17 @@ void nbnxn_get_atomorder(const nbnxn_search_t nbs, const int **a, int *n) void nbnxn_set_atomorder(nbnxn_search_t nbs) { - nbnxn_grid_t *grid; - int ao, cx, cy, cxy, cz, j; - /* Set the atom order for the home cell (index 0) */ - grid = &nbs->grid[0]; + nbnxn_grid_t *grid = &nbs->grid[0]; - ao = 0; - for (cx = 0; cx < grid->ncx; cx++) + int ao = 0; + for (int cx = 0; cx < grid->ncx; cx++) { - for (cy = 0; cy < grid->ncy; cy++) + for (int cy = 0; cy < grid->ncy; cy++) { - cxy = cx*grid->ncy + cy; - j = grid->cxy_ind[cxy]*grid->na_sc; - for (cz = 0; cz < grid->cxy_na[cxy]; cz++) + int cxy = cx*grid->ncy + cy; + int j = grid->cxy_ind[cxy]*grid->na_sc; + for (int cz = 0; cz < grid->cxy_na[cxy]; cz++) { nbs->a[j] = ao; nbs->cell[ao] = j; diff --git a/src/gromacs/mdlib/nbnxn_grid.h b/src/gromacs/mdlib/nbnxn_grid.h index cb846a1171..1824cc5f87 100644 --- a/src/gromacs/mdlib/nbnxn_grid.h +++ b/src/gromacs/mdlib/nbnxn_grid.h @@ -40,9 +40,6 @@ #include "gromacs/mdlib/nbnxn_consts.h" #include "gromacs/mdlib/nbnxn_internal.h" -#ifdef __cplusplus -extern "C" { -#endif /* Allocate and initialize ngrid pair search grids in nbs */ void nbnxn_grids_init(nbnxn_search_t nbs, int ngrid); @@ -92,8 +89,5 @@ void nbnxn_get_atomorder(const nbnxn_search_t nbs, const int **a, int *n); /* Renumber the atom indices on the grid to consecutive order */ void nbnxn_set_atomorder(nbnxn_search_t nbs); -#ifdef __cplusplus -} -#endif #endif diff --git a/src/gromacs/mdlib/nbnxn_search.c b/src/gromacs/mdlib/nbnxn_search.cpp similarity index 93% rename from src/gromacs/mdlib/nbnxn_search.c rename to src/gromacs/mdlib/nbnxn_search.cpp index 8292581a42..7ec37ff526 100644 --- a/src/gromacs/mdlib/nbnxn_search.c +++ b/src/gromacs/mdlib/nbnxn_search.cpp @@ -37,12 +37,16 @@ #include "nbnxn_search.h" +#include "config.h" + #include -#include #include +#include + +#include + #include "gromacs/legacyheaders/gmx_omp_nthreads.h" -#include "gromacs/legacyheaders/macros.h" #include "gromacs/legacyheaders/nrnb.h" #include "gromacs/legacyheaders/ns.h" #include "gromacs/legacyheaders/types/commrec.h" @@ -131,9 +135,7 @@ static void nbs_cycle_clear(nbnxn_cycle_t *cc) { - int i; - - for (i = 0; i < enbsCCnr; i++) + for (int i = 0; i < enbsCCnr; i++) { cc[i].count = 0; cc[i].c = 0; @@ -147,9 +149,6 @@ static double Mcyc_av(const nbnxn_cycle_t *cc) static void nbs_cycle_print(FILE *fp, const nbnxn_search_t nbs) { - int n; - int t; - fprintf(fp, "\n"); fprintf(fp, "ns %4d grid %4.1f search %4.1f red.f %5.3f", nbs->cc[enbsCCgrid].count, @@ -165,7 +164,7 @@ static void nbs_cycle_print(FILE *fp, const nbnxn_search_t nbs) Mcyc_av(&nbs->cc[enbsCCcombine])); } fprintf(fp, " s. th"); - for (t = 0; t < nbs->nthread_max; t++) + for (int t = 0; t < nbs->nthread_max; t++) { fprintf(fp, " %4.1f", Mcyc_av(&nbs->work[t].cc[enbsCCsearch])); @@ -241,7 +240,7 @@ void nbnxn_init_search(nbnxn_search_t * nbs_ptr, int nthread_max) { nbnxn_search_t nbs; - int ngrid, d, t; + int ngrid; snew(nbs, 1); *nbs_ptr = nbs; @@ -256,7 +255,7 @@ void nbnxn_init_search(nbnxn_search_t * nbs_ptr, { nbs->zones = zones; - for (d = 0; d < DIM; d++) + for (int d = 0; d < DIM; d++) { if ((*n_dd_cells)[d] > 1) { @@ -278,7 +277,7 @@ void nbnxn_init_search(nbnxn_search_t * nbs_ptr, /* Initialize the work data structures for each thread */ snew(nbs->work, nbs->nthread_max); - for (t = 0; t < nbs->nthread_max; t++) + for (int t = 0; t < nbs->nthread_max; t++) { nbs->work[t].cxy_na = NULL; nbs->work[t].cxy_na_nalloc = 0; @@ -293,7 +292,7 @@ void nbnxn_init_search(nbnxn_search_t * nbs_ptr, nbs->print_cycles = (getenv("GMX_NBNXN_CYCLE") != 0); nbs->search_count = 0; nbs_cycle_clear(nbs->cc); - for (t = 0; t < nbs->nthread_max; t++) + for (int t = 0; t < nbs->nthread_max; t++) { nbs_cycle_clear(nbs->work[t].cc); } @@ -302,15 +301,13 @@ void nbnxn_init_search(nbnxn_search_t * nbs_ptr, static void init_buffer_flags(nbnxn_buffer_flags_t *flags, int natoms) { - int b; - flags->nflag = (natoms + NBNXN_BUFFERFLAG_SIZE - 1)/NBNXN_BUFFERFLAG_SIZE; if (flags->nflag > flags->flag_nalloc) { flags->flag_nalloc = over_alloc_large(flags->nflag); srenew(flags->flag, flags->flag_nalloc); } - for (b = 0; b < flags->nflag; b++) + for (int b = 0; b < flags->nflag; b++) { bitmask_clear(&(flags->flag[b])); } @@ -323,14 +320,14 @@ static void get_cell_range(real b0, real b1, int nc, real c0, real s, real invs, real d2, real r2, int *cf, int *cl) { - *cf = max((int)((b0 - c0)*invs), 0); + *cf = std::max(static_cast((b0 - c0)*invs), 0); while (*cf > 0 && d2 + sqr((b0 - c0) - (*cf-1+1)*s) < r2) { (*cf)--; } - *cl = min((int)((b1 - c0)*invs), nc-1); + *cl = std::min(static_cast((b1 - c0)*invs), nc-1); while (*cl < nc-1 && d2 + sqr((*cl+1)*s - (b1 - c0)) < r2) { (*cl)++; @@ -349,20 +346,20 @@ static float box_dist2(float bx0, float bx1, float by0, dl = bx0 - bb->upper[BB_X]; dh = bb->lower[BB_X] - bx1; - dm = max(dl, dh); - dm0 = max(dm, 0); + dm = std::max(dl, dh); + dm0 = std::max(dm, 0.0f); d2 += dm0*dm0; dl = by0 - bb->upper[BB_Y]; dh = bb->lower[BB_Y] - by1; - dm = max(dl, dh); - dm0 = max(dm, 0); + dm = std::max(dl, dh); + dm0 = std::max(dm, 0.0f); d2 += dm0*dm0; dl = bz0 - bb->upper[BB_Z]; dh = bb->lower[BB_Z] - bz1; - dm = max(dl, dh); - dm0 = max(dm, 0); + dm = std::max(dl, dh); + dm0 = std::max(dm, 0.0f); d2 += dm0*dm0; return d2; @@ -383,20 +380,20 @@ static float subc_bb_dist2(int si, const nbnxn_bb_t *bb_i_ci, dl = bb_i->lower[BB_X] - bb_j->upper[BB_X]; dh = bb_j->lower[BB_X] - bb_i->upper[BB_X]; - dm = max(dl, dh); - dm0 = max(dm, 0); + dm = std::max(dl, dh); + dm0 = std::max(dm, 0.0f); d2 += dm0*dm0; dl = bb_i->lower[BB_Y] - bb_j->upper[BB_Y]; dh = bb_j->lower[BB_Y] - bb_i->upper[BB_Y]; - dm = max(dl, dh); - dm0 = max(dm, 0); + dm = std::max(dl, dh); + dm0 = std::max(dm, 0.0f); d2 += dm0*dm0; dl = bb_i->lower[BB_Z] - bb_j->upper[BB_Z]; dh = bb_j->lower[BB_Z] - bb_i->upper[BB_Z]; - dm = max(dl, dh); - dm0 = max(dm, 0); + dm = std::max(dl, dh); + dm0 = std::max(dm, 0.0f); d2 += dm0*dm0; return d2; @@ -519,19 +516,14 @@ static gmx_bool subc_in_range_x(int na_c, int csj, int stride, const real *x_j, real rl2) { - int i, j, i0, j0; - real d2; - - for (i = 0; i < na_c; i++) + for (int i = 0; i < na_c; i++) { - i0 = (si*na_c + i)*DIM; - for (j = 0; j < na_c; j++) + int i0 = (si*na_c + i)*DIM; + for (int j = 0; j < na_c; j++) { - j0 = (csj*na_c + j)*stride; + int j0 = (csj*na_c + j)*stride; - d2 = sqr(x_i[i0 ] - x_j[j0 ]) + - sqr(x_i[i0+1] - x_j[j0+1]) + - sqr(x_i[i0+2] - x_j[j0+2]); + real d2 = sqr(x_i[i0 ] - x_j[j0 ]) + sqr(x_i[i0+1] - x_j[j0+1]) + sqr(x_i[i0+2] - x_j[j0+2]); if (d2 < rl2) { @@ -696,7 +688,7 @@ static void check_subcell_list_space_simple(nbnxn_pairlist_t *nbl, static void check_subcell_list_space_supersub(nbnxn_pairlist_t *nbl, int nsupercell) { - int ncj4_max, j4, j, w, t; + int ncj4_max, w; #define NWARP 2 #define WARP_SIZE 32 @@ -718,7 +710,7 @@ static void check_subcell_list_space_supersub(nbnxn_pairlist_t *nbl, if (ncj4_max > nbl->work->cj4_init) { - for (j4 = nbl->work->cj4_init; j4 < ncj4_max; j4++) + for (int j4 = nbl->work->cj4_init; j4 < ncj4_max; j4++) { /* No i-subcells and no excl's in the list initially */ for (w = 0; w < NWARP; w++) @@ -735,9 +727,7 @@ static void check_subcell_list_space_supersub(nbnxn_pairlist_t *nbl, /* Set all excl masks for one GPU warp no exclusions */ static void set_no_excls(nbnxn_excl_t *excl) { - int t; - - for (t = 0; t < WARP_SIZE; t++) + for (int t = 0; t < WARP_SIZE; t++) { /* Turn all interaction bits on */ excl->pair[t] = NBNXN_INTERACTION_MASK_ALL; @@ -824,8 +814,6 @@ void nbnxn_init_pairlist_set(nbnxn_pairlist_set_t *nbl_list, nbnxn_alloc_t *alloc, nbnxn_free_t *free) { - int i; - nbl_list->bSimple = bSimple; nbl_list->bCombined = bCombined; @@ -842,7 +830,7 @@ void nbnxn_init_pairlist_set(nbnxn_pairlist_set_t *nbl_list, snew(nbl_list->nbl_fep, nbl_list->nnbl); /* Execute in order to avoid memory interleaving between threads */ #pragma omp parallel for num_threads(nbl_list->nnbl) schedule(static) - for (i = 0; i < nbl_list->nnbl; i++) + for (int i = 0; i < nbl_list->nnbl; i++) { /* Allocate the nblist data structure locally on each thread * to optimize memory access for NUMA architectures. @@ -870,7 +858,6 @@ static void print_nblist_statistics_simple(FILE *fp, const nbnxn_pairlist_t *nbl { const nbnxn_grid_t *grid; int cs[SHIFTS]; - int s, i, j; int npexcl; /* This code only produces correct statistics with domain decomposition */ @@ -884,19 +871,19 @@ static void print_nblist_statistics_simple(FILE *fp, const nbnxn_pairlist_t *nbl nbl->ncj/(double)grid->nc*grid->na_sc/(0.5*4.0/3.0*M_PI*rl*rl*rl*grid->nc*grid->na_sc/(grid->size[XX]*grid->size[YY]*grid->size[ZZ]))); fprintf(fp, "nbl average j cell list length %.1f\n", - 0.25*nbl->ncj/(double)max(nbl->nci, 1)); + 0.25*nbl->ncj/(double)std::max(nbl->nci, 1)); - for (s = 0; s < SHIFTS; s++) + for (int s = 0; s < SHIFTS; s++) { cs[s] = 0; } npexcl = 0; - for (i = 0; i < nbl->nci; i++) + for (int i = 0; i < nbl->nci; i++) { cs[nbl->ci[i].shift & NBNXN_CI_SHIFT] += nbl->ci[i].cj_ind_end - nbl->ci[i].cj_ind_start; - j = nbl->ci[i].cj_ind_start; + int j = nbl->ci[i].cj_ind_start; while (j < nbl->ci[i].cj_ind_end && nbl->cj[j].excl != NBNXN_INTERACTION_MASK_ALL) { @@ -905,8 +892,8 @@ static void print_nblist_statistics_simple(FILE *fp, const nbnxn_pairlist_t *nbl } } fprintf(fp, "nbl cell pairs, total: %d excl: %d %.1f%%\n", - nbl->ncj, npexcl, 100*npexcl/(double)max(nbl->ncj, 1)); - for (s = 0; s < SHIFTS; s++) + nbl->ncj, npexcl, 100*npexcl/(double)std::max(nbl->ncj, 1)); + for (int s = 0; s < SHIFTS; s++) { if (cs[s] > 0) { @@ -920,7 +907,7 @@ static void print_nblist_statistics_supersub(FILE *fp, const nbnxn_pairlist_t *n const nbnxn_search_t nbs, real rl) { const nbnxn_grid_t *grid; - int i, j4, j, si, b; + int b; int c[GPU_NSUBCELL+1]; double sum_nsp, sum_nsp2; int nsp_max; @@ -938,21 +925,21 @@ static void print_nblist_statistics_supersub(FILE *fp, const nbnxn_pairlist_t *n sum_nsp = 0; sum_nsp2 = 0; nsp_max = 0; - for (si = 0; si <= GPU_NSUBCELL; si++) + for (int si = 0; si <= GPU_NSUBCELL; si++) { c[si] = 0; } - for (i = 0; i < nbl->nsci; i++) + for (int i = 0; i < nbl->nsci; i++) { int nsp; nsp = 0; - for (j4 = nbl->sci[i].cj4_ind_start; j4 < nbl->sci[i].cj4_ind_end; j4++) + for (int j4 = nbl->sci[i].cj4_ind_start; j4 < nbl->sci[i].cj4_ind_end; j4++) { - for (j = 0; j < NBNXN_GPU_JGROUP_SIZE; j++) + for (int j = 0; j < NBNXN_GPU_JGROUP_SIZE; j++) { b = 0; - for (si = 0; si < GPU_NSUBCELL; si++) + for (int si = 0; si < GPU_NSUBCELL; si++) { if (nbl->cj4[j4].imei[0].imask & (1U << (j*GPU_NSUBCELL + si))) { @@ -965,7 +952,7 @@ static void print_nblist_statistics_supersub(FILE *fp, const nbnxn_pairlist_t *n } sum_nsp += nsp; sum_nsp2 += nsp*nsp; - nsp_max = max(nsp_max, nsp); + nsp_max = std::max(nsp_max, nsp); } if (nbl->nsci > 0) { @@ -973,7 +960,7 @@ static void print_nblist_statistics_supersub(FILE *fp, const nbnxn_pairlist_t *n sum_nsp2 /= nbl->nsci; } fprintf(fp, "nbl #cluster-pairs: av %.1f stddev %.1f max %d\n", - sum_nsp, sqrt(sum_nsp2 - sum_nsp*sum_nsp), nsp_max); + sum_nsp, std::sqrt(sum_nsp2 - sum_nsp*sum_nsp), nsp_max); if (nbl->ncj4 > 0) { @@ -1039,17 +1026,16 @@ static void set_self_and_newton_excls_supersub(nbnxn_pairlist_t *nbl, int si) { nbnxn_excl_t *excl[2]; - int ei, ej, w; /* Here we only set the set self and double pair exclusions */ get_nbl_exclusions_2(nbl, cj4_ind, &excl[0], &excl[1]); /* Only minor < major bits set */ - for (ej = 0; ej < nbl->na_ci; ej++) + for (int ej = 0; ej < nbl->na_ci; ej++) { - w = (ej>>2); - for (ei = ej; ei < nbl->na_ci; ei++) + int w = (ej>>2); + for (int ei = ej; ei < nbl->na_ci; ei++) { excl[w]->pair[(ej & (NBNXN_GPU_JGROUP_SIZE-1))*nbl->na_ci + ei] &= ~(1U << (sj_offset*GPU_NSUBCELL + si)); @@ -1112,16 +1098,12 @@ static void make_cluster_list_simple(const nbnxn_grid_t *gridj, real rl2, float rbb2, int *ndistc) { - const nbnxn_list_work_t *work; - const nbnxn_bb_t *bb_ci; const real *x_ci; gmx_bool InRange; real d2; - int cjf_gl, cjl_gl, cj; - - work = nbl->work; + int cjf_gl, cjl_gl; bb_ci = nbl->work->bb_ci; x_ci = nbl->work->x_ci; @@ -1143,12 +1125,10 @@ static void make_cluster_list_simple(const nbnxn_grid_t *gridj, } else if (d2 < rl2) { - int i, j; - cjf_gl = gridj->cell0 + cjf; - for (i = 0; i < NBNXN_CPU_CLUSTER_I_SIZE && !InRange; i++) + for (int i = 0; i < NBNXN_CPU_CLUSTER_I_SIZE && !InRange; i++) { - for (j = 0; j < NBNXN_CPU_CLUSTER_I_SIZE; j++) + for (int j = 0; j < NBNXN_CPU_CLUSTER_I_SIZE; j++) { InRange = InRange || (sqr(x_ci[i*STRIDE_XYZ+XX] - x_j[(cjf_gl*NBNXN_CPU_CLUSTER_I_SIZE+j)*STRIDE_XYZ+XX]) + @@ -1185,12 +1165,10 @@ static void make_cluster_list_simple(const nbnxn_grid_t *gridj, } else if (d2 < rl2) { - int i, j; - cjl_gl = gridj->cell0 + cjl; - for (i = 0; i < NBNXN_CPU_CLUSTER_I_SIZE && !InRange; i++) + for (int i = 0; i < NBNXN_CPU_CLUSTER_I_SIZE && !InRange; i++) { - for (j = 0; j < NBNXN_CPU_CLUSTER_I_SIZE; j++) + for (int j = 0; j < NBNXN_CPU_CLUSTER_I_SIZE; j++) { InRange = InRange || (sqr(x_ci[i*STRIDE_XYZ+XX] - x_j[(cjl_gl*NBNXN_CPU_CLUSTER_I_SIZE+j)*STRIDE_XYZ+XX]) + @@ -1208,7 +1186,7 @@ static void make_cluster_list_simple(const nbnxn_grid_t *gridj, if (cjf <= cjl) { - for (cj = cjf; cj <= cjl; cj++) + for (int cj = cjf; cj <= cjl; cj++) { /* Store cj and the interaction mask */ nbl->cj[nbl->ncj].cj = gridj->cell0 + cj; @@ -1241,7 +1219,7 @@ static void make_cluster_list_supersub(const nbnxn_grid_t *gridi, { int na_c; int npair; - int cjo, ci1, ci, cj, cj_gl; + int ci1, cj, cj_gl; int cj4_ind, cj_offset; unsigned int imask; nbnxn_cj4_t *cj4; @@ -1269,7 +1247,7 @@ static void make_cluster_list_supersub(const nbnxn_grid_t *gridi, na_c = gridj->na_c; - for (cjo = 0; cjo < gridj->nsubc[scj]; cjo++) + for (int cjo = 0; cjo < gridj->nsubc[scj]; cjo++) { cj4_ind = (nbl->work->cj_ind >> NBNXN_GPU_JGROUP_SIZE_2LOG); cj_offset = nbl->work->cj_ind - cj4_ind*NBNXN_GPU_JGROUP_SIZE; @@ -1301,7 +1279,7 @@ static void make_cluster_list_supersub(const nbnxn_grid_t *gridi, npair = 0; /* We use a fixed upper-bound instead of ci1 to help optimization */ - for (ci = 0; ci < GPU_NSUBCELL; ci++) + for (int ci = 0; ci < GPU_NSUBCELL; ci++) { if (ci == ci1) { @@ -1416,12 +1394,9 @@ static void set_ci_top_excls(const nbnxn_search_t nbs, int cj_ind_first, cj_ind_last; int cj_first, cj_last; int ndirect; - int i, ai, aj, si, eind, ge, se; + int ai, aj, si, ge, se; int found, cj_ind_0, cj_ind_1, cj_ind_m; int cj_m; - gmx_bool Found_si; - int si_ind; - nbnxn_excl_t *nbl_excl; int inner_i, inner_e; cell = nbs->cell; @@ -1465,7 +1440,7 @@ static void set_ci_top_excls(const nbnxn_search_t nbs, #endif /* Loop over the atoms in the i super-cell */ - for (i = 0; i < nbl->na_sc; i++) + for (int i = 0; i < nbl->na_sc; i++) { ai = nbs->a[ci*nbl->na_sc+i]; if (ai >= 0) @@ -1473,7 +1448,7 @@ static void set_ci_top_excls(const nbnxn_search_t nbs, si = (i>>na_ci_2log); /* Loop over the topology-based exclusions for this i-atom */ - for (eind = excl->index[ai]; eind < excl->index[ai+1]; eind++) + for (int eind = excl->index[ai]; eind < excl->index[ai+1]; eind++) { aj = excl->a[eind]; @@ -1581,12 +1556,12 @@ static void make_fep_list(const nbnxn_search_t nbs, const nbnxn_grid_t *gridj, t_nblist *nlist) { - int ci, cj_ind_start, cj_ind_end, cj_ind, cja, cjr; + int ci, cj_ind_start, cj_ind_end, cja, cjr; int nri_max; int ngid, gid_i = 0, gid_j, gid; int egp_shift, egp_mask; int gid_cj = 0; - int i, j, ind_i, ind_j, ai, aj; + int ind_i, ind_j, ai, aj; int nri; gmx_bool bFEP_i, bFEP_i_all; @@ -1614,7 +1589,7 @@ static void make_fep_list(const nbnxn_search_t nbs, ngid = nbat->nenergrp; - if (ngid*gridj->na_cj > sizeof(gid_cj)*8) + if (static_cast(ngid*gridj->na_cj) > sizeof(gid_cj)*8) { gmx_fatal(FARGS, "The Verlet scheme with %dx%d kernels and free-energy only supports up to %d energy groups", gridi->na_c, gridj->na_cj, (sizeof(gid_cj)*8)/gridj->na_cj); @@ -1625,7 +1600,7 @@ static void make_fep_list(const nbnxn_search_t nbs, /* Loop over the atoms in the i sub-cell */ bFEP_i_all = TRUE; - for (i = 0; i < nbl->na_ci; i++) + for (int i = 0; i < nbl->na_ci; i++) { ind_i = ci*nbl->na_ci + i; ai = nbs->a[ind_i]; @@ -1654,7 +1629,7 @@ static void make_fep_list(const nbnxn_search_t nbs, gid_i = (nbat->energrp[ci] >> (egp_shift*i)) & egp_mask; } - for (cj_ind = cj_ind_start; cj_ind < cj_ind_end; cj_ind++) + for (int cj_ind = cj_ind_start; cj_ind < cj_ind_end; cj_ind++) { unsigned int fep_cj; @@ -1692,7 +1667,7 @@ static void make_fep_list(const nbnxn_search_t nbs, if (bFEP_i || fep_cj != 0) { - for (j = 0; j < nbl->na_cj; j++) + for (int j = 0; j < nbl->na_cj; j++) { /* Is this interaction perturbed and not excluded? */ ind_j = cja*nbl->na_cj + j; @@ -1786,10 +1761,10 @@ static void make_fep_list_supersub(const nbnxn_search_t nbs, const nbnxn_grid_t *gridj, t_nblist *nlist) { - int sci, cj4_ind_start, cj4_ind_end, cj4_ind, gcj, cjr; + int sci, cj4_ind_start, cj4_ind_end, cjr; int nri_max; - int c, c_abs; - int i, j, ind_i, ind_j, ai, aj; + int c_abs; + int ind_i, ind_j, ai, aj; int nri; gmx_bool bFEP_i; real xi, yi, zi; @@ -1821,11 +1796,11 @@ static void make_fep_list_supersub(const nbnxn_search_t nbs, } /* Loop over the atoms in the i super-cluster */ - for (c = 0; c < GPU_NSUBCELL; c++) + for (int c = 0; c < GPU_NSUBCELL; c++) { c_abs = sci*GPU_NSUBCELL + c; - for (i = 0; i < nbl->na_ci; i++) + for (int i = 0; i < nbl->na_ci; i++) { ind_i = c_abs*nbl->na_ci + i; ai = nbs->a[ind_i]; @@ -1851,11 +1826,11 @@ static void make_fep_list_supersub(const nbnxn_search_t nbs, srenew(nlist->excl_fep, nlist->maxnrj); } - for (cj4_ind = cj4_ind_start; cj4_ind < cj4_ind_end; cj4_ind++) + for (int cj4_ind = cj4_ind_start; cj4_ind < cj4_ind_end; cj4_ind++) { cj4 = &nbl->cj4[cj4_ind]; - for (gcj = 0; gcj < NBNXN_GPU_JGROUP_SIZE; gcj++) + for (int gcj = 0; gcj < NBNXN_GPU_JGROUP_SIZE; gcj++) { unsigned int fep_cj; @@ -1871,7 +1846,7 @@ static void make_fep_list_supersub(const nbnxn_search_t nbs, if (bFEP_i || fep_cj != 0) { - for (j = 0; j < nbl->na_cj; j++) + for (int j = 0; j < nbl->na_cj; j++) { /* Is this interaction perturbed and not excluded? */ ind_j = (gridj->cell0*GPU_NSUBCELL + cjr)*nbl->na_cj + j; @@ -1961,11 +1936,9 @@ static void set_sci_top_excls(const nbnxn_search_t nbs, int cj_ind_first, cj_ind_last; int cj_first, cj_last; int ndirect; - int i, ai, aj, si, eind, ge, se; + int ai, aj, si, ge, se; int found, cj_ind_0, cj_ind_1, cj_ind_m; int cj_m; - gmx_bool Found_si; - int si_ind; nbnxn_excl_t *nbl_excl; int inner_i, inner_e, w; @@ -1999,7 +1972,7 @@ static void set_sci_top_excls(const nbnxn_search_t nbs, } /* Loop over the atoms in the i super-cell */ - for (i = 0; i < nbl->na_sc; i++) + for (int i = 0; i < nbl->na_sc; i++) { ai = nbs->a[sci*nbl->na_sc+i]; if (ai >= 0) @@ -2007,7 +1980,7 @@ static void set_sci_top_excls(const nbnxn_search_t nbs, si = (i>>na_c_2log); /* Loop over the topology-based exclusions for this i-atom */ - for (eind = excl->index[ai]; eind < excl->index[ai+1]; eind++) + for (int eind = excl->index[ai]; eind < excl->index[ai+1]; eind++) { aj = excl->a[eind]; @@ -2138,7 +2111,7 @@ static void new_sci_entry(nbnxn_pairlist_t *nbl, int sci, int shift) static void sort_cj_excl(nbnxn_cj_t *cj, int ncj, nbnxn_list_work_t *work) { - int jnew, j; + int jnew; if (ncj > work->cj_nalloc) { @@ -2148,7 +2121,7 @@ static void sort_cj_excl(nbnxn_cj_t *cj, int ncj, /* Make a list of the j-cells involving exclusions */ jnew = 0; - for (j = 0; j < ncj; j++) + for (int j = 0; j < ncj; j++) { if (cj[j].excl != NBNXN_INTERACTION_MASK_ALL) { @@ -2159,14 +2132,14 @@ static void sort_cj_excl(nbnxn_cj_t *cj, int ncj, if (!((jnew == 0) || (jnew == 1 && cj[0].excl != NBNXN_INTERACTION_MASK_ALL))) { - for (j = 0; j < ncj; j++) + for (int j = 0; j < ncj; j++) { if (cj[j].excl == NBNXN_INTERACTION_MASK_ALL) { work->cj[jnew++] = cj[j]; } } - for (j = 0; j < ncj; j++) + for (int j = 0; j < ncj; j++) { cj[j] = work->cj[j]; } @@ -2218,10 +2191,9 @@ static void split_sci_entry(nbnxn_pairlist_t *nbl, { int nsp_est; int nsp_max; - int cj4_start, cj4_end, j4len, cj4; + int cj4_start, cj4_end, j4len; int sci; int nsp, nsp_sci, nsp_cj4, nsp_cj4_e, nsp_cj4_p; - int p; if (progBal) { @@ -2263,12 +2235,12 @@ static void split_sci_entry(nbnxn_pairlist_t *nbl, nsp_sci = 0; nsp_cj4_e = 0; nsp_cj4 = 0; - for (cj4 = cj4_start; cj4 < cj4_end; cj4++) + for (int cj4 = cj4_start; cj4 < cj4_end; cj4++) { nsp_cj4_p = nsp_cj4; /* Count the number of cluster pairs in this cj4 group */ nsp_cj4 = 0; - for (p = 0; p < GPU_NSUBCELL*NBNXN_GPU_JGROUP_SIZE; p++) + for (int p = 0; p < GPU_NSUBCELL*NBNXN_GPU_JGROUP_SIZE; p++) { nsp_cj4 += (nbl->cj4[cj4].imei[0].imask >> p) & 1; } @@ -2317,13 +2289,10 @@ static void close_ci_entry_supersub(nbnxn_pairlist_t *nbl, gmx_bool progBal, int nsp_tot_est, int thread, int nthread) { - int j4len, tlen; - int nb, b; - /* All content of the new ci entry have already been filled correctly, * we only need to increase the count here (for non empty lists). */ - j4len = nbl->sci[nbl->nsci].cj4_ind_end - nbl->sci[nbl->nsci].cj4_ind_start; + int j4len = nbl->sci[nbl->nsci].cj4_ind_end - nbl->sci[nbl->nsci].cj4_ind_start; if (j4len > 0) { /* We can only have complete blocks of 4 j-entries in a list, @@ -2398,12 +2367,10 @@ static void set_icell_bbxxxx_supersub(const float *bb, int ci, real shx, real shy, real shz, float *bb_ci) { - int ia, m, i; - - ia = ci*(GPU_NSUBCELL>>STRIDE_PBB_2LOG)*NNBSBB_XXXX; - for (m = 0; m < (GPU_NSUBCELL>>STRIDE_PBB_2LOG)*NNBSBB_XXXX; m += NNBSBB_XXXX) + int ia = ci*(GPU_NSUBCELL>>STRIDE_PBB_2LOG)*NNBSBB_XXXX; + for (int m = 0; m < (GPU_NSUBCELL>>STRIDE_PBB_2LOG)*NNBSBB_XXXX; m += NNBSBB_XXXX) { - for (i = 0; i < STRIDE_PBB; i++) + for (int i = 0; i < STRIDE_PBB; i++) { bb_ci[m+0*STRIDE_PBB+i] = bb[ia+m+0*STRIDE_PBB+i] + shx; bb_ci[m+1*STRIDE_PBB+i] = bb[ia+m+1*STRIDE_PBB+i] + shy; @@ -2421,9 +2388,7 @@ static void set_icell_bb_supersub(const nbnxn_bb_t *bb, int ci, real shx, real shy, real shz, nbnxn_bb_t *bb_ci) { - int i; - - for (i = 0; i < GPU_NSUBCELL; i++) + for (int i = 0; i < GPU_NSUBCELL; i++) { set_icell_bb_simple(bb, ci*GPU_NSUBCELL+i, shx, shy, shz, @@ -2438,11 +2403,9 @@ static void icell_set_x_simple(int ci, int stride, const real *x, nbnxn_list_work_t *work) { - int ia, i; + int ia = ci*NBNXN_CPU_CLUSTER_I_SIZE; - ia = ci*NBNXN_CPU_CLUSTER_I_SIZE; - - for (i = 0; i < NBNXN_CPU_CLUSTER_I_SIZE; i++) + for (int i = 0; i < NBNXN_CPU_CLUSTER_I_SIZE; i++) { work->x_ci[i*STRIDE_XYZ+XX] = x[(ia+i)*stride+XX] + shx; work->x_ci[i*STRIDE_XYZ+YY] = x[(ia+i)*stride+YY] + shy; @@ -2457,13 +2420,10 @@ static void icell_set_x_supersub(int ci, int stride, const real *x, nbnxn_list_work_t *work) { - int ia, i; - real *x_ci; - - x_ci = work->x_ci; + real * x_ci = work->x_ci; - ia = ci*GPU_NSUBCELL*na_c; - for (i = 0; i < GPU_NSUBCELL*na_c; i++) + int ia = ci*GPU_NSUBCELL*na_c; + for (int i = 0; i < GPU_NSUBCELL*na_c; i++) { x_ci[i*DIM + XX] = x[(ia+i)*stride + XX] + shx; x_ci[i*DIM + YY] = x[(ia+i)*stride + YY] + shy; @@ -2479,18 +2439,15 @@ static void icell_set_x_supersub_simd4(int ci, int stride, const real *x, nbnxn_list_work_t *work) { - int si, io, ia, i, j; - real *x_ci; - - x_ci = work->x_ci; + real * x_ci = work->x_ci; - for (si = 0; si < GPU_NSUBCELL; si++) + for (int si = 0; si < GPU_NSUBCELL; si++) { - for (i = 0; i < na_c; i += STRIDE_PBB) + for (int i = 0; i < na_c; i += STRIDE_PBB) { - io = si*na_c + i; - ia = ci*GPU_NSUBCELL*na_c + io; - for (j = 0; j < STRIDE_PBB; j++) + int io = si*na_c + i; + int ia = ci*GPU_NSUBCELL*na_c + io; + for (int j = 0; j < STRIDE_PBB; j++) { x_ci[io*DIM + j + XX*STRIDE_PBB] = x[(ia+j)*stride+XX] + shx; x_ci[io*DIM + j + YY*STRIDE_PBB] = x[(ia+j)*stride+YY] + shy; @@ -2505,11 +2462,11 @@ static real minimum_subgrid_size_xy(const nbnxn_grid_t *grid) { if (grid->bSimple) { - return min(grid->sx, grid->sy); + return std::min(grid->sx, grid->sy); } else { - return min(grid->sx/GPU_NSUBCELL_X, grid->sy/GPU_NSUBCELL_Y); + return std::min(grid->sx/GPU_NSUBCELL_X, grid->sy/GPU_NSUBCELL_Y); } } @@ -2558,13 +2515,12 @@ real nbnxn_get_rlist_effective_inc(int cluster_size_j, real atom_density) vol_inc_i = (cluster_size_i - 1)/atom_density; vol_inc_j = (cluster_size_j - 1)/atom_density; - return nbnxn_rlist_inc_outside_fac*pow(vol_inc_i + vol_inc_j, 1.0/3.0); + return nbnxn_rlist_inc_outside_fac*std::pow(static_cast(vol_inc_i + vol_inc_j), static_cast(1.0/3.0)); } /* Estimates the interaction volume^2 for non-local interactions */ static real nonlocal_vol2(const gmx_domdec_zones_t *zones, rvec ls, real r) { - int z, d; real cl, ca, za; real vold_est; real vol2_est_tot; @@ -2578,14 +2534,14 @@ static real nonlocal_vol2(const gmx_domdec_zones_t *zones, rvec ls, real r) * as small parts as possible. */ - for (z = 0; z < zones->n; z++) + for (int z = 0; z < zones->n; z++) { if (zones->shift[z][XX] + zones->shift[z][YY] + zones->shift[z][ZZ] == 1) { cl = 0; ca = 1; za = 1; - for (d = 0; d < DIM; d++) + for (int d = 0; d < DIM; d++) { if (zones->shift[z][d] == 0) { @@ -2624,7 +2580,6 @@ static void get_nsubpair_target(const nbnxn_search_t nbs, const nbnxn_grid_t *grid; rvec ls; real xy_diag2, r_eff_sup, vol_est, nsp_est, nsp_est_nl; - int nsubpair_max; grid = &nbs->grid[0]; @@ -2645,7 +2600,7 @@ static void get_nsubpair_target(const nbnxn_search_t nbs, xy_diag2 = ls[XX]*ls[XX] + ls[YY]*ls[YY] + ls[ZZ]*ls[ZZ]; /* The formulas below are a heuristic estimate of the average nsj per si*/ - r_eff_sup = rlist + nbnxn_rlist_inc_outside_fac*sqr((grid->na_c - 1.0)/grid->na_c)*sqrt(xy_diag2/3); + r_eff_sup = rlist + nbnxn_rlist_inc_outside_fac*sqr((grid->na_c - 1.0)/grid->na_c)*std::sqrt(xy_diag2/3); if (!nbs->DomDec || nbs->zones->n == 1) { @@ -2667,7 +2622,7 @@ static void get_nsubpair_target(const nbnxn_search_t nbs, /* 12/2 quarter pie slices on the edges */ vol_est += 2*(ls[XX] + ls[YY] + ls[ZZ])*0.25*M_PI*sqr(r_eff_sup); /* 4 octants of a sphere */ - vol_est += 0.5*4.0/3.0*M_PI*pow(r_eff_sup, 3); + vol_est += 0.5*4.0/3.0*M_PI*std::pow(r_eff_sup, static_cast(3)); /* Estimate the number of cluster pairs as the local number of * clusters times the volume they interact with times the density. @@ -2686,7 +2641,7 @@ static void get_nsubpair_target(const nbnxn_search_t nbs, * groups of atoms we'll anyhow be limited by nsubpair_target_min, * so this overestimation will not matter. */ - nsp_est = max(nsp_est, grid->nsubc_tot*14.0); + nsp_est = std::max(nsp_est, grid->nsubc_tot*static_cast(14)); if (debug) { @@ -2703,9 +2658,9 @@ static void get_nsubpair_target(const nbnxn_search_t nbs, * Since there is overhead, we shouldn't make the lists too small * (and we can't chop up j-groups) so we use a minimum target size of 36. */ - *nsubpair_target = max(nsubpair_target_min, - (int)(nsp_est/min_ci_balanced + 0.5)); - *nsubpair_tot_est = (int)nsp_est; + *nsubpair_target = std::max(nsubpair_target_min, + static_cast(nsp_est/min_ci_balanced + 0.5)); + *nsubpair_tot_est = static_cast(nsp_est); if (debug) { @@ -2717,15 +2672,13 @@ static void get_nsubpair_target(const nbnxn_search_t nbs, /* Debug list print function */ static void print_nblist_ci_cj(FILE *fp, const nbnxn_pairlist_t *nbl) { - int i, j; - - for (i = 0; i < nbl->nci; i++) + for (int i = 0; i < nbl->nci; i++) { fprintf(fp, "ci %4d shift %2d ncj %3d\n", nbl->ci[i].ci, nbl->ci[i].shift, nbl->ci[i].cj_ind_end - nbl->ci[i].cj_ind_start); - for (j = nbl->ci[i].cj_ind_start; j < nbl->ci[i].cj_ind_end; j++) + for (int j = nbl->ci[i].cj_ind_start; j < nbl->ci[i].cj_ind_end; j++) { fprintf(fp, " cj %5d imask %x\n", nbl->cj[j].cj, @@ -2737,23 +2690,21 @@ static void print_nblist_ci_cj(FILE *fp, const nbnxn_pairlist_t *nbl) /* Debug list print function */ static void print_nblist_sci_cj(FILE *fp, const nbnxn_pairlist_t *nbl) { - int i, j4, j, ncp, si; - - for (i = 0; i < nbl->nsci; i++) + for (int i = 0; i < nbl->nsci; i++) { fprintf(fp, "ci %4d shift %2d ncj4 %2d\n", nbl->sci[i].sci, nbl->sci[i].shift, nbl->sci[i].cj4_ind_end - nbl->sci[i].cj4_ind_start); - ncp = 0; - for (j4 = nbl->sci[i].cj4_ind_start; j4 < nbl->sci[i].cj4_ind_end; j4++) + int ncp = 0; + for (int j4 = nbl->sci[i].cj4_ind_start; j4 < nbl->sci[i].cj4_ind_end; j4++) { - for (j = 0; j < NBNXN_GPU_JGROUP_SIZE; j++) + for (int j = 0; j < NBNXN_GPU_JGROUP_SIZE; j++) { fprintf(fp, " sj %5d imask %x\n", nbl->cj4[j4].cj[j], nbl->cj4[j4].imei[0].imask); - for (si = 0; si < GPU_NSUBCELL; si++) + for (int si = 0; si < GPU_NSUBCELL; si++) { if (nbl->cj4[j4].imei[0].imask & (1U << (j*GPU_NSUBCELL + si))) { @@ -2774,8 +2725,6 @@ static void combine_nblists(int nnbl, nbnxn_pairlist_t **nbl, nbnxn_pairlist_t *nblc) { int nsci, ncj4, nexcl; - int n, i; - int nthreads gmx_unused; if (nblc->bSimple) { @@ -2785,7 +2734,7 @@ static void combine_nblists(int nnbl, nbnxn_pairlist_t **nbl, nsci = nblc->nsci; ncj4 = nblc->ncj4; nexcl = nblc->nexcl; - for (i = 0; i < nnbl; i++) + for (int i = 0; i < nnbl; i++) { nsci += nbl[i]->nsci; ncj4 += nbl[i]->ncj4; @@ -2816,54 +2765,54 @@ static void combine_nblists(int nnbl, nbnxn_pairlist_t **nbl, /* Each thread should copy its own data to the combined arrays, * as otherwise data will go back and forth between different caches. */ - nthreads = gmx_omp_nthreads_get(emntPairsearch); +#if (defined GMX_OPENMP) && !(defined __clang_analyzer__) + // cppcheck-suppress unreadVariable + int nthreads = gmx_omp_nthreads_get(emntPairsearch); +#endif + #pragma omp parallel for num_threads(nthreads) schedule(static) - for (n = 0; n < nnbl; n++) + for (int n = 0; n < nnbl; n++) { int sci_offset; int cj4_offset; - int ci_offset; int excl_offset; - int i, j4; const nbnxn_pairlist_t *nbli; /* Determine the offset in the combined data for our thread */ sci_offset = nblc->nsci; cj4_offset = nblc->ncj4; - ci_offset = nblc->nci_tot; excl_offset = nblc->nexcl; - for (i = 0; i < n; i++) + for (int i = 0; i < n; i++) { sci_offset += nbl[i]->nsci; cj4_offset += nbl[i]->ncj4; - ci_offset += nbl[i]->nci_tot; excl_offset += nbl[i]->nexcl; } nbli = nbl[n]; - for (i = 0; i < nbli->nsci; i++) + for (int i = 0; i < nbli->nsci; i++) { nblc->sci[sci_offset+i] = nbli->sci[i]; nblc->sci[sci_offset+i].cj4_ind_start += cj4_offset; nblc->sci[sci_offset+i].cj4_ind_end += cj4_offset; } - for (j4 = 0; j4 < nbli->ncj4; j4++) + for (int j4 = 0; j4 < nbli->ncj4; j4++) { nblc->cj4[cj4_offset+j4] = nbli->cj4[j4]; nblc->cj4[cj4_offset+j4].imei[0].excl_ind += excl_offset; nblc->cj4[cj4_offset+j4].imei[1].excl_ind += excl_offset; } - for (j4 = 0; j4 < nbli->nexcl; j4++) + for (int j4 = 0; j4 < nbli->nexcl; j4++) { nblc->excl[excl_offset+j4] = nbli->excl[j4]; } } - for (n = 0; n < nnbl; n++) + for (int n = 0; n < nnbl; n++) { nblc->nsci += nbl[n]->nsci; nblc->ncj4 += nbl[n]->ncj4; @@ -2875,7 +2824,7 @@ static void combine_nblists(int nnbl, nbnxn_pairlist_t **nbl, static void balance_fep_lists(const nbnxn_search_t nbs, nbnxn_pairlist_set_t *nbl_lists) { - int nnbl, th; + int nnbl; int nri_tot, nrj_tot, nrj_target; int th_dest; t_nblist *nbld; @@ -2891,7 +2840,7 @@ static void balance_fep_lists(const nbnxn_search_t nbs, /* Count the total i-lists and pairs */ nri_tot = 0; nrj_tot = 0; - for (th = 0; th < nnbl; th++) + for (int th = 0; th < nnbl; th++) { nri_tot += nbl_lists->nbl_fep[th]->nri; nrj_tot += nbl_lists->nbl_fep[th]->nrj; @@ -2902,7 +2851,7 @@ static void balance_fep_lists(const nbnxn_search_t nbs, assert(gmx_omp_nthreads_get(emntNonbonded) == nnbl); #pragma omp parallel for schedule(static) num_threads(nnbl) - for (th = 0; th < nnbl; th++) + for (int th = 0; th < nnbl; th++) { t_nblist *nbl; @@ -2929,14 +2878,13 @@ static void balance_fep_lists(const nbnxn_search_t nbs, /* Loop over the source lists and assign and copy i-entries */ th_dest = 0; nbld = nbs->work[th_dest].nbl_fep; - for (th = 0; th < nnbl; th++) + for (int th = 0; th < nnbl; th++) { t_nblist *nbls; - int i, j; nbls = nbl_lists->nbl_fep[th]; - for (i = 0; i < nbls->nri; i++) + for (int i = 0; i < nbls->nri; i++) { int nrj; @@ -2957,7 +2905,7 @@ static void balance_fep_lists(const nbnxn_search_t nbs, nbld->gid[nbld->nri] = nbls->gid[i]; nbld->shift[nbld->nri] = nbls->shift[i]; - for (j = nbls->jindex[i]; j < nbls->jindex[i+1]; j++) + for (int j = nbls->jindex[i]; j < nbls->jindex[i+1]; j++) { nbld->jjnr[nbld->nrj] = nbls->jjnr[j]; nbld->excl_fep[nbld->nrj] = nbls->excl_fep[j]; @@ -2969,7 +2917,7 @@ static void balance_fep_lists(const nbnxn_search_t nbs, } /* Swap the list pointers */ - for (th = 0; th < nnbl; th++) + for (int th = 0; th < nnbl; th++) { t_nblist *nbl_tmp; @@ -3054,7 +3002,8 @@ static float boundingbox_only_distance2(const nbnxn_grid_t *gridi, bby /= GPU_NSUBCELL_Y; } - rbb2 = sqr(max(0, rlist - 0.5*sqrt(bbx*bbx + bby*bby))); + rbb2 = std::max(0.0, rlist - 0.5*std::sqrt(bbx*bbx + bby*bby)); + rbb2 = rbb2 * rbb2; #ifndef GMX_DOUBLE return rbb2; @@ -3136,12 +3085,9 @@ static void nbnxn_make_pairlist_part(const nbnxn_search_t nbs, matrix box; real rl2, rl_fep2 = 0; float rbb2; - int d; int ci_b, ci, ci_x, ci_y, ci_xy, cj; ivec shp; - int tx, ty, tz; int shift; - gmx_bool bMakeList; real shx, shy, shz; int conv_i, cell0_i; const nbnxn_bb_t *bb_i = NULL; @@ -3154,7 +3100,6 @@ static void nbnxn_make_pairlist_part(const nbnxn_search_t nbs, real bz1_frac; real d2cx, d2z, d2z_cx, d2z_cy, d2zx, d2zxy, d2xy; int cxf, cxl, cyf, cyf_x, cyl; - int cx, cy; int c0, c1, cs, cf, cl; int ndistc; int ncpcheck; @@ -3219,11 +3164,11 @@ static void nbnxn_make_pairlist_part(const nbnxn_search_t nbs, if (debug) { - fprintf(debug, "nbl bounding box only distance %f\n", sqrt(rbb2)); + fprintf(debug, "nbl bounding box only distance %f\n", std::sqrt(rbb2)); } /* Set the shift range */ - for (d = 0; d < DIM; d++) + for (int d = 0; d < DIM; d++) { /* Check if we need periodicity shifts. * Without PBC or with domain decomposition we don't need them. @@ -3235,7 +3180,7 @@ static void nbnxn_make_pairlist_part(const nbnxn_search_t nbs, else { if (d == XX && - box[XX][XX] - fabs(box[YY][XX]) - fabs(box[ZZ][XX]) < sqrt(rl2)) + box[XX][XX] - fabs(box[YY][XX]) - fabs(box[ZZ][XX]) < std::sqrt(rl2)) { shp[d] = 2; } @@ -3334,7 +3279,7 @@ static void nbnxn_make_pairlist_part(const nbnxn_search_t nbs, ci_xy = ci_x*gridi->ncy + ci_y; /* Loop over shift vectors in three dimensions */ - for (tz = -shp[ZZ]; tz <= shp[ZZ]; tz++) + for (int tz = -shp[ZZ]; tz <= shp[ZZ]; tz++) { shz = tz*box[ZZ][ZZ]; @@ -3361,15 +3306,14 @@ static void nbnxn_make_pairlist_part(const nbnxn_search_t nbs, continue; } - bz1_frac = - bz1/((real)(gridi->cxy_ind[ci_xy+1] - gridi->cxy_ind[ci_xy])); + bz1_frac = bz1/(gridi->cxy_ind[ci_xy+1] - gridi->cxy_ind[ci_xy]); if (bz1_frac < 0) { bz1_frac = 0; } /* The check with bz1_frac close to or larger than 1 comes later */ - for (ty = -shp[YY]; ty <= shp[YY]; ty++) + for (int ty = -shp[YY]; ty <= shp[YY]; ty++) { shy = ty*box[YY][YY] + tz*box[ZZ][YY]; @@ -3404,7 +3348,7 @@ static void nbnxn_make_pairlist_part(const nbnxn_search_t nbs, d2z_cy += sqr(by0 - gridj->c1[YY]); } - for (tx = -shp[XX]; tx <= shp[XX]; tx++) + for (int tx = -shp[XX]; tx <= shp[XX]; tx++) { shift = XYZ2IS(tx, ty, tz); @@ -3480,7 +3424,7 @@ static void nbnxn_make_pairlist_part(const nbnxn_search_t nbs, gridi->na_c, nbat->xstride, nbat->x, nbl->work); - for (cx = cxf; cx <= cxl; cx++) + for (int cx = cxf; cx <= cxl; cx++) { d2zx = d2z; if (gridj->c0[XX] + cx*gridj->sx > bx1) @@ -3510,7 +3454,7 @@ static void nbnxn_make_pairlist_part(const nbnxn_search_t nbs, cyf_x = cyf; } - for (cy = cyf_x; cy <= cyl; cy++) + for (int cy = cyf_x; cy <= cyl; cy++) { c0 = gridj->cxy_ind[cx*gridj->ncy+cy]; c1 = gridj->cxy_ind[cx*gridj->ncy+cy+1]; @@ -3533,7 +3477,7 @@ static void nbnxn_make_pairlist_part(const nbnxn_search_t nbs, } if (c1 > c0 && d2zxy < rl2) { - cs = c0 + (int)(bz1_frac*(c1 - c0)); + cs = c0 + static_cast(bz1_frac*(c1 - c0)); if (cs >= c1) { cs = c1 - 1; @@ -3568,10 +3512,9 @@ static void nbnxn_make_pairlist_part(const nbnxn_search_t nbs, /* Simple reference code, for debugging, * overrides the more complex code above. */ - int k; cf = c1; cl = -1; - for (k = c0; k < c1; k++) + for (int k = c0; k < c1; k++) { if (box_dist2(bx0, bx1, by0, by1, bz0, bz1, bb+k) < rl2 && k < cf) @@ -3593,11 +3536,11 @@ static void nbnxn_make_pairlist_part(const nbnxn_search_t nbs, * only use cj >= ci. */ #ifndef NBNXN_SHIFT_BACKWARD - cf = max(cf, ci); + cf = std::max(cf, ci); #else if (shift == CENTRAL) { - cf = max(cf, ci); + cf = std::max(cf, ci); } #endif } @@ -3659,11 +3602,9 @@ static void nbnxn_make_pairlist_part(const nbnxn_search_t nbs, if (bFBufferFlag && nbl->ncj > ncj_old_j) { - int cbf, cbl, cb; - - cbf = nbl->cj[ncj_old_j].cj >> gridj_flag_shift; - cbl = nbl->cj[nbl->ncj-1].cj >> gridj_flag_shift; - for (cb = cbf; cb <= cbl; cb++) + int cbf = nbl->cj[ncj_old_j].cj >> gridj_flag_shift; + int cbl = nbl->cj[nbl->ncj-1].cj >> gridj_flag_shift; + for (int cb = cbf; cb <= cbl; cb++) { bitmask_init_bit(&gridj_flag[cb], th); } @@ -3764,14 +3705,11 @@ static void reduce_buffer_flags(const nbnxn_search_t nbs, int nsrc, const nbnxn_buffer_flags_t *dest) { - int s, b; - gmx_bitmask_t *flag; - - for (s = 0; s < nsrc; s++) + for (int s = 0; s < nsrc; s++) { - flag = nbs->work[s].buffer_flags.flag; + gmx_bitmask_t * flag = nbs->work[s].buffer_flags.flag; - for (b = 0; b < dest->nflag; b++) + for (int b = 0; b < dest->nflag; b++) { bitmask_union(&(dest->flag[b]), flag[b]); } @@ -3780,7 +3718,7 @@ static void reduce_buffer_flags(const nbnxn_search_t nbs, static void print_reduction_cost(const nbnxn_buffer_flags_t *flags, int nout) { - int nelem, nkeep, ncopy, nred, b, c, out; + int nelem, nkeep, ncopy, nred, out; gmx_bitmask_t mask_0; nelem = 0; @@ -3788,7 +3726,7 @@ static void print_reduction_cost(const nbnxn_buffer_flags_t *flags, int nout) ncopy = 0; nred = 0; bitmask_init_bit(&mask_0, 0); - for (b = 0; b < flags->nflag; b++) + for (int b = 0; b < flags->nflag; b++) { if (bitmask_is_equal(flags->flag[b], mask_0)) { @@ -3798,7 +3736,7 @@ static void print_reduction_cost(const nbnxn_buffer_flags_t *flags, int nout) } else if (!bitmask_is_zero(flags->flag[b])) { - c = 0; + int c = 0; for (out = 0; out < nout; out++) { if (bitmask_is_set(flags->flag[b], out)) @@ -3839,7 +3777,7 @@ static void print_reduction_cost(const nbnxn_buffer_flags_t *flags, int nout) static void sort_sci(nbnxn_pairlist_t *nbl) { nbnxn_list_work_t *work; - int m, i, s, s0, s1; + int m, s0, s1; nbnxn_sci_t *sci_sort; if (nbl->ncj4 <= nbl->nsci) @@ -3869,19 +3807,19 @@ static void sort_sci(nbnxn_pairlist_t *nbl) } /* Count the entries of each size */ - for (i = 0; i <= m; i++) + for (int i = 0; i <= m; i++) { work->sort[i] = 0; } - for (s = 0; s < nbl->nsci; s++) + for (int s = 0; s < nbl->nsci; s++) { - i = min(m, nbl->sci[s].cj4_ind_end - nbl->sci[s].cj4_ind_start); + int i = std::min(m, nbl->sci[s].cj4_ind_end - nbl->sci[s].cj4_ind_start); work->sort[i]++; } /* Calculate the offset for each count */ s0 = work->sort[m]; work->sort[m] = 0; - for (i = m - 1; i >= 0; i--) + for (int i = m - 1; i >= 0; i--) { s1 = work->sort[i]; work->sort[i] = work->sort[i + 1] + s0; @@ -3890,9 +3828,9 @@ static void sort_sci(nbnxn_pairlist_t *nbl) /* Sort entries directly into place */ sci_sort = work->sci_sort; - for (s = 0; s < nbl->nsci; s++) + for (int s = 0; s < nbl->nsci; s++) { - i = min(m, nbl->sci[s].cj4_ind_end - nbl->sci[s].cj4_ind_start); + int i = std::min(m, nbl->sci[s].cj4_ind_end - nbl->sci[s].cj4_ind_start); sci_sort[work->sort[i]++] = nbl->sci[s]; } @@ -3914,9 +3852,8 @@ void nbnxn_make_pairlist(const nbnxn_search_t nbs, { nbnxn_grid_t *gridi, *gridj; gmx_bool bGPUCPU; - int nzi, zi, zj0, zj1, zj; + int nzi, zj0, zj1; int nsubpair_target, nsubpair_tot_est; - int th; int nnbl; nbnxn_pairlist_t **nbl; int ci_block; @@ -3945,6 +3882,7 @@ void nbnxn_make_pairlist(const nbnxn_search_t nbs, if (nbl_list->bSimple) { +#ifdef GMX_NBNXN_SIMD switch (nb_kernel_type) { #ifdef GMX_NBNXN_SIMD_4XN @@ -3961,6 +3899,10 @@ void nbnxn_make_pairlist(const nbnxn_search_t nbs, nbs->icell_set_x = icell_set_x_simple; break; } +#else /* GMX_NBNXN_SIMD */ + /* MSVC 2013 complains about switch statements without case */ + nbs->icell_set_x = icell_set_x_simple; +#endif /* GMX_NBNXN_SIMD */ } else { @@ -3995,7 +3937,7 @@ void nbnxn_make_pairlist(const nbnxn_search_t nbs, } /* Clear all pair-lists */ - for (th = 0; th < nnbl; th++) + for (int th = 0; th < nnbl; th++) { clear_pairlist(nbl[th]); @@ -4005,7 +3947,7 @@ void nbnxn_make_pairlist(const nbnxn_search_t nbs, } } - for (zi = 0; zi < nzi; zi++) + for (int zi = 0; zi < nzi; zi++) { gridi = &nbs->grid[zi]; @@ -4018,7 +3960,7 @@ void nbnxn_make_pairlist(const nbnxn_search_t nbs, zj0++; } } - for (zj = zj0; zj < zj1; zj++) + for (int zj = zj0; zj < zj1; zj++) { gridj = &nbs->grid[zj]; @@ -4045,7 +3987,7 @@ void nbnxn_make_pairlist(const nbnxn_search_t nbs, progBal = (LOCAL_I(iloc) || nbs->zones->n <= 2); #pragma omp parallel for num_threads(nnbl) schedule(static) - for (th = 0; th < nnbl; th++) + for (int th = 0; th < nnbl; th++) { /* Re-init the thread-local work flag data before making * the first list (not an elegant conditional). @@ -4079,7 +4021,7 @@ void nbnxn_make_pairlist(const nbnxn_search_t nbs, np_tot = 0; np_noq = 0; np_hlj = 0; - for (th = 0; th < nnbl; th++) + for (int th = 0; th < nnbl; th++) { inc_nrnb(nrnb, eNR_NBNXN_DIST2, nbs->work[th].ndistc); @@ -4121,7 +4063,7 @@ void nbnxn_make_pairlist(const nbnxn_search_t nbs, else { #pragma omp parallel for num_threads(nnbl) schedule(static) - for (th = 0; th < nnbl; th++) + for (int th = 0; th < nnbl; th++) { sort_sci(nbl[th]); } diff --git a/src/gromacs/mdlib/nbnxn_search.h b/src/gromacs/mdlib/nbnxn_search.h index 34963ef613..79b09788e5 100644 --- a/src/gromacs/mdlib/nbnxn_search.h +++ b/src/gromacs/mdlib/nbnxn_search.h @@ -39,9 +39,6 @@ #include "gromacs/legacyheaders/typedefs.h" #include "gromacs/mdlib/nbnxn_pairlist.h" -#ifdef __cplusplus -extern "C" { -#endif /* Tells if the pair-list corresponding to nb_kernel_type is simple. * Returns FALSE for super-sub type pair-list. @@ -83,8 +80,5 @@ void nbnxn_make_pairlist(const nbnxn_search_t nbs, int nb_kernel_type, t_nrnb *nrnb); -#ifdef __cplusplus -} -#endif #endif -- 2.11.4.GIT