From 0f0053fb631e26582e7def9fa95acd34610c7abe Mon Sep 17 00:00:00 2001
From: Erik Lindahl <erik@kth.se>
Date: Tue, 7 Jul 2015 21:19:00 +0200
Subject: [PATCH] Convert nbnxn search,grid,atomdata to C++

Move iteration variables into loops, use
cmath/algorithm, and std::max/min.
Remove extern "C" from include files.

Change-Id: I14e0d6e6ca92a41df243bbef4d2f8f0a16507c13
---
 .../mdlib/{nbnxn_atomdata.c => nbnxn_atomdata.cpp} | 275 ++++++-------
 src/gromacs/mdlib/nbnxn_atomdata.h                 |  10 +-
 src/gromacs/mdlib/{nbnxn_grid.c => nbnxn_grid.cpp} | 396 +++++++++----------
 src/gromacs/mdlib/nbnxn_grid.h                     |   6 -
 .../mdlib/{nbnxn_search.c => nbnxn_search.cpp}     | 428 +++++++++------------
 src/gromacs/mdlib/nbnxn_search.h                   |   6 -
 6 files changed, 491 insertions(+), 630 deletions(-)
 rename src/gromacs/mdlib/{nbnxn_atomdata.c => nbnxn_atomdata.cpp} (89%)
 rename src/gromacs/mdlib/{nbnxn_grid.c => nbnxn_grid.cpp} (85%)
 rename src/gromacs/mdlib/{nbnxn_search.c => nbnxn_search.cpp} (93%)
diff --git a/src/gromacs/mdlib/nbnxn_atomdata.c b/src/gromacs/mdlib/nbnxn_atomdata.cpp
similarity index 89%
rename from src/gromacs/mdlib/nbnxn_atomdata.c
rename to src/gromacs/mdlib/nbnxn_atomdata.cpp
index 8c05933036..20bb7b6e2c 100644
--- a/src/gromacs/mdlib/nbnxn_atomdata.c
+++ b/src/gromacs/mdlib/nbnxn_atomdata.cpp
@@ -40,14 +40,16 @@
 #include "config.h"
 
 #include <assert.h>
-#include <math.h>
 #include <stdlib.h>
 #include <string.h>
 
+#include <cmath>
+
+#include <algorithm>
+
 #include "thread_mpi/atomic.h"
 
 #include "gromacs/legacyheaders/gmx_omp_nthreads.h"
-#include "gromacs/legacyheaders/macros.h"
 #include "gromacs/math/vec.h"
 #include "gromacs/mdlib/nb_verlet.h"
 #include "gromacs/mdlib/nbnxn_consts.h"
@@ -61,6 +63,7 @@
 #include "gromacs/utility/gmxomp.h"
 #include "gromacs/utility/smalloc.h"
 
+
 /* Default nbnxn allocation routine, allocates NBNXN_MEM_ALIGN byte aligned */
 void nbnxn_alloc_aligned(void **ptr, size_t nbytes)
 {
@@ -151,8 +154,6 @@ static void nbnxn_atomdata_output_init(nbnxn_atomdata_output_t *out,
                                        int nenergrp, int stride,
                                        nbnxn_alloc_t *ma)
 {
-    int cj_size;
-
     out->f = NULL;
     ma((void **)&out->fshift, SHIFTS*DIM*sizeof(*out->fshift));
     out->nV = nenergrp*nenergrp;
@@ -162,7 +163,7 @@ static void nbnxn_atomdata_output_init(nbnxn_atomdata_output_t *out,
     if (nb_kernel_type == nbnxnk4xN_SIMD_4xN ||
         nb_kernel_type == nbnxnk4xN_SIMD_2xNN)
     {
-        cj_size  = nbnxn_kernel_to_cluster_j_size(nb_kernel_type);
+        int cj_size  = nbnxn_kernel_to_cluster_j_size(nb_kernel_type);
         out->nVS = nenergrp*nenergrp*stride*(cj_size>>1)*cj_size;
         ma((void **)&out->VSvdw, out->nVS*sizeof(*out->VSvdw));
         ma((void **)&out->VSc, out->nVS*sizeof(*out->VSc  ));
@@ -192,23 +193,23 @@ static void copy_int_to_nbat_int(const int *a, int na, int na_round,
 
 static void clear_nbat_real(int na, int nbatFormat, real *xnb, int a0)
 {
-    int a, d, j, c;
+    int j, c;
 
     switch (nbatFormat)
     {
         case nbatXYZ:
-            for (a = 0; a < na; a++)
+            for (int a = 0; a < na; a++)
             {
-                for (d = 0; d < DIM; d++)
+                for (int d = 0; d < DIM; d++)
                 {
                     xnb[(a0+a)*STRIDE_XYZ+d] = 0;
                 }
             }
             break;
         case nbatXYZQ:
-            for (a = 0; a < na; a++)
+            for (int a = 0; a < na; a++)
             {
-                for (d = 0; d < DIM; d++)
+                for (int d = 0; d < DIM; d++)
                 {
                     xnb[(a0+a)*STRIDE_XYZQ+d] = 0;
                 }
@@ -217,7 +218,7 @@ static void clear_nbat_real(int na, int nbatFormat, real *xnb, int a0)
         case nbatX4:
             j = X4_IND_A(a0);
             c = a0 & (PACK_X4-1);
-            for (a = 0; a < na; a++)
+            for (int a = 0; a < na; a++)
             {
                 xnb[j+XX*PACK_X4] = 0;
                 xnb[j+YY*PACK_X4] = 0;
@@ -234,7 +235,7 @@ static void clear_nbat_real(int na, int nbatFormat, real *xnb, int a0)
         case nbatX8:
             j = X8_IND_A(a0);
             c = a0 & (PACK_X8-1);
-            for (a = 0; a < na; a++)
+            for (int a = 0; a < na; a++)
             {
                 xnb[j+XX*PACK_X8] = 0;
                 xnb[j+YY*PACK_X8] = 0;
@@ -374,10 +375,9 @@ void copy_rvec_to_nbat_real(const int *a, int na, int na_round,
 /* Stores the LJ parameter data in a format convenient for different kernels */
 static void set_lj_parameter_data(nbnxn_atomdata_t *nbat, gmx_bool bSIMD)
 {
-    int  nt, i, j;
     real c6, c12;
 
-    nt = nbat->ntype;
+    int  nt = nbat->ntype;
 
     if (bSIMD)
     {
@@ -388,9 +388,9 @@ static void set_lj_parameter_data(nbnxn_atomdata_t *nbat, gmx_bool bSIMD)
          * be used, but introducing the conditional code is not
          * really worth it. */
         nbat->alloc((void **)&nbat->nbfp_s4, nt*nt*4*sizeof(*nbat->nbfp_s4));
-        for (i = 0; i < nt; i++)
+        for (int i = 0; i < nt; i++)
         {
-            for (j = 0; j < nt; j++)
+            for (int j = 0; j < nt; j++)
             {
                 nbat->nbfp_s4[(i*nt+j)*4+0] = nbat->nbfp[(i*nt+j)*2+0];
                 nbat->nbfp_s4[(i*nt+j)*4+1] = nbat->nbfp[(i*nt+j)*2+1];
@@ -409,15 +409,15 @@ static void set_lj_parameter_data(nbnxn_atomdata_t *nbat, gmx_bool bSIMD)
         case ljcrGEOM:
             nbat->comb_rule = ljcrGEOM;
 
-            for (i = 0; i < nt; i++)
+            for (int i = 0; i < nt; i++)
             {
                 /* Store the sqrt of the diagonal from the nbfp matrix */
-                nbat->nbfp_comb[i*2  ] = sqrt(nbat->nbfp[(i*nt+i)*2  ]);
-                nbat->nbfp_comb[i*2+1] = sqrt(nbat->nbfp[(i*nt+i)*2+1]);
+                nbat->nbfp_comb[i*2  ] = std::sqrt(nbat->nbfp[(i*nt+i)*2  ]);
+                nbat->nbfp_comb[i*2+1] = std::sqrt(nbat->nbfp[(i*nt+i)*2+1]);
             }
             break;
         case ljcrLB:
-            for (i = 0; i < nt; i++)
+            for (int i = 0; i < nt; i++)
             {
                 /* Get 6*C6 and 12*C12 from the diagonal of the nbfp matrix */
                 c6  = nbat->nbfp[(i*nt+i)*2  ];
@@ -427,8 +427,8 @@ static void set_lj_parameter_data(nbnxn_atomdata_t *nbat, gmx_bool bSIMD)
                     /* We store 0.5*2^1/6*sigma and sqrt(4*3*eps),
                      * so we get 6*C6 and 12*C12 after combining.
                      */
-                    nbat->nbfp_comb[i*2  ] = 0.5*pow(c12/c6, 1.0/6.0);
-                    nbat->nbfp_comb[i*2+1] = sqrt(c6*c6/c12);
+                    nbat->nbfp_comb[i*2  ] = 0.5*std::pow(static_cast<real>(c12/c6), static_cast<real>(1.0/6.0));
+                    nbat->nbfp_comb[i*2+1] = std::sqrt(c6*c6/c12);
                 }
                 else
                 {
@@ -450,7 +450,6 @@ static void set_lj_parameter_data(nbnxn_atomdata_t *nbat, gmx_bool bSIMD)
 static void
 nbnxn_atomdata_init_simple_exclusion_masks(nbnxn_atomdata_t *nbat)
 {
-    int       i, j;
     const int simd_width = GMX_SIMD_REAL_WIDTH;
     int       simd_excl_size;
     /* Set the diagonal cluster pair exclusion mask setup data.
@@ -460,18 +459,16 @@ nbnxn_atomdata_init_simple_exclusion_masks(nbnxn_atomdata_t *nbat)
      * In the kernel we can subtract 1 to generate the subsequent mask.
      */
     int        simd_4xn_diag_size;
-    const real simdFalse = -1, simdTrue = 1;
-    real      *simd_interaction_array;
 
-    simd_4xn_diag_size = max(NBNXN_CPU_CLUSTER_I_SIZE, simd_width);
+    simd_4xn_diag_size = std::max(NBNXN_CPU_CLUSTER_I_SIZE, simd_width);
     snew_aligned(nbat->simd_4xn_diagonal_j_minus_i, simd_4xn_diag_size, NBNXN_MEM_ALIGN);
-    for (j = 0; j < simd_4xn_diag_size; j++)
+    for (int j = 0; j < simd_4xn_diag_size; j++)
     {
         nbat->simd_4xn_diagonal_j_minus_i[j] = j - 0.5;
     }
 
     snew_aligned(nbat->simd_2xnn_diagonal_j_minus_i, simd_width, NBNXN_MEM_ALIGN);
-    for (j = 0; j < simd_width/2; j++)
+    for (int j = 0; j < simd_width/2; j++)
     {
         /* The j-cluster size is half the SIMD width */
         nbat->simd_2xnn_diagonal_j_minus_i[j]              = j - 0.5;
@@ -493,7 +490,7 @@ nbnxn_atomdata_init_simple_exclusion_masks(nbnxn_atomdata_t *nbat)
     snew_aligned(nbat->simd_exclusion_filter1, simd_excl_size,   NBNXN_MEM_ALIGN);
     snew_aligned(nbat->simd_exclusion_filter2, simd_excl_size*2, NBNXN_MEM_ALIGN);
 
-    for (j = 0; j < simd_excl_size; j++)
+    for (int j = 0; j < simd_excl_size; j++)
     {
         /* Set the consecutive bits for masking pair exclusions */
         nbat->simd_exclusion_filter1[j]       = (1U << j);
@@ -516,12 +513,15 @@ nbnxn_atomdata_init_simple_exclusion_masks(nbnxn_atomdata_t *nbat)
      * indices are used in the kernels. */
 
     simd_excl_size = NBNXN_CPU_CLUSTER_I_SIZE*NBNXN_CPU_CLUSTER_I_SIZE;
-    const int qpx_simd_width = GMX_SIMD_REAL_WIDTH;
+    const int  qpx_simd_width = GMX_SIMD_REAL_WIDTH;
+    const real simdFalse      = -1, simdTrue = 1;
+    real      *simd_interaction_array;
+
     snew_aligned(simd_interaction_array, simd_excl_size * qpx_simd_width, NBNXN_MEM_ALIGN);
-    for (j = 0; j < simd_excl_size; j++)
+    for (int j = 0; j < simd_excl_size; j++)
     {
         int index = j * qpx_simd_width;
-        for (i = 0; i < qpx_simd_width; i++)
+        for (int i = 0; i < qpx_simd_width; i++)
         {
             simd_interaction_array[index + i] = (j & (1 << i)) ? simdTrue : simdFalse;
         }
@@ -542,7 +542,7 @@ void nbnxn_atomdata_init(FILE *fp,
                          nbnxn_alloc_t *alloc,
                          nbnxn_free_t  *free)
 {
-    int      i, j, nth;
+    int      nth;
     real     c6, c12, tol;
     char    *ptr;
     gmx_bool simple, bCombGeom, bCombLB, bSIMD;
@@ -591,13 +591,13 @@ void nbnxn_atomdata_init(FILE *fp,
     /* Temporarily fill nbat->nbfp_comb with sigma and epsilon
      * to check for the LB rule.
      */
-    for (i = 0; i < ntype; i++)
+    for (int i = 0; i < ntype; i++)
     {
         c6  = nbfp[(i*ntype+i)*2  ]/6.0;
         c12 = nbfp[(i*ntype+i)*2+1]/12.0;
         if (c6 > 0 && c12 > 0)
         {
-            nbat->nbfp_comb[i*2  ] = pow(c12/c6, 1.0/6.0);
+            nbat->nbfp_comb[i*2  ] = std::pow(static_cast<real>(c12/c6), static_cast<real>(1.0/6.0));
             nbat->nbfp_comb[i*2+1] = 0.25*c6*c6/c12;
         }
         else if (c6 == 0 && c12 == 0)
@@ -612,9 +612,9 @@ void nbnxn_atomdata_init(FILE *fp,
         }
     }
 
-    for (i = 0; i < nbat->ntype; i++)
+    for (int i = 0; i < nbat->ntype; i++)
     {
-        for (j = 0; j < nbat->ntype; j++)
+        for (int j = 0; j < nbat->ntype; j++)
         {
             if (i < ntype && j < ntype)
             {
@@ -638,8 +638,9 @@ void nbnxn_atomdata_init(FILE *fp,
                     ((c6 == 0 && c12 == 0 &&
                       (nbat->nbfp_comb[i*2+1] == 0 || nbat->nbfp_comb[j*2+1] == 0)) ||
                      (c6 > 0 && c12 > 0 &&
-                      gmx_within_tol(pow(c12/c6, 1.0/6.0), 0.5*(nbat->nbfp_comb[i*2]+nbat->nbfp_comb[j*2]), tol) &&
-                      gmx_within_tol(0.25*c6*c6/c12, sqrt(nbat->nbfp_comb[i*2+1]*nbat->nbfp_comb[j*2+1]), tol)));
+                      gmx_within_tol(std::pow(static_cast<real>(c12/c6), static_cast<real>(1.0/6.0)),
+                                     0.5*(nbat->nbfp_comb[i*2]+nbat->nbfp_comb[j*2]), tol) &&
+                      gmx_within_tol(0.25*c6*c6/c12, std::sqrt(nbat->nbfp_comb[i*2+1]*nbat->nbfp_comb[j*2+1]), tol)));
             }
             else
             {
@@ -720,8 +721,8 @@ void nbnxn_atomdata_init(FILE *fp,
 
         if (bSIMD)
         {
-            pack_x = max(NBNXN_CPU_CLUSTER_I_SIZE,
-                         nbnxn_kernel_to_cluster_j_size(nb_kernel_type));
+            pack_x = std::max(NBNXN_CPU_CLUSTER_I_SIZE,
+                              nbnxn_kernel_to_cluster_j_size(nb_kernel_type));
             switch (pack_x)
             {
                 case 4:
@@ -784,7 +785,7 @@ void nbnxn_atomdata_init(FILE *fp,
     nbat->nout    = nout;
     snew(nbat->out, nbat->nout);
     nbat->nalloc  = 0;
-    for (i = 0; i < nbat->nout; i++)
+    for (int i = 0; i < nbat->nout; i++)
     {
         nbnxn_atomdata_output_init(&nbat->out[i],
                                    nb_kernel_type,
@@ -825,16 +826,14 @@ static void copy_lj_to_nbat_lj_comb_x4(const real *ljparam_type,
                                        const int *type, int na,
                                        real *ljparam_at)
 {
-    int is, k, i;
-
     /* The LJ params follow the combination rule:
      * copy the params for the type array to the atom array.
      */
-    for (is = 0; is < na; is += PACK_X4)
+    for (int is = 0; is < na; is += PACK_X4)
     {
-        for (k = 0; k < PACK_X4; k++)
+        for (int k = 0; k < PACK_X4; k++)
         {
-            i = is + k;
+            int i = is + k;
             ljparam_at[is*2        +k] = ljparam_type[type[i]*2  ];
             ljparam_at[is*2+PACK_X4+k] = ljparam_type[type[i]*2+1];
         }
@@ -845,16 +844,14 @@ static void copy_lj_to_nbat_lj_comb_x8(const real *ljparam_type,
                                        const int *type, int na,
                                        real *ljparam_at)
 {
-    int is, k, i;
-
     /* The LJ params follow the combination rule:
      * copy the params for the type array to the atom array.
      */
-    for (is = 0; is < na; is += PACK_X8)
+    for (int is = 0; is < na; is += PACK_X8)
     {
-        for (k = 0; k < PACK_X8; k++)
+        for (int k = 0; k < PACK_X8; k++)
         {
-            i = is + k;
+            int i = is + k;
             ljparam_at[is*2        +k] = ljparam_type[type[i]*2  ];
             ljparam_at[is*2+PACK_X8+k] = ljparam_type[type[i]*2+1];
         }
@@ -867,18 +864,15 @@ static void nbnxn_atomdata_set_atomtypes(nbnxn_atomdata_t    *nbat,
                                          const nbnxn_search_t nbs,
                                          const int           *type)
 {
-    int                 g, i, ncz, ash;
-    const nbnxn_grid_t *grid;
-
-    for (g = 0; g < ngrid; g++)
+    for (int g = 0; g < ngrid; g++)
     {
-        grid = &nbs->grid[g];
+        const nbnxn_grid_t * grid = &nbs->grid[g];
 
         /* Loop over all columns and copy and fill */
-        for (i = 0; i < grid->ncx*grid->ncy; i++)
+        for (int i = 0; i < grid->ncx*grid->ncy; i++)
         {
-            ncz = grid->cxy_ind[i+1] - grid->cxy_ind[i];
-            ash = (grid->cell0 + grid->cxy_ind[i])*grid->na_sc;
+            int ncz = grid->cxy_ind[i+1] - grid->cxy_ind[i];
+            int ash = (grid->cell0 + grid->cxy_ind[i])*grid->na_sc;
 
             copy_int_to_nbat_int(nbs->a+ash, grid->cxy_na[i], ncz*grid->na_sc,
                                  type, nbat->ntype-1, nbat->type+ash);
@@ -891,20 +885,17 @@ static void nbnxn_atomdata_set_ljcombparams(nbnxn_atomdata_t    *nbat,
                                             int                  ngrid,
                                             const nbnxn_search_t nbs)
 {
-    int                 g, i, ncz, ash;
-    const nbnxn_grid_t *grid;
-
     if (nbat->comb_rule != ljcrNONE)
     {
-        for (g = 0; g < ngrid; g++)
+        for (int g = 0; g < ngrid; g++)
         {
-            grid = &nbs->grid[g];
+            const nbnxn_grid_t * grid = &nbs->grid[g];
 
             /* Loop over all columns and copy and fill */
-            for (i = 0; i < grid->ncx*grid->ncy; i++)
+            for (int i = 0; i < grid->ncx*grid->ncy; i++)
             {
-                ncz = grid->cxy_ind[i+1] - grid->cxy_ind[i];
-                ash = (grid->cell0 + grid->cxy_ind[i])*grid->na_sc;
+                int ncz = grid->cxy_ind[i+1] - grid->cxy_ind[i];
+                int ash = (grid->cell0 + grid->cxy_ind[i])*grid->na_sc;
 
                 if (nbat->XFormat == nbatX4)
                 {
@@ -929,20 +920,19 @@ static void nbnxn_atomdata_set_charges(nbnxn_atomdata_t    *nbat,
                                        const nbnxn_search_t nbs,
                                        const real          *charge)
 {
-    int                 g, cxy, ncz, ash, na, na_round, i, j;
+    int                 i;
     real               *q;
-    const nbnxn_grid_t *grid;
 
-    for (g = 0; g < ngrid; g++)
+    for (int g = 0; g < ngrid; g++)
     {
-        grid = &nbs->grid[g];
+        const nbnxn_grid_t * grid = &nbs->grid[g];
 
         /* Loop over all columns and copy and fill */
-        for (cxy = 0; cxy < grid->ncx*grid->ncy; cxy++)
+        for (int cxy = 0; cxy < grid->ncx*grid->ncy; cxy++)
         {
-            ash      = (grid->cell0 + grid->cxy_ind[cxy])*grid->na_sc;
-            na       = grid->cxy_na[cxy];
-            na_round = (grid->cxy_ind[cxy+1] - grid->cxy_ind[cxy])*grid->na_sc;
+            int ash      = (grid->cell0 + grid->cxy_ind[cxy])*grid->na_sc;
+            int na       = grid->cxy_na[cxy];
+            int na_round = (grid->cxy_ind[cxy+1] - grid->cxy_ind[cxy])*grid->na_sc;
 
             if (nbat->XFormat == nbatXYZQ)
             {
@@ -989,8 +979,7 @@ static void nbnxn_atomdata_mask_fep(nbnxn_atomdata_t    *nbat,
                                     const nbnxn_search_t nbs)
 {
     real               *q;
-    int                 stride_q, g, nsubc, c_offset, c, subc, i, ind;
-    const nbnxn_grid_t *grid;
+    int                 stride_q, nsubc;
 
     if (nbat->XFormat == nbatXYZQ)
     {
@@ -1003,9 +992,9 @@ static void nbnxn_atomdata_mask_fep(nbnxn_atomdata_t    *nbat,
         stride_q = 1;
     }
 
-    for (g = 0; g < ngrid; g++)
+    for (int g = 0; g < ngrid; g++)
     {
-        grid = &nbs->grid[g];
+        const nbnxn_grid_t * grid = &nbs->grid[g];
         if (grid->bSimple)
         {
             nsubc = 1;
@@ -1015,20 +1004,20 @@ static void nbnxn_atomdata_mask_fep(nbnxn_atomdata_t    *nbat,
             nsubc = GPU_NSUBCELL;
         }
 
-        c_offset = grid->cell0*grid->na_sc;
+        int c_offset = grid->cell0*grid->na_sc;
 
         /* Loop over all columns and copy and fill */
-        for (c = 0; c < grid->nc*nsubc; c++)
+        for (int c = 0; c < grid->nc*nsubc; c++)
         {
             /* Does this cluster contain perturbed particles? */
             if (grid->fep[c] != 0)
             {
-                for (i = 0; i < grid->na_c; i++)
+                for (int i = 0; i < grid->na_c; i++)
                 {
                     /* Is this a perturbed particle? */
                     if (grid->fep[c] & (1 << i))
                     {
-                        ind = c_offset + c*grid->na_c + i;
+                        int ind = c_offset + c*grid->na_c + i;
                         /* Set atom type and charge to non-interacting */
                         nbat->type[ind] = nbat->ntype - 1;
                         q[ind*stride_q] = 0;
@@ -1044,17 +1033,17 @@ static void copy_egp_to_nbat_egps(const int *a, int na, int na_round,
                                   int na_c, int bit_shift,
                                   const int *in, int *innb)
 {
-    int i, j, sa, at;
+    int i;
     int comb;
 
-    j = 0;
+    int j = 0;
     for (i = 0; i < na; i += na_c)
     {
         /* Store na_c energy group numbers into one int */
         comb = 0;
-        for (sa = 0; sa < na_c; sa++)
+        for (int sa = 0; sa < na_c; sa++)
         {
-            at = a[i+sa];
+            int at = a[i+sa];
             if (at >= 0)
             {
                 comb |= (GET_CGINFO_GID(in[at]) << (sa*bit_shift));
@@ -1075,23 +1064,20 @@ static void nbnxn_atomdata_set_energygroups(nbnxn_atomdata_t    *nbat,
                                             const nbnxn_search_t nbs,
                                             const int           *atinfo)
 {
-    int                 g, i, ncz, ash;
-    const nbnxn_grid_t *grid;
-
     if (nbat->nenergrp == 1)
     {
         return;
     }
 
-    for (g = 0; g < ngrid; g++)
+    for (int g = 0; g < ngrid; g++)
     {
-        grid = &nbs->grid[g];
+        const nbnxn_grid_t * grid = &nbs->grid[g];
 
         /* Loop over all columns and copy and fill */
-        for (i = 0; i < grid->ncx*grid->ncy; i++)
+        for (int i = 0; i < grid->ncx*grid->ncy; i++)
         {
-            ncz = grid->cxy_ind[i+1] - grid->cxy_ind[i];
-            ash = (grid->cell0 + grid->cxy_ind[i])*grid->na_sc;
+            int ncz = grid->cxy_ind[i+1] - grid->cxy_ind[i];
+            int ash = (grid->cell0 + grid->cxy_ind[i])*grid->na_sc;
 
             copy_egp_to_nbat_egps(nbs->a+ash, grid->cxy_na[i], ncz*grid->na_sc,
                                   nbat->na_c, nbat->neg_2log,
@@ -1183,19 +1169,17 @@ void nbnxn_atomdata_copy_x_to_nbat_x(const nbnxn_search_t nbs,
 #pragma omp parallel for num_threads(nth) schedule(static)
     for (th = 0; th < nth; th++)
     {
-        int g;
-
-        for (g = g0; g < g1; g++)
+        for (int g = g0; g < g1; g++)
         {
             const nbnxn_grid_t *grid;
-            int                 cxy0, cxy1, cxy;
+            int                 cxy0, cxy1;
 
             grid = &nbs->grid[g];
 
             cxy0 = (grid->ncx*grid->ncy* th   +nth-1)/nth;
             cxy1 = (grid->ncx*grid->ncy*(th+1)+nth-1)/nth;
 
-            for (cxy = cxy0; cxy < cxy1; cxy++)
+            for (int cxy = cxy0; cxy < cxy1; cxy++)
             {
                 int na, ash, na_fill;
 
@@ -1227,9 +1211,7 @@ static void
 nbnxn_atomdata_clear_reals(real * gmx_restrict dest,
                            int i0, int i1)
 {
-    int i;
-
-    for (i = i0; i < i1; i++)
+    for (int i = i0; i < i1; i++)
     {
         dest[i] = 0;
     }
@@ -1242,14 +1224,12 @@ nbnxn_atomdata_reduce_reals(real * gmx_restrict dest,
                             int nsrc,
                             int i0, int i1)
 {
-    int i, s;
-
     if (bDestSet)
     {
         /* The destination buffer contains data, add to it */
-        for (i = i0; i < i1; i++)
+        for (int i = i0; i < i1; i++)
         {
-            for (s = 0; s < nsrc; s++)
+            for (int s = 0; s < nsrc; s++)
             {
                 dest[i] += src[s][i];
             }
@@ -1258,10 +1238,10 @@ nbnxn_atomdata_reduce_reals(real * gmx_restrict dest,
     else
     {
         /* The destination buffer is unitialized, set it first */
-        for (i = i0; i < i1; i++)
+        for (int i = i0; i < i1; i++)
         {
             dest[i] = src[0][i];
-            for (s = 1; s < nsrc; s++)
+            for (int s = 1; s < nsrc; s++)
             {
                 dest[i] += src[s][i];
             }
@@ -1280,15 +1260,14 @@ nbnxn_atomdata_reduce_reals_simd(real gmx_unused * gmx_restrict dest,
 /* The SIMD width here is actually independent of that in the kernels,
  * but we use the same width for simplicity (usually optimal anyhow).
  */
-    int             i, s;
     gmx_simd_real_t dest_SSE, src_SSE;
 
     if (bDestSet)
     {
-        for (i = i0; i < i1; i += GMX_SIMD_REAL_WIDTH)
+        for (int i = i0; i < i1; i += GMX_SIMD_REAL_WIDTH)
         {
             dest_SSE = gmx_simd_load_r(dest+i);
-            for (s = 0; s < nsrc; s++)
+            for (int s = 0; s < nsrc; s++)
             {
                 src_SSE  = gmx_simd_load_r(src[s]+i);
                 dest_SSE = gmx_simd_add_r(dest_SSE, src_SSE);
@@ -1298,10 +1277,10 @@ nbnxn_atomdata_reduce_reals_simd(real gmx_unused * gmx_restrict dest,
     }
     else
     {
-        for (i = i0; i < i1; i += GMX_SIMD_REAL_WIDTH)
+        for (int i = i0; i < i1; i += GMX_SIMD_REAL_WIDTH)
         {
             dest_SSE = gmx_simd_load_r(src[0]+i);
-            for (s = 1; s < nsrc; s++)
+            for (int s = 1; s < nsrc; s++)
             {
                 src_SSE  = gmx_simd_load_r(src[s]+i);
                 dest_SSE = gmx_simd_add_r(dest_SSE, src_SSE);
@@ -1321,7 +1300,6 @@ nbnxn_atomdata_add_nbat_f_to_f_part(const nbnxn_search_t nbs,
                                     int a0, int a1,
                                     rvec *f)
 {
-    int         a, i, fa;
     const int  *cell;
     const real *fnb;
 
@@ -1336,9 +1314,9 @@ nbnxn_atomdata_add_nbat_f_to_f_part(const nbnxn_search_t nbs,
             {
                 fnb = out[0].f;
 
-                for (a = a0; a < a1; a++)
+                for (int a = a0; a < a1; a++)
                 {
-                    i = cell[a]*nbat->fstride;
+                    int i = cell[a]*nbat->fstride;
 
                     f[a][XX] += fnb[i];
                     f[a][YY] += fnb[i+1];
@@ -1347,11 +1325,11 @@ nbnxn_atomdata_add_nbat_f_to_f_part(const nbnxn_search_t nbs,
             }
             else
             {
-                for (a = a0; a < a1; a++)
+                for (int a = a0; a < a1; a++)
                 {
-                    i = cell[a]*nbat->fstride;
+                    int i = cell[a]*nbat->fstride;
 
-                    for (fa = 0; fa < nfa; fa++)
+                    for (int fa = 0; fa < nfa; fa++)
                     {
                         f[a][XX] += out[fa].f[i];
                         f[a][YY] += out[fa].f[i+1];
@@ -1365,9 +1343,9 @@ nbnxn_atomdata_add_nbat_f_to_f_part(const nbnxn_search_t nbs,
             {
                 fnb = out[0].f;
 
-                for (a = a0; a < a1; a++)
+                for (int a = a0; a < a1; a++)
                 {
-                    i = X4_IND_A(cell[a]);
+                    int i = X4_IND_A(cell[a]);
 
                     f[a][XX] += fnb[i+XX*PACK_X4];
                     f[a][YY] += fnb[i+YY*PACK_X4];
@@ -1376,11 +1354,11 @@ nbnxn_atomdata_add_nbat_f_to_f_part(const nbnxn_search_t nbs,
             }
             else
             {
-                for (a = a0; a < a1; a++)
+                for (int a = a0; a < a1; a++)
                 {
-                    i = X4_IND_A(cell[a]);
+                    int i = X4_IND_A(cell[a]);
 
-                    for (fa = 0; fa < nfa; fa++)
+                    for (int fa = 0; fa < nfa; fa++)
                     {
                         f[a][XX] += out[fa].f[i+XX*PACK_X4];
                         f[a][YY] += out[fa].f[i+YY*PACK_X4];
@@ -1394,9 +1372,9 @@ nbnxn_atomdata_add_nbat_f_to_f_part(const nbnxn_search_t nbs,
             {
                 fnb = out[0].f;
 
-                for (a = a0; a < a1; a++)
+                for (int a = a0; a < a1; a++)
                 {
-                    i = X8_IND_A(cell[a]);
+                    int i = X8_IND_A(cell[a]);
 
                     f[a][XX] += fnb[i+XX*PACK_X8];
                     f[a][YY] += fnb[i+YY*PACK_X8];
@@ -1405,11 +1383,11 @@ nbnxn_atomdata_add_nbat_f_to_f_part(const nbnxn_search_t nbs,
             }
             else
             {
-                for (a = a0; a < a1; a++)
+                for (int a = a0; a < a1; a++)
                 {
-                    i = X8_IND_A(cell[a]);
+                    int i = X8_IND_A(cell[a]);
 
-                    for (fa = 0; fa < nfa; fa++)
+                    for (int fa = 0; fa < nfa; fa++)
                     {
                         f[a][XX] += out[fa].f[i+XX*PACK_X8];
                         f[a][YY] += out[fa].f[i+YY*PACK_X8];
@@ -1555,30 +1533,26 @@ static void nbnxn_atomdata_add_nbat_f_to_f_treereduce(const nbnxn_atomdata_t *nb
 static void nbnxn_atomdata_add_nbat_f_to_f_stdreduce(const nbnxn_atomdata_t *nbat,
                                                      int                     nth)
 {
-    int th;
 #pragma omp parallel for num_threads(nth) schedule(static)
-    for (th = 0; th < nth; th++)
+    for (int th = 0; th < nth; th++)
     {
         const nbnxn_buffer_flags_t *flags;
-        int   b0, b1, b;
-        int   i0, i1;
         int   nfptr;
         real *fptr[NBNXN_BUFFERFLAG_MAX_THREADS];
-        int   out;
 
         flags = &nbat->buffer_flags;
 
         /* Calculate the cell-block range for our thread */
-        b0 = (flags->nflag* th   )/nth;
-        b1 = (flags->nflag*(th+1))/nth;
+        int b0 = (flags->nflag* th   )/nth;
+        int b1 = (flags->nflag*(th+1))/nth;
 
-        for (b = b0; b < b1; b++)
+        for (int b = b0; b < b1; b++)
         {
-            i0 =  b   *NBNXN_BUFFERFLAG_SIZE*nbat->fstride;
-            i1 = (b+1)*NBNXN_BUFFERFLAG_SIZE*nbat->fstride;
+            int i0 =  b   *NBNXN_BUFFERFLAG_SIZE*nbat->fstride;
+            int i1 = (b+1)*NBNXN_BUFFERFLAG_SIZE*nbat->fstride;
 
             nfptr = 0;
-            for (out = 1; out < nbat->nout; out++)
+            for (int out = 1; out < nbat->nout; out++)
             {
                 if (bitmask_is_set(flags->flag[b], out))
                 {
@@ -1613,7 +1587,6 @@ void nbnxn_atomdata_add_nbat_f_to_f(const nbnxn_search_t    nbs,
                                     rvec                   *f)
 {
     int a0 = 0, na = 0;
-    int nth, th;
 
     nbs_cycle_start(&nbs->cc[enbsCCreducef]);
 
@@ -1633,7 +1606,7 @@ void nbnxn_atomdata_add_nbat_f_to_f(const nbnxn_search_t    nbs,
             break;
     }
 
-    nth = gmx_omp_nthreads_get(emntNonbonded);
+    int nth = gmx_omp_nthreads_get(emntNonbonded);
 
     if (nbat->nout > 1)
     {
@@ -1655,7 +1628,7 @@ void nbnxn_atomdata_add_nbat_f_to_f(const nbnxn_search_t    nbs,
         }
     }
 #pragma omp parallel for num_threads(nth) schedule(static)
-    for (th = 0; th < nth; th++)
+    for (int th = 0; th < nth; th++)
     {
         nbnxn_atomdata_add_nbat_f_to_f_part(nbs, nbat,
                                             nbat->out,
@@ -1672,17 +1645,13 @@ void nbnxn_atomdata_add_nbat_f_to_f(const nbnxn_search_t    nbs,
 void nbnxn_atomdata_add_nbat_fshift_to_fshift(const nbnxn_atomdata_t *nbat,
                                               rvec                   *fshift)
 {
-    const nbnxn_atomdata_output_t *out;
-    int  th;
-    int  s;
-    rvec sum;
-
-    out = nbat->out;
+    const nbnxn_atomdata_output_t * out = nbat->out;
 
-    for (s = 0; s < SHIFTS; s++)
+    for (int s = 0; s < SHIFTS; s++)
     {
+        rvec sum;
         clear_rvec(sum);
-        for (th = 0; th < nbat->nout; th++)
+        for (int th = 0; th < nbat->nout; th++)
         {
             sum[XX] += out[th].fshift[s*DIM+XX];
             sum[YY] += out[th].fshift[s*DIM+YY];
diff --git a/src/gromacs/mdlib/nbnxn_atomdata.h b/src/gromacs/mdlib/nbnxn_atomdata.h
index f3f50c6634..1b4e404029 100644
--- a/src/gromacs/mdlib/nbnxn_atomdata.h
+++ b/src/gromacs/mdlib/nbnxn_atomdata.h
@@ -1,7 +1,7 @@
 /*
  * This file is part of the GROMACS molecular simulation package.
  *
- * Copyright (c) 2012,2013,2014, by the GROMACS development team, led by
+ * Copyright (c) 2012,2013,2014,2015, by the GROMACS development team, led by
  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  * and including many others, as listed in the AUTHORS file in the
  * top-level source directory and at http://www.gromacs.org.
@@ -39,10 +39,6 @@
 #include "gromacs/legacyheaders/typedefs.h"
 #include "gromacs/mdlib/nbnxn_pairlist.h"
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 
 /* Default nbnxn allocation routine, allocates 32 byte aligned,
  * which works for plain C and aligned SSE and AVX loads/stores.
@@ -119,8 +115,4 @@ void nbnxn_atomdata_add_nbat_f_to_f(const nbnxn_search_t    nbs,
 void nbnxn_atomdata_add_nbat_fshift_to_fshift(const nbnxn_atomdata_t *nbat,
                                               rvec                   *fshift);
 
-#ifdef __cplusplus
-}
-#endif
-
 #endif
diff --git a/src/gromacs/mdlib/nbnxn_grid.c b/src/gromacs/mdlib/nbnxn_grid.cpp
similarity index 85%
rename from src/gromacs/mdlib/nbnxn_grid.c
rename to src/gromacs/mdlib/nbnxn_grid.cpp
index f14024fb3d..b4e52f8c16 100644
--- a/src/gromacs/mdlib/nbnxn_grid.c
+++ b/src/gromacs/mdlib/nbnxn_grid.cpp
@@ -37,12 +37,16 @@
 
 #include "nbnxn_grid.h"
 
+#include "config.h"
+
 #include <assert.h>
-#include <math.h>
 #include <string.h>
 
+#include <cmath>
+
+#include <algorithm>
+
 #include "gromacs/legacyheaders/gmx_omp_nthreads.h"
-#include "gromacs/legacyheaders/macros.h"
 #include "gromacs/legacyheaders/types/commrec.h"
 #include "gromacs/math/utilities.h"
 #include "gromacs/math/vec.h"
@@ -104,8 +108,8 @@ static int set_grid_size_xy(const nbnxn_search_t nbs,
 {
     rvec size;
     int  na_c;
-    real adens, tlen, tlen_x, tlen_y, nc_max;
-    int  t;
+    int  nc_max;
+    real tlen, tlen_x, tlen_y;
 
     rvec_sub(corner1, corner0, size);
 
@@ -119,17 +123,17 @@ static int set_grid_size_xy(const nbnxn_search_t nbs,
             /* To minimize the zero interactions, we should make
              * the largest of the i/j cell cubic.
              */
-            na_c = max(grid->na_c, grid->na_cj);
+            na_c = std::max(grid->na_c, grid->na_cj);
 
             /* Approximately cubic cells */
-            tlen   = pow(na_c/atom_density, 1.0/3.0);
+            tlen   = std::pow(static_cast<real>(na_c/atom_density), static_cast<real>(1.0/3.0));
             tlen_x = tlen;
             tlen_y = tlen;
         }
         else
         {
             /* Approximately cubic sub cells */
-            tlen   = pow(grid->na_c/atom_density, 1.0/3.0);
+            tlen   = std::pow(static_cast<real>(grid->na_c/atom_density), static_cast<real>(1.0/3.0));
             tlen_x = tlen*GPU_NSUBCELL_X;
             tlen_y = tlen*GPU_NSUBCELL_Y;
         }
@@ -137,8 +141,8 @@ static int set_grid_size_xy(const nbnxn_search_t nbs,
          * in the nbsist when the fixed cell dimensions (x,y) are
          * larger than the variable one (z) than the other way around.
          */
-        grid->ncx = max(1, (int)(size[XX]/tlen_x));
-        grid->ncy = max(1, (int)(size[YY]/tlen_y));
+        grid->ncx = std::max(1, static_cast<int>(size[XX]/tlen_x));
+        grid->ncy = std::max(1, static_cast<int>(size[YY]/tlen_y));
     }
     else
     {
@@ -170,7 +174,7 @@ static int set_grid_size_xy(const nbnxn_search_t nbs,
         srenew(grid->cxy_na, grid->cxy_nalloc);
         srenew(grid->cxy_ind, grid->cxy_nalloc+1);
     }
-    for (t = 0; t < nbs->nthread_max; t++)
+    for (int t = 0; t < nbs->nthread_max; t++)
     {
         if (grid->ncx*grid->ncy+1 > nbs->work[t].cxy_na_nalloc)
         {
@@ -273,10 +277,6 @@ static void sort_atoms(int dim, gmx_bool Backwards,
                        real h0, real invh, int n_per_h,
                        int *sort)
 {
-    int nsort, i, c;
-    int zi, zim, zi_min, zi_max;
-    int cp, tmp;
-
     if (n <= 1)
     {
         /* Nothing to do */
@@ -296,20 +296,20 @@ static void sort_atoms(int dim, gmx_bool Backwards,
     /* Set nsort to the maximum possible number of holes used.
      * In worst case all n elements end up in the last bin.
      */
-    nsort = n_per_h*SORT_GRID_OVERSIZE + n;
+    int nsort = n_per_h*SORT_GRID_OVERSIZE + n;
 
     /* Determine the index range used, so we can limit it for the second pass */
-    zi_min = INT_MAX;
-    zi_max = -1;
+    int zi_min = INT_MAX;
+    int zi_max = -1;
 
     /* Sort the particles using a simple index sort */
-    for (i = 0; i < n; i++)
+    for (int i = 0; i < n; i++)
     {
         /* The cast takes care of float-point rounding effects below zero.
          * This code assumes particles are less than 1/SORT_GRID_OVERSIZE
          * times the box height out of the box.
          */
-        zi = (int)((x[a[i]][dim] - h0)*invh);
+        int zi = static_cast<int>((x[a[i]][dim] - h0)*invh);
 
 #ifndef NDEBUG
         /* As we can have rounding effect, we use > iso >= here */
@@ -337,8 +337,8 @@ static void sort_atoms(int dim, gmx_bool Backwards,
         if (sort[zi] < 0)
         {
             sort[zi] = a[i];
-            zi_min   = min(zi_min, zi);
-            zi_max   = max(zi_max, zi);
+            zi_min   = std::min(zi_min, zi);
+            zi_max   = std::max(zi_max, zi);
         }
         else
         {
@@ -358,27 +358,27 @@ static void sort_atoms(int dim, gmx_bool Backwards,
             if (sort[zi] >= 0)
             {
                 /* Shift all elements by one slot until we find an empty slot */
-                cp  = sort[zi];
-                zim = zi + 1;
+                int cp  = sort[zi];
+                int zim = zi + 1;
                 while (sort[zim] >= 0)
                 {
-                    tmp       = sort[zim];
+                    int tmp   = sort[zim];
                     sort[zim] = cp;
                     cp        = tmp;
                     zim++;
                 }
                 sort[zim] = cp;
-                zi_max    = max(zi_max, zim);
+                zi_max    = std::max(zi_max, zim);
             }
             sort[zi] = a[i];
-            zi_max   = max(zi_max, zi);
+            zi_max   = std::max(zi_max, zi);
         }
     }
 
-    c = 0;
+    int c = 0;
     if (!Backwards)
     {
-        for (zi = 0; zi < nsort; zi++)
+        for (int zi = 0; zi < nsort; zi++)
         {
             if (sort[zi] >= 0)
             {
@@ -389,7 +389,7 @@ static void sort_atoms(int dim, gmx_bool Backwards,
     }
     else
     {
-        for (zi = zi_max; zi >= zi_min; zi--)
+        for (int zi = zi_max; zi >= zi_min; zi--)
         {
             if (sort[zi] >= 0)
             {
@@ -415,7 +415,7 @@ static void sort_atoms(int dim, gmx_bool Backwards,
 /* Coordinate order x,y,z, bb order xyz0 */
 static void calc_bounding_box(int na, int stride, const real *x, nbnxn_bb_t *bb)
 {
-    int  i, j;
+    int  i;
     real xl, xh, yl, yh, zl, zh;
 
     i  = 0;
@@ -426,14 +426,14 @@ static void calc_bounding_box(int na, int stride, const real *x, nbnxn_bb_t *bb)
     zl = x[i+ZZ];
     zh = x[i+ZZ];
     i += stride;
-    for (j = 1; j < na; j++)
-    {
-        xl = min(xl, x[i+XX]);
-        xh = max(xh, x[i+XX]);
-        yl = min(yl, x[i+YY]);
-        yh = max(yh, x[i+YY]);
-        zl = min(zl, x[i+ZZ]);
-        zh = max(zh, x[i+ZZ]);
+    for (int j = 1; j < na; j++)
+    {
+        xl = std::min(xl, x[i+XX]);
+        xh = std::max(xh, x[i+XX]);
+        yl = std::min(yl, x[i+YY]);
+        yh = std::max(yh, x[i+YY]);
+        zl = std::min(zl, x[i+ZZ]);
+        zh = std::max(zh, x[i+ZZ]);
         i += stride;
     }
     /* Note: possible double to float conversion here */
@@ -448,7 +448,6 @@ static void calc_bounding_box(int na, int stride, const real *x, nbnxn_bb_t *bb)
 /* Packed coordinates, bb order xyz0 */
 static void calc_bounding_box_x_x4(int na, const real *x, nbnxn_bb_t *bb)
 {
-    int  j;
     real xl, xh, yl, yh, zl, zh;
 
     xl = x[XX*PACK_X4];
@@ -457,14 +456,14 @@ static void calc_bounding_box_x_x4(int na, const real *x, nbnxn_bb_t *bb)
     yh = x[YY*PACK_X4];
     zl = x[ZZ*PACK_X4];
     zh = x[ZZ*PACK_X4];
-    for (j = 1; j < na; j++)
+    for (int j = 1; j < na; j++)
     {
-        xl = min(xl, x[j+XX*PACK_X4]);
-        xh = max(xh, x[j+XX*PACK_X4]);
-        yl = min(yl, x[j+YY*PACK_X4]);
-        yh = max(yh, x[j+YY*PACK_X4]);
-        zl = min(zl, x[j+ZZ*PACK_X4]);
-        zh = max(zh, x[j+ZZ*PACK_X4]);
+        xl = std::min(xl, x[j+XX*PACK_X4]);
+        xh = std::max(xh, x[j+XX*PACK_X4]);
+        yl = std::min(yl, x[j+YY*PACK_X4]);
+        yh = std::max(yh, x[j+YY*PACK_X4]);
+        zl = std::min(zl, x[j+ZZ*PACK_X4]);
+        zh = std::max(zh, x[j+ZZ*PACK_X4]);
     }
     /* Note: possible double to float conversion here */
     bb->lower[BB_X] = R2F_D(xl);
@@ -478,7 +477,6 @@ static void calc_bounding_box_x_x4(int na, const real *x, nbnxn_bb_t *bb)
 /* Packed coordinates, bb order xyz0 */
 static void calc_bounding_box_x_x8(int na, const real *x, nbnxn_bb_t *bb)
 {
-    int  j;
     real xl, xh, yl, yh, zl, zh;
 
     xl = x[XX*PACK_X8];
@@ -487,14 +485,14 @@ static void calc_bounding_box_x_x8(int na, const real *x, nbnxn_bb_t *bb)
     yh = x[YY*PACK_X8];
     zl = x[ZZ*PACK_X8];
     zh = x[ZZ*PACK_X8];
-    for (j = 1; j < na; j++)
+    for (int j = 1; j < na; j++)
     {
-        xl = min(xl, x[j+XX*PACK_X8]);
-        xh = max(xh, x[j+XX*PACK_X8]);
-        yl = min(yl, x[j+YY*PACK_X8]);
-        yh = max(yh, x[j+YY*PACK_X8]);
-        zl = min(zl, x[j+ZZ*PACK_X8]);
-        zh = max(zh, x[j+ZZ*PACK_X8]);
+        xl = std::min(xl, x[j+XX*PACK_X8]);
+        xh = std::max(xh, x[j+XX*PACK_X8]);
+        yl = std::min(yl, x[j+YY*PACK_X8]);
+        yh = std::max(yh, x[j+YY*PACK_X8]);
+        zl = std::min(zl, x[j+ZZ*PACK_X8]);
+        zh = std::max(zh, x[j+ZZ*PACK_X8]);
     }
     /* Note: possible double to float conversion here */
     bb->lower[BB_X] = R2F_D(xl);
@@ -509,11 +507,11 @@ static void calc_bounding_box_x_x8(int na, const real *x, nbnxn_bb_t *bb)
 static void calc_bounding_box_x_x4_halves(int na, const real *x,
                                           nbnxn_bb_t *bb, nbnxn_bb_t *bbj)
 {
-    calc_bounding_box_x_x4(min(na, 2), x, bbj);
+    calc_bounding_box_x_x4(std::min(na, 2), x, bbj);
 
     if (na > 2)
     {
-        calc_bounding_box_x_x4(min(na-2, 2), x+(PACK_X4>>1), bbj+1);
+        calc_bounding_box_x_x4(std::min(na-2, 2), x+(PACK_X4>>1), bbj+1);
     }
     else
     {
@@ -541,8 +539,8 @@ static void calc_bounding_box_x_x4_halves(int na, const real *x,
 
         for (i = 0; i < NNBSBB_C; i++)
         {
-            bb->lower[i] = min(bbj[0].lower[i], bbj[1].lower[i]);
-            bb->upper[i] = max(bbj[0].upper[i], bbj[1].upper[i]);
+            bb->lower[i] = std::min(bbj[0].lower[i], bbj[1].lower[i]);
+            bb->upper[i] = std::max(bbj[0].upper[i], bbj[1].upper[i]);
         }
     }
 #endif
@@ -553,7 +551,7 @@ static void calc_bounding_box_x_x4_halves(int na, const real *x,
 /* Coordinate order xyz, bb order xxxxyyyyzzzz */
 static void calc_bounding_box_xxxx(int na, int stride, const real *x, float *bb)
 {
-    int  i, j;
+    int  i;
     real xl, xh, yl, yh, zl, zh;
 
     i  = 0;
@@ -564,14 +562,14 @@ static void calc_bounding_box_xxxx(int na, int stride, const real *x, float *bb)
     zl = x[i+ZZ];
     zh = x[i+ZZ];
     i += stride;
-    for (j = 1; j < na; j++)
-    {
-        xl = min(xl, x[i+XX]);
-        xh = max(xh, x[i+XX]);
-        yl = min(yl, x[i+YY]);
-        yh = max(yh, x[i+YY]);
-        zl = min(zl, x[i+ZZ]);
-        zh = max(zh, x[i+ZZ]);
+    for (int j = 1; j < na; j++)
+    {
+        xl = std::min(xl, x[i+XX]);
+        xh = std::max(xh, x[i+XX]);
+        yl = std::min(yl, x[i+YY]);
+        yh = std::max(yh, x[i+YY]);
+        zl = std::min(zl, x[i+ZZ]);
+        zh = std::max(zh, x[i+ZZ]);
         i += stride;
     }
     /* Note: possible double to float conversion here */
@@ -593,12 +591,10 @@ static void calc_bounding_box_simd4(int na, const float *x, nbnxn_bb_t *bb)
     gmx_simd4_float_t bb_0_S, bb_1_S;
     gmx_simd4_float_t x_S;
 
-    int               i;
-
     bb_0_S = gmx_simd4_load_f(x);
     bb_1_S = bb_0_S;
 
-    for (i = 1; i < na; i++)
+    for (int i = 1; i < na; i++)
     {
         x_S    = gmx_simd4_load_f(x+i*NNBSBB_C);
         bb_0_S = gmx_simd4_min_f(bb_0_S, x_S);
@@ -630,14 +626,13 @@ static void calc_bounding_box_xxxx_simd4(int na, const float *x,
 /* Combines pairs of consecutive bounding boxes */
 static void combine_bounding_box_pairs(nbnxn_grid_t *grid, const nbnxn_bb_t *bb)
 {
-    int    i, j, sc2, nc2, c2;
-
-    for (i = 0; i < grid->ncx*grid->ncy; i++)
+    for (int i = 0; i < grid->ncx*grid->ncy; i++)
     {
         /* Starting bb in a column is expected to be 2-aligned */
-        sc2 = grid->cxy_ind[i]>>1;
+        int sc2 = grid->cxy_ind[i]>>1;
         /* For odd numbers skip the last bb here */
-        nc2 = (grid->cxy_na[i]+3)>>(2+1);
+        int nc2 = (grid->cxy_na[i]+3)>>(2+1);
+        int c2;
         for (c2 = sc2; c2 < sc2+nc2; c2++)
         {
 #ifdef NBNXN_SEARCH_BB_SIMD4
@@ -650,19 +645,19 @@ static void combine_bounding_box_pairs(nbnxn_grid_t *grid, const nbnxn_bb_t *bb)
             gmx_simd4_store_f(&grid->bbj[c2].lower[0], min_S);
             gmx_simd4_store_f(&grid->bbj[c2].upper[0], max_S);
 #else
-            for (j = 0; j < NNBSBB_C; j++)
+            for (int j = 0; j < NNBSBB_C; j++)
             {
-                grid->bbj[c2].lower[j] = min(bb[c2*2+0].lower[j],
-                                             bb[c2*2+1].lower[j]);
-                grid->bbj[c2].upper[j] = max(bb[c2*2+0].upper[j],
-                                             bb[c2*2+1].upper[j]);
+                grid->bbj[c2].lower[j] = std::min(bb[c2*2+0].lower[j],
+                                                  bb[c2*2+1].lower[j]);
+                grid->bbj[c2].upper[j] = std::max(bb[c2*2+0].upper[j],
+                                                  bb[c2*2+1].upper[j]);
             }
 #endif
         }
         if (((grid->cxy_na[i]+3)>>2) & 1)
         {
             /* The bb count in this column is odd: duplicate the last bb */
-            for (j = 0; j < NNBSBB_C; j++)
+            for (int j = 0; j < NNBSBB_C; j++)
             {
                 grid->bbj[c2].lower[j] = bb[c2*2].lower[j];
                 grid->bbj[c2].upper[j] = bb[c2*2].upper[j];
@@ -676,13 +671,12 @@ static void combine_bounding_box_pairs(nbnxn_grid_t *grid, const nbnxn_bb_t *bb)
 static void print_bbsizes_simple(FILE                *fp,
                                  const nbnxn_grid_t  *grid)
 {
-    int  c, d;
     dvec ba;
 
     clear_dvec(ba);
-    for (c = 0; c < grid->nc; c++)
+    for (int c = 0; c < grid->nc; c++)
     {
-        for (d = 0; d < DIM; d++)
+        for (int d = 0; d < DIM; d++)
         {
             ba[d] += grid->bb[c].upper[d] - grid->bb[c].lower[d];
         }
@@ -705,22 +699,20 @@ static void print_bbsizes_simple(FILE                *fp,
 static void print_bbsizes_supersub(FILE                *fp,
                                    const nbnxn_grid_t  *grid)
 {
-    int  ns, c, s;
+    int  ns;
     dvec ba;
 
     clear_dvec(ba);
     ns = 0;
-    for (c = 0; c < grid->nc; c++)
+    for (int c = 0; c < grid->nc; c++)
     {
 #ifdef NBNXN_BBXXXX
-        for (s = 0; s < grid->nsubc[c]; s += STRIDE_PBB)
+        for (int s = 0; s < grid->nsubc[c]; s += STRIDE_PBB)
         {
-            int cs_w, i, d;
-
-            cs_w = (c*GPU_NSUBCELL + s)/STRIDE_PBB;
-            for (i = 0; i < STRIDE_PBB; i++)
+            int cs_w = (c*GPU_NSUBCELL + s)/STRIDE_PBB;
+            for (int i = 0; i < STRIDE_PBB; i++)
             {
-                for (d = 0; d < DIM; d++)
+                for (int d = 0; d < DIM; d++)
                 {
                     ba[d] +=
                         grid->pbb[cs_w*NNBSBB_XXXX+(DIM+d)*STRIDE_PBB+i] -
@@ -729,12 +721,10 @@ static void print_bbsizes_supersub(FILE                *fp,
             }
         }
 #else
-        for (s = 0; s < grid->nsubc[c]; s++)
+        for (int s = 0; s < grid->nsubc[c]; s++)
         {
-            int cs, d;
-
-            cs = c*GPU_NSUBCELL + s;
-            for (d = 0; d < DIM; d++)
+            int cs = c*GPU_NSUBCELL + s;
+            for (int d = 0; d < DIM; d++)
             {
                 ba[d] += grid->bb[cs].upper[d] - grid->bb[cs].lower[d];
             }
@@ -764,22 +754,21 @@ static void sort_cluster_on_flag(int na_c,
                                  int *order,
                                  int *flags)
 {
-    int      subc, s, a, n1, n2, a_lj_max, i, j;
+    int      subc;
     int      sort1[NBNXN_NA_SC_MAX/GPU_NSUBCELL];
     int      sort2[NBNXN_NA_SC_MAX/GPU_NSUBCELL];
-    gmx_bool haveQ, bFEP;
 
     *flags = 0;
 
     subc = 0;
-    for (s = a0; s < a1; s += na_c)
+    for (int s = a0; s < a1; s += na_c)
     {
         /* Make lists for this (sub-)cell on atoms with and without LJ */
-        n1       = 0;
-        n2       = 0;
-        haveQ    = FALSE;
-        a_lj_max = -1;
-        for (a = s; a < min(s+na_c, a1); a++)
+        int      n1         = 0;
+        int      n2         = 0;
+        gmx_bool haveQ      = FALSE;
+        int      a_lj_max   = -1;
+        for (int a = s; a < std::min(s+na_c, a1); a++)
         {
             haveQ = haveQ || GET_CGINFO_HAS_Q(atinfo[order[a]]);
 
@@ -801,17 +790,17 @@ static void sort_cluster_on_flag(int na_c,
 
             if (2*n1 <= na_c)
             {
-                /* Only sort when strictly necessary.
+                /* Only sort when strictly necessary. Ordering particles
                  * Ordering particles can lead to less accurate summation
                  * due to rounding, both for LJ and Coulomb interactions.
                  */
                 if (2*(a_lj_max - s) >= na_c)
                 {
-                    for (i = 0; i < n1; i++)
+                    for (int i = 0; i < n1; i++)
                     {
                         order[a0+i] = sort1[i];
                     }
-                    for (j = 0; j < n2; j++)
+                    for (int j = 0; j < n2; j++)
                     {
                         order[a0+n1+j] = sort2[j];
                     }
@@ -862,12 +851,12 @@ static void fill_cell(const nbnxn_search_t nbs,
     if (nbs->bFEP)
     {
         /* Set the fep flag for perturbed atoms in this (sub-)cell */
-        int c, at;
+        int c;
 
         /* The grid-local cluster/(sub-)cell index */
         c            = (a0 >> grid->na_c_2log) - grid->cell0*(grid->bSimple ? 1 : GPU_NSUBCELL);
         grid->fep[c] = 0;
-        for (at = a0; at < a1; at++)
+        for (int at = a0; at < a1; at++)
         {
             if (nbs->a[at] >= 0 && GET_CGINFO_FEP(atinfo[nbs->a[at]]))
             {
@@ -980,10 +969,7 @@ static void sort_columns_simple(const nbnxn_search_t nbs,
                                 int cxy_start, int cxy_end,
                                 int *sort_work)
 {
-    int  cxy;
-    int  cx, cy, cz, ncz, cfilled, c;
-    int  na, ash, ind, a;
-    int  na_c, ash_c;
+    int  cfilled, c;
 
     if (debug)
     {
@@ -992,14 +978,14 @@ static void sort_columns_simple(const nbnxn_search_t nbs,
     }
 
     /* Sort the atoms within each x,y column in 3 dimensions */
-    for (cxy = cxy_start; cxy < cxy_end; cxy++)
+    for (int cxy = cxy_start; cxy < cxy_end; cxy++)
     {
-        cx = cxy/grid->ncy;
-        cy = cxy - cx*grid->ncy;
+        int cx = cxy/grid->ncy;
+        int cy = cxy - cx*grid->ncy;
 
-        na  = grid->cxy_na[cxy];
-        ncz = grid->cxy_ind[cxy+1] - grid->cxy_ind[cxy];
-        ash = (grid->cell0 + grid->cxy_ind[cxy])*grid->na_sc;
+        int na  = grid->cxy_na[cxy];
+        int ncz = grid->cxy_ind[cxy+1] - grid->cxy_ind[cxy];
+        int ash = (grid->cell0 + grid->cxy_ind[cxy])*grid->na_sc;
 
         /* Sort the atoms within each x,y column on z coordinate */
         sort_atoms(ZZ, FALSE, dd_zone,
@@ -1010,12 +996,12 @@ static void sort_columns_simple(const nbnxn_search_t nbs,
 
         /* Fill the ncz cells in this column */
         cfilled = grid->cxy_ind[cxy];
-        for (cz = 0; cz < ncz; cz++)
+        for (int cz = 0; cz < ncz; cz++)
         {
             c  = grid->cxy_ind[cxy] + cz;
 
-            ash_c = ash + cz*grid->na_sc;
-            na_c  = min(grid->na_sc, na-(ash_c-ash));
+            int ash_c = ash + cz*grid->na_sc;
+            int na_c  = std::min(grid->na_sc, na-(ash_c-ash));
 
             fill_cell(nbs, grid, nbat,
                       ash_c, ash_c+na_c, atinfo, x,
@@ -1037,7 +1023,7 @@ static void sort_columns_simple(const nbnxn_search_t nbs,
         }
 
         /* Set the unused atom indices to -1 */
-        for (ind = na; ind < ncz*grid->na_sc; ind++)
+        for (int ind = na; ind < ncz*grid->na_sc; ind++)
         {
             nbs->a[ash+ind] = -1;
         }
@@ -1055,16 +1041,9 @@ static void sort_columns_supersub(const nbnxn_search_t nbs,
                                   int cxy_start, int cxy_end,
                                   int *sort_work)
 {
-    int        cxy;
-    int        cx, cy, cz = -1, c = -1, ncz;
-    int        na, ash, na_c, ind, a;
-    int        subdiv_z, sub_z, na_z, ash_z;
-    int        subdiv_y, sub_y, na_y, ash_y;
-    int        subdiv_x, sub_x, na_x, ash_x;
-
     nbnxn_bb_t bb_work_array[2], *bb_work_aligned;
 
-    bb_work_aligned = (nbnxn_bb_t *)(((size_t)(bb_work_array+1)) & (~((size_t)15)));
+    bb_work_aligned = reinterpret_cast<nbnxn_bb_t *>((reinterpret_cast<std::size_t>(bb_work_array+1)) & (~(static_cast<std::size_t>(15))));
 
     if (debug)
     {
@@ -1072,19 +1051,19 @@ static void sort_columns_supersub(const nbnxn_search_t nbs,
                 grid->cell0, cxy_start, cxy_end, a0, a1);
     }
 
-    subdiv_x = grid->na_c;
-    subdiv_y = GPU_NSUBCELL_X*subdiv_x;
-    subdiv_z = GPU_NSUBCELL_Y*subdiv_y;
+    int subdiv_x = grid->na_c;
+    int subdiv_y = GPU_NSUBCELL_X*subdiv_x;
+    int subdiv_z = GPU_NSUBCELL_Y*subdiv_y;
 
     /* Sort the atoms within each x,y column in 3 dimensions */
-    for (cxy = cxy_start; cxy < cxy_end; cxy++)
+    for (int cxy = cxy_start; cxy < cxy_end; cxy++)
     {
-        cx = cxy/grid->ncy;
-        cy = cxy - cx*grid->ncy;
+        int cx = cxy/grid->ncy;
+        int cy = cxy - cx*grid->ncy;
 
-        na  = grid->cxy_na[cxy];
-        ncz = grid->cxy_ind[cxy+1] - grid->cxy_ind[cxy];
-        ash = (grid->cell0 + grid->cxy_ind[cxy])*grid->na_sc;
+        int na  = grid->cxy_na[cxy];
+        int ncz = grid->cxy_ind[cxy+1] - grid->cxy_ind[cxy];
+        int ash = (grid->cell0 + grid->cxy_ind[cxy])*grid->na_sc;
 
         /* Sort the atoms within each x,y column on z coordinate */
         sort_atoms(ZZ, FALSE, dd_zone,
@@ -1094,22 +1073,22 @@ static void sort_columns_supersub(const nbnxn_search_t nbs,
                    sort_work);
 
         /* This loop goes over the supercells and subcells along z at once */
-        for (sub_z = 0; sub_z < ncz*GPU_NSUBCELL_Z; sub_z++)
+        for (int sub_z = 0; sub_z < ncz*GPU_NSUBCELL_Z; sub_z++)
         {
-            ash_z = ash + sub_z*subdiv_z;
-            na_z  = min(subdiv_z, na-(ash_z-ash));
-
+            int ash_z = ash + sub_z*subdiv_z;
+            int na_z  = std::min(subdiv_z, na-(ash_z-ash));
+            int cz    = -1;
             /* We have already sorted on z */
 
             if (sub_z % GPU_NSUBCELL_Z == 0)
             {
                 cz = sub_z/GPU_NSUBCELL_Z;
-                c  = grid->cxy_ind[cxy] + cz;
+                int c  = grid->cxy_ind[cxy] + cz;
 
                 /* The number of atoms in this supercell */
-                na_c = min(grid->na_sc, na-(ash_z-ash));
+                int na_c = std::min(grid->na_sc, na-(ash_z-ash));
 
-                grid->nsubc[c] = min(GPU_NSUBCELL, (na_c+grid->na_c-1)/grid->na_c);
+                grid->nsubc[c] = std::min(GPU_NSUBCELL, (na_c+grid->na_c-1)/grid->na_c);
 
                 /* Store the z-boundaries of the super cell */
                 grid->bbcz[c*NNBSBB_D  ] = x[nbs->a[ash_z]][ZZ];
@@ -1125,10 +1104,10 @@ static void sort_columns_supersub(const nbnxn_search_t nbs,
                        sort_work);
 #endif
 
-            for (sub_y = 0; sub_y < GPU_NSUBCELL_Y; sub_y++)
+            for (int sub_y = 0; sub_y < GPU_NSUBCELL_Y; sub_y++)
             {
-                ash_y = ash_z + sub_y*subdiv_y;
-                na_y  = min(subdiv_y, na-(ash_y-ash));
+                int ash_y = ash_z + sub_y*subdiv_y;
+                int na_y  = std::min(subdiv_y, na-(ash_y-ash));
 
 #if GPU_NSUBCELL_X > 1
                 /* Sort the atoms along x */
@@ -1139,10 +1118,10 @@ static void sort_columns_supersub(const nbnxn_search_t nbs,
                            sort_work);
 #endif
 
-                for (sub_x = 0; sub_x < GPU_NSUBCELL_X; sub_x++)
+                for (int sub_x = 0; sub_x < GPU_NSUBCELL_X; sub_x++)
                 {
-                    ash_x = ash_y + sub_x*subdiv_x;
-                    na_x  = min(subdiv_x, na-(ash_x-ash));
+                    int ash_x = ash_y + sub_x*subdiv_x;
+                    int na_x  = std::min(subdiv_x, na-(ash_x-ash));
 
                     fill_cell(nbs, grid, nbat,
                               ash_x, ash_x+na_x, atinfo, x,
@@ -1155,7 +1134,7 @@ static void sort_columns_supersub(const nbnxn_search_t nbs,
         }
 
         /* Set the unused atom indices to -1 */
-        for (ind = na; ind < ncz*grid->na_sc; ind++)
+        for (int ind = na; ind < ncz*grid->na_sc; ind++)
         {
             nbs->a[ash+ind] = -1;
         }
@@ -1171,21 +1150,18 @@ static void calc_column_indices(nbnxn_grid_t *grid,
                                 int *cell,
                                 int *cxy_na)
 {
-    int  n0, n1, i;
-    int  cx, cy;
-
     /* We add one extra cell for particles which moved during DD */
-    for (i = 0; i < grid->ncx*grid->ncy+1; i++)
+    for (int i = 0; i < grid->ncx*grid->ncy+1; i++)
     {
         cxy_na[i] = 0;
     }
 
-    n0 = a0 + (int)((thread+0)*(a1 - a0))/nthread;
-    n1 = a0 + (int)((thread+1)*(a1 - a0))/nthread;
+    int n0 = a0 + static_cast<int>((thread+0)*(a1 - a0))/nthread;
+    int n1 = a0 + static_cast<int>((thread+1)*(a1 - a0))/nthread;
     if (dd_zone == 0)
     {
         /* Home zone */
-        for (i = n0; i < n1; i++)
+        for (int i = n0; i < n1; i++)
         {
             if (move == NULL || move[i] >= 0)
             {
@@ -1194,8 +1170,8 @@ static void calc_column_indices(nbnxn_grid_t *grid,
                  * The int cast takes care of the lower bound,
                  * we will explicitly take care of the upper bound.
                  */
-                cx = (int)((x[i][XX] - grid->c0[XX])*grid->inv_sx);
-                cy = (int)((x[i][YY] - grid->c0[YY])*grid->inv_sy);
+                int cx = static_cast<int>((x[i][XX] - grid->c0[XX])*grid->inv_sx);
+                int cy = static_cast<int>((x[i][YY] - grid->c0[YY])*grid->inv_sy);
 
 #ifndef NDEBUG
                 if (cx < 0 || cx > grid->ncx ||
@@ -1209,8 +1185,8 @@ static void calc_column_indices(nbnxn_grid_t *grid,
                 }
 #endif
                 /* Take care of potential rouding issues */
-                cx = min(cx, grid->ncx - 1);
-                cy = min(cy, grid->ncy - 1);
+                cx = std::min(cx, grid->ncx - 1);
+                cy = std::min(cy, grid->ncy - 1);
 
                 /* For the moment cell will contain only the, grid local,
                  * x and y indices, not z.
@@ -1231,10 +1207,10 @@ static void calc_column_indices(nbnxn_grid_t *grid,
     else
     {
         /* Non-home zone */
-        for (i = n0; i < n1; i++)
+        for (int i = n0; i < n1; i++)
         {
-            cx = (int)((x[i][XX] - grid->c0[XX])*grid->inv_sx);
-            cy = (int)((x[i][YY] - grid->c0[YY])*grid->inv_sy);
+            int cx = static_cast<int>((x[i][XX] - grid->c0[XX])*grid->inv_sx);
+            int cy = static_cast<int>((x[i][YY] - grid->c0[YY])*grid->inv_sy);
 
             /* For non-home zones there could be particles outside
              * the non-bonded cut-off range, which have been communicated
@@ -1242,10 +1218,10 @@ static void calc_column_indices(nbnxn_grid_t *grid,
              * matter where these end up on the grid. For performance
              * we put them in an extra row at the border.
              */
-            cx = max(cx, 0);
-            cx = min(cx, grid->ncx - 1);
-            cy = max(cy, 0);
-            cy = min(cy, grid->ncy - 1);
+            cx = std::max(cx, 0);
+            cx = std::min(cx, grid->ncx - 1);
+            cy = std::max(cy, 0);
+            cy = std::min(cy, grid->ncy - 1);
 
             /* For the moment cell will contain only the, grid local,
              * x and y indices, not z.
@@ -1267,15 +1243,15 @@ static void calc_cell_indices(const nbnxn_search_t nbs,
                               const int *move,
                               nbnxn_atomdata_t *nbat)
 {
-    int   n0, n1, i;
+    int   n0, n1;
     int   cx, cy, cxy, ncz_max, ncz;
-    int   nthread, thread;
-    int  *cxy_na, cxy_na_i;
+    int   nthread;
+    int   cxy_na_i;
 
     nthread = gmx_omp_nthreads_get(emntPairsearch);
 
 #pragma omp parallel for num_threads(nthread) schedule(static)
-    for (thread = 0; thread < nthread; thread++)
+    for (int thread = 0; thread < nthread; thread++)
     {
         calc_column_indices(grid, a0, a1, x, dd_zone, move, thread, nthread,
                             nbs->cell, nbs->work[thread].cxy_na);
@@ -1285,7 +1261,7 @@ static void calc_cell_indices(const nbnxn_search_t nbs,
     ncz_max          = 0;
     ncz              = 0;
     grid->cxy_ind[0] = 0;
-    for (i = 0; i < grid->ncx*grid->ncy+1; i++)
+    for (int i = 0; i < grid->ncx*grid->ncy+1; i++)
     {
         /* We set ncz_max at the beginning of the loop iso at the end
          * to skip i=grid->ncx*grid->ncy which are moved particles
@@ -1296,7 +1272,7 @@ static void calc_cell_indices(const nbnxn_search_t nbs,
             ncz_max = ncz;
         }
         cxy_na_i = nbs->work[0].cxy_na[i];
-        for (thread = 1; thread < nthread; thread++)
+        for (int thread = 1; thread < nthread; thread++)
         {
             cxy_na_i += nbs->work[thread].cxy_na[i];
         }
@@ -1322,7 +1298,7 @@ static void calc_cell_indices(const nbnxn_search_t nbs,
                 ncz_max);
         if (gmx_debug_at)
         {
-            i = 0;
+            int i = 0;
             for (cy = 0; cy < grid->ncy; cy++)
             {
                 for (cx = 0; cx < grid->ncx; cx++)
@@ -1338,14 +1314,14 @@ static void calc_cell_indices(const nbnxn_search_t nbs,
     /* Make sure the work array for sorting is large enough */
     if (ncz_max*grid->na_sc*SGSF > nbs->work[0].sort_work_nalloc)
     {
-        for (thread = 0; thread < nbs->nthread_max; thread++)
+        for (int thread = 0; thread < nbs->nthread_max; thread++)
         {
             nbs->work[thread].sort_work_nalloc =
                 over_alloc_large(ncz_max*grid->na_sc*SGSF);
             srenew(nbs->work[thread].sort_work,
                    nbs->work[thread].sort_work_nalloc);
             /* When not in use, all elements should be -1 */
-            for (i = 0; i < nbs->work[thread].sort_work_nalloc; i++)
+            for (int i = 0; i < nbs->work[thread].sort_work_nalloc; i++)
             {
                 nbs->work[thread].sort_work[i] = -1;
             }
@@ -1355,7 +1331,7 @@ static void calc_cell_indices(const nbnxn_search_t nbs,
     /* Now we know the dimensions we can fill the grid.
      * This is the first, unsorted fill. We sort the columns after this.
      */
-    for (i = a0; i < a1; i++)
+    for (int i = a0; i < a1; i++)
     {
         /* At this point nbs->cell contains the local grid x,y indices */
         cxy = nbs->cell[i];
@@ -1369,7 +1345,7 @@ static void calc_cell_indices(const nbnxn_search_t nbs,
         n1 = grid->nc*grid->na_sc+grid->cxy_na[grid->ncx*grid->ncy];
         if (dd_zone == 0)
         {
-            for (i = n0; i < n1; i++)
+            for (int i = n0; i < n1; i++)
             {
                 nbs->cell[nbs->a[i]] = i;
             }
@@ -1378,7 +1354,7 @@ static void calc_cell_indices(const nbnxn_search_t nbs,
 
     /* Sort the super-cell columns along z into the sub-cells. */
 #pragma omp parallel for num_threads(nthread) schedule(static)
-    for (thread = 0; thread < nthread; thread++)
+    for (int thread = 0; thread < nthread; thread++)
     {
         if (grid->bSimple)
         {
@@ -1404,7 +1380,7 @@ static void calc_cell_indices(const nbnxn_search_t nbs,
     if (!grid->bSimple)
     {
         grid->nsubc_tot = 0;
-        for (i = 0; i < grid->nc; i++)
+        for (int i = 0; i < grid->nc; i++)
         {
             grid->nsubc_tot += grid->nsubc[i];
         }
@@ -1429,15 +1405,13 @@ static void calc_cell_indices(const nbnxn_search_t nbs,
 static void init_buffer_flags(nbnxn_buffer_flags_t *flags,
                               int                   natoms)
 {
-    int b;
-
     flags->nflag = (natoms + NBNXN_BUFFERFLAG_SIZE - 1)/NBNXN_BUFFERFLAG_SIZE;
     if (flags->nflag > flags->flag_nalloc)
     {
         flags->flag_nalloc = over_alloc_large(flags->nflag);
         srenew(flags->flag, flags->flag_nalloc);
     }
-    for (b = 0; b < flags->nflag; b++)
+    for (int b = 0; b < flags->nflag; b++)
     {
         bitmask_clear(&(flags->flag[b]));
     }
@@ -1520,7 +1494,7 @@ void nbnxn_put_on_grid(nbnxn_search_t nbs,
     }
     else
     {
-        nbs->natoms_nonlocal = max(nbs->natoms_nonlocal, a1);
+        nbs->natoms_nonlocal = std::max(nbs->natoms_nonlocal, a1);
     }
 
     /* We always use the home zone (grid[0]) for setting the cell size,
@@ -1571,12 +1545,11 @@ void nbnxn_put_on_grid_nonlocal(nbnxn_search_t            nbs,
                                 int                       nb_kernel_type,
                                 nbnxn_atomdata_t         *nbat)
 {
-    int  zone, d;
     rvec c0, c1;
 
-    for (zone = 1; zone < zones->n; zone++)
+    for (int zone = 1; zone < zones->n; zone++)
     {
-        for (d = 0; d < DIM; d++)
+        for (int d = 0; d < DIM; d++)
         {
             c0[d] = zones->size[zone].bb_x0[d];
             c1[d] = zones->size[zone].bb_x1[d];
@@ -1602,8 +1575,7 @@ void nbnxn_grid_add_simple(nbnxn_search_t    nbs,
     nbnxn_grid_t *grid;
     float        *bbcz;
     nbnxn_bb_t   *bb;
-    int           ncd, sc;
-    int           nthreads gmx_unused;
+    int           ncd;
 
     grid = &nbs->grid[0];
 
@@ -1630,17 +1602,18 @@ void nbnxn_grid_add_simple(nbnxn_search_t    nbs,
     bbcz = grid->bbcz_simple;
     bb   = grid->bb_simple;
 
-    nthreads = gmx_omp_nthreads_get(emntPairsearch);
+#if (defined GMX_OPENMP) && !(defined __clang_analyzer__)
+    // cppcheck-suppress unreadVariable
+    int nthreads = gmx_omp_nthreads_get(emntPairsearch);
+#endif
+
 #pragma omp parallel for num_threads(nthreads) schedule(static)
-    for (sc = 0; sc < grid->nc; sc++)
+    for (int sc = 0; sc < grid->nc; sc++)
     {
-        int c, tx, na;
-
-        for (c = 0; c < ncd; c++)
+        for (int c = 0; c < ncd; c++)
         {
-            tx = sc*ncd + c;
-
-            na = NBNXN_CPU_CLUSTER_I_SIZE;
+            int tx = sc*ncd + c;
+            int na = NBNXN_CPU_CLUSTER_I_SIZE;
             while (na > 0 &&
                    nbat->type[tx*NBNXN_CPU_CLUSTER_I_SIZE+na-1] == nbat->ntype-1)
             {
@@ -1706,20 +1679,17 @@ void nbnxn_get_atomorder(const nbnxn_search_t nbs, const int **a, int *n)
 
 void nbnxn_set_atomorder(nbnxn_search_t nbs)
 {
-    nbnxn_grid_t *grid;
-    int           ao, cx, cy, cxy, cz, j;
-
     /* Set the atom order for the home cell (index 0) */
-    grid = &nbs->grid[0];
+    nbnxn_grid_t *grid = &nbs->grid[0];
 
-    ao = 0;
-    for (cx = 0; cx < grid->ncx; cx++)
+    int           ao = 0;
+    for (int cx = 0; cx < grid->ncx; cx++)
     {
-        for (cy = 0; cy < grid->ncy; cy++)
+        for (int cy = 0; cy < grid->ncy; cy++)
         {
-            cxy = cx*grid->ncy + cy;
-            j   = grid->cxy_ind[cxy]*grid->na_sc;
-            for (cz = 0; cz < grid->cxy_na[cxy]; cz++)
+            int cxy = cx*grid->ncy + cy;
+            int j   = grid->cxy_ind[cxy]*grid->na_sc;
+            for (int cz = 0; cz < grid->cxy_na[cxy]; cz++)
             {
                 nbs->a[j]     = ao;
                 nbs->cell[ao] = j;
diff --git a/src/gromacs/mdlib/nbnxn_grid.h b/src/gromacs/mdlib/nbnxn_grid.h
index cb846a1171..1824cc5f87 100644
--- a/src/gromacs/mdlib/nbnxn_grid.h
+++ b/src/gromacs/mdlib/nbnxn_grid.h
@@ -40,9 +40,6 @@
 #include "gromacs/mdlib/nbnxn_consts.h"
 #include "gromacs/mdlib/nbnxn_internal.h"
 
-#ifdef __cplusplus
-extern "C" {
-#endif
 
 /* Allocate and initialize ngrid pair search grids in nbs */
 void nbnxn_grids_init(nbnxn_search_t nbs, int ngrid);
@@ -92,8 +89,5 @@ void nbnxn_get_atomorder(const nbnxn_search_t nbs, const int **a, int *n);
 /* Renumber the atom indices on the grid to consecutive order */
 void nbnxn_set_atomorder(nbnxn_search_t nbs);
 
-#ifdef __cplusplus
-}
-#endif
 
 #endif
diff --git a/src/gromacs/mdlib/nbnxn_search.c b/src/gromacs/mdlib/nbnxn_search.cpp
similarity index 93%
rename from src/gromacs/mdlib/nbnxn_search.c
rename to src/gromacs/mdlib/nbnxn_search.cpp
index 8292581a42..7ec37ff526 100644
--- a/src/gromacs/mdlib/nbnxn_search.c
+++ b/src/gromacs/mdlib/nbnxn_search.cpp
@@ -37,12 +37,16 @@
 
 #include "nbnxn_search.h"
 
+#include "config.h"
+
 #include <assert.h>
-#include <math.h>
 #include <string.h>
 
+#include <cmath>
+
+#include <algorithm>
+
 #include "gromacs/legacyheaders/gmx_omp_nthreads.h"
-#include "gromacs/legacyheaders/macros.h"
 #include "gromacs/legacyheaders/nrnb.h"
 #include "gromacs/legacyheaders/ns.h"
 #include "gromacs/legacyheaders/types/commrec.h"
@@ -131,9 +135,7 @@
 
 static void nbs_cycle_clear(nbnxn_cycle_t *cc)
 {
-    int i;
-
-    for (i = 0; i < enbsCCnr; i++)
+    for (int i = 0; i < enbsCCnr; i++)
     {
         cc[i].count = 0;
         cc[i].c     = 0;
@@ -147,9 +149,6 @@ static double Mcyc_av(const nbnxn_cycle_t *cc)
 
 static void nbs_cycle_print(FILE *fp, const nbnxn_search_t nbs)
 {
-    int n;
-    int t;
-
     fprintf(fp, "\n");
     fprintf(fp, "ns %4d grid %4.1f search %4.1f red.f %5.3f",
             nbs->cc[enbsCCgrid].count,
@@ -165,7 +164,7 @@ static void nbs_cycle_print(FILE *fp, const nbnxn_search_t nbs)
                     Mcyc_av(&nbs->cc[enbsCCcombine]));
         }
         fprintf(fp, " s. th");
-        for (t = 0; t < nbs->nthread_max; t++)
+        for (int t = 0; t < nbs->nthread_max; t++)
         {
             fprintf(fp, " %4.1f",
                     Mcyc_av(&nbs->work[t].cc[enbsCCsearch]));
@@ -241,7 +240,7 @@ void nbnxn_init_search(nbnxn_search_t    * nbs_ptr,
                        int                 nthread_max)
 {
     nbnxn_search_t nbs;
-    int            ngrid, d, t;
+    int            ngrid;
 
     snew(nbs, 1);
     *nbs_ptr = nbs;
@@ -256,7 +255,7 @@ void nbnxn_init_search(nbnxn_search_t    * nbs_ptr,
     {
         nbs->zones = zones;
 
-        for (d = 0; d < DIM; d++)
+        for (int d = 0; d < DIM; d++)
         {
             if ((*n_dd_cells)[d] > 1)
             {
@@ -278,7 +277,7 @@ void nbnxn_init_search(nbnxn_search_t    * nbs_ptr,
 
     /* Initialize the work data structures for each thread */
     snew(nbs->work, nbs->nthread_max);
-    for (t = 0; t < nbs->nthread_max; t++)
+    for (int t = 0; t < nbs->nthread_max; t++)
     {
         nbs->work[t].cxy_na           = NULL;
         nbs->work[t].cxy_na_nalloc    = 0;
@@ -293,7 +292,7 @@ void nbnxn_init_search(nbnxn_search_t    * nbs_ptr,
     nbs->print_cycles = (getenv("GMX_NBNXN_CYCLE") != 0);
     nbs->search_count = 0;
     nbs_cycle_clear(nbs->cc);
-    for (t = 0; t < nbs->nthread_max; t++)
+    for (int t = 0; t < nbs->nthread_max; t++)
     {
         nbs_cycle_clear(nbs->work[t].cc);
     }
@@ -302,15 +301,13 @@ void nbnxn_init_search(nbnxn_search_t    * nbs_ptr,
 static void init_buffer_flags(nbnxn_buffer_flags_t *flags,
                               int                   natoms)
 {
-    int b;
-
     flags->nflag = (natoms + NBNXN_BUFFERFLAG_SIZE - 1)/NBNXN_BUFFERFLAG_SIZE;
     if (flags->nflag > flags->flag_nalloc)
     {
         flags->flag_nalloc = over_alloc_large(flags->nflag);
         srenew(flags->flag, flags->flag_nalloc);
     }
-    for (b = 0; b < flags->nflag; b++)
+    for (int b = 0; b < flags->nflag; b++)
     {
         bitmask_clear(&(flags->flag[b]));
     }
@@ -323,14 +320,14 @@ static void get_cell_range(real b0, real b1,
                            int nc, real c0, real s, real invs,
                            real d2, real r2, int *cf, int *cl)
 {
-    *cf = max((int)((b0 - c0)*invs), 0);
+    *cf = std::max(static_cast<int>((b0 - c0)*invs), 0);
 
     while (*cf > 0 && d2 + sqr((b0 - c0) - (*cf-1+1)*s) < r2)
     {
         (*cf)--;
     }
 
-    *cl = min((int)((b1 - c0)*invs), nc-1);
+    *cl = std::min(static_cast<int>((b1 - c0)*invs), nc-1);
     while (*cl < nc-1 && d2 + sqr((*cl+1)*s - (b1 - c0)) < r2)
     {
         (*cl)++;
@@ -349,20 +346,20 @@ static float box_dist2(float bx0, float bx1, float by0,
 
     dl  = bx0 - bb->upper[BB_X];
     dh  = bb->lower[BB_X] - bx1;
-    dm  = max(dl, dh);
-    dm0 = max(dm, 0);
+    dm  = std::max(dl, dh);
+    dm0 = std::max(dm, 0.0f);
     d2 += dm0*dm0;
 
     dl  = by0 - bb->upper[BB_Y];
     dh  = bb->lower[BB_Y] - by1;
-    dm  = max(dl, dh);
-    dm0 = max(dm, 0);
+    dm  = std::max(dl, dh);
+    dm0 = std::max(dm, 0.0f);
     d2 += dm0*dm0;
 
     dl  = bz0 - bb->upper[BB_Z];
     dh  = bb->lower[BB_Z] - bz1;
-    dm  = max(dl, dh);
-    dm0 = max(dm, 0);
+    dm  = std::max(dl, dh);
+    dm0 = std::max(dm, 0.0f);
     d2 += dm0*dm0;
 
     return d2;
@@ -383,20 +380,20 @@ static float subc_bb_dist2(int si, const nbnxn_bb_t *bb_i_ci,
 
     dl  = bb_i->lower[BB_X] - bb_j->upper[BB_X];
     dh  = bb_j->lower[BB_X] - bb_i->upper[BB_X];
-    dm  = max(dl, dh);
-    dm0 = max(dm, 0);
+    dm  = std::max(dl, dh);
+    dm0 = std::max(dm, 0.0f);
     d2 += dm0*dm0;
 
     dl  = bb_i->lower[BB_Y] - bb_j->upper[BB_Y];
     dh  = bb_j->lower[BB_Y] - bb_i->upper[BB_Y];
-    dm  = max(dl, dh);
-    dm0 = max(dm, 0);
+    dm  = std::max(dl, dh);
+    dm0 = std::max(dm, 0.0f);
     d2 += dm0*dm0;
 
     dl  = bb_i->lower[BB_Z] - bb_j->upper[BB_Z];
     dh  = bb_j->lower[BB_Z] - bb_i->upper[BB_Z];
-    dm  = max(dl, dh);
-    dm0 = max(dm, 0);
+    dm  = std::max(dl, dh);
+    dm0 = std::max(dm, 0.0f);
     d2 += dm0*dm0;
 
     return d2;
@@ -519,19 +516,14 @@ static gmx_bool subc_in_range_x(int na_c,
                                 int csj, int stride, const real *x_j,
                                 real rl2)
 {
-    int  i, j, i0, j0;
-    real d2;
-
-    for (i = 0; i < na_c; i++)
+    for (int i = 0; i < na_c; i++)
     {
-        i0 = (si*na_c + i)*DIM;
-        for (j = 0; j < na_c; j++)
+        int i0 = (si*na_c + i)*DIM;
+        for (int j = 0; j < na_c; j++)
         {
-            j0 = (csj*na_c + j)*stride;
+            int  j0 = (csj*na_c + j)*stride;
 
-            d2 = sqr(x_i[i0  ] - x_j[j0  ]) +
-                sqr(x_i[i0+1] - x_j[j0+1]) +
-                sqr(x_i[i0+2] - x_j[j0+2]);
+            real d2 = sqr(x_i[i0  ] - x_j[j0  ]) + sqr(x_i[i0+1] - x_j[j0+1]) + sqr(x_i[i0+2] - x_j[j0+2]);
 
             if (d2 < rl2)
             {
@@ -696,7 +688,7 @@ static void check_subcell_list_space_simple(nbnxn_pairlist_t *nbl,
 static void check_subcell_list_space_supersub(nbnxn_pairlist_t *nbl,
                                               int               nsupercell)
 {
-    int ncj4_max, j4, j, w, t;
+    int ncj4_max, w;
 
 #define NWARP       2
 #define WARP_SIZE  32
@@ -718,7 +710,7 @@ static void check_subcell_list_space_supersub(nbnxn_pairlist_t *nbl,
 
     if (ncj4_max > nbl->work->cj4_init)
     {
-        for (j4 = nbl->work->cj4_init; j4 < ncj4_max; j4++)
+        for (int j4 = nbl->work->cj4_init; j4 < ncj4_max; j4++)
         {
             /* No i-subcells and no excl's in the list initially */
             for (w = 0; w < NWARP; w++)
@@ -735,9 +727,7 @@ static void check_subcell_list_space_supersub(nbnxn_pairlist_t *nbl,
 /* Set all excl masks for one GPU warp no exclusions */
 static void set_no_excls(nbnxn_excl_t *excl)
 {
-    int t;
-
-    for (t = 0; t < WARP_SIZE; t++)
+    for (int t = 0; t < WARP_SIZE; t++)
     {
         /* Turn all interaction bits on */
         excl->pair[t] = NBNXN_INTERACTION_MASK_ALL;
@@ -824,8 +814,6 @@ void nbnxn_init_pairlist_set(nbnxn_pairlist_set_t *nbl_list,
                              nbnxn_alloc_t *alloc,
                              nbnxn_free_t  *free)
 {
-    int i;
-
     nbl_list->bSimple   = bSimple;
     nbl_list->bCombined = bCombined;
 
@@ -842,7 +830,7 @@ void nbnxn_init_pairlist_set(nbnxn_pairlist_set_t *nbl_list,
     snew(nbl_list->nbl_fep, nbl_list->nnbl);
     /* Execute in order to avoid memory interleaving between threads */
 #pragma omp parallel for num_threads(nbl_list->nnbl) schedule(static)
-    for (i = 0; i < nbl_list->nnbl; i++)
+    for (int i = 0; i < nbl_list->nnbl; i++)
     {
         /* Allocate the nblist data structure locally on each thread
          * to optimize memory access for NUMA architectures.
@@ -870,7 +858,6 @@ static void print_nblist_statistics_simple(FILE *fp, const nbnxn_pairlist_t *nbl
 {
     const nbnxn_grid_t *grid;
     int                 cs[SHIFTS];
-    int                 s, i, j;
     int                 npexcl;
 
     /* This code only produces correct statistics with domain decomposition */
@@ -884,19 +871,19 @@ static void print_nblist_statistics_simple(FILE *fp, const nbnxn_pairlist_t *nbl
             nbl->ncj/(double)grid->nc*grid->na_sc/(0.5*4.0/3.0*M_PI*rl*rl*rl*grid->nc*grid->na_sc/(grid->size[XX]*grid->size[YY]*grid->size[ZZ])));
 
     fprintf(fp, "nbl average j cell list length %.1f\n",
-            0.25*nbl->ncj/(double)max(nbl->nci, 1));
+            0.25*nbl->ncj/(double)std::max(nbl->nci, 1));
 
-    for (s = 0; s < SHIFTS; s++)
+    for (int s = 0; s < SHIFTS; s++)
     {
         cs[s] = 0;
     }
     npexcl = 0;
-    for (i = 0; i < nbl->nci; i++)
+    for (int i = 0; i < nbl->nci; i++)
     {
         cs[nbl->ci[i].shift & NBNXN_CI_SHIFT] +=
             nbl->ci[i].cj_ind_end - nbl->ci[i].cj_ind_start;
 
-        j = nbl->ci[i].cj_ind_start;
+        int j = nbl->ci[i].cj_ind_start;
         while (j < nbl->ci[i].cj_ind_end &&
                nbl->cj[j].excl != NBNXN_INTERACTION_MASK_ALL)
         {
@@ -905,8 +892,8 @@ static void print_nblist_statistics_simple(FILE *fp, const nbnxn_pairlist_t *nbl
         }
     }
     fprintf(fp, "nbl cell pairs, total: %d excl: %d %.1f%%\n",
-            nbl->ncj, npexcl, 100*npexcl/(double)max(nbl->ncj, 1));
-    for (s = 0; s < SHIFTS; s++)
+            nbl->ncj, npexcl, 100*npexcl/(double)std::max(nbl->ncj, 1));
+    for (int s = 0; s < SHIFTS; s++)
     {
         if (cs[s] > 0)
         {
@@ -920,7 +907,7 @@ static void print_nblist_statistics_supersub(FILE *fp, const nbnxn_pairlist_t *n
                                              const nbnxn_search_t nbs, real rl)
 {
     const nbnxn_grid_t *grid;
-    int                 i, j4, j, si, b;
+    int                 b;
     int                 c[GPU_NSUBCELL+1];
     double              sum_nsp, sum_nsp2;
     int                 nsp_max;
@@ -938,21 +925,21 @@ static void print_nblist_statistics_supersub(FILE *fp, const nbnxn_pairlist_t *n
     sum_nsp  = 0;
     sum_nsp2 = 0;
     nsp_max  = 0;
-    for (si = 0; si <= GPU_NSUBCELL; si++)
+    for (int si = 0; si <= GPU_NSUBCELL; si++)
     {
         c[si] = 0;
     }
-    for (i = 0; i < nbl->nsci; i++)
+    for (int i = 0; i < nbl->nsci; i++)
     {
         int nsp;
 
         nsp = 0;
-        for (j4 = nbl->sci[i].cj4_ind_start; j4 < nbl->sci[i].cj4_ind_end; j4++)
+        for (int j4 = nbl->sci[i].cj4_ind_start; j4 < nbl->sci[i].cj4_ind_end; j4++)
         {
-            for (j = 0; j < NBNXN_GPU_JGROUP_SIZE; j++)
+            for (int j = 0; j < NBNXN_GPU_JGROUP_SIZE; j++)
             {
                 b = 0;
-                for (si = 0; si < GPU_NSUBCELL; si++)
+                for (int si = 0; si < GPU_NSUBCELL; si++)
                 {
                     if (nbl->cj4[j4].imei[0].imask & (1U << (j*GPU_NSUBCELL + si)))
                     {
@@ -965,7 +952,7 @@ static void print_nblist_statistics_supersub(FILE *fp, const nbnxn_pairlist_t *n
         }
         sum_nsp  += nsp;
         sum_nsp2 += nsp*nsp;
-        nsp_max   = max(nsp_max, nsp);
+        nsp_max   = std::max(nsp_max, nsp);
     }
     if (nbl->nsci > 0)
     {
@@ -973,7 +960,7 @@ static void print_nblist_statistics_supersub(FILE *fp, const nbnxn_pairlist_t *n
         sum_nsp2 /= nbl->nsci;
     }
     fprintf(fp, "nbl #cluster-pairs: av %.1f stddev %.1f max %d\n",
-            sum_nsp, sqrt(sum_nsp2 - sum_nsp*sum_nsp), nsp_max);
+            sum_nsp, std::sqrt(sum_nsp2 - sum_nsp*sum_nsp), nsp_max);
 
     if (nbl->ncj4 > 0)
     {
@@ -1039,17 +1026,16 @@ static void set_self_and_newton_excls_supersub(nbnxn_pairlist_t *nbl,
                                                int si)
 {
     nbnxn_excl_t *excl[2];
-    int           ei, ej, w;
 
     /* Here we only set the set self and double pair exclusions */
 
     get_nbl_exclusions_2(nbl, cj4_ind, &excl[0], &excl[1]);
 
     /* Only minor < major bits set */
-    for (ej = 0; ej < nbl->na_ci; ej++)
+    for (int ej = 0; ej < nbl->na_ci; ej++)
     {
-        w = (ej>>2);
-        for (ei = ej; ei < nbl->na_ci; ei++)
+        int w = (ej>>2);
+        for (int ei = ej; ei < nbl->na_ci; ei++)
         {
             excl[w]->pair[(ej & (NBNXN_GPU_JGROUP_SIZE-1))*nbl->na_ci + ei] &=
                 ~(1U << (sj_offset*GPU_NSUBCELL + si));
@@ -1112,16 +1098,12 @@ static void make_cluster_list_simple(const nbnxn_grid_t *gridj,
                                      real rl2, float rbb2,
                                      int *ndistc)
 {
-    const nbnxn_list_work_t *work;
-
     const nbnxn_bb_t        *bb_ci;
     const real              *x_ci;
 
     gmx_bool                 InRange;
     real                     d2;
-    int                      cjf_gl, cjl_gl, cj;
-
-    work = nbl->work;
+    int                      cjf_gl, cjl_gl;
 
     bb_ci = nbl->work->bb_ci;
     x_ci  = nbl->work->x_ci;
@@ -1143,12 +1125,10 @@ static void make_cluster_list_simple(const nbnxn_grid_t *gridj,
         }
         else if (d2 < rl2)
         {
-            int i, j;
-
             cjf_gl = gridj->cell0 + cjf;
-            for (i = 0; i < NBNXN_CPU_CLUSTER_I_SIZE && !InRange; i++)
+            for (int i = 0; i < NBNXN_CPU_CLUSTER_I_SIZE && !InRange; i++)
             {
-                for (j = 0; j < NBNXN_CPU_CLUSTER_I_SIZE; j++)
+                for (int j = 0; j < NBNXN_CPU_CLUSTER_I_SIZE; j++)
                 {
                     InRange = InRange ||
                         (sqr(x_ci[i*STRIDE_XYZ+XX] - x_j[(cjf_gl*NBNXN_CPU_CLUSTER_I_SIZE+j)*STRIDE_XYZ+XX]) +
@@ -1185,12 +1165,10 @@ static void make_cluster_list_simple(const nbnxn_grid_t *gridj,
         }
         else if (d2 < rl2)
         {
-            int i, j;
-
             cjl_gl = gridj->cell0 + cjl;
-            for (i = 0; i < NBNXN_CPU_CLUSTER_I_SIZE && !InRange; i++)
+            for (int i = 0; i < NBNXN_CPU_CLUSTER_I_SIZE && !InRange; i++)
             {
-                for (j = 0; j < NBNXN_CPU_CLUSTER_I_SIZE; j++)
+                for (int j = 0; j < NBNXN_CPU_CLUSTER_I_SIZE; j++)
                 {
                     InRange = InRange ||
                         (sqr(x_ci[i*STRIDE_XYZ+XX] - x_j[(cjl_gl*NBNXN_CPU_CLUSTER_I_SIZE+j)*STRIDE_XYZ+XX]) +
@@ -1208,7 +1186,7 @@ static void make_cluster_list_simple(const nbnxn_grid_t *gridj,
 
     if (cjf <= cjl)
     {
-        for (cj = cjf; cj <= cjl; cj++)
+        for (int cj = cjf; cj <= cjl; cj++)
         {
             /* Store cj and the interaction mask */
             nbl->cj[nbl->ncj].cj   = gridj->cell0 + cj;
@@ -1241,7 +1219,7 @@ static void make_cluster_list_supersub(const nbnxn_grid_t *gridi,
 {
     int               na_c;
     int               npair;
-    int               cjo, ci1, ci, cj, cj_gl;
+    int               ci1, cj, cj_gl;
     int               cj4_ind, cj_offset;
     unsigned int      imask;
     nbnxn_cj4_t      *cj4;
@@ -1269,7 +1247,7 @@ static void make_cluster_list_supersub(const nbnxn_grid_t *gridi,
 
     na_c = gridj->na_c;
 
-    for (cjo = 0; cjo < gridj->nsubc[scj]; cjo++)
+    for (int cjo = 0; cjo < gridj->nsubc[scj]; cjo++)
     {
         cj4_ind   = (nbl->work->cj_ind >> NBNXN_GPU_JGROUP_SIZE_2LOG);
         cj_offset = nbl->work->cj_ind - cj4_ind*NBNXN_GPU_JGROUP_SIZE;
@@ -1301,7 +1279,7 @@ static void make_cluster_list_supersub(const nbnxn_grid_t *gridi,
 
         npair = 0;
         /* We use a fixed upper-bound instead of ci1 to help optimization */
-        for (ci = 0; ci < GPU_NSUBCELL; ci++)
+        for (int ci = 0; ci < GPU_NSUBCELL; ci++)
         {
             if (ci == ci1)
             {
@@ -1416,12 +1394,9 @@ static void set_ci_top_excls(const nbnxn_search_t nbs,
     int           cj_ind_first, cj_ind_last;
     int           cj_first, cj_last;
     int           ndirect;
-    int           i, ai, aj, si, eind, ge, se;
+    int           ai, aj, si, ge, se;
     int           found, cj_ind_0, cj_ind_1, cj_ind_m;
     int           cj_m;
-    gmx_bool      Found_si;
-    int           si_ind;
-    nbnxn_excl_t *nbl_excl;
     int           inner_i, inner_e;
 
     cell = nbs->cell;
@@ -1465,7 +1440,7 @@ static void set_ci_top_excls(const nbnxn_search_t nbs,
 #endif
 
     /* Loop over the atoms in the i super-cell */
-    for (i = 0; i < nbl->na_sc; i++)
+    for (int i = 0; i < nbl->na_sc; i++)
     {
         ai = nbs->a[ci*nbl->na_sc+i];
         if (ai >= 0)
@@ -1473,7 +1448,7 @@ static void set_ci_top_excls(const nbnxn_search_t nbs,
             si  = (i>>na_ci_2log);
 
             /* Loop over the topology-based exclusions for this i-atom */
-            for (eind = excl->index[ai]; eind < excl->index[ai+1]; eind++)
+            for (int eind = excl->index[ai]; eind < excl->index[ai+1]; eind++)
             {
                 aj = excl->a[eind];
 
@@ -1581,12 +1556,12 @@ static void make_fep_list(const nbnxn_search_t    nbs,
                           const nbnxn_grid_t     *gridj,
                           t_nblist               *nlist)
 {
-    int      ci, cj_ind_start, cj_ind_end, cj_ind, cja, cjr;
+    int      ci, cj_ind_start, cj_ind_end, cja, cjr;
     int      nri_max;
     int      ngid, gid_i = 0, gid_j, gid;
     int      egp_shift, egp_mask;
     int      gid_cj = 0;
-    int      i, j, ind_i, ind_j, ai, aj;
+    int      ind_i, ind_j, ai, aj;
     int      nri;
     gmx_bool bFEP_i, bFEP_i_all;
 
@@ -1614,7 +1589,7 @@ static void make_fep_list(const nbnxn_search_t    nbs,
 
     ngid = nbat->nenergrp;
 
-    if (ngid*gridj->na_cj > sizeof(gid_cj)*8)
+    if (static_cast<std::size_t>(ngid*gridj->na_cj) > sizeof(gid_cj)*8)
     {
         gmx_fatal(FARGS, "The Verlet scheme with %dx%d kernels and free-energy only supports up to %d energy groups",
                   gridi->na_c, gridj->na_cj, (sizeof(gid_cj)*8)/gridj->na_cj);
@@ -1625,7 +1600,7 @@ static void make_fep_list(const nbnxn_search_t    nbs,
 
     /* Loop over the atoms in the i sub-cell */
     bFEP_i_all = TRUE;
-    for (i = 0; i < nbl->na_ci; i++)
+    for (int i = 0; i < nbl->na_ci; i++)
     {
         ind_i = ci*nbl->na_ci + i;
         ai    = nbs->a[ind_i];
@@ -1654,7 +1629,7 @@ static void make_fep_list(const nbnxn_search_t    nbs,
                 gid_i = (nbat->energrp[ci] >> (egp_shift*i)) & egp_mask;
             }
 
-            for (cj_ind = cj_ind_start; cj_ind < cj_ind_end; cj_ind++)
+            for (int cj_ind = cj_ind_start; cj_ind < cj_ind_end; cj_ind++)
             {
                 unsigned int fep_cj;
 
@@ -1692,7 +1667,7 @@ static void make_fep_list(const nbnxn_search_t    nbs,
 
                 if (bFEP_i || fep_cj != 0)
                 {
-                    for (j = 0; j < nbl->na_cj; j++)
+                    for (int j = 0; j < nbl->na_cj; j++)
                     {
                         /* Is this interaction perturbed and not excluded? */
                         ind_j = cja*nbl->na_cj + j;
@@ -1786,10 +1761,10 @@ static void make_fep_list_supersub(const nbnxn_search_t    nbs,
                                    const nbnxn_grid_t     *gridj,
                                    t_nblist               *nlist)
 {
-    int                sci, cj4_ind_start, cj4_ind_end, cj4_ind, gcj, cjr;
+    int                sci, cj4_ind_start, cj4_ind_end, cjr;
     int                nri_max;
-    int                c, c_abs;
-    int                i, j, ind_i, ind_j, ai, aj;
+    int                c_abs;
+    int                ind_i, ind_j, ai, aj;
     int                nri;
     gmx_bool           bFEP_i;
     real               xi, yi, zi;
@@ -1821,11 +1796,11 @@ static void make_fep_list_supersub(const nbnxn_search_t    nbs,
     }
 
     /* Loop over the atoms in the i super-cluster */
-    for (c = 0; c < GPU_NSUBCELL; c++)
+    for (int c = 0; c < GPU_NSUBCELL; c++)
     {
         c_abs = sci*GPU_NSUBCELL + c;
 
-        for (i = 0; i < nbl->na_ci; i++)
+        for (int i = 0; i < nbl->na_ci; i++)
         {
             ind_i = c_abs*nbl->na_ci + i;
             ai    = nbs->a[ind_i];
@@ -1851,11 +1826,11 @@ static void make_fep_list_supersub(const nbnxn_search_t    nbs,
                     srenew(nlist->excl_fep, nlist->maxnrj);
                 }
 
-                for (cj4_ind = cj4_ind_start; cj4_ind < cj4_ind_end; cj4_ind++)
+                for (int cj4_ind = cj4_ind_start; cj4_ind < cj4_ind_end; cj4_ind++)
                 {
                     cj4 = &nbl->cj4[cj4_ind];
 
-                    for (gcj = 0; gcj < NBNXN_GPU_JGROUP_SIZE; gcj++)
+                    for (int gcj = 0; gcj < NBNXN_GPU_JGROUP_SIZE; gcj++)
                     {
                         unsigned int fep_cj;
 
@@ -1871,7 +1846,7 @@ static void make_fep_list_supersub(const nbnxn_search_t    nbs,
 
                         if (bFEP_i || fep_cj != 0)
                         {
-                            for (j = 0; j < nbl->na_cj; j++)
+                            for (int j = 0; j < nbl->na_cj; j++)
                             {
                                 /* Is this interaction perturbed and not excluded? */
                                 ind_j = (gridj->cell0*GPU_NSUBCELL + cjr)*nbl->na_cj + j;
@@ -1961,11 +1936,9 @@ static void set_sci_top_excls(const nbnxn_search_t nbs,
     int           cj_ind_first, cj_ind_last;
     int           cj_first, cj_last;
     int           ndirect;
-    int           i, ai, aj, si, eind, ge, se;
+    int           ai, aj, si, ge, se;
     int           found, cj_ind_0, cj_ind_1, cj_ind_m;
     int           cj_m;
-    gmx_bool      Found_si;
-    int           si_ind;
     nbnxn_excl_t *nbl_excl;
     int           inner_i, inner_e, w;
 
@@ -1999,7 +1972,7 @@ static void set_sci_top_excls(const nbnxn_search_t nbs,
     }
 
     /* Loop over the atoms in the i super-cell */
-    for (i = 0; i < nbl->na_sc; i++)
+    for (int i = 0; i < nbl->na_sc; i++)
     {
         ai = nbs->a[sci*nbl->na_sc+i];
         if (ai >= 0)
@@ -2007,7 +1980,7 @@ static void set_sci_top_excls(const nbnxn_search_t nbs,
             si  = (i>>na_c_2log);
 
             /* Loop over the topology-based exclusions for this i-atom */
-            for (eind = excl->index[ai]; eind < excl->index[ai+1]; eind++)
+            for (int eind = excl->index[ai]; eind < excl->index[ai+1]; eind++)
             {
                 aj = excl->a[eind];
 
@@ -2138,7 +2111,7 @@ static void new_sci_entry(nbnxn_pairlist_t *nbl, int sci, int shift)
 static void sort_cj_excl(nbnxn_cj_t *cj, int ncj,
                          nbnxn_list_work_t *work)
 {
-    int jnew, j;
+    int jnew;
 
     if (ncj > work->cj_nalloc)
     {
@@ -2148,7 +2121,7 @@ static void sort_cj_excl(nbnxn_cj_t *cj, int ncj,
 
     /* Make a list of the j-cells involving exclusions */
     jnew = 0;
-    for (j = 0; j < ncj; j++)
+    for (int j = 0; j < ncj; j++)
     {
         if (cj[j].excl != NBNXN_INTERACTION_MASK_ALL)
         {
@@ -2159,14 +2132,14 @@ static void sort_cj_excl(nbnxn_cj_t *cj, int ncj,
     if (!((jnew == 0) ||
           (jnew == 1 && cj[0].excl != NBNXN_INTERACTION_MASK_ALL)))
     {
-        for (j = 0; j < ncj; j++)
+        for (int j = 0; j < ncj; j++)
         {
             if (cj[j].excl == NBNXN_INTERACTION_MASK_ALL)
             {
                 work->cj[jnew++] = cj[j];
             }
         }
-        for (j = 0; j < ncj; j++)
+        for (int j = 0; j < ncj; j++)
         {
             cj[j] = work->cj[j];
         }
@@ -2218,10 +2191,9 @@ static void split_sci_entry(nbnxn_pairlist_t *nbl,
 {
     int nsp_est;
     int nsp_max;
-    int cj4_start, cj4_end, j4len, cj4;
+    int cj4_start, cj4_end, j4len;
     int sci;
     int nsp, nsp_sci, nsp_cj4, nsp_cj4_e, nsp_cj4_p;
-    int p;
 
     if (progBal)
     {
@@ -2263,12 +2235,12 @@ static void split_sci_entry(nbnxn_pairlist_t *nbl,
         nsp_sci    = 0;
         nsp_cj4_e  = 0;
         nsp_cj4    = 0;
-        for (cj4 = cj4_start; cj4 < cj4_end; cj4++)
+        for (int cj4 = cj4_start; cj4 < cj4_end; cj4++)
         {
             nsp_cj4_p = nsp_cj4;
             /* Count the number of cluster pairs in this cj4 group */
             nsp_cj4   = 0;
-            for (p = 0; p < GPU_NSUBCELL*NBNXN_GPU_JGROUP_SIZE; p++)
+            for (int p = 0; p < GPU_NSUBCELL*NBNXN_GPU_JGROUP_SIZE; p++)
             {
                 nsp_cj4 += (nbl->cj4[cj4].imei[0].imask >> p) & 1;
             }
@@ -2317,13 +2289,10 @@ static void close_ci_entry_supersub(nbnxn_pairlist_t *nbl,
                                     gmx_bool progBal, int nsp_tot_est,
                                     int thread, int nthread)
 {
-    int j4len, tlen;
-    int nb, b;
-
     /* All content of the new ci entry have already been filled correctly,
      * we only need to increase the count here (for non empty lists).
      */
-    j4len = nbl->sci[nbl->nsci].cj4_ind_end - nbl->sci[nbl->nsci].cj4_ind_start;
+    int j4len = nbl->sci[nbl->nsci].cj4_ind_end - nbl->sci[nbl->nsci].cj4_ind_start;
     if (j4len > 0)
     {
         /* We can only have complete blocks of 4 j-entries in a list,
@@ -2398,12 +2367,10 @@ static void set_icell_bbxxxx_supersub(const float *bb, int ci,
                                       real shx, real shy, real shz,
                                       float *bb_ci)
 {
-    int ia, m, i;
-
-    ia = ci*(GPU_NSUBCELL>>STRIDE_PBB_2LOG)*NNBSBB_XXXX;
-    for (m = 0; m < (GPU_NSUBCELL>>STRIDE_PBB_2LOG)*NNBSBB_XXXX; m += NNBSBB_XXXX)
+    int ia = ci*(GPU_NSUBCELL>>STRIDE_PBB_2LOG)*NNBSBB_XXXX;
+    for (int m = 0; m < (GPU_NSUBCELL>>STRIDE_PBB_2LOG)*NNBSBB_XXXX; m += NNBSBB_XXXX)
     {
-        for (i = 0; i < STRIDE_PBB; i++)
+        for (int i = 0; i < STRIDE_PBB; i++)
         {
             bb_ci[m+0*STRIDE_PBB+i] = bb[ia+m+0*STRIDE_PBB+i] + shx;
             bb_ci[m+1*STRIDE_PBB+i] = bb[ia+m+1*STRIDE_PBB+i] + shy;
@@ -2421,9 +2388,7 @@ static void set_icell_bb_supersub(const nbnxn_bb_t *bb, int ci,
                                   real shx, real shy, real shz,
                                   nbnxn_bb_t *bb_ci)
 {
-    int i;
-
-    for (i = 0; i < GPU_NSUBCELL; i++)
+    for (int i = 0; i < GPU_NSUBCELL; i++)
     {
         set_icell_bb_simple(bb, ci*GPU_NSUBCELL+i,
                             shx, shy, shz,
@@ -2438,11 +2403,9 @@ static void icell_set_x_simple(int ci,
                                int stride, const real *x,
                                nbnxn_list_work_t *work)
 {
-    int  ia, i;
+    int ia = ci*NBNXN_CPU_CLUSTER_I_SIZE;
 
-    ia = ci*NBNXN_CPU_CLUSTER_I_SIZE;
-
-    for (i = 0; i < NBNXN_CPU_CLUSTER_I_SIZE; i++)
+    for (int i = 0; i < NBNXN_CPU_CLUSTER_I_SIZE; i++)
     {
         work->x_ci[i*STRIDE_XYZ+XX] = x[(ia+i)*stride+XX] + shx;
         work->x_ci[i*STRIDE_XYZ+YY] = x[(ia+i)*stride+YY] + shy;
@@ -2457,13 +2420,10 @@ static void icell_set_x_supersub(int ci,
                                  int stride, const real *x,
                                  nbnxn_list_work_t *work)
 {
-    int   ia, i;
-    real *x_ci;
-
-    x_ci = work->x_ci;
+    real * x_ci = work->x_ci;
 
-    ia = ci*GPU_NSUBCELL*na_c;
-    for (i = 0; i < GPU_NSUBCELL*na_c; i++)
+    int    ia = ci*GPU_NSUBCELL*na_c;
+    for (int i = 0; i < GPU_NSUBCELL*na_c; i++)
     {
         x_ci[i*DIM + XX] = x[(ia+i)*stride + XX] + shx;
         x_ci[i*DIM + YY] = x[(ia+i)*stride + YY] + shy;
@@ -2479,18 +2439,15 @@ static void icell_set_x_supersub_simd4(int ci,
                                        int stride, const real *x,
                                        nbnxn_list_work_t *work)
 {
-    int   si, io, ia, i, j;
-    real *x_ci;
-
-    x_ci = work->x_ci;
+    real * x_ci = work->x_ci;
 
-    for (si = 0; si < GPU_NSUBCELL; si++)
+    for (int si = 0; si < GPU_NSUBCELL; si++)
     {
-        for (i = 0; i < na_c; i += STRIDE_PBB)
+        for (int i = 0; i < na_c; i += STRIDE_PBB)
         {
-            io = si*na_c + i;
-            ia = ci*GPU_NSUBCELL*na_c + io;
-            for (j = 0; j < STRIDE_PBB; j++)
+            int io = si*na_c + i;
+            int ia = ci*GPU_NSUBCELL*na_c + io;
+            for (int j = 0; j < STRIDE_PBB; j++)
             {
                 x_ci[io*DIM + j + XX*STRIDE_PBB] = x[(ia+j)*stride+XX] + shx;
                 x_ci[io*DIM + j + YY*STRIDE_PBB] = x[(ia+j)*stride+YY] + shy;
@@ -2505,11 +2462,11 @@ static real minimum_subgrid_size_xy(const nbnxn_grid_t *grid)
 {
     if (grid->bSimple)
     {
-        return min(grid->sx, grid->sy);
+        return std::min(grid->sx, grid->sy);
     }
     else
     {
-        return min(grid->sx/GPU_NSUBCELL_X, grid->sy/GPU_NSUBCELL_Y);
+        return std::min(grid->sx/GPU_NSUBCELL_X, grid->sy/GPU_NSUBCELL_Y);
     }
 }
 
@@ -2558,13 +2515,12 @@ real nbnxn_get_rlist_effective_inc(int cluster_size_j, real atom_density)
     vol_inc_i = (cluster_size_i - 1)/atom_density;
     vol_inc_j = (cluster_size_j - 1)/atom_density;
 
-    return nbnxn_rlist_inc_outside_fac*pow(vol_inc_i + vol_inc_j, 1.0/3.0);
+    return nbnxn_rlist_inc_outside_fac*std::pow(static_cast<real>(vol_inc_i + vol_inc_j), static_cast<real>(1.0/3.0));
 }
 
 /* Estimates the interaction volume^2 for non-local interactions */
 static real nonlocal_vol2(const gmx_domdec_zones_t *zones, rvec ls, real r)
 {
-    int  z, d;
     real cl, ca, za;
     real vold_est;
     real vol2_est_tot;
@@ -2578,14 +2534,14 @@ static real nonlocal_vol2(const gmx_domdec_zones_t *zones, rvec ls, real r)
      * as small parts as possible.
      */
 
-    for (z = 0; z < zones->n; z++)
+    for (int z = 0; z < zones->n; z++)
     {
         if (zones->shift[z][XX] + zones->shift[z][YY] + zones->shift[z][ZZ] == 1)
         {
             cl = 0;
             ca = 1;
             za = 1;
-            for (d = 0; d < DIM; d++)
+            for (int d = 0; d < DIM; d++)
             {
                 if (zones->shift[z][d] == 0)
                 {
@@ -2624,7 +2580,6 @@ static void get_nsubpair_target(const nbnxn_search_t  nbs,
     const nbnxn_grid_t *grid;
     rvec                ls;
     real                xy_diag2, r_eff_sup, vol_est, nsp_est, nsp_est_nl;
-    int                 nsubpair_max;
 
     grid = &nbs->grid[0];
 
@@ -2645,7 +2600,7 @@ static void get_nsubpair_target(const nbnxn_search_t  nbs,
     xy_diag2 = ls[XX]*ls[XX] + ls[YY]*ls[YY] + ls[ZZ]*ls[ZZ];
 
     /* The formulas below are a heuristic estimate of the average nsj per si*/
-    r_eff_sup = rlist + nbnxn_rlist_inc_outside_fac*sqr((grid->na_c - 1.0)/grid->na_c)*sqrt(xy_diag2/3);
+    r_eff_sup = rlist + nbnxn_rlist_inc_outside_fac*sqr((grid->na_c - 1.0)/grid->na_c)*std::sqrt(xy_diag2/3);
 
     if (!nbs->DomDec || nbs->zones->n == 1)
     {
@@ -2667,7 +2622,7 @@ static void get_nsubpair_target(const nbnxn_search_t  nbs,
         /* 12/2 quarter pie slices on the edges */
         vol_est += 2*(ls[XX] + ls[YY] + ls[ZZ])*0.25*M_PI*sqr(r_eff_sup);
         /* 4 octants of a sphere */
-        vol_est += 0.5*4.0/3.0*M_PI*pow(r_eff_sup, 3);
+        vol_est += 0.5*4.0/3.0*M_PI*std::pow(r_eff_sup, static_cast<real>(3));
 
         /* Estimate the number of cluster pairs as the local number of
          * clusters times the volume they interact with times the density.
@@ -2686,7 +2641,7 @@ static void get_nsubpair_target(const nbnxn_search_t  nbs,
          * groups of atoms we'll anyhow be limited by nsubpair_target_min,
          * so this overestimation will not matter.
          */
-        nsp_est = max(nsp_est, grid->nsubc_tot*14.0);
+        nsp_est = std::max(nsp_est, grid->nsubc_tot*static_cast<real>(14));
 
         if (debug)
         {
@@ -2703,9 +2658,9 @@ static void get_nsubpair_target(const nbnxn_search_t  nbs,
      * Since there is overhead, we shouldn't make the lists too small
      * (and we can't chop up j-groups) so we use a minimum target size of 36.
      */
-    *nsubpair_target  = max(nsubpair_target_min,
-                            (int)(nsp_est/min_ci_balanced + 0.5));
-    *nsubpair_tot_est = (int)nsp_est;
+    *nsubpair_target  = std::max(nsubpair_target_min,
+                                 static_cast<int>(nsp_est/min_ci_balanced + 0.5));
+    *nsubpair_tot_est = static_cast<int>(nsp_est);
 
     if (debug)
     {
@@ -2717,15 +2672,13 @@ static void get_nsubpair_target(const nbnxn_search_t  nbs,
 /* Debug list print function */
 static void print_nblist_ci_cj(FILE *fp, const nbnxn_pairlist_t *nbl)
 {
-    int i, j;
-
-    for (i = 0; i < nbl->nci; i++)
+    for (int i = 0; i < nbl->nci; i++)
     {
         fprintf(fp, "ci %4d  shift %2d  ncj %3d\n",
                 nbl->ci[i].ci, nbl->ci[i].shift,
                 nbl->ci[i].cj_ind_end - nbl->ci[i].cj_ind_start);
 
-        for (j = nbl->ci[i].cj_ind_start; j < nbl->ci[i].cj_ind_end; j++)
+        for (int j = nbl->ci[i].cj_ind_start; j < nbl->ci[i].cj_ind_end; j++)
         {
             fprintf(fp, "  cj %5d  imask %x\n",
                     nbl->cj[j].cj,
@@ -2737,23 +2690,21 @@ static void print_nblist_ci_cj(FILE *fp, const nbnxn_pairlist_t *nbl)
 /* Debug list print function */
 static void print_nblist_sci_cj(FILE *fp, const nbnxn_pairlist_t *nbl)
 {
-    int i, j4, j, ncp, si;
-
-    for (i = 0; i < nbl->nsci; i++)
+    for (int i = 0; i < nbl->nsci; i++)
     {
         fprintf(fp, "ci %4d  shift %2d  ncj4 %2d\n",
                 nbl->sci[i].sci, nbl->sci[i].shift,
                 nbl->sci[i].cj4_ind_end - nbl->sci[i].cj4_ind_start);
 
-        ncp = 0;
-        for (j4 = nbl->sci[i].cj4_ind_start; j4 < nbl->sci[i].cj4_ind_end; j4++)
+        int ncp = 0;
+        for (int j4 = nbl->sci[i].cj4_ind_start; j4 < nbl->sci[i].cj4_ind_end; j4++)
         {
-            for (j = 0; j < NBNXN_GPU_JGROUP_SIZE; j++)
+            for (int j = 0; j < NBNXN_GPU_JGROUP_SIZE; j++)
             {
                 fprintf(fp, "  sj %5d  imask %x\n",
                         nbl->cj4[j4].cj[j],
                         nbl->cj4[j4].imei[0].imask);
-                for (si = 0; si < GPU_NSUBCELL; si++)
+                for (int si = 0; si < GPU_NSUBCELL; si++)
                 {
                     if (nbl->cj4[j4].imei[0].imask & (1U << (j*GPU_NSUBCELL + si)))
                     {
@@ -2774,8 +2725,6 @@ static void combine_nblists(int nnbl, nbnxn_pairlist_t **nbl,
                             nbnxn_pairlist_t *nblc)
 {
     int nsci, ncj4, nexcl;
-    int n, i;
-    int nthreads gmx_unused;
 
     if (nblc->bSimple)
     {
@@ -2785,7 +2734,7 @@ static void combine_nblists(int nnbl, nbnxn_pairlist_t **nbl,
     nsci  = nblc->nsci;
     ncj4  = nblc->ncj4;
     nexcl = nblc->nexcl;
-    for (i = 0; i < nnbl; i++)
+    for (int i = 0; i < nnbl; i++)
     {
         nsci  += nbl[i]->nsci;
         ncj4  += nbl[i]->ncj4;
@@ -2816,54 +2765,54 @@ static void combine_nblists(int nnbl, nbnxn_pairlist_t **nbl,
     /* Each thread should copy its own data to the combined arrays,
      * as otherwise data will go back and forth between different caches.
      */
-    nthreads = gmx_omp_nthreads_get(emntPairsearch);
+#if (defined GMX_OPENMP) && !(defined __clang_analyzer__)
+    // cppcheck-suppress unreadVariable
+    int nthreads = gmx_omp_nthreads_get(emntPairsearch);
+#endif
+
 #pragma omp parallel for num_threads(nthreads) schedule(static)
-    for (n = 0; n < nnbl; n++)
+    for (int n = 0; n < nnbl; n++)
     {
         int                     sci_offset;
         int                     cj4_offset;
-        int                     ci_offset;
         int                     excl_offset;
-        int                     i, j4;
         const nbnxn_pairlist_t *nbli;
 
         /* Determine the offset in the combined data for our thread */
         sci_offset  = nblc->nsci;
         cj4_offset  = nblc->ncj4;
-        ci_offset   = nblc->nci_tot;
         excl_offset = nblc->nexcl;
 
-        for (i = 0; i < n; i++)
+        for (int i = 0; i < n; i++)
         {
             sci_offset  += nbl[i]->nsci;
             cj4_offset  += nbl[i]->ncj4;
-            ci_offset   += nbl[i]->nci_tot;
             excl_offset += nbl[i]->nexcl;
         }
 
         nbli = nbl[n];
 
-        for (i = 0; i < nbli->nsci; i++)
+        for (int i = 0; i < nbli->nsci; i++)
         {
             nblc->sci[sci_offset+i]                = nbli->sci[i];
             nblc->sci[sci_offset+i].cj4_ind_start += cj4_offset;
             nblc->sci[sci_offset+i].cj4_ind_end   += cj4_offset;
         }
 
-        for (j4 = 0; j4 < nbli->ncj4; j4++)
+        for (int j4 = 0; j4 < nbli->ncj4; j4++)
         {
             nblc->cj4[cj4_offset+j4]                   = nbli->cj4[j4];
             nblc->cj4[cj4_offset+j4].imei[0].excl_ind += excl_offset;
             nblc->cj4[cj4_offset+j4].imei[1].excl_ind += excl_offset;
         }
 
-        for (j4 = 0; j4 < nbli->nexcl; j4++)
+        for (int j4 = 0; j4 < nbli->nexcl; j4++)
         {
             nblc->excl[excl_offset+j4] = nbli->excl[j4];
         }
     }
 
-    for (n = 0; n < nnbl; n++)
+    for (int n = 0; n < nnbl; n++)
     {
         nblc->nsci    += nbl[n]->nsci;
         nblc->ncj4    += nbl[n]->ncj4;
@@ -2875,7 +2824,7 @@ static void combine_nblists(int nnbl, nbnxn_pairlist_t **nbl,
 static void balance_fep_lists(const nbnxn_search_t  nbs,
                               nbnxn_pairlist_set_t *nbl_lists)
 {
-    int       nnbl, th;
+    int       nnbl;
     int       nri_tot, nrj_tot, nrj_target;
     int       th_dest;
     t_nblist *nbld;
@@ -2891,7 +2840,7 @@ static void balance_fep_lists(const nbnxn_search_t  nbs,
     /* Count the total i-lists and pairs */
     nri_tot = 0;
     nrj_tot = 0;
-    for (th = 0; th < nnbl; th++)
+    for (int th = 0; th < nnbl; th++)
     {
         nri_tot += nbl_lists->nbl_fep[th]->nri;
         nrj_tot += nbl_lists->nbl_fep[th]->nrj;
@@ -2902,7 +2851,7 @@ static void balance_fep_lists(const nbnxn_search_t  nbs,
     assert(gmx_omp_nthreads_get(emntNonbonded) == nnbl);
 
 #pragma omp parallel for schedule(static) num_threads(nnbl)
-    for (th = 0; th < nnbl; th++)
+    for (int th = 0; th < nnbl; th++)
     {
         t_nblist *nbl;
 
@@ -2929,14 +2878,13 @@ static void balance_fep_lists(const nbnxn_search_t  nbs,
     /* Loop over the source lists and assign and copy i-entries */
     th_dest = 0;
     nbld    = nbs->work[th_dest].nbl_fep;
-    for (th = 0; th < nnbl; th++)
+    for (int th = 0; th < nnbl; th++)
     {
         t_nblist *nbls;
-        int       i, j;
 
         nbls = nbl_lists->nbl_fep[th];
 
-        for (i = 0; i < nbls->nri; i++)
+        for (int i = 0; i < nbls->nri; i++)
         {
             int nrj;
 
@@ -2957,7 +2905,7 @@ static void balance_fep_lists(const nbnxn_search_t  nbs,
             nbld->gid[nbld->nri]   = nbls->gid[i];
             nbld->shift[nbld->nri] = nbls->shift[i];
 
-            for (j = nbls->jindex[i]; j < nbls->jindex[i+1]; j++)
+            for (int j = nbls->jindex[i]; j < nbls->jindex[i+1]; j++)
             {
                 nbld->jjnr[nbld->nrj]     = nbls->jjnr[j];
                 nbld->excl_fep[nbld->nrj] = nbls->excl_fep[j];
@@ -2969,7 +2917,7 @@ static void balance_fep_lists(const nbnxn_search_t  nbs,
     }
 
     /* Swap the list pointers */
-    for (th = 0; th < nnbl; th++)
+    for (int th = 0; th < nnbl; th++)
     {
         t_nblist *nbl_tmp;
 
@@ -3054,7 +3002,8 @@ static float boundingbox_only_distance2(const nbnxn_grid_t *gridi,
         bby /= GPU_NSUBCELL_Y;
     }
 
-    rbb2 = sqr(max(0, rlist - 0.5*sqrt(bbx*bbx + bby*bby)));
+    rbb2 = std::max(0.0, rlist - 0.5*std::sqrt(bbx*bbx + bby*bby));
+    rbb2 = rbb2 * rbb2;
 
 #ifndef GMX_DOUBLE
     return rbb2;
@@ -3136,12 +3085,9 @@ static void nbnxn_make_pairlist_part(const nbnxn_search_t nbs,
     matrix            box;
     real              rl2, rl_fep2 = 0;
     float             rbb2;
-    int               d;
     int               ci_b, ci, ci_x, ci_y, ci_xy, cj;
     ivec              shp;
-    int               tx, ty, tz;
     int               shift;
-    gmx_bool          bMakeList;
     real              shx, shy, shz;
     int               conv_i, cell0_i;
     const nbnxn_bb_t *bb_i = NULL;
@@ -3154,7 +3100,6 @@ static void nbnxn_make_pairlist_part(const nbnxn_search_t nbs,
     real              bz1_frac;
     real              d2cx, d2z, d2z_cx, d2z_cy, d2zx, d2zxy, d2xy;
     int               cxf, cxl, cyf, cyf_x, cyl;
-    int               cx, cy;
     int               c0, c1, cs, cf, cl;
     int               ndistc;
     int               ncpcheck;
@@ -3219,11 +3164,11 @@ static void nbnxn_make_pairlist_part(const nbnxn_search_t nbs,
 
     if (debug)
     {
-        fprintf(debug, "nbl bounding box only distance %f\n", sqrt(rbb2));
+        fprintf(debug, "nbl bounding box only distance %f\n", std::sqrt(rbb2));
     }
 
     /* Set the shift range */
-    for (d = 0; d < DIM; d++)
+    for (int d = 0; d < DIM; d++)
     {
         /* Check if we need periodicity shifts.
          * Without PBC or with domain decomposition we don't need them.
@@ -3235,7 +3180,7 @@ static void nbnxn_make_pairlist_part(const nbnxn_search_t nbs,
         else
         {
             if (d == XX &&
-                box[XX][XX] - fabs(box[YY][XX]) - fabs(box[ZZ][XX]) < sqrt(rl2))
+                box[XX][XX] - fabs(box[YY][XX]) - fabs(box[ZZ][XX]) < std::sqrt(rl2))
             {
                 shp[d] = 2;
             }
@@ -3334,7 +3279,7 @@ static void nbnxn_make_pairlist_part(const nbnxn_search_t nbs,
         ci_xy = ci_x*gridi->ncy + ci_y;
 
         /* Loop over shift vectors in three dimensions */
-        for (tz = -shp[ZZ]; tz <= shp[ZZ]; tz++)
+        for (int tz = -shp[ZZ]; tz <= shp[ZZ]; tz++)
         {
             shz = tz*box[ZZ][ZZ];
 
@@ -3361,15 +3306,14 @@ static void nbnxn_make_pairlist_part(const nbnxn_search_t nbs,
                 continue;
             }
 
-            bz1_frac =
-                bz1/((real)(gridi->cxy_ind[ci_xy+1] - gridi->cxy_ind[ci_xy]));
+            bz1_frac = bz1/(gridi->cxy_ind[ci_xy+1] - gridi->cxy_ind[ci_xy]);
             if (bz1_frac < 0)
             {
                 bz1_frac = 0;
             }
             /* The check with bz1_frac close to or larger than 1 comes later */
 
-            for (ty = -shp[YY]; ty <= shp[YY]; ty++)
+            for (int ty = -shp[YY]; ty <= shp[YY]; ty++)
             {
                 shy = ty*box[YY][YY] + tz*box[ZZ][YY];
 
@@ -3404,7 +3348,7 @@ static void nbnxn_make_pairlist_part(const nbnxn_search_t nbs,
                     d2z_cy += sqr(by0 - gridj->c1[YY]);
                 }
 
-                for (tx = -shp[XX]; tx <= shp[XX]; tx++)
+                for (int tx = -shp[XX]; tx <= shp[XX]; tx++)
                 {
                     shift = XYZ2IS(tx, ty, tz);
 
@@ -3480,7 +3424,7 @@ static void nbnxn_make_pairlist_part(const nbnxn_search_t nbs,
                                      gridi->na_c, nbat->xstride, nbat->x,
                                      nbl->work);
 
-                    for (cx = cxf; cx <= cxl; cx++)
+                    for (int cx = cxf; cx <= cxl; cx++)
                     {
                         d2zx = d2z;
                         if (gridj->c0[XX] + cx*gridj->sx > bx1)
@@ -3510,7 +3454,7 @@ static void nbnxn_make_pairlist_part(const nbnxn_search_t nbs,
                             cyf_x = cyf;
                         }
 
-                        for (cy = cyf_x; cy <= cyl; cy++)
+                        for (int cy = cyf_x; cy <= cyl; cy++)
                         {
                             c0 = gridj->cxy_ind[cx*gridj->ncy+cy];
                             c1 = gridj->cxy_ind[cx*gridj->ncy+cy+1];
@@ -3533,7 +3477,7 @@ static void nbnxn_make_pairlist_part(const nbnxn_search_t nbs,
                             }
                             if (c1 > c0 && d2zxy < rl2)
                             {
-                                cs = c0 + (int)(bz1_frac*(c1 - c0));
+                                cs = c0 + static_cast<int>(bz1_frac*(c1 - c0));
                                 if (cs >= c1)
                                 {
                                     cs = c1 - 1;
@@ -3568,10 +3512,9 @@ static void nbnxn_make_pairlist_part(const nbnxn_search_t nbs,
                                     /* Simple reference code, for debugging,
                                      * overrides the more complex code above.
                                      */
-                                    int k;
                                     cf = c1;
                                     cl = -1;
-                                    for (k = c0; k < c1; k++)
+                                    for (int k = c0; k < c1; k++)
                                     {
                                         if (box_dist2(bx0, bx1, by0, by1, bz0, bz1, bb+k) < rl2 &&
                                             k < cf)
@@ -3593,11 +3536,11 @@ static void nbnxn_make_pairlist_part(const nbnxn_search_t nbs,
                                      * only use cj >= ci.
                                      */
 #ifndef NBNXN_SHIFT_BACKWARD
-                                    cf = max(cf, ci);
+                                    cf = std::max(cf, ci);
 #else
                                     if (shift == CENTRAL)
                                     {
-                                        cf = max(cf, ci);
+                                        cf = std::max(cf, ci);
                                     }
 #endif
                                 }
@@ -3659,11 +3602,9 @@ static void nbnxn_make_pairlist_part(const nbnxn_search_t nbs,
 
                                     if (bFBufferFlag && nbl->ncj > ncj_old_j)
                                     {
-                                        int cbf, cbl, cb;
-
-                                        cbf = nbl->cj[ncj_old_j].cj >> gridj_flag_shift;
-                                        cbl = nbl->cj[nbl->ncj-1].cj >> gridj_flag_shift;
-                                        for (cb = cbf; cb <= cbl; cb++)
+                                        int cbf = nbl->cj[ncj_old_j].cj >> gridj_flag_shift;
+                                        int cbl = nbl->cj[nbl->ncj-1].cj >> gridj_flag_shift;
+                                        for (int cb = cbf; cb <= cbl; cb++)
                                         {
                                             bitmask_init_bit(&gridj_flag[cb], th);
                                         }
@@ -3764,14 +3705,11 @@ static void reduce_buffer_flags(const nbnxn_search_t        nbs,
                                 int                         nsrc,
                                 const nbnxn_buffer_flags_t *dest)
 {
-    int            s, b;
-    gmx_bitmask_t *flag;
-
-    for (s = 0; s < nsrc; s++)
+    for (int s = 0; s < nsrc; s++)
     {
-        flag = nbs->work[s].buffer_flags.flag;
+        gmx_bitmask_t * flag = nbs->work[s].buffer_flags.flag;
 
-        for (b = 0; b < dest->nflag; b++)
+        for (int b = 0; b < dest->nflag; b++)
         {
             bitmask_union(&(dest->flag[b]), flag[b]);
         }
@@ -3780,7 +3718,7 @@ static void reduce_buffer_flags(const nbnxn_search_t        nbs,
 
 static void print_reduction_cost(const nbnxn_buffer_flags_t *flags, int nout)
 {
-    int           nelem, nkeep, ncopy, nred, b, c, out;
+    int           nelem, nkeep, ncopy, nred, out;
     gmx_bitmask_t mask_0;
 
     nelem = 0;
@@ -3788,7 +3726,7 @@ static void print_reduction_cost(const nbnxn_buffer_flags_t *flags, int nout)
     ncopy = 0;
     nred  = 0;
     bitmask_init_bit(&mask_0, 0);
-    for (b = 0; b < flags->nflag; b++)
+    for (int b = 0; b < flags->nflag; b++)
     {
         if (bitmask_is_equal(flags->flag[b], mask_0))
         {
@@ -3798,7 +3736,7 @@ static void print_reduction_cost(const nbnxn_buffer_flags_t *flags, int nout)
         }
         else if (!bitmask_is_zero(flags->flag[b]))
         {
-            c = 0;
+            int c = 0;
             for (out = 0; out < nout; out++)
             {
                 if (bitmask_is_set(flags->flag[b], out))
@@ -3839,7 +3777,7 @@ static void print_reduction_cost(const nbnxn_buffer_flags_t *flags, int nout)
 static void sort_sci(nbnxn_pairlist_t *nbl)
 {
     nbnxn_list_work_t *work;
-    int                m, i, s, s0, s1;
+    int                m, s0, s1;
     nbnxn_sci_t       *sci_sort;
 
     if (nbl->ncj4 <= nbl->nsci)
@@ -3869,19 +3807,19 @@ static void sort_sci(nbnxn_pairlist_t *nbl)
     }
 
     /* Count the entries of each size */
-    for (i = 0; i <= m; i++)
+    for (int i = 0; i <= m; i++)
     {
         work->sort[i] = 0;
     }
-    for (s = 0; s < nbl->nsci; s++)
+    for (int s = 0; s < nbl->nsci; s++)
     {
-        i = min(m, nbl->sci[s].cj4_ind_end - nbl->sci[s].cj4_ind_start);
+        int i = std::min(m, nbl->sci[s].cj4_ind_end - nbl->sci[s].cj4_ind_start);
         work->sort[i]++;
     }
     /* Calculate the offset for each count */
     s0            = work->sort[m];
     work->sort[m] = 0;
-    for (i = m - 1; i >= 0; i--)
+    for (int i = m - 1; i >= 0; i--)
     {
         s1            = work->sort[i];
         work->sort[i] = work->sort[i + 1] + s0;
@@ -3890,9 +3828,9 @@ static void sort_sci(nbnxn_pairlist_t *nbl)
 
     /* Sort entries directly into place */
     sci_sort = work->sci_sort;
-    for (s = 0; s < nbl->nsci; s++)
+    for (int s = 0; s < nbl->nsci; s++)
     {
-        i = min(m, nbl->sci[s].cj4_ind_end - nbl->sci[s].cj4_ind_start);
+        int i = std::min(m, nbl->sci[s].cj4_ind_end - nbl->sci[s].cj4_ind_start);
         sci_sort[work->sort[i]++] = nbl->sci[s];
     }
 
@@ -3914,9 +3852,8 @@ void nbnxn_make_pairlist(const nbnxn_search_t  nbs,
 {
     nbnxn_grid_t      *gridi, *gridj;
     gmx_bool           bGPUCPU;
-    int                nzi, zi, zj0, zj1, zj;
+    int                nzi, zj0, zj1;
     int                nsubpair_target, nsubpair_tot_est;
-    int                th;
     int                nnbl;
     nbnxn_pairlist_t **nbl;
     int                ci_block;
@@ -3945,6 +3882,7 @@ void nbnxn_make_pairlist(const nbnxn_search_t  nbs,
 
     if (nbl_list->bSimple)
     {
+#ifdef GMX_NBNXN_SIMD
         switch (nb_kernel_type)
         {
 #ifdef GMX_NBNXN_SIMD_4XN
@@ -3961,6 +3899,10 @@ void nbnxn_make_pairlist(const nbnxn_search_t  nbs,
                 nbs->icell_set_x = icell_set_x_simple;
                 break;
         }
+#else   /* GMX_NBNXN_SIMD */
+        /* MSVC 2013 complains about switch statements without case */
+        nbs->icell_set_x = icell_set_x_simple;
+#endif  /* GMX_NBNXN_SIMD */
     }
     else
     {
@@ -3995,7 +3937,7 @@ void nbnxn_make_pairlist(const nbnxn_search_t  nbs,
     }
 
     /* Clear all pair-lists */
-    for (th = 0; th < nnbl; th++)
+    for (int th = 0; th < nnbl; th++)
     {
         clear_pairlist(nbl[th]);
 
@@ -4005,7 +3947,7 @@ void nbnxn_make_pairlist(const nbnxn_search_t  nbs,
         }
     }
 
-    for (zi = 0; zi < nzi; zi++)
+    for (int zi = 0; zi < nzi; zi++)
     {
         gridi = &nbs->grid[zi];
 
@@ -4018,7 +3960,7 @@ void nbnxn_make_pairlist(const nbnxn_search_t  nbs,
                 zj0++;
             }
         }
-        for (zj = zj0; zj < zj1; zj++)
+        for (int zj = zj0; zj < zj1; zj++)
         {
             gridj = &nbs->grid[zj];
 
@@ -4045,7 +3987,7 @@ void nbnxn_make_pairlist(const nbnxn_search_t  nbs,
             progBal = (LOCAL_I(iloc) || nbs->zones->n <= 2);
 
 #pragma omp parallel for num_threads(nnbl) schedule(static)
-            for (th = 0; th < nnbl; th++)
+            for (int th = 0; th < nnbl; th++)
             {
                 /* Re-init the thread-local work flag data before making
                  * the first list (not an elegant conditional).
@@ -4079,7 +4021,7 @@ void nbnxn_make_pairlist(const nbnxn_search_t  nbs,
             np_tot = 0;
             np_noq = 0;
             np_hlj = 0;
-            for (th = 0; th < nnbl; th++)
+            for (int th = 0; th < nnbl; th++)
             {
                 inc_nrnb(nrnb, eNR_NBNXN_DIST2, nbs->work[th].ndistc);
 
@@ -4121,7 +4063,7 @@ void nbnxn_make_pairlist(const nbnxn_search_t  nbs,
         else
         {
 #pragma omp parallel for num_threads(nnbl) schedule(static)
-            for (th = 0; th < nnbl; th++)
+            for (int th = 0; th < nnbl; th++)
             {
                 sort_sci(nbl[th]);
             }
diff --git a/src/gromacs/mdlib/nbnxn_search.h b/src/gromacs/mdlib/nbnxn_search.h
index 34963ef613..79b09788e5 100644
--- a/src/gromacs/mdlib/nbnxn_search.h
+++ b/src/gromacs/mdlib/nbnxn_search.h
@@ -39,9 +39,6 @@
 #include "gromacs/legacyheaders/typedefs.h"
 #include "gromacs/mdlib/nbnxn_pairlist.h"
 
-#ifdef __cplusplus
-extern "C" {
-#endif
 
 /* Tells if the pair-list corresponding to nb_kernel_type is simple.
  * Returns FALSE for super-sub type pair-list.
@@ -83,8 +80,5 @@ void nbnxn_make_pairlist(const nbnxn_search_t  nbs,
                          int                   nb_kernel_type,
                          t_nrnb               *nrnb);
 
-#ifdef __cplusplus
-}
-#endif
 
 #endif
-- 
2.11.4.GIT