From bd48f518c6bae0f561493553d2d14e3f52b7602f Mon Sep 17 00:00:00 2001
From: Aleksei Iupinov <a.yupinov@gmail.com>
Date: Fri, 27 Oct 2017 13:01:19 +0200
Subject: [PATCH] Bring PME GPU/CUDA internal structure names to CamelCase

This only does mechanical renaming (e.g. pme_gpu_settings_t to
PmeGpuSettings). Any meaningful renames will be done separately.

Change-Id: I7ea2af94fd0212ff6edcf433ff21842c5bbb67b0
---
 src/gromacs/ewald/pme-3dfft.cu         |   6 +-
 src/gromacs/ewald/pme-3dfft.cuh        |   4 +-
 src/gromacs/ewald/pme-gather.cu        |   4 +-
 src/gromacs/ewald/pme-gpu-internal.cpp |  30 ++++----
 src/gromacs/ewald/pme-gpu-internal.h   | 128 ++++++++++++++++-----------------
 src/gromacs/ewald/pme-gpu-types.h      |  58 +++++++--------
 src/gromacs/ewald/pme-gpu.cpp          |   6 +-
 src/gromacs/ewald/pme-internal.h       |   4 +-
 src/gromacs/ewald/pme-solve.cu         |   4 +-
 src/gromacs/ewald/pme-spread.cu        |  10 +--
 src/gromacs/ewald/pme-timings.cu       |  14 ++--
 src/gromacs/ewald/pme-timings.cuh      |   6 +-
 src/gromacs/ewald/pme.cpp              |   2 +-
 src/gromacs/ewald/pme.cu               |  86 +++++++++++-----------
 src/gromacs/ewald/pme.cuh              |   6 +-
 src/gromacs/ewald/pme.h                |   4 +-
 16 files changed, 186 insertions(+), 186 deletions(-)

diff --git a/src/gromacs/ewald/pme-3dfft.cu b/src/gromacs/ewald/pme-3dfft.cu
index d1a8799668..ad722763bb 100644
--- a/src/gromacs/ewald/pme-3dfft.cu
+++ b/src/gromacs/ewald/pme-3dfft.cu
@@ -57,9 +57,9 @@ static void handleCufftError(cufftResult_t status, const char *msg)
     }
 }
 
-GpuParallel3dFft::GpuParallel3dFft(const pme_gpu_t *pmeGPU)
+GpuParallel3dFft::GpuParallel3dFft(const PmeGpu *pmeGPU)
 {
-    const pme_gpu_cuda_kernel_params_t *kernelParamsPtr = pmeGPU->kernelParams.get();
+    const PmeGpuCudaKernelParams *kernelParamsPtr = pmeGPU->kernelParams.get();
     ivec realGridSize, realGridSizePadded, complexGridSizePadded;
     for (int i = 0; i < DIM; i++)
     {
@@ -137,7 +137,7 @@ void GpuParallel3dFft::perform3dFft(gmx_fft_direction dir)
     }
 }
 
-void pme_gpu_3dfft(const pme_gpu_t *pmeGPU, gmx_fft_direction dir, int grid_index)
+void pme_gpu_3dfft(const PmeGpu *pmeGPU, gmx_fft_direction dir, int grid_index)
 {
     int timerId = (dir == GMX_FFT_REAL_TO_COMPLEX) ? gtPME_FFT_R2C : gtPME_FFT_C2R;
     pme_gpu_start_timing(pmeGPU, timerId);
diff --git a/src/gromacs/ewald/pme-3dfft.cuh b/src/gromacs/ewald/pme-3dfft.cuh
index 775c3241d3..ef6cba466b 100644
--- a/src/gromacs/ewald/pme-3dfft.cuh
+++ b/src/gromacs/ewald/pme-3dfft.cuh
@@ -46,7 +46,7 @@
 
 #include "gromacs/fft/fft.h"        // for the enum gmx_fft_direction
 
-struct pme_gpu_t;
+struct PmeGpu;
 
 /*! \brief \internal A 3D FFT class for performing R2C/C2R transforms
  * \todo Make this class actually parallel over multiple GPUs
@@ -64,7 +64,7 @@ class GpuParallel3dFft
          *
          * \param[in] pmeGPU                  The PME GPU structure.
          */
-        GpuParallel3dFft(const pme_gpu_t *pmeGPU);
+        GpuParallel3dFft(const PmeGpu *pmeGPU);
         /*! \brief Destroys CUDA FFT plans. */
         ~GpuParallel3dFft();
         /*! \brief Performs the FFT transform in given direction */
diff --git a/src/gromacs/ewald/pme-gather.cu b/src/gromacs/ewald/pme-gather.cu
index 8a3179aad3..40db9452c7 100644
--- a/src/gromacs/ewald/pme-gather.cu
+++ b/src/gromacs/ewald/pme-gather.cu
@@ -235,7 +235,7 @@ template <
     const bool wrapY
     >
 __launch_bounds__(c_gatherMaxThreadsPerBlock, c_gatherMinBlocksPerMP)
-__global__ void pme_gather_kernel(const pme_gpu_cuda_kernel_params_t    kernelParams)
+__global__ void pme_gather_kernel(const PmeGpuCudaKernelParams    kernelParams)
 {
     /* Global memory pointers */
     const float * __restrict__  gm_coefficients     = kernelParams.atoms.d_coefficients;
@@ -410,7 +410,7 @@ __global__ void pme_gather_kernel(const pme_gpu_cuda_kernel_params_t    kernelPa
     }
 }
 
-void pme_gpu_gather(const pme_gpu_t       *pmeGpu,
+void pme_gpu_gather(const PmeGpu          *pmeGpu,
                     float                 *h_forces,
                     PmeForceOutputHandling forceTreatment,
                     const float           *h_grid
diff --git a/src/gromacs/ewald/pme-gpu-internal.cpp b/src/gromacs/ewald/pme-gpu-internal.cpp
index 0443017ca1..ab7cec033c 100644
--- a/src/gromacs/ewald/pme-gpu-internal.cpp
+++ b/src/gromacs/ewald/pme-gpu-internal.cpp
@@ -70,14 +70,14 @@
  * \param[in] pmeGPU  The PME GPU structure.
  * \returns The pointer to the kernel parameters.
  */
-static pme_gpu_kernel_params_base_t *pme_gpu_get_kernel_params_base_ptr(const pme_gpu_t *pmeGPU)
+static PmeGpuKernelParamsBase *pme_gpu_get_kernel_params_base_ptr(const PmeGpu *pmeGPU)
 {
     // reinterpret_cast is needed because the derived CUDA structure is not known in this file
-    auto *kernelParamsPtr = reinterpret_cast<pme_gpu_kernel_params_base_t *>(pmeGPU->kernelParams.get());
+    auto *kernelParamsPtr = reinterpret_cast<PmeGpuKernelParamsBase *>(pmeGPU->kernelParams.get());
     return kernelParamsPtr;
 }
 
-void pme_gpu_get_energy_virial(const pme_gpu_t *pmeGPU, real *energy, matrix virial)
+void pme_gpu_get_energy_virial(const PmeGpu *pmeGPU, real *energy, matrix virial)
 {
     GMX_ASSERT(energy, "Invalid energy output pointer in PME GPU");
     unsigned int j = 0;
@@ -90,7 +90,7 @@ void pme_gpu_get_energy_virial(const pme_gpu_t *pmeGPU, real *energy, matrix vir
     *energy        = 0.5f * pmeGPU->staging.h_virialAndEnergy[j++];
 }
 
-void pme_gpu_update_input_box(pme_gpu_t *pmeGPU, const matrix box)
+void pme_gpu_update_input_box(PmeGpu *pmeGPU, const matrix box)
 {
     auto        *kernelParamsPtr      = pme_gpu_get_kernel_params_base_ptr(pmeGPU);
     kernelParamsPtr->step.boxVolume = box[XX][XX] * box[YY][YY] * box[ZZ][ZZ];
@@ -122,13 +122,13 @@ void pme_gpu_update_input_box(pme_gpu_t *pmeGPU, const matrix box)
  *
  * \param[in] pmeGPU            The PME GPU structure.
  */
-static void pme_gpu_reinit_step(const pme_gpu_t *pmeGPU)
+static void pme_gpu_reinit_step(const PmeGpu *pmeGPU)
 {
     pme_gpu_clear_grids(pmeGPU);
     pme_gpu_clear_energy_virial(pmeGPU);
 }
 
-void pme_gpu_finish_step(const pme_gpu_t *pmeGPU, const bool bCalcF, const bool bCalcEnerVir)
+void pme_gpu_finish_step(const PmeGpu *pmeGPU, const bool bCalcF, const bool bCalcEnerVir)
 {
     if (bCalcF && pme_gpu_performs_gather(pmeGPU))
     {
@@ -147,7 +147,7 @@ void pme_gpu_finish_step(const pme_gpu_t *pmeGPU, const bool bCalcF, const bool
  *
  * \param[in] pmeGPU            The PME GPU structure.
  */
-static void pme_gpu_reinit_grids(pme_gpu_t *pmeGPU)
+static void pme_gpu_reinit_grids(PmeGpu *pmeGPU)
 {
     auto *kernelParamsPtr = pme_gpu_get_kernel_params_base_ptr(pmeGPU);
     kernelParamsPtr->grid.ewaldFactor = (M_PI * M_PI) / (pmeGPU->common->ewaldcoeff_q * pmeGPU->common->ewaldcoeff_q);
@@ -197,7 +197,7 @@ static void pme_gpu_copy_common_data_from(const gmx_pme_t *pme)
 {
     /* TODO: Consider refactoring the CPU PME code to use the same structure,
      * so that this function becomes 2 lines */
-    pme_gpu_t *pmeGPU             = pme->gpu;
+    PmeGpu *pmeGPU             = pme->gpu;
     pmeGPU->common->ngrids        = pme->ngrids;
     pmeGPU->common->epsilon_r     = pme->epsilon_r;
     pmeGPU->common->ewaldcoeff_q  = pme->ewaldcoeff_q;
@@ -290,9 +290,9 @@ static void pme_gpu_init(gmx_pme_t *pme, gmx_device_info_t *gpuInfo, const gmx::
         GMX_THROW(gmx::NotImplementedError(errorString));
     }
 
-    pme->gpu          = new pme_gpu_t();
-    pme_gpu_t *pmeGPU = pme->gpu;
-    pmeGPU->common = std::shared_ptr<pme_shared_t>(new pme_shared_t());
+    pme->gpu          = new PmeGpu();
+    PmeGpu *pmeGPU = pme->gpu;
+    pmeGPU->common = std::shared_ptr<PmeShared>(new PmeShared());
 
     /* These settings are set here for the whole run; dynamic ones are set in pme_gpu_reinit() */
     /* A convenience variable. */
@@ -318,7 +318,7 @@ static void pme_gpu_init(gmx_pme_t *pme, gmx_device_info_t *gpuInfo, const gmx::
     kernelParamsPtr->constants.elFactor = ONE_4PI_EPS0 / pmeGPU->common->epsilon_r;
 }
 
-void pme_gpu_transform_spline_atom_data(const pme_gpu_t *pmeGPU, const pme_atomcomm_t *atc,
+void pme_gpu_transform_spline_atom_data(const PmeGpu *pmeGPU, const pme_atomcomm_t *atc,
                                         PmeSplineDataType type, int dimIndex, PmeLayoutTransform transform)
 {
     // The GPU atom spline data is laid out in a different way currently than the CPU one.
@@ -376,7 +376,7 @@ void pme_gpu_transform_spline_atom_data(const pme_gpu_t *pmeGPU, const pme_atomc
     }
 }
 
-void pme_gpu_get_real_grid_sizes(const pme_gpu_t *pmeGPU, gmx::IVec *gridSize, gmx::IVec *paddedGridSize)
+void pme_gpu_get_real_grid_sizes(const PmeGpu *pmeGPU, gmx::IVec *gridSize, gmx::IVec *paddedGridSize)
 {
     GMX_ASSERT(gridSize != nullptr, "");
     GMX_ASSERT(paddedGridSize != nullptr, "");
@@ -417,7 +417,7 @@ void pme_gpu_reinit(gmx_pme_t *pme, gmx_device_info_t *gpuInfo, const gmx::MDLog
     pme_gpu_reinit_step(pme->gpu);
 }
 
-void pme_gpu_destroy(pme_gpu_t *pmeGPU)
+void pme_gpu_destroy(PmeGpu *pmeGPU)
 {
     /* Free lots of data */
     pme_gpu_free_energy_virial(pmeGPU);
@@ -439,7 +439,7 @@ void pme_gpu_destroy(pme_gpu_t *pmeGPU)
     delete pmeGPU;
 }
 
-void pme_gpu_reinit_atoms(pme_gpu_t *pmeGPU, const int nAtoms, const real *charges)
+void pme_gpu_reinit_atoms(PmeGpu *pmeGPU, const int nAtoms, const real *charges)
 {
     auto      *kernelParamsPtr = pme_gpu_get_kernel_params_base_ptr(pmeGPU);
     kernelParamsPtr->atoms.nAtoms = nAtoms;
diff --git a/src/gromacs/ewald/pme-gpu-internal.h b/src/gromacs/ewald/pme-gpu-internal.h
index ee53312184..91ab374d97 100644
--- a/src/gromacs/ewald/pme-gpu-internal.h
+++ b/src/gromacs/ewald/pme-gpu-internal.h
@@ -49,7 +49,7 @@
 #include "gromacs/fft/fft.h"                   // for the gmx_fft_direction enum
 #include "gromacs/gpu_utils/gpu_macros.h"      // for the CUDA_FUNC_ macros
 
-#include "pme-gpu-types.h"                     // for the inline functions accessing pme_gpu_t members
+#include "pme-gpu-types.h"                     // for the inline functions accessing PmeGpu members
 
 struct gmx_hw_info_t;
 struct gmx_gpu_opt_t;
@@ -115,7 +115,7 @@ const int c_virialAndEnergyCount = 7;
  * \param[in] pmeGPU            The PME GPU structure.
  * \returns   Number of atoms in a single GPU atom data chunk.
  */
-CUDA_FUNC_QUALIFIER int pme_gpu_get_atom_data_alignment(const pme_gpu_t *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM_WITH_RETURN(1)
+CUDA_FUNC_QUALIFIER int pme_gpu_get_atom_data_alignment(const PmeGpu *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM_WITH_RETURN(1)
 
 /*! \libinternal \brief
  * Returns the number of atoms per chunk in the atom spline theta/dtheta data layout.
@@ -123,28 +123,28 @@ CUDA_FUNC_QUALIFIER int pme_gpu_get_atom_data_alignment(const pme_gpu_t *CUDA_FU
  * \param[in] pmeGPU            The PME GPU structure.
  * \returns   Number of atoms in a single GPU atom spline data chunk.
  */
-CUDA_FUNC_QUALIFIER int pme_gpu_get_atoms_per_warp(const pme_gpu_t *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM_WITH_RETURN(1)
+CUDA_FUNC_QUALIFIER int pme_gpu_get_atoms_per_warp(const PmeGpu *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM_WITH_RETURN(1)
 
 /*! \libinternal \brief
  * Synchronizes the current step, waiting for the GPU kernels/transfers to finish.
  *
  * \param[in] pmeGPU            The PME GPU structure.
  */
-CUDA_FUNC_QUALIFIER void pme_gpu_synchronize(const pme_gpu_t *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM
+CUDA_FUNC_QUALIFIER void pme_gpu_synchronize(const PmeGpu *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM
 
 /*! \libinternal \brief
  * Allocates the fixed size energy and virial buffer both on GPU and CPU.
  *
  * \param[in] pmeGPU            The PME GPU structure.
  */
-CUDA_FUNC_QUALIFIER void pme_gpu_alloc_energy_virial(const pme_gpu_t *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM
+CUDA_FUNC_QUALIFIER void pme_gpu_alloc_energy_virial(const PmeGpu *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM
 
 /*! \libinternal \brief
  * Frees the energy and virial memory both on GPU and CPU.
  *
  * \param[in] pmeGPU            The PME GPU structure.
  */
-CUDA_FUNC_QUALIFIER void pme_gpu_free_energy_virial(pme_gpu_t *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM
+CUDA_FUNC_QUALIFIER void pme_gpu_free_energy_virial(PmeGpu *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM
 
 /*! \libinternal \brief
  * Clears the energy and virial memory on GPU with 0.
@@ -152,35 +152,35 @@ CUDA_FUNC_QUALIFIER void pme_gpu_free_energy_virial(pme_gpu_t *CUDA_FUNC_ARGUMEN
  *
  * \param[in] pmeGPU            The PME GPU structure.
  */
-CUDA_FUNC_QUALIFIER void pme_gpu_clear_energy_virial(const pme_gpu_t *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM
+CUDA_FUNC_QUALIFIER void pme_gpu_clear_energy_virial(const PmeGpu *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM
 
 /*! \libinternal \brief
  * Reallocates and copies the pre-computed B-spline values to the GPU.
  *
  * \param[in] pmeGPU             The PME GPU structure.
  */
-CUDA_FUNC_QUALIFIER void pme_gpu_realloc_and_copy_bspline_values(const pme_gpu_t *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM
+CUDA_FUNC_QUALIFIER void pme_gpu_realloc_and_copy_bspline_values(const PmeGpu *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM
 
 /*! \libinternal \brief
  * Frees the pre-computed B-spline values on the GPU (and the transfer CPU buffers).
  *
  * \param[in] pmeGPU             The PME GPU structure.
  */
-CUDA_FUNC_QUALIFIER void pme_gpu_free_bspline_values(const pme_gpu_t *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM
+CUDA_FUNC_QUALIFIER void pme_gpu_free_bspline_values(const PmeGpu *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM
 
 /*! \libinternal \brief
  * Reallocates the GPU buffer for the PME forces.
  *
  * \param[in] pmeGPU             The PME GPU structure.
  */
-CUDA_FUNC_QUALIFIER void pme_gpu_realloc_forces(const pme_gpu_t *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM
+CUDA_FUNC_QUALIFIER void pme_gpu_realloc_forces(const PmeGpu *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM
 
 /*! \libinternal \brief
  * Frees the GPU buffer for the PME forces.
  *
  * \param[in] pmeGPU             The PME GPU structure.
  */
-CUDA_FUNC_QUALIFIER void pme_gpu_free_forces(const pme_gpu_t *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM
+CUDA_FUNC_QUALIFIER void pme_gpu_free_forces(const PmeGpu *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM
 
 /*! \libinternal \brief
  * Copies the forces from the CPU buffer to the GPU (to reduce them with the PME GPU gathered forces).
@@ -189,7 +189,7 @@ CUDA_FUNC_QUALIFIER void pme_gpu_free_forces(const pme_gpu_t *CUDA_FUNC_ARGUMENT
  * \param[in] pmeGPU             The PME GPU structure.
  * \param[in] h_forces           The input forces rvec buffer.
  */
-CUDA_FUNC_QUALIFIER void pme_gpu_copy_input_forces(const pme_gpu_t *CUDA_FUNC_ARGUMENT(pmeGPU),
+CUDA_FUNC_QUALIFIER void pme_gpu_copy_input_forces(const PmeGpu    *CUDA_FUNC_ARGUMENT(pmeGPU),
                                                    const float     *CUDA_FUNC_ARGUMENT(h_forces)) CUDA_FUNC_TERM
 
 /*! \libinternal \brief
@@ -198,7 +198,7 @@ CUDA_FUNC_QUALIFIER void pme_gpu_copy_input_forces(const pme_gpu_t *CUDA_FUNC_AR
  * \param[in] pmeGPU             The PME GPU structure.
  * \param[out] h_forces          The output forces rvec buffer.
  */
-CUDA_FUNC_QUALIFIER void pme_gpu_copy_output_forces(const pme_gpu_t *CUDA_FUNC_ARGUMENT(pmeGPU),
+CUDA_FUNC_QUALIFIER void pme_gpu_copy_output_forces(const PmeGpu    *CUDA_FUNC_ARGUMENT(pmeGPU),
                                                     float           *CUDA_FUNC_ARGUMENT(h_forces)) CUDA_FUNC_TERM
 
 /*! \libinternal \brief
@@ -206,7 +206,7 @@ CUDA_FUNC_QUALIFIER void pme_gpu_copy_output_forces(const pme_gpu_t *CUDA_FUNC_A
  *
  * \param[in] pmeGPU            The PME GPU structure.
  */
-CUDA_FUNC_QUALIFIER void pme_gpu_sync_output_forces(const pme_gpu_t *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM
+CUDA_FUNC_QUALIFIER void pme_gpu_sync_output_forces(const PmeGpu *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM
 
 /*! \libinternal \brief
  * Reallocates the input coordinates buffer on the GPU (and clears the padded part if needed).
@@ -215,7 +215,7 @@ CUDA_FUNC_QUALIFIER void pme_gpu_sync_output_forces(const pme_gpu_t *CUDA_FUNC_A
  *
  * Needs to be called on every DD step/in the beginning.
  */
-CUDA_FUNC_QUALIFIER void pme_gpu_realloc_coordinates(const pme_gpu_t *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM
+CUDA_FUNC_QUALIFIER void pme_gpu_realloc_coordinates(const PmeGpu *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM
 
 /*! \libinternal \brief
  * Copies the input coordinates from the CPU buffer onto the GPU.
@@ -225,7 +225,7 @@ CUDA_FUNC_QUALIFIER void pme_gpu_realloc_coordinates(const pme_gpu_t *CUDA_FUNC_
  *
  * Needs to be called every MD step. The coordinates are then used in the spline calculation.
  */
-CUDA_FUNC_QUALIFIER void pme_gpu_copy_input_coordinates(const pme_gpu_t *CUDA_FUNC_ARGUMENT(pmeGPU),
+CUDA_FUNC_QUALIFIER void pme_gpu_copy_input_coordinates(const PmeGpu    *CUDA_FUNC_ARGUMENT(pmeGPU),
                                                         const rvec      *CUDA_FUNC_ARGUMENT(h_coordinates)) CUDA_FUNC_TERM
 
 /*! \libinternal \brief
@@ -233,7 +233,7 @@ CUDA_FUNC_QUALIFIER void pme_gpu_copy_input_coordinates(const pme_gpu_t *CUDA_FU
  *
  * \param[in] pmeGPU            The PME GPU structure.
  */
-CUDA_FUNC_QUALIFIER void pme_gpu_free_coordinates(const pme_gpu_t *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM
+CUDA_FUNC_QUALIFIER void pme_gpu_free_coordinates(const PmeGpu *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM
 
 /*! \libinternal \brief
  * Reallocates the buffer on the GPU and copies the charges/coefficients from the CPU buffer.
@@ -245,7 +245,7 @@ CUDA_FUNC_QUALIFIER void pme_gpu_free_coordinates(const pme_gpu_t *CUDA_FUNC_ARG
  * Does not need to be done every MD step, only whenever the local charges change.
  * (So, in the beginning of the run, or on DD step).
  */
-CUDA_FUNC_QUALIFIER void pme_gpu_realloc_and_copy_input_coefficients(const pme_gpu_t *CUDA_FUNC_ARGUMENT(pmeGPU),
+CUDA_FUNC_QUALIFIER void pme_gpu_realloc_and_copy_input_coefficients(const PmeGpu    *CUDA_FUNC_ARGUMENT(pmeGPU),
                                                                      const float     *CUDA_FUNC_ARGUMENT(h_coefficients)) CUDA_FUNC_TERM
 
 /*! \libinternal \brief
@@ -253,49 +253,49 @@ CUDA_FUNC_QUALIFIER void pme_gpu_realloc_and_copy_input_coefficients(const pme_g
  *
  * \param[in] pmeGPU             The PME GPU structure.
  */
-CUDA_FUNC_QUALIFIER void pme_gpu_free_coefficients(const pme_gpu_t *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM
+CUDA_FUNC_QUALIFIER void pme_gpu_free_coefficients(const PmeGpu *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM
 
 /*! \libinternal \brief
  * Reallocates the buffers on the GPU and the host for the atoms spline data.
  *
  * \param[in] pmeGPU            The PME GPU structure.
  */
-CUDA_FUNC_QUALIFIER void pme_gpu_realloc_spline_data(const pme_gpu_t *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM
+CUDA_FUNC_QUALIFIER void pme_gpu_realloc_spline_data(const PmeGpu *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM
 
 /*! \libinternal \brief
  * Frees the buffers on the GPU for the atoms spline data.
  *
  * \param[in] pmeGPU            The PME GPU structure.
  */
-CUDA_FUNC_QUALIFIER void pme_gpu_free_spline_data(const pme_gpu_t *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM
+CUDA_FUNC_QUALIFIER void pme_gpu_free_spline_data(const PmeGpu *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM
 
 /*! \libinternal \brief
  * Reallocates the buffers on the GPU and the host for the particle gridline indices.
  *
  * \param[in] pmeGPU            The PME GPU structure.
  */
-CUDA_FUNC_QUALIFIER void pme_gpu_realloc_grid_indices(const pme_gpu_t *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM
+CUDA_FUNC_QUALIFIER void pme_gpu_realloc_grid_indices(const PmeGpu *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM
 
 /*! \libinternal \brief
  * Frees the buffer on the GPU for the particle gridline indices.
  *
  * \param[in] pmeGPU            The PME GPU structure.
  */
-CUDA_FUNC_QUALIFIER void pme_gpu_free_grid_indices(const pme_gpu_t *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM
+CUDA_FUNC_QUALIFIER void pme_gpu_free_grid_indices(const PmeGpu *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM
 
 /*! \libinternal \brief
  * Reallocates the real space grid and the complex reciprocal grid (if needed) on the GPU.
  *
  * \param[in] pmeGPU            The PME GPU structure.
  */
-CUDA_FUNC_QUALIFIER void pme_gpu_realloc_grids(pme_gpu_t *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM
+CUDA_FUNC_QUALIFIER void pme_gpu_realloc_grids(PmeGpu *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM
 
 /*! \libinternal \brief
  * Frees the real space grid and the complex reciprocal grid (if needed) on the GPU.
  *
  * \param[in] pmeGPU            The PME GPU structure.
  */
-CUDA_FUNC_QUALIFIER void pme_gpu_free_grids(const pme_gpu_t *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM
+CUDA_FUNC_QUALIFIER void pme_gpu_free_grids(const PmeGpu *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM
 
 /*! \libinternal \brief
  * Clears the real space grid on the GPU.
@@ -303,28 +303,28 @@ CUDA_FUNC_QUALIFIER void pme_gpu_free_grids(const pme_gpu_t *CUDA_FUNC_ARGUMENT(
  *
  * \param[in] pmeGPU            The PME GPU structure.
  */
-CUDA_FUNC_QUALIFIER void pme_gpu_clear_grids(const pme_gpu_t *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM
+CUDA_FUNC_QUALIFIER void pme_gpu_clear_grids(const PmeGpu *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM
 
 /*! \libinternal \brief
  * Reallocates and copies the pre-computed fractional coordinates' shifts to the GPU.
  *
  * \param[in] pmeGPU            The PME GPU structure.
  */
-CUDA_FUNC_QUALIFIER void pme_gpu_realloc_and_copy_fract_shifts(pme_gpu_t *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM
+CUDA_FUNC_QUALIFIER void pme_gpu_realloc_and_copy_fract_shifts(PmeGpu *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM
 
 /*! \libinternal \brief
  * Frees the pre-computed fractional coordinates' shifts on the GPU.
  *
  * \param[in] pmeGPU            The PME GPU structure.
  */
-CUDA_FUNC_QUALIFIER void pme_gpu_free_fract_shifts(const pme_gpu_t *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM
+CUDA_FUNC_QUALIFIER void pme_gpu_free_fract_shifts(const PmeGpu *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM
 
 /*! \libinternal \brief
  * Waits for the output virial/energy copying to the intermediate CPU buffer to finish.
  *
  * \param[in] pmeGPU  The PME GPU structure.
  */
-CUDA_FUNC_QUALIFIER void pme_gpu_sync_output_energy_virial(const pme_gpu_t *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM
+CUDA_FUNC_QUALIFIER void pme_gpu_sync_output_energy_virial(const PmeGpu *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM
 
 /*! \libinternal \brief
  * Copies the input real-space grid from the host to the GPU.
@@ -332,7 +332,7 @@ CUDA_FUNC_QUALIFIER void pme_gpu_sync_output_energy_virial(const pme_gpu_t *CUDA
  * \param[in] pmeGPU   The PME GPU structure.
  * \param[in] h_grid   The host-side grid buffer.
  */
-CUDA_FUNC_QUALIFIER void pme_gpu_copy_input_gather_grid(const pme_gpu_t *CUDA_FUNC_ARGUMENT(pmeGPU),
+CUDA_FUNC_QUALIFIER void pme_gpu_copy_input_gather_grid(const PmeGpu    *CUDA_FUNC_ARGUMENT(pmeGPU),
                                                         float           *CUDA_FUNC_ARGUMENT(h_grid)) CUDA_FUNC_TERM
 
 /*! \libinternal \brief
@@ -341,7 +341,7 @@ CUDA_FUNC_QUALIFIER void pme_gpu_copy_input_gather_grid(const pme_gpu_t *CUDA_FU
  * \param[in] pmeGPU   The PME GPU structure.
  * \param[out] h_grid  The host-side grid buffer.
  */
-CUDA_FUNC_QUALIFIER void pme_gpu_copy_output_spread_grid(const pme_gpu_t *CUDA_FUNC_ARGUMENT(pmeGPU),
+CUDA_FUNC_QUALIFIER void pme_gpu_copy_output_spread_grid(const PmeGpu    *CUDA_FUNC_ARGUMENT(pmeGPU),
                                                          float           *CUDA_FUNC_ARGUMENT(h_grid)) CUDA_FUNC_TERM
 
 /*! \libinternal \brief
@@ -349,35 +349,35 @@ CUDA_FUNC_QUALIFIER void pme_gpu_copy_output_spread_grid(const pme_gpu_t *CUDA_F
  *
  * \param[in] pmeGPU   The PME GPU structure.
  */
-CUDA_FUNC_QUALIFIER void pme_gpu_copy_output_spread_atom_data(const pme_gpu_t *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM
+CUDA_FUNC_QUALIFIER void pme_gpu_copy_output_spread_atom_data(const PmeGpu *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM
 
 /*! \libinternal \brief
  * Copies the gather input spline data and gridline indices from the host to the GPU.
  *
  * \param[in] pmeGPU   The PME GPU structure.
  */
-CUDA_FUNC_QUALIFIER void pme_gpu_copy_input_gather_atom_data(const pme_gpu_t *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM
+CUDA_FUNC_QUALIFIER void pme_gpu_copy_input_gather_atom_data(const PmeGpu *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM
 
 /*! \libinternal \brief
  * Waits for the grid copying to the host-side buffer after spreading to finish.
  *
  * \param[in] pmeGPU  The PME GPU structure.
  */
-CUDA_FUNC_QUALIFIER void pme_gpu_sync_spread_grid(const pme_gpu_t *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM
+CUDA_FUNC_QUALIFIER void pme_gpu_sync_spread_grid(const PmeGpu *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM
 
 /*! \libinternal \brief
  * Waits for the atom data copying to the intermediate host-side buffer after spline computation to finish.
  *
  * \param[in] pmeGPU  The PME GPU structure.
  */
-CUDA_FUNC_QUALIFIER void pme_gpu_sync_spline_atom_data(const pme_gpu_t *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM
+CUDA_FUNC_QUALIFIER void pme_gpu_sync_spline_atom_data(const PmeGpu *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM
 
 /*! \libinternal \brief
  * Waits for the grid copying to the host-side buffer after solving to finish.
  *
  * \param[in] pmeGPU  The PME GPU structure.
  */
-CUDA_FUNC_QUALIFIER void pme_gpu_sync_solve_grid(const pme_gpu_t *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM
+CUDA_FUNC_QUALIFIER void pme_gpu_sync_solve_grid(const PmeGpu *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM
 
 /*! \libinternal \brief
  * Does the one-time GPU-framework specific PME initialization.
@@ -385,7 +385,7 @@ CUDA_FUNC_QUALIFIER void pme_gpu_sync_solve_grid(const pme_gpu_t *CUDA_FUNC_ARGU
  *
  * \param[in] pmeGPU  The PME GPU structure.
  */
-CUDA_FUNC_QUALIFIER void pme_gpu_init_internal(pme_gpu_t *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM
+CUDA_FUNC_QUALIFIER void pme_gpu_init_internal(PmeGpu *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM
 
 /*! \libinternal \brief
  * Destroys the PME GPU-framework specific data.
@@ -393,35 +393,35 @@ CUDA_FUNC_QUALIFIER void pme_gpu_init_internal(pme_gpu_t *CUDA_FUNC_ARGUMENT(pme
  *
  * \param[in] pmeGPU  The PME GPU structure.
  */
-CUDA_FUNC_QUALIFIER void pme_gpu_destroy_specific(const pme_gpu_t *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM
+CUDA_FUNC_QUALIFIER void pme_gpu_destroy_specific(const PmeGpu *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM
 
 /*! \libinternal \brief
  * Initializes the PME GPU synchronization events.
  *
  * \param[in] pmeGPU  The PME GPU structure.
  */
-CUDA_FUNC_QUALIFIER void pme_gpu_init_sync_events(const pme_gpu_t *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM
+CUDA_FUNC_QUALIFIER void pme_gpu_init_sync_events(const PmeGpu *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM
 
 /*! \libinternal \brief
  * Destroys the PME GPU synchronization events.
  *
  * \param[in] pmeGPU  The PME GPU structure.
  */
-CUDA_FUNC_QUALIFIER void pme_gpu_destroy_sync_events(const pme_gpu_t *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM
+CUDA_FUNC_QUALIFIER void pme_gpu_destroy_sync_events(const PmeGpu *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM
 
 /*! \libinternal \brief
  * Initializes the CUDA FFT structures.
  *
  * \param[in] pmeGPU  The PME GPU structure.
  */
-CUDA_FUNC_QUALIFIER void pme_gpu_reinit_3dfft(const pme_gpu_t *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM
+CUDA_FUNC_QUALIFIER void pme_gpu_reinit_3dfft(const PmeGpu *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM
 
 /*! \libinternal \brief
  * Destroys the CUDA FFT structures.
  *
  * \param[in] pmeGPU  The PME GPU structure.
  */
-CUDA_FUNC_QUALIFIER void pme_gpu_destroy_3dfft(const pme_gpu_t *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM
+CUDA_FUNC_QUALIFIER void pme_gpu_destroy_3dfft(const PmeGpu *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM
 
 /* Several CUDA event-based timing functions that live in pme-timings.cu */
 
@@ -430,21 +430,21 @@ CUDA_FUNC_QUALIFIER void pme_gpu_destroy_3dfft(const pme_gpu_t *CUDA_FUNC_ARGUME
  *
  * \param[in] pmeGPU         The PME GPU structure.
  */
-CUDA_FUNC_QUALIFIER void pme_gpu_update_timings(const pme_gpu_t *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM
+CUDA_FUNC_QUALIFIER void pme_gpu_update_timings(const PmeGpu *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM
 
 /*! \libinternal \brief
  * Updates the internal list of active PME GPU stages (if timings are enabled).
  *
  * \param[in] pmeGPU         The PME GPU data structure.
  */
-CUDA_FUNC_QUALIFIER void pme_gpu_reinit_timings(const pme_gpu_t *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM
+CUDA_FUNC_QUALIFIER void pme_gpu_reinit_timings(const PmeGpu *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM
 
 /*! \brief
  * Resets the PME GPU timings. To be called at the reset step.
  *
  * \param[in] pmeGPU         The PME GPU structure.
  */
-CUDA_FUNC_QUALIFIER void pme_gpu_reset_timings(const pme_gpu_t *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM
+CUDA_FUNC_QUALIFIER void pme_gpu_reset_timings(const PmeGpu *CUDA_FUNC_ARGUMENT(pmeGPU)) CUDA_FUNC_TERM
 
 /*! \libinternal \brief
  * Copies the PME GPU timings to the gmx_wallclock_gpu_t structure (for log output). To be called at the run end.
@@ -452,7 +452,7 @@ CUDA_FUNC_QUALIFIER void pme_gpu_reset_timings(const pme_gpu_t *CUDA_FUNC_ARGUME
  * \param[in] pmeGPU         The PME GPU structure.
  * \param[in] timings        The gmx_wallclock_gpu_pme_t structure.
  */
-CUDA_FUNC_QUALIFIER void pme_gpu_get_timings(const pme_gpu_t         *CUDA_FUNC_ARGUMENT(pmeGPU),
+CUDA_FUNC_QUALIFIER void pme_gpu_get_timings(const PmeGpu            *CUDA_FUNC_ARGUMENT(pmeGPU),
                                              gmx_wallclock_gpu_pme_t *CUDA_FUNC_ARGUMENT(timings)) CUDA_FUNC_TERM
 
 /* The PME stages themselves */
@@ -467,7 +467,7 @@ CUDA_FUNC_QUALIFIER void pme_gpu_get_timings(const pme_gpu_t         *CUDA_FUNC_
  * \param[in]  computeSplines  Should the computation of spline parameters and gridline indices be performed.
  * \param[in]  spreadCharges   Should the charges/coefficients be spread on the grid.
  */
-CUDA_FUNC_QUALIFIER void pme_gpu_spread(const pme_gpu_t *CUDA_FUNC_ARGUMENT(pmeGpu),
+CUDA_FUNC_QUALIFIER void pme_gpu_spread(const PmeGpu    *CUDA_FUNC_ARGUMENT(pmeGpu),
                                         int              CUDA_FUNC_ARGUMENT(gridIndex),
                                         real            *CUDA_FUNC_ARGUMENT(h_grid),
                                         bool             CUDA_FUNC_ARGUMENT(computeSplines),
@@ -480,7 +480,7 @@ CUDA_FUNC_QUALIFIER void pme_gpu_spread(const pme_gpu_t *CUDA_FUNC_ARGUMENT(pmeG
  * \param[in]  direction       Transform direction (real-to-complex or complex-to-real)
  * \param[in]  gridIndex       Index of the PME grid - unused, assumed to be 0.
  */
-CUDA_FUNC_QUALIFIER void pme_gpu_3dfft(const pme_gpu_t       *CUDA_FUNC_ARGUMENT(pmeGpu),
+CUDA_FUNC_QUALIFIER void pme_gpu_3dfft(const PmeGpu          *CUDA_FUNC_ARGUMENT(pmeGpu),
                                        enum gmx_fft_direction CUDA_FUNC_ARGUMENT(direction),
                                        const int              CUDA_FUNC_ARGUMENT(gridIndex)) CUDA_FUNC_TERM
 
@@ -492,7 +492,7 @@ CUDA_FUNC_QUALIFIER void pme_gpu_3dfft(const pme_gpu_t       *CUDA_FUNC_ARGUMENT
  * \param[in]     gridOrdering            Specifies the dimenion ordering of the complex grid. TODO: store this information?
  * \param[in]     computeEnergyAndVirial  Tells if the energy and virial computation should also be performed.
  */
-CUDA_FUNC_QUALIFIER void pme_gpu_solve(const pme_gpu_t *CUDA_FUNC_ARGUMENT(pmeGpu),
+CUDA_FUNC_QUALIFIER void pme_gpu_solve(const PmeGpu    *CUDA_FUNC_ARGUMENT(pmeGpu),
                                        t_complex       *CUDA_FUNC_ARGUMENT(h_grid),
                                        GridOrdering     CUDA_FUNC_ARGUMENT(gridOrdering),
                                        bool             CUDA_FUNC_ARGUMENT(computeEnergyAndVirial)) CUDA_FUNC_TERM
@@ -506,7 +506,7 @@ CUDA_FUNC_QUALIFIER void pme_gpu_solve(const pme_gpu_t *CUDA_FUNC_ARGUMENT(pmeGp
  *                                 TODO: determine efficiency/balance of host/device-side reductions.
  * \param[in]     h_grid           The host-side grid buffer (used only in testing mode)
  */
-CUDA_FUNC_QUALIFIER void pme_gpu_gather(const pme_gpu_t       *CUDA_FUNC_ARGUMENT(pmeGpu),
+CUDA_FUNC_QUALIFIER void pme_gpu_gather(const PmeGpu          *CUDA_FUNC_ARGUMENT(pmeGpu),
                                         float                 *CUDA_FUNC_ARGUMENT(h_forces),
                                         PmeForceOutputHandling CUDA_FUNC_ARGUMENT(forceTreatment),
                                         const float           *CUDA_FUNC_ARGUMENT(h_grid)
@@ -521,7 +521,7 @@ CUDA_FUNC_QUALIFIER void pme_gpu_gather(const pme_gpu_t       *CUDA_FUNC_ARGUMEN
  * \param[in] pmeGPU         The PME GPU structure.
  * \returns                  True if PME runs on multiple GPUs, false otherwise.
  */
-gmx_inline bool pme_gpu_uses_dd(const pme_gpu_t *pmeGPU)
+gmx_inline bool pme_gpu_uses_dd(const PmeGpu *pmeGPU)
 {
     return !pmeGPU->settings.useDecomposition;
 }
@@ -532,7 +532,7 @@ gmx_inline bool pme_gpu_uses_dd(const pme_gpu_t *pmeGPU)
  * \param[in] pmeGPU         The PME GPU structure.
  * \returns                  True if the gathering is performed on GPU, false otherwise.
  */
-gmx_inline bool pme_gpu_performs_gather(const pme_gpu_t *pmeGPU)
+gmx_inline bool pme_gpu_performs_gather(const PmeGpu *pmeGPU)
 {
     return pmeGPU->settings.performGPUGather;
 }
@@ -543,7 +543,7 @@ gmx_inline bool pme_gpu_performs_gather(const pme_gpu_t *pmeGPU)
  * \param[in] pmeGPU         The PME GPU structure.
  * \returns                  True if FFT is performed on GPU, false otherwise.
  */
-gmx_inline bool pme_gpu_performs_FFT(const pme_gpu_t *pmeGPU)
+gmx_inline bool pme_gpu_performs_FFT(const PmeGpu *pmeGPU)
 {
     return pmeGPU->settings.performGPUFFT;
 }
@@ -554,7 +554,7 @@ gmx_inline bool pme_gpu_performs_FFT(const pme_gpu_t *pmeGPU)
  * \param[in] pmeGPU         The PME GPU structure.
  * \returns                  True if (un-)wrapping is performed on GPU, false otherwise.
  */
-gmx_inline bool pme_gpu_performs_wrapping(const pme_gpu_t *pmeGPU)
+gmx_inline bool pme_gpu_performs_wrapping(const PmeGpu *pmeGPU)
 {
     return pmeGPU->settings.useDecomposition;
 }
@@ -565,7 +565,7 @@ gmx_inline bool pme_gpu_performs_wrapping(const pme_gpu_t *pmeGPU)
  * \param[in] pmeGPU         The PME GPU structure.
  * \returns                  True if solving is performed on GPU, false otherwise.
  */
-gmx_inline bool pme_gpu_performs_solve(const pme_gpu_t *pmeGPU)
+gmx_inline bool pme_gpu_performs_solve(const PmeGpu *pmeGPU)
 {
     return pmeGPU->settings.performGPUSolve;
 }
@@ -577,7 +577,7 @@ gmx_inline bool pme_gpu_performs_solve(const pme_gpu_t *pmeGPU)
  * \param[in] pmeGPU             The PME GPU structure.
  * \param[in] testing            Should the testing mode be enabled, or disabled.
  */
-gmx_inline void pme_gpu_set_testing(pme_gpu_t *pmeGPU, bool testing)
+gmx_inline void pme_gpu_set_testing(PmeGpu *pmeGPU, bool testing)
 {
     pmeGPU->settings.copyAllOutputs = testing;
 }
@@ -588,7 +588,7 @@ gmx_inline void pme_gpu_set_testing(pme_gpu_t *pmeGPU, bool testing)
  * \param[in] pmeGPU             The PME GPU structure.
  * \returns                      true if testing mode is enabled, false otherwise.
  */
-gmx_inline bool pme_gpu_is_testing(const pme_gpu_t *pmeGPU)
+gmx_inline bool pme_gpu_is_testing(const PmeGpu *pmeGPU)
 {
     return pmeGPU->settings.copyAllOutputs;
 }
@@ -603,7 +603,7 @@ gmx_inline bool pme_gpu_is_testing(const pme_gpu_t *pmeGPU)
  * \param[out] energy            The output energy.
  * \param[out] virial            The output virial matrix.
  */
-void pme_gpu_get_energy_virial(const pme_gpu_t *pmeGPU, real *energy, matrix virial);
+void pme_gpu_get_energy_virial(const PmeGpu *pmeGPU, real *energy, matrix virial);
 
 /*! \libinternal \brief
  * Updates the unit cell parameters. Does not check if update is necessary - that is done in pme_gpu_prepare_step().
@@ -611,7 +611,7 @@ void pme_gpu_get_energy_virial(const pme_gpu_t *pmeGPU, real *energy, matrix vir
  * \param[in] pmeGPU         The PME GPU structure.
  * \param[in] box            The unit cell box.
  */
-void pme_gpu_update_input_box(pme_gpu_t *pmeGPU, const matrix box);
+void pme_gpu_update_input_box(PmeGpu *pmeGPU, const matrix box);
 
 /*! \libinternal \brief
  * Finishes the PME GPU step, waiting for the output forces and/or energy/virial to be copied to the host.
@@ -620,7 +620,7 @@ void pme_gpu_update_input_box(pme_gpu_t *pmeGPU, const matrix box);
  * \param[in] bCalcForces    The left-over flag from the CPU code which tells the function to copy the forces to the CPU side. Should be passed to the launch call instead. FIXME
  * \param[in] bCalcEnerVir   The left-over flag from the CPU code which tells the function to copy the energy/virial to the CPU side. Should be passed to the launch call instead.
  */
-void pme_gpu_finish_step(const pme_gpu_t *pmeGPU,  const bool       bCalcForces,
+void pme_gpu_finish_step(const PmeGpu *pmeGPU,  const bool       bCalcForces,
                          const bool       bCalcEnerVir);
 
 //! A binary enum for spline data layout transformation
@@ -640,7 +640,7 @@ enum class PmeLayoutTransform
  * \param[in]  dimIndex   Dimension index.
  * \param[in]  transform  Layout transform type
  */
-void pme_gpu_transform_spline_atom_data(const pme_gpu_t *pmeGPU, const pme_atomcomm_t *atc,
+void pme_gpu_transform_spline_atom_data(const PmeGpu *pmeGPU, const pme_atomcomm_t *atc,
                                         PmeSplineDataType type, int dimIndex, PmeLayoutTransform transform);
 
 /*! \libinternal \brief
@@ -650,7 +650,7 @@ void pme_gpu_transform_spline_atom_data(const pme_gpu_t *pmeGPU, const pme_atomc
  * \param[out] gridSize          Pointer to the grid dimensions to fill in.
  * \param[out] paddedGridSize    Pointer to the padded grid dimensions to fill in.
  */
-void pme_gpu_get_real_grid_sizes(const pme_gpu_t *pmeGPU, gmx::IVec *gridSize, gmx::IVec *paddedGridSize);
+void pme_gpu_get_real_grid_sizes(const PmeGpu *pmeGPU, gmx::IVec *gridSize, gmx::IVec *paddedGridSize);
 
 /*! \libinternal \brief
  * (Re-)initializes the PME GPU data at the beginning of the run or on DLB.
@@ -668,7 +668,7 @@ void pme_gpu_reinit(gmx_pme_t *pme, gmx_device_info_t *gpuInfo, const gmx::MDLog
  *
  * \param[in] pmeGPU     The PME GPU structure.
  */
-void pme_gpu_destroy(pme_gpu_t *pmeGPU);
+void pme_gpu_destroy(PmeGpu *pmeGPU);
 
 /*! \libinternal \brief
  * Reallocates the local atoms data (charges, coordinates, etc.). Copies the charges to the GPU.
@@ -680,7 +680,7 @@ void pme_gpu_destroy(pme_gpu_t *pmeGPU);
  * This is a function that should only be called in the beginning of the run and on domain decomposition.
  * Should be called before the pme_gpu_set_io_ranges.
  */
-void pme_gpu_reinit_atoms(pme_gpu_t        *pmeGPU,
+void pme_gpu_reinit_atoms(PmeGpu           *pmeGPU,
                           const int         nAtoms,
                           const real       *charges);
 
diff --git a/src/gromacs/ewald/pme-gpu-types.h b/src/gromacs/ewald/pme-gpu-types.h
index 04e5481466..ced974c087 100644
--- a/src/gromacs/ewald/pme-gpu-types.h
+++ b/src/gromacs/ewald/pme-gpu-types.h
@@ -41,8 +41,8 @@
  * most of the initial PME CUDA implementation is merged
  * into the master branch (likely, after release 2017).
  * This should include:
- * -- bringing the structure/function names up to guidelines
- * ---- pme_gpu_settings_t -> PmeGpuTasks
+ * -- bringing the function names up to guidelines
+ * -- PmeGpuSettings -> PmeGpuTasks
  * -- refining GPU notation application (#2053)
  * -- renaming coefficients to charges (?)
  *
@@ -84,20 +84,20 @@ enum class PmeForceOutputHandling
 
 #if GMX_GPU == GMX_GPU_CUDA
 
-struct pme_gpu_cuda_t;
+struct PmeGpuCuda;
 /*! \brief A typedef for including the GPU host data by pointer */
-typedef pme_gpu_cuda_t pme_gpu_specific_t;
+typedef PmeGpuCuda PmeGpuSpecific;
 
-struct pme_gpu_cuda_kernel_params_t;
+struct PmeGpuCudaKernelParams;
 /*! \brief A typedef for including the GPU kernel arguments data by pointer */
-typedef pme_gpu_cuda_kernel_params_t pme_gpu_kernel_params_t;
+typedef PmeGpuCudaKernelParams PmeGpuKernelParams;
 
 #else
 
 /*! \brief A dummy typedef for the GPU host data placeholder on non-GPU builds */
-typedef int pme_gpu_specific_t;
+typedef int PmeGpuSpecific;
 /*! \brief A dummy typedef for the GPU kernel arguments data placeholder on non-GPU builds */
-typedef int pme_gpu_kernel_params_t;
+typedef int PmeGpuKernelParams;
 
 #endif
 
@@ -105,14 +105,14 @@ typedef int pme_gpu_kernel_params_t;
  * sorted into several device-side structures depending on the update rate.
  * This is GPU agnostic (float3 replaced by float[3], etc.).
  * The GPU-framework specifics (e.g. cudaTextureObject_t handles) are described
- * in the larger structure pme_gpu_cuda_kernel_params_t in the pme.cuh.
+ * in the larger structure PmeGpuCudaKernelParams in the pme.cuh.
  */
 
 /*! \internal \brief
  * A GPU data structure for storing the constant PME data.
  * This only has to be initialized once.
  */
-struct pme_gpu_const_params_t
+struct PmeGpuConstParams
 {
     /*! \brief Electrostatics coefficient = ONE_4PI_EPS0 / pme->epsilon_r */
     float elFactor;
@@ -125,7 +125,7 @@ struct pme_gpu_const_params_t
  * A GPU data structure for storing the PME data related to the grid sizes and cut-off.
  * This only has to be updated at every DD step.
  */
-struct pme_gpu_grid_params_t
+struct PmeGpuGridParams
 {
     /* Grid sizes */
     /*! \brief Real-space grid data dimensions. */
@@ -168,7 +168,7 @@ struct pme_gpu_grid_params_t
  * A GPU data structure for storing the PME data of the atoms, local to this process' domain partition.
  * This only has to be updated every DD step.
  */
-struct pme_gpu_atom_params_t
+struct PmeGpuAtomParams
 {
     /*! \brief Number of local atoms */
     int    nAtoms;
@@ -203,7 +203,7 @@ struct pme_gpu_atom_params_t
 /*! \internal \brief
  * A GPU data structure for storing the PME data which might change every MD step.
  */
-struct pme_gpu_step_params_t
+struct PmeGpuStepParams
 {
     /* The box parameters. The box only changes size each step with pressure coupling enabled. */
     /*! \brief
@@ -221,20 +221,20 @@ struct pme_gpu_step_params_t
 /*! \internal \brief
  * A single structure encompassing almost all the PME data used in GPU kernels on device.
  * This is inherited by the GPU framework-specific structure
- * (pme_gpu_cuda_kernel_params_t in pme.cuh).
+ * (PmeGpuCudaKernelParams in pme.cuh).
  * This way, most code preparing the kernel parameters can be GPU-agnostic by casting
- * the kernel parameter data pointer to pme_gpu_kernel_params_base_t.
+ * the kernel parameter data pointer to PmeGpuKernelParamsBase.
  */
-struct pme_gpu_kernel_params_base_t
+struct PmeGpuKernelParamsBase
 {
     /*! \brief Constant data that is set once. */
-    pme_gpu_const_params_t constants;
+    PmeGpuConstParams constants;
     /*! \brief Data dependent on the grid size/cutoff. */
-    pme_gpu_grid_params_t  grid;
+    PmeGpuGridParams  grid;
     /*! \brief Data dependent on the DD and local atoms. */
-    pme_gpu_atom_params_t  atoms;
+    PmeGpuAtomParams  atoms;
     /*! \brief Data that possibly changes on every MD step. */
-    pme_gpu_step_params_t  step;
+    PmeGpuStepParams  step;
 };
 
 /* Here are the host-side structures */
@@ -242,7 +242,7 @@ struct pme_gpu_kernel_params_base_t
 /*! \internal \brief
  * The PME GPU settings structure, included in the main PME GPU structure by value.
  */
-struct pme_gpu_settings_t
+struct PmeGpuSettings
 {
     /* Permanent settings set on initialization */
     /*! \brief A boolean which tells if the solving is performed on GPU. Currently always true */
@@ -265,7 +265,7 @@ struct pme_gpu_settings_t
  * The PME GPU intermediate buffers structure, included in the main PME GPU structure by value.
  * Buffers are managed by the PME GPU module.
  */
-struct pme_gpu_staging_t
+struct PmeGpuStaging
 {
     /*! \brief Virial and energy intermediate host-side buffer. Size is PME_GPU_VIRIAL_AND_ENERGY_COUNT. */
     float  *h_virialAndEnergy;
@@ -289,7 +289,7 @@ struct pme_gpu_staging_t
  * TODO: use the shared data with the PME CPU.
  * Included in the main PME GPU structure by value.
  */
-struct pme_shared_t
+struct PmeShared
 {
     /*! \brief Grid count - currently always 1 on GPU */
     int ngrids;
@@ -325,18 +325,18 @@ struct pme_shared_t
 /*! \internal \brief
  * The main PME GPU host structure, included in the PME CPU structure by pointer.
  */
-struct pme_gpu_t
+struct PmeGpu
 {
     /*! \brief The information copied once per reinit from the CPU structure. */
-    std::shared_ptr<pme_shared_t> common; // TODO: make the CPU structure use the same type
+    std::shared_ptr<PmeShared> common; // TODO: make the CPU structure use the same type
 
     /*! \brief The settings. */
-    pme_gpu_settings_t settings;
+    PmeGpuSettings settings;
 
     /*! \brief The host-side buffers.
      * The device-side buffers are buried in kernelParams, but that will have to change.
      */
-    pme_gpu_staging_t staging;
+    PmeGpuStaging staging;
 
     /*! \brief Number of local atoms, padded to be divisible by PME_ATOM_DATA_ALIGNMENT.
      * Used for kernel scheduling.
@@ -362,10 +362,10 @@ struct pme_gpu_t
      * \todo Test whether this should be copied to the constant GPU memory once per MD step
      * (or even less often with no box updates) instead of being an argument.
      */
-    std::shared_ptr<pme_gpu_kernel_params_t> kernelParams;
+    std::shared_ptr<PmeGpuKernelParams> kernelParams;
 
     /*! \brief The pointer to GPU-framework specific host-side data, such as CUDA streams and events. */
-    std::shared_ptr<pme_gpu_specific_t> archSpecific; /* FIXME: make it an unique_ptr */
+    std::shared_ptr<PmeGpuSpecific> archSpecific; /* FIXME: make it an unique_ptr */
 };
 
 #endif
diff --git a/src/gromacs/ewald/pme-gpu.cpp b/src/gromacs/ewald/pme-gpu.cpp
index fd97065b06..0ba9603b11 100644
--- a/src/gromacs/ewald/pme-gpu.cpp
+++ b/src/gromacs/ewald/pme-gpu.cpp
@@ -176,7 +176,7 @@ void pme_gpu_prepare_step(gmx_pme_t            *pme,
     GMX_ASSERT(pme->nnodes > 0, "");
     GMX_ASSERT(pme->nnodes == 1 || pme->ndecompdim > 0, "");
 
-    pme_gpu_t *pmeGpu = pme->gpu;
+    PmeGpu *pmeGpu = pme->gpu;
     pmeGpu->settings.stepFlags = flags;
     // TODO these flags are only here to honor the CPU PME code, and probably should be removed
 
@@ -216,7 +216,7 @@ void pme_gpu_launch_spread(gmx_pme_t            *pme,
 {
     GMX_ASSERT(pme_gpu_active(pme), "This should be a GPU run of PME but it is not enabled.");
 
-    pme_gpu_t *pmeGpu = pme->gpu;
+    PmeGpu *pmeGpu = pme->gpu;
 
     // The only spot of PME GPU where LAUNCH_GPU (sub)counter increases call-count
     wallcycle_start(wcycle, ewcLAUNCH_GPU);
@@ -243,7 +243,7 @@ void pme_gpu_launch_spread(gmx_pme_t            *pme,
 void pme_gpu_launch_complex_transforms(gmx_pme_t      *pme,
                                        gmx_wallcycle_t wcycle)
 {
-    pme_gpu_t         *pmeGpu                 = pme->gpu;
+    PmeGpu            *pmeGpu                 = pme->gpu;
     const bool         computeEnergyAndVirial = pmeGpu->settings.stepFlags & GMX_PME_CALC_ENER_VIR;
     const bool         performBackFFT         = pmeGpu->settings.stepFlags & (GMX_PME_CALC_F | GMX_PME_CALC_POT);
     const unsigned int gridIndex              = 0;
diff --git a/src/gromacs/ewald/pme-internal.h b/src/gromacs/ewald/pme-internal.h
index 4530acde70..b7eac93368 100644
--- a/src/gromacs/ewald/pme-internal.h
+++ b/src/gromacs/ewald/pme-internal.h
@@ -66,7 +66,7 @@ typedef struct gmx_parallel_3dfft *gmx_parallel_3dfft_t;
 
 struct t_commrec;
 struct t_inputrec;
-struct pme_gpu_t;
+struct PmeGpu;
 
 //@{
 //! Grid indices for A state for charge and Lennard-Jones C6
@@ -263,7 +263,7 @@ struct gmx_pme_t {
                               * and ideally not be duplicated here.
                               */
 
-    pme_gpu_t      *gpu;     /* A pointer to the GPU data.
+    PmeGpu      *gpu;        /* A pointer to the GPU data.
                               * TODO: this should be unique or a shared pointer.
                               * Currently in practice there is a single gmx_pme_t instance while a code
                               * is partially set up for many of them. The PME tuning calls gmx_pme_reinit()
diff --git a/src/gromacs/ewald/pme-solve.cu b/src/gromacs/ewald/pme-solve.cu
index aa75eb8401..6f68453a5a 100644
--- a/src/gromacs/ewald/pme-solve.cu
+++ b/src/gromacs/ewald/pme-solve.cu
@@ -81,7 +81,7 @@ template<
     bool computeEnergyAndVirial
     >
 __launch_bounds__(c_solveMaxThreadsPerBlock)
-__global__ void pme_solve_kernel(const struct pme_gpu_cuda_kernel_params_t kernelParams)
+__global__ void pme_solve_kernel(const struct PmeGpuCudaKernelParams kernelParams)
 {
     /* This kernel supports 2 different grid dimension orderings: YZX and XYZ */
     int majorDim, middleDim, minorDim;
@@ -417,7 +417,7 @@ __global__ void pme_solve_kernel(const struct pme_gpu_cuda_kernel_params_t kerne
     }
 }
 
-void pme_gpu_solve(const pme_gpu_t *pmeGpu, t_complex *h_grid,
+void pme_gpu_solve(const PmeGpu *pmeGpu, t_complex *h_grid,
                    GridOrdering gridOrdering, bool computeEnergyAndVirial)
 {
     const bool   copyInputAndOutputGrid = pme_gpu_is_testing(pmeGpu) || !pme_gpu_performs_FFT(pmeGpu);
diff --git a/src/gromacs/ewald/pme-spread.cu b/src/gromacs/ewald/pme-spread.cu
index eb52077dcb..3e969d572b 100644
--- a/src/gromacs/ewald/pme-spread.cu
+++ b/src/gromacs/ewald/pme-spread.cu
@@ -107,7 +107,7 @@ template<typename T,
          const int atomsPerBlock,
          const int dataCountPerAtom>
 __device__  __forceinline__
-void pme_gpu_stage_atom_data(const pme_gpu_cuda_kernel_params_t kernelParams,
+void pme_gpu_stage_atom_data(const PmeGpuCudaKernelParams       kernelParams,
                              T * __restrict__                   sm_destination,
                              const T * __restrict__             gm_source)
 {
@@ -141,7 +141,7 @@ void pme_gpu_stage_atom_data(const pme_gpu_cuda_kernel_params_t kernelParams,
  */
 template <const int order,
           const int atomsPerBlock>
-__device__ __forceinline__ void calculate_splines(const pme_gpu_cuda_kernel_params_t     kernelParams,
+__device__ __forceinline__ void calculate_splines(const PmeGpuCudaKernelParams           kernelParams,
                                                   const int                              atomIndexOffset,
                                                   const float3 * __restrict__            sm_coordinates,
                                                   const float * __restrict__             sm_coefficients,
@@ -354,7 +354,7 @@ __device__ __forceinline__ void calculate_splines(const pme_gpu_cuda_kernel_para
  */
 template <
     const int order, const bool wrapX, const bool wrapY>
-__device__ __forceinline__ void spread_charges(const pme_gpu_cuda_kernel_params_t     kernelParams,
+__device__ __forceinline__ void spread_charges(const PmeGpuCudaKernelParams           kernelParams,
                                                int                                    atomIndexOffset,
                                                const float * __restrict__             sm_coefficients,
                                                const int * __restrict__               sm_gridlineIndices,
@@ -444,7 +444,7 @@ template <
     const bool wrapY
     >
 __launch_bounds__(c_spreadMaxThreadsPerBlock)
-__global__ void pme_spline_and_spread_kernel(const pme_gpu_cuda_kernel_params_t kernelParams)
+__global__ void pme_spline_and_spread_kernel(const PmeGpuCudaKernelParams kernelParams)
 {
     const int        atomsPerBlock = c_spreadMaxThreadsPerBlock / PME_SPREADGATHER_THREADS_PER_ATOM;
     // Gridline indices, ivec
@@ -491,7 +491,7 @@ __global__ void pme_spline_and_spread_kernel(const pme_gpu_cuda_kernel_params_t
     }
 }
 
-void pme_gpu_spread(const pme_gpu_t *pmeGpu,
+void pme_gpu_spread(const PmeGpu    *pmeGpu,
                     int gmx_unused   gridIndex,
                     real            *h_grid,
                     bool             computeSplines,
diff --git a/src/gromacs/ewald/pme-timings.cu b/src/gromacs/ewald/pme-timings.cu
index 09d6d18337..ee47c27ec7 100644
--- a/src/gromacs/ewald/pme-timings.cu
+++ b/src/gromacs/ewald/pme-timings.cu
@@ -53,12 +53,12 @@
  * \param[in] pme            The PME data structure.
  * \returns                  True if timings are enabled, false otherwise.
  */
-gmx_inline bool pme_gpu_timings_enabled(const pme_gpu_t *pmeGPU)
+gmx_inline bool pme_gpu_timings_enabled(const PmeGpu *pmeGPU)
 {
     return pmeGPU->archSpecific->useTiming;
 }
 
-void pme_gpu_start_timing(const pme_gpu_t *pmeGPU, size_t PMEStageId)
+void pme_gpu_start_timing(const PmeGpu *pmeGPU, size_t PMEStageId)
 {
     if (pme_gpu_timings_enabled(pmeGPU))
     {
@@ -67,7 +67,7 @@ void pme_gpu_start_timing(const pme_gpu_t *pmeGPU, size_t PMEStageId)
     }
 }
 
-void pme_gpu_stop_timing(const pme_gpu_t *pmeGPU, size_t PMEStageId)
+void pme_gpu_stop_timing(const PmeGpu *pmeGPU, size_t PMEStageId)
 {
     if (pme_gpu_timings_enabled(pmeGPU))
     {
@@ -76,7 +76,7 @@ void pme_gpu_stop_timing(const pme_gpu_t *pmeGPU, size_t PMEStageId)
     }
 }
 
-void pme_gpu_get_timings(const pme_gpu_t *pmeGPU, gmx_wallclock_gpu_pme_t *timings)
+void pme_gpu_get_timings(const PmeGpu *pmeGPU, gmx_wallclock_gpu_pme_t *timings)
 {
     if (pme_gpu_timings_enabled(pmeGPU))
     {
@@ -89,7 +89,7 @@ void pme_gpu_get_timings(const pme_gpu_t *pmeGPU, gmx_wallclock_gpu_pme_t *timin
     }
 }
 
-void pme_gpu_update_timings(const pme_gpu_t *pmeGPU)
+void pme_gpu_update_timings(const PmeGpu *pmeGPU)
 {
     if (pme_gpu_timings_enabled(pmeGPU))
     {
@@ -102,7 +102,7 @@ void pme_gpu_update_timings(const pme_gpu_t *pmeGPU)
     }
 }
 
-void pme_gpu_reinit_timings(const pme_gpu_t *pmeGPU)
+void pme_gpu_reinit_timings(const PmeGpu *pmeGPU)
 {
     if (pme_gpu_timings_enabled(pmeGPU))
     {
@@ -125,7 +125,7 @@ void pme_gpu_reinit_timings(const pme_gpu_t *pmeGPU)
     }
 }
 
-void pme_gpu_reset_timings(const pme_gpu_t *pmeGPU)
+void pme_gpu_reset_timings(const PmeGpu *pmeGPU)
 {
     if (pme_gpu_timings_enabled(pmeGPU))
     {
diff --git a/src/gromacs/ewald/pme-timings.cuh b/src/gromacs/ewald/pme-timings.cuh
index 8c94e7f817..72a70b87e7 100644
--- a/src/gromacs/ewald/pme-timings.cuh
+++ b/src/gromacs/ewald/pme-timings.cuh
@@ -45,7 +45,7 @@
 #include "gromacs/gpu_utils/gpuregiontimer.cuh"
 #include "gromacs/timing/gpu_timing.h"       // TODO: move include to the source files
 
-struct pme_gpu_t;
+struct PmeGpu;
 
 /*! \libinternal \brief
  * Starts timing the certain PME GPU stage during a single step (if timings are enabled).
@@ -53,7 +53,7 @@ struct pme_gpu_t;
  * \param[in] pmeGPU         The PME GPU data structure.
  * \param[in] PMEStageId     The PME GPU stage gtPME_ index from the enum in src/gromacs/timing/gpu_timing.h
  */
-void pme_gpu_start_timing(const pme_gpu_t *pmeGPU, size_t PMEStageId);
+void pme_gpu_start_timing(const PmeGpu *pmeGPU, size_t PMEStageId);
 
 /*! \libinternal \brief
  * Stops timing the certain PME GPU stage during a single step (if timings are enabled).
@@ -61,6 +61,6 @@ void pme_gpu_start_timing(const pme_gpu_t *pmeGPU, size_t PMEStageId);
  * \param[in] pmeGPU         The PME GPU data structure.
  * \param[in] PMEStageId     The PME GPU stage gtPME_ index from the enum in src/gromacs/timing/gpu_timing.h
  */
-void pme_gpu_stop_timing(const pme_gpu_t *pmeGPU, size_t PMEStageId);
+void pme_gpu_stop_timing(const PmeGpu *pmeGPU, size_t PMEStageId);
 
 #endif
diff --git a/src/gromacs/ewald/pme.cpp b/src/gromacs/ewald/pme.cpp
index ef2a5cf6fc..812754f24b 100644
--- a/src/gromacs/ewald/pme.cpp
+++ b/src/gromacs/ewald/pme.cpp
@@ -517,7 +517,7 @@ gmx_pme_t *gmx_pme_init(const t_commrec     *cr,
                         real                 ewaldcoeff_lj,
                         int                  nthread,
                         PmeRunMode           runMode,
-                        pme_gpu_t           *pmeGPU,
+                        PmeGpu              *pmeGPU,
                         gmx_device_info_t   *gpuInfo,
                         const gmx::MDLogger &mdlog)
 {
diff --git a/src/gromacs/ewald/pme.cu b/src/gromacs/ewald/pme.cu
index 14bef5ce5d..1a59b01e4d 100644
--- a/src/gromacs/ewald/pme.cu
+++ b/src/gromacs/ewald/pme.cu
@@ -56,27 +56,27 @@
 #include "pme-3dfft.cuh"
 #include "pme-grid.h"
 
-int pme_gpu_get_atom_data_alignment(const pme_gpu_t *pmeGPU)
+int pme_gpu_get_atom_data_alignment(const PmeGpu *pmeGPU)
 {
     const int order = pmeGPU->common->pme_order;
     GMX_ASSERT(order > 0, "Invalid PME order");
     return PME_ATOM_DATA_ALIGNMENT;
 }
 
-int pme_gpu_get_atoms_per_warp(const pme_gpu_t *pmeGPU)
+int pme_gpu_get_atoms_per_warp(const PmeGpu *pmeGPU)
 {
     const int order = pmeGPU->common->pme_order;
     GMX_ASSERT(order > 0, "Invalid PME order");
     return PME_SPREADGATHER_ATOMS_PER_WARP;
 }
 
-void pme_gpu_synchronize(const pme_gpu_t *pmeGPU)
+void pme_gpu_synchronize(const PmeGpu *pmeGPU)
 {
     cudaError_t stat = cudaStreamSynchronize(pmeGPU->archSpecific->pmeStream);
     CU_RET_ERR(stat, "Failed to synchronize the PME GPU stream!");
 }
 
-void pme_gpu_alloc_energy_virial(const pme_gpu_t *pmeGPU)
+void pme_gpu_alloc_energy_virial(const PmeGpu *pmeGPU)
 {
     const size_t energyAndVirialSize = c_virialAndEnergyCount * sizeof(float);
     cudaError_t  stat                = cudaMalloc((void **)&pmeGPU->kernelParams->constants.d_virialAndEnergy, energyAndVirialSize);
@@ -84,7 +84,7 @@ void pme_gpu_alloc_energy_virial(const pme_gpu_t *pmeGPU)
     pmalloc((void **)&pmeGPU->staging.h_virialAndEnergy, energyAndVirialSize);
 }
 
-void pme_gpu_free_energy_virial(pme_gpu_t *pmeGPU)
+void pme_gpu_free_energy_virial(PmeGpu *pmeGPU)
 {
     cudaError_t stat = cudaFree(pmeGPU->kernelParams->constants.d_virialAndEnergy);
     CU_RET_ERR(stat, "cudaFree failed on PME energy and virial");
@@ -93,14 +93,14 @@ void pme_gpu_free_energy_virial(pme_gpu_t *pmeGPU)
     pmeGPU->staging.h_virialAndEnergy = nullptr;
 }
 
-void pme_gpu_clear_energy_virial(const pme_gpu_t *pmeGPU)
+void pme_gpu_clear_energy_virial(const PmeGpu *pmeGPU)
 {
     cudaError_t stat = cudaMemsetAsync(pmeGPU->kernelParams->constants.d_virialAndEnergy, 0,
                                        c_virialAndEnergyCount * sizeof(float), pmeGPU->archSpecific->pmeStream);
     CU_RET_ERR(stat, "PME energy/virial cudaMemsetAsync error");
 }
 
-void pme_gpu_realloc_and_copy_bspline_values(const pme_gpu_t *pmeGPU)
+void pme_gpu_realloc_and_copy_bspline_values(const PmeGpu *pmeGPU)
 {
     const int splineValuesOffset[DIM] = {
         0,
@@ -130,14 +130,14 @@ void pme_gpu_realloc_and_copy_bspline_values(const pme_gpu_t *pmeGPU)
                       newSplineValuesSize * sizeof(float), pmeGPU->archSpecific->pmeStream);
 }
 
-void pme_gpu_free_bspline_values(const pme_gpu_t *pmeGPU)
+void pme_gpu_free_bspline_values(const PmeGpu *pmeGPU)
 {
     pfree(pmeGPU->staging.h_splineModuli);
     cu_free_buffered(pmeGPU->kernelParams->grid.d_splineModuli, &pmeGPU->archSpecific->splineValuesSize,
                      &pmeGPU->archSpecific->splineValuesSizeAlloc);
 }
 
-void pme_gpu_realloc_forces(const pme_gpu_t *pmeGPU)
+void pme_gpu_realloc_forces(const PmeGpu *pmeGPU)
 {
     const size_t newForcesSize = pmeGPU->nAtomsAlloc * DIM;
     GMX_ASSERT(newForcesSize > 0, "Bad number of atoms in PME GPU");
@@ -145,12 +145,12 @@ void pme_gpu_realloc_forces(const pme_gpu_t *pmeGPU)
                         &pmeGPU->archSpecific->forcesSize, &pmeGPU->archSpecific->forcesSizeAlloc, newForcesSize, pmeGPU->archSpecific->pmeStream, true);
 }
 
-void pme_gpu_free_forces(const pme_gpu_t *pmeGPU)
+void pme_gpu_free_forces(const PmeGpu *pmeGPU)
 {
     cu_free_buffered(pmeGPU->kernelParams->atoms.d_forces, &pmeGPU->archSpecific->forcesSize, &pmeGPU->archSpecific->forcesSizeAlloc);
 }
 
-void pme_gpu_copy_input_forces(const pme_gpu_t *pmeGPU, const float *h_forces)
+void pme_gpu_copy_input_forces(const PmeGpu *pmeGPU, const float *h_forces)
 {
     GMX_ASSERT(h_forces, "nullptr host forces pointer in PME GPU");
     const size_t forcesSize = DIM * pmeGPU->kernelParams->atoms.nAtoms * sizeof(float);
@@ -158,7 +158,7 @@ void pme_gpu_copy_input_forces(const pme_gpu_t *pmeGPU, const float *h_forces)
     cu_copy_H2D_async(pmeGPU->kernelParams->atoms.d_forces, const_cast<float *>(h_forces), forcesSize, pmeGPU->archSpecific->pmeStream);
 }
 
-void pme_gpu_copy_output_forces(const pme_gpu_t *pmeGPU, float *h_forces)
+void pme_gpu_copy_output_forces(const PmeGpu *pmeGPU, float *h_forces)
 {
     GMX_ASSERT(h_forces, "nullptr host forces pointer in PME GPU");
     const size_t forcesSize   = DIM * pmeGPU->kernelParams->atoms.nAtoms * sizeof(float);
@@ -168,13 +168,13 @@ void pme_gpu_copy_output_forces(const pme_gpu_t *pmeGPU, float *h_forces)
     CU_RET_ERR(stat, "PME gather forces synchronization failure");
 }
 
-void pme_gpu_sync_output_forces(const pme_gpu_t *pmeGPU)
+void pme_gpu_sync_output_forces(const PmeGpu *pmeGPU)
 {
     cudaError_t  stat = cudaEventSynchronize(pmeGPU->archSpecific->syncForcesD2H);
     CU_RET_ERR(stat, "Error while waiting for the PME GPU forces");
 }
 
-void pme_gpu_realloc_coordinates(const pme_gpu_t *pmeGPU)
+void pme_gpu_realloc_coordinates(const PmeGpu *pmeGPU)
 {
     const size_t newCoordinatesSize = pmeGPU->nAtomsAlloc * DIM;
     GMX_ASSERT(newCoordinatesSize > 0, "Bad number of atoms in PME GPU");
@@ -192,7 +192,7 @@ void pme_gpu_realloc_coordinates(const pme_gpu_t *pmeGPU)
     }
 }
 
-void pme_gpu_copy_input_coordinates(const pme_gpu_t *pmeGPU, const rvec *h_coordinates)
+void pme_gpu_copy_input_coordinates(const PmeGpu *pmeGPU, const rvec *h_coordinates)
 {
     GMX_ASSERT(h_coordinates, "Bad host-side coordinate buffer in PME GPU");
 #if GMX_DOUBLE
@@ -204,12 +204,12 @@ void pme_gpu_copy_input_coordinates(const pme_gpu_t *pmeGPU, const rvec *h_coord
 #endif
 }
 
-void pme_gpu_free_coordinates(const pme_gpu_t *pmeGPU)
+void pme_gpu_free_coordinates(const PmeGpu *pmeGPU)
 {
     cu_free_buffered(pmeGPU->kernelParams->atoms.d_coordinates, &pmeGPU->archSpecific->coordinatesSize, &pmeGPU->archSpecific->coordinatesSizeAlloc);
 }
 
-void pme_gpu_realloc_and_copy_input_coefficients(const pme_gpu_t *pmeGPU, const float *h_coefficients)
+void pme_gpu_realloc_and_copy_input_coefficients(const PmeGpu *pmeGPU, const float *h_coefficients)
 {
     GMX_ASSERT(h_coefficients, "Bad host-side charge buffer in PME GPU");
     const size_t newCoefficientsSize = pmeGPU->nAtomsAlloc;
@@ -231,12 +231,12 @@ void pme_gpu_realloc_and_copy_input_coefficients(const pme_gpu_t *pmeGPU, const
     }
 }
 
-void pme_gpu_free_coefficients(const pme_gpu_t *pmeGPU)
+void pme_gpu_free_coefficients(const PmeGpu *pmeGPU)
 {
     cu_free_buffered(pmeGPU->kernelParams->atoms.d_coefficients, &pmeGPU->archSpecific->coefficientsSize, &pmeGPU->archSpecific->coefficientsSizeAlloc);
 }
 
-void pme_gpu_realloc_spline_data(const pme_gpu_t *pmeGPU)
+void pme_gpu_realloc_spline_data(const PmeGpu *pmeGPU)
 {
     const int    order             = pmeGPU->common->pme_order;
     const int    alignment         = pme_gpu_get_atoms_per_warp(pmeGPU);
@@ -261,7 +261,7 @@ void pme_gpu_realloc_spline_data(const pme_gpu_t *pmeGPU)
     }
 }
 
-void pme_gpu_free_spline_data(const pme_gpu_t *pmeGPU)
+void pme_gpu_free_spline_data(const PmeGpu *pmeGPU)
 {
     /* Two arrays of the same size */
     cu_free_buffered(pmeGPU->kernelParams->atoms.d_theta);
@@ -270,7 +270,7 @@ void pme_gpu_free_spline_data(const pme_gpu_t *pmeGPU)
     pfree(pmeGPU->staging.h_dtheta);
 }
 
-void pme_gpu_realloc_grid_indices(const pme_gpu_t *pmeGPU)
+void pme_gpu_realloc_grid_indices(const PmeGpu *pmeGPU)
 {
     const size_t newIndicesSize = DIM * pmeGPU->nAtomsAlloc;
     GMX_ASSERT(newIndicesSize > 0, "Bad number of atoms in PME GPU");
@@ -280,13 +280,13 @@ void pme_gpu_realloc_grid_indices(const pme_gpu_t *pmeGPU)
     pmalloc((void **)&pmeGPU->staging.h_gridlineIndices, newIndicesSize * sizeof(int));
 }
 
-void pme_gpu_free_grid_indices(const pme_gpu_t *pmeGPU)
+void pme_gpu_free_grid_indices(const PmeGpu *pmeGPU)
 {
     cu_free_buffered(pmeGPU->kernelParams->atoms.d_gridlineIndices, &pmeGPU->archSpecific->gridlineIndicesSize, &pmeGPU->archSpecific->gridlineIndicesSizeAlloc);
     pfree(pmeGPU->staging.h_gridlineIndices);
 }
 
-void pme_gpu_realloc_grids(pme_gpu_t *pmeGPU)
+void pme_gpu_realloc_grids(PmeGpu *pmeGPU)
 {
     auto     *kernelParamsPtr = pmeGPU->kernelParams.get();
     const int newRealGridSize = kernelParamsPtr->grid.realGridSizePadded[XX] *
@@ -319,7 +319,7 @@ void pme_gpu_realloc_grids(pme_gpu_t *pmeGPU)
     }
 }
 
-void pme_gpu_free_grids(const pme_gpu_t *pmeGPU)
+void pme_gpu_free_grids(const PmeGpu *pmeGPU)
 {
     if (pmeGPU->archSpecific->performOutOfPlaceFFT)
     {
@@ -329,7 +329,7 @@ void pme_gpu_free_grids(const pme_gpu_t *pmeGPU)
                      &pmeGPU->archSpecific->realGridSize, &pmeGPU->archSpecific->realGridSizeAlloc);
 }
 
-void pme_gpu_clear_grids(const pme_gpu_t *pmeGPU)
+void pme_gpu_clear_grids(const PmeGpu *pmeGPU)
 {
     cudaError_t stat = cudaMemsetAsync(pmeGPU->kernelParams->grid.d_realGrid, 0,
                                        pmeGPU->archSpecific->realGridSize * sizeof(float), pmeGPU->archSpecific->pmeStream);
@@ -337,7 +337,7 @@ void pme_gpu_clear_grids(const pme_gpu_t *pmeGPU)
     CU_RET_ERR(stat, "cudaMemsetAsync on the PME grid error");
 }
 
-void pme_gpu_realloc_and_copy_fract_shifts(pme_gpu_t *pmeGPU)
+void pme_gpu_realloc_and_copy_fract_shifts(PmeGpu *pmeGPU)
 {
     pme_gpu_free_fract_shifts(pmeGPU);
 
@@ -368,7 +368,7 @@ void pme_gpu_realloc_and_copy_fract_shifts(pme_gpu_t *pmeGPU)
                          pmeGPU->deviceInfo);
 }
 
-void pme_gpu_free_fract_shifts(const pme_gpu_t *pmeGPU)
+void pme_gpu_free_fract_shifts(const PmeGpu *pmeGPU)
 {
     auto *kernelParamsPtr = pmeGPU->kernelParams.get();
     destroyParamLookupTable(kernelParamsPtr->grid.d_fractShiftsTable,
@@ -381,7 +381,7 @@ void pme_gpu_free_fract_shifts(const pme_gpu_t *pmeGPU)
                             pmeGPU->deviceInfo);
 }
 
-void pme_gpu_sync_output_energy_virial(const pme_gpu_t *pmeGPU)
+void pme_gpu_sync_output_energy_virial(const PmeGpu *pmeGPU)
 {
     cudaError_t stat = cudaEventSynchronize(pmeGPU->archSpecific->syncEnerVirD2H);
     CU_RET_ERR(stat, "Error while waiting for PME solve output");
@@ -392,13 +392,13 @@ void pme_gpu_sync_output_energy_virial(const pme_gpu_t *pmeGPU)
     }
 }
 
-void pme_gpu_copy_input_gather_grid(const pme_gpu_t *pmeGpu, float *h_grid)
+void pme_gpu_copy_input_gather_grid(const PmeGpu *pmeGpu, float *h_grid)
 {
     const size_t gridSize = pmeGpu->archSpecific->realGridSize * sizeof(float);
     cu_copy_H2D_async(pmeGpu->kernelParams->grid.d_realGrid, h_grid, gridSize, pmeGpu->archSpecific->pmeStream);
 }
 
-void pme_gpu_copy_output_spread_grid(const pme_gpu_t *pmeGpu, float *h_grid)
+void pme_gpu_copy_output_spread_grid(const PmeGpu *pmeGpu, float *h_grid)
 {
     const size_t gridSize = pmeGpu->archSpecific->realGridSize * sizeof(float);
     cu_copy_D2H_async(h_grid, pmeGpu->kernelParams->grid.d_realGrid, gridSize, pmeGpu->archSpecific->pmeStream);
@@ -406,7 +406,7 @@ void pme_gpu_copy_output_spread_grid(const pme_gpu_t *pmeGpu, float *h_grid)
     CU_RET_ERR(stat, "PME spread grid sync event record failure");
 }
 
-void pme_gpu_copy_output_spread_atom_data(const pme_gpu_t *pmeGpu)
+void pme_gpu_copy_output_spread_atom_data(const PmeGpu *pmeGpu)
 {
     const int    alignment       = pme_gpu_get_atoms_per_warp(pmeGpu);
     const size_t nAtomsPadded    = ((pmeGpu->nAtomsAlloc + alignment - 1) / alignment) * alignment;
@@ -420,7 +420,7 @@ void pme_gpu_copy_output_spread_atom_data(const pme_gpu_t *pmeGpu)
     CU_RET_ERR(stat, "PME spread atom data sync event record failure");
 }
 
-void pme_gpu_copy_input_gather_atom_data(const pme_gpu_t *pmeGpu)
+void pme_gpu_copy_input_gather_atom_data(const PmeGpu *pmeGpu)
 {
     const int    alignment       = pme_gpu_get_atoms_per_warp(pmeGpu);
     const size_t nAtomsPadded    = ((pmeGpu->nAtomsAlloc + alignment - 1) / alignment) * alignment;
@@ -444,30 +444,30 @@ void pme_gpu_copy_input_gather_atom_data(const pme_gpu_t *pmeGpu)
                       kernelParamsPtr->atoms.nAtoms * DIM * sizeof(int), pmeGpu->archSpecific->pmeStream);
 }
 
-void pme_gpu_sync_spread_grid(const pme_gpu_t *pmeGPU)
+void pme_gpu_sync_spread_grid(const PmeGpu *pmeGPU)
 {
     cudaError_t stat = cudaEventSynchronize(pmeGPU->archSpecific->syncSpreadGridD2H);
     CU_RET_ERR(stat, "Error while waiting for the PME GPU spread grid to be copied to the host");
 }
 
-void pme_gpu_sync_spline_atom_data(const pme_gpu_t *pmeGPU)
+void pme_gpu_sync_spline_atom_data(const PmeGpu *pmeGPU)
 {
     cudaError_t stat = cudaEventSynchronize(pmeGPU->archSpecific->syncSplineAtomDataD2H);
     CU_RET_ERR(stat, "Error while waiting for the PME GPU atom data to be copied to the host");
 }
 
-void pme_gpu_sync_solve_grid(const pme_gpu_t *pmeGPU)
+void pme_gpu_sync_solve_grid(const PmeGpu *pmeGPU)
 {
     cudaError_t stat = cudaEventSynchronize(pmeGPU->archSpecific->syncSolveGridD2H);
     CU_RET_ERR(stat, "Error while waiting for the PME GPU solve grid to be copied to the host");
     //should check for pme_gpu_performs_solve(pmeGPU)
 }
 
-void pme_gpu_init_internal(pme_gpu_t *pmeGPU)
+void pme_gpu_init_internal(PmeGpu *pmeGPU)
 {
     /* Allocate the target-specific structures */
-    pmeGPU->archSpecific.reset(new pme_gpu_specific_t());
-    pmeGPU->kernelParams.reset(new pme_gpu_kernel_params_t());
+    pmeGPU->archSpecific.reset(new PmeGpuSpecific());
+    pmeGPU->kernelParams.reset(new PmeGpuKernelParams());
 
     pmeGPU->archSpecific->performOutOfPlaceFFT = true;
     /* This should give better performance, according to the cuFFT documentation.
@@ -492,14 +492,14 @@ void pme_gpu_init_internal(pme_gpu_t *pmeGPU)
     CU_RET_ERR(stat, "cudaStreamCreateWithPriority on the PME stream failed");
 }
 
-void pme_gpu_destroy_specific(const pme_gpu_t *pmeGPU)
+void pme_gpu_destroy_specific(const PmeGpu *pmeGPU)
 {
     /* Destroy the CUDA stream */
     cudaError_t stat = cudaStreamDestroy(pmeGPU->archSpecific->pmeStream);
     CU_RET_ERR(stat, "PME cudaStreamDestroy error");
 }
 
-void pme_gpu_init_sync_events(const pme_gpu_t *pmeGPU)
+void pme_gpu_init_sync_events(const PmeGpu *pmeGPU)
 {
     cudaError_t stat;
     const auto  eventFlags = cudaEventDisableTiming;
@@ -515,7 +515,7 @@ void pme_gpu_init_sync_events(const pme_gpu_t *pmeGPU)
     CU_RET_ERR(stat, "cudaEventCreate on syncSolveGridD2H failed");
 }
 
-void pme_gpu_destroy_sync_events(const pme_gpu_t *pmeGPU)
+void pme_gpu_destroy_sync_events(const PmeGpu *pmeGPU)
 {
     cudaError_t stat;
     stat = cudaEventDestroy(pmeGPU->archSpecific->syncEnerVirD2H);
@@ -530,7 +530,7 @@ void pme_gpu_destroy_sync_events(const pme_gpu_t *pmeGPU)
     CU_RET_ERR(stat, "cudaEventDestroy failed on syncSolveGridD2H");
 }
 
-void pme_gpu_reinit_3dfft(const pme_gpu_t *pmeGPU)
+void pme_gpu_reinit_3dfft(const PmeGpu *pmeGPU)
 {
     if (pme_gpu_performs_FFT(pmeGPU))
     {
@@ -542,7 +542,7 @@ void pme_gpu_reinit_3dfft(const pme_gpu_t *pmeGPU)
     }
 }
 
-void pme_gpu_destroy_3dfft(const pme_gpu_t *pmeGPU)
+void pme_gpu_destroy_3dfft(const PmeGpu *pmeGPU)
 {
     pmeGPU->archSpecific->fftSetup.resize(0);
 }
diff --git a/src/gromacs/ewald/pme.cuh b/src/gromacs/ewald/pme.cuh
index 71fb758a1c..001caf7686 100644
--- a/src/gromacs/ewald/pme.cuh
+++ b/src/gromacs/ewald/pme.cuh
@@ -140,7 +140,7 @@ int __device__ __forceinline__ pme_gpu_check_atom_charge(const float coefficient
 /*! \brief \internal
  * The main PME CUDA-specific host data structure, included in the PME GPU structure by the archSpecific pointer.
  */
-struct pme_gpu_cuda_t
+struct PmeGpuCuda
 {
     /*! \brief The CUDA stream where everything related to the PME happens. */
     cudaStream_t pmeStream;
@@ -220,10 +220,10 @@ struct pme_gpu_cuda_t
 
 /*! \brief \internal
  * A single structure encompassing all the PME data used in CUDA kernels.
- * This inherits from pme_gpu_kernel_params_base_t and adds a couple cudaTextureObject_t handles,
+ * This inherits from PmeGpuKernelParamsBase and adds a couple cudaTextureObject_t handles,
  * which we would like to avoid in plain C++.
  */
-struct pme_gpu_cuda_kernel_params_t : pme_gpu_kernel_params_base_t
+struct PmeGpuCudaKernelParams : PmeGpuKernelParamsBase
 {
     /* These are CUDA texture objects, related to the grid size. */
     /*! \brief CUDA texture object for accessing grid.d_fractShiftsTable */
diff --git a/src/gromacs/ewald/pme.h b/src/gromacs/ewald/pme.h
index af9fd7c77b..65dcd22bc5 100644
--- a/src/gromacs/ewald/pme.h
+++ b/src/gromacs/ewald/pme.h
@@ -58,7 +58,7 @@ struct interaction_const_t;
 struct t_commrec;
 struct t_inputrec;
 struct t_nrnb;
-struct pme_gpu_t;
+struct PmeGpu;
 struct gmx_wallclock_gpu_pme_t;
 struct gmx_device_info_t;
 struct gmx_pme_t;
@@ -106,7 +106,7 @@ gmx_pme_t *gmx_pme_init(const t_commrec *cr,
                         real ewaldcoeff_q, real ewaldcoeff_lj,
                         int nthread,
                         PmeRunMode runMode,
-                        pme_gpu_t *pmeGPU,
+                        PmeGpu *pmeGPU,
                         gmx_device_info_t *gpuInfo,
                         const gmx::MDLogger &mdlog);
 
-- 
2.11.4.GIT