From 360ddad45f0244048491c994acab120f8fc41353 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Szil=C3=A1rd=20P=C3=A1ll?= Date: Mon, 27 Mar 2017 19:19:19 +0200 Subject: [PATCH] Allow disabling the explicit use of CUDA textures This change implements fallback for the explicit CUDA texture loads in the non-bonded kernels. This can be done by defining DISABLE_CUDA_TEXTURES. When disabled texture objects/references are not initialized either. Also removed unnecessary extern declarations of texture references in nbnxn_cuda_kernel_utils.cuh; this was only needed because texture reference accesses were previously compiled unconditionally (and were also generated in the nvcc host pass). Change-Id: Id7cdd6f80da0abe6be5639e80bed6530c3ce25c0 --- src/gromacs/gpu_utils/cuda_arch_utils.cuh | 14 ++- src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda.cu | 15 ++- .../mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.cu | 104 ++++++++++++--------- src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel.cuh | 8 +- .../mdlib/nbnxn_cuda/nbnxn_cuda_kernel_fermi.cuh | 4 + .../mdlib/nbnxn_cuda/nbnxn_cuda_kernel_utils.cuh | 70 +++++++++----- .../mdlib/nbnxn_cuda/nbnxn_cuda_kernels.cuh | 5 +- src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_types.h | 7 +- 8 files changed, 156 insertions(+), 71 deletions(-) diff --git a/src/gromacs/gpu_utils/cuda_arch_utils.cuh b/src/gromacs/gpu_utils/cuda_arch_utils.cuh index 8be983365c..8940e927c5 100644 --- a/src/gromacs/gpu_utils/cuda_arch_utils.cuh +++ b/src/gromacs/gpu_utils/cuda_arch_utils.cuh @@ -1,7 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2012,2014,2015,2016, by the GROMACS development team, led by + * Copyright (c) 2012,2014,2015,2016,2017, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -58,4 +58,16 @@ static const int warp_size = 32; static const int warp_size_log2 = 5; +/*! \brief Allow disabling CUDA textures using the GMX_DISABLE_CUDA_TEXTURES macro. + * + * This option will not influence functionality. All features using textures ought + * to have fallback for texture-less reads (direct/LDG loads), all new code needs + * to provide fallback code. + */ +#if defined GMX_DISABLE_CUDA_TEXTURES +#define DISABLE_CUDA_TEXTURES 1 +#else +#define DISABLE_CUDA_TEXTURES 0 +#endif + #endif /* CUDA_ARCH_UTILS_CUH_ */ diff --git a/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda.cu b/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda.cu index 3f62e86de4..35774bec6f 100644 --- a/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda.cu +++ b/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda.cu @@ -1,7 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2012,2013,2014,2015,2016, by the GROMACS development team, led by + * Copyright (c) 2012,2013,2014,2015,2016,2017, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -62,6 +62,16 @@ #include "nbnxn_cuda_types.h" +/* + * Texture references are created at compile-time and need to be declared + * at file scope as global variables (see http://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#texture-reference-api). + * The texture references below are used in two translation units; + * we declare them here along the kernels that use them (when compiling legacy Fermi kernels), + * and provide getters (see below) used by the data_mgmt module where the + * textures are bound/unbound. + * (In principle we could do it the other way arond, but that would likely require + * device linking and we'd rather avoid technical hurdles.) + */ /*! Texture reference for LJ C6/C12 parameters; bound to cu_nbparam_t.nbfp */ texture nbfp_texref; @@ -695,18 +705,21 @@ void nbnxn_gpu_wait_for_gpu(gmx_nbnxn_cuda_t *nb, /*! Return the reference to the nbfp texture. */ const struct texture &nbnxn_cuda_get_nbfp_texref() { + assert(!c_disableCudaTextures); return nbfp_texref; } /*! Return the reference to the nbfp_comb texture. */ const struct texture &nbnxn_cuda_get_nbfp_comb_texref() { + assert(!c_disableCudaTextures); return nbfp_comb_texref; } /*! Return the reference to the coulomb_tab. */ const struct texture &nbnxn_cuda_get_coulomb_tab_texref() { + assert(!c_disableCudaTextures); return coulomb_tab_texref; } diff --git a/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.cu b/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.cu index 0d60b620e2..1215452128 100644 --- a/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.cu +++ b/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_data_mgmt.cu @@ -100,6 +100,7 @@ static void nbnxn_cuda_free_nbparam_table(cu_nbparam_t *nbparam, */ static bool use_texobj(const gmx_device_info_t *dev_info) { + assert(!c_disableCudaTextures); /* Only device CC >= 3.0 (Kepler and later) support texture objects */ return (dev_info->prop.major >= 3); } @@ -128,6 +129,8 @@ static void setup1DFloatTexture(cudaTextureObject_t &texObj, void *devPtr, size_t sizeInBytes) { + assert(!c_disableCudaTextures); + cudaError_t stat; cudaResourceDesc rd; cudaTextureDesc td; @@ -158,6 +161,8 @@ static void setup1DFloatTexture(const struct texturecoulomb_tab_size = ic->tabq_size; nbp->coulomb_tab_scale = ic->tabq_scale; - if (use_texobj(dev_info)) - { - setup1DFloatTexture(nbp->coulomb_tab_texobj, nbp->coulomb_tab, - nbp->coulomb_tab_size*sizeof(*nbp->coulomb_tab)); - } - else + if (!c_disableCudaTextures) { - setup1DFloatTexture(&nbnxn_cuda_get_coulomb_tab_texref(), nbp->coulomb_tab, - nbp->coulomb_tab_size*sizeof(*nbp->coulomb_tab)); + if (use_texobj(dev_info)) + { + setup1DFloatTexture(nbp->coulomb_tab_texobj, nbp->coulomb_tab, + nbp->coulomb_tab_size*sizeof(*nbp->coulomb_tab)); + } + else + { + setup1DFloatTexture(&nbnxn_cuda_get_coulomb_tab_texref(), nbp->coulomb_tab, + nbp->coulomb_tab_size*sizeof(*nbp->coulomb_tab)); + } } } @@ -339,13 +347,16 @@ static void initParamLookupTable(float * &devPtr, CU_RET_ERR(stat, "cudaMalloc failed in initParamLookupTable"); cu_copy_H2D(devPtr, (void *)hostPtr, sizeInBytes); - if (use_texobj(devInfo)) + if (!c_disableCudaTextures) { - setup1DFloatTexture(texObj, devPtr, sizeInBytes); - } - else - { - setup1DFloatTexture(texRef, devPtr, sizeInBytes); + if (use_texobj(devInfo)) + { + setup1DFloatTexture(texObj, devPtr, sizeInBytes); + } + else + { + setup1DFloatTexture(texRef, devPtr, sizeInBytes); + } } } @@ -867,17 +878,20 @@ static void nbnxn_cuda_free_nbparam_table(cu_nbparam_t *nbparam, if (nbparam->eeltype == eelCuEWALD_TAB || nbparam->eeltype == eelCuEWALD_TAB_TWIN) { - /* Only device CC >= 3.0 (Kepler and later) support texture objects */ - if (use_texobj(dev_info)) - { - stat = cudaDestroyTextureObject(nbparam->coulomb_tab_texobj); - CU_RET_ERR(stat, "cudaDestroyTextureObject on coulomb_tab_texobj failed"); - } - else + if (!c_disableCudaTextures) { - GMX_UNUSED_VALUE(dev_info); - stat = cudaUnbindTexture(nbnxn_cuda_get_coulomb_tab_texref()); - CU_RET_ERR(stat, "cudaUnbindTexture on coulomb_tab_texref failed"); + /* Only device CC >= 3.0 (Kepler and later) support texture objects */ + if (use_texobj(dev_info)) + { + stat = cudaDestroyTextureObject(nbparam->coulomb_tab_texobj); + CU_RET_ERR(stat, "cudaDestroyTextureObject on coulomb_tab_texobj failed"); + } + else + { + GMX_UNUSED_VALUE(dev_info); + stat = cudaUnbindTexture(nbnxn_cuda_get_coulomb_tab_texref()); + CU_RET_ERR(stat, "cudaUnbindTexture on coulomb_tab_texref failed"); + } } cu_free_buffered(nbparam->coulomb_tab, &nbparam->coulomb_tab_size); } @@ -946,32 +960,38 @@ void nbnxn_gpu_free(gmx_nbnxn_cuda_t *nb) if (!useLjCombRule(nb->nbparam)) { - /* Only device CC >= 3.0 (Kepler and later) support texture objects */ - if (use_texobj(nb->dev_info)) - { - stat = cudaDestroyTextureObject(nbparam->nbfp_texobj); - CU_RET_ERR(stat, "cudaDestroyTextureObject on nbfp_texobj failed"); - } - else + if (!c_disableCudaTextures) { - stat = cudaUnbindTexture(nbnxn_cuda_get_nbfp_texref()); - CU_RET_ERR(stat, "cudaUnbindTexture on nbfp_texref failed"); + /* Only device CC >= 3.0 (Kepler and later) support texture objects */ + if (use_texobj(nb->dev_info)) + { + stat = cudaDestroyTextureObject(nbparam->nbfp_texobj); + CU_RET_ERR(stat, "cudaDestroyTextureObject on nbfp_texobj failed"); + } + else + { + stat = cudaUnbindTexture(nbnxn_cuda_get_nbfp_texref()); + CU_RET_ERR(stat, "cudaUnbindTexture on nbfp_texref failed"); + } } cu_free_buffered(nbparam->nbfp); } if (nbparam->vdwtype == evdwCuEWALDGEOM || nbparam->vdwtype == evdwCuEWALDLB) { - /* Only device CC >= 3.0 (Kepler and later) support texture objects */ - if (use_texobj(nb->dev_info)) - { - stat = cudaDestroyTextureObject(nbparam->nbfp_comb_texobj); - CU_RET_ERR(stat, "cudaDestroyTextureObject on nbfp_comb_texobj failed"); - } - else + if (!c_disableCudaTextures) { - stat = cudaUnbindTexture(nbnxn_cuda_get_nbfp_comb_texref()); - CU_RET_ERR(stat, "cudaUnbindTexture on nbfp_comb_texref failed"); + /* Only device CC >= 3.0 (Kepler and later) support texture objects */ + if (use_texobj(nb->dev_info)) + { + stat = cudaDestroyTextureObject(nbparam->nbfp_comb_texobj); + CU_RET_ERR(stat, "cudaDestroyTextureObject on nbfp_comb_texobj failed"); + } + else + { + stat = cudaUnbindTexture(nbnxn_cuda_get_nbfp_comb_texref()); + CU_RET_ERR(stat, "cudaUnbindTexture on nbfp_comb_texref failed"); + } } cu_free_buffered(nbparam->nbfp_comb); } diff --git a/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel.cuh b/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel.cuh index 38ca79ee82..415ead4c03 100644 --- a/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel.cuh +++ b/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel.cuh @@ -54,8 +54,8 @@ * code that is in double precision. */ -#if GMX_PTX_ARCH < 300 -#error "nbnxn_cuda_kernel.cuh included with GMX_PTX_ARCH < 300" +#if GMX_PTX_ARCH < 300 && GMX_PTX_ARCH != 0 +#error "nbnxn_cuda_kernel.cuh included with GMX_PTX_ARCH < 300 or host pass" #endif #if defined EL_EWALD_ANA || defined EL_EWALD_TAB @@ -317,7 +317,11 @@ __global__ void NB_KERNEL_FUNC_NAME(nbnxn_kernel, _F_cuda) #endif #ifdef LJ_EWALD + #if DISABLE_CUDA_TEXTURES + E_lj += LDG(&nbparam.nbfp[atom_types[(sci*c_numClPerSupercl + i)*c_clSize + tidxi]*(ntypes + 1)*2]); + #else E_lj += tex1Dfetch(nbparam.nbfp_texobj, atom_types[(sci*c_numClPerSupercl + i)*c_clSize + tidxi]*(ntypes + 1)*2); + #endif #endif } diff --git a/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel_fermi.cuh b/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel_fermi.cuh index f615978ab2..98ba1d0916 100644 --- a/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel_fermi.cuh +++ b/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel_fermi.cuh @@ -258,7 +258,11 @@ __global__ void NB_KERNEL_FUNC_NAME(nbnxn_kernel, _F_cuda) #endif #ifdef LJ_EWALD + #if DISABLE_CUDA_TEXTURES + E_lj += LDG(&nbparam.nbfp[atom_types[(sci*c_numClPerSupercl + i)*c_clSize + tidxi]*(ntypes + 1)*2]); + #else E_lj += tex1Dfetch(nbfp_texref, atom_types[(sci*c_numClPerSupercl + i)*c_clSize + tidxi]*(ntypes + 1)*2); + #endif #endif } diff --git a/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel_utils.cuh b/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel_utils.cuh index c7af49b1b7..a0d8979381 100644 --- a/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel_utils.cuh +++ b/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel_utils.cuh @@ -42,8 +42,6 @@ * \author Szilárd Páll * \ingroup module_mdlib */ -#include "config.h" - #include /* Note that floating-point constants in CUDA code should be suffixed @@ -60,8 +58,8 @@ #ifndef NBNXN_CUDA_KERNEL_UTILS_CUH #define NBNXN_CUDA_KERNEL_UTILS_CUH -/* Use texture objects if supported by the target hardware. */ -#if GMX_PTX_ARCH >= 300 +/* Use texture objects if supported by the target hardware (and in host pass). */ +#if GMX_PTX_ARCH >= 300 || GMX_PTX_ARCH == 0 /* Note: convenience macro, needs to be undef-ed at the end of the file. */ #define USE_TEXOBJ #endif @@ -79,19 +77,6 @@ static const int c_fbufStride = c_clSizeSq; static const float c_oneSixth = 0.16666667f; static const float c_oneTwelveth = 0.08333333f; -/* With multiple compilation units this ensures that texture refs are available - in the the kernels' compilation units. */ -#if !GMX_CUDA_NB_SINGLE_COMPILATION_UNIT -/*! Texture reference for LJ C6/C12 parameters; bound to cu_nbparam_t.nbfp */ -extern texture nbfp_texref; - -/*! Texture reference for LJ-PME parameters; bound to cu_nbparam_t.nbfp_comb */ -extern texture nbfp_comb_texref; - -/*! Texture reference for Ewald coulomb force table; bound to cu_nbparam_t.coulomb_tab */ -extern texture coulomb_tab_texref; -#endif /* GMX_CUDA_NB_SINGLE_COMPILATION_UNIT */ - /*! Convert LJ sigma,epsilon parameters to C6,C12. */ static __forceinline__ __device__ @@ -233,17 +218,25 @@ void calculate_potential_switch_F_E(const cu_nbparam_t nbparam, } -/*! \brief Fetch C6 grid contribution coefficients and return the product of these. */ +/*! \brief Fetch C6 grid contribution coefficients and return the product of these. + * + * Depending on what is supported, it fetches parameters either + * using direct load, texture objects, or texrefs. + */ static __forceinline__ __device__ float calculate_lj_ewald_c6grid(const cu_nbparam_t nbparam, int typei, int typej) { +#if DISABLE_CUDA_TEXTURES + return LDG(&nbparam.nbfp_comb[2*typei]) * LDG(&nbparam.nbfp_comb[2*typej]); +#else #ifdef USE_TEXOBJ return tex1Dfetch(nbparam.nbfp_comb_texobj, 2*typei) * tex1Dfetch(nbparam.nbfp_comb_texobj, 2*typej); #else return tex1Dfetch(nbfp_comb_texref, 2*typei) * tex1Dfetch(nbfp_comb_texref, 2*typej); #endif /* USE_TEXOBJ */ +#endif /* DISABLE_CUDA_TEXTURES */ } @@ -308,12 +301,23 @@ void calculate_lj_ewald_comb_geom_F_E(const cu_nbparam_t nbparam, *E_lj += c_oneSixth*c6grid*(inv_r6_nm*(1.0f - expmcr2*poly) + sh_mask); } -/*! Fetch per-type LJ parameters. */ +/*! Fetch per-type LJ parameters. + * + * Depending on what is supported, it fetches parameters either + * using direct load, texture objects, or texrefs. + */ static __forceinline__ __device__ float2 fetch_nbfp_comb_c6_c12(const cu_nbparam_t nbparam, int type) { float2 c6c12; +#if DISABLE_CUDA_TEXTURES + /* Force an 8-byte fetch to save a memory instruction. */ + float2 *nbfp_comb = (float2 *)nbparam.nbfp_comb; + c6c12 = LDG(&nbfp_comb[type]); +#else + /* NOTE: as we always do 8-byte aligned loads, we could + fetch float2 here too just as above. */ #ifdef USE_TEXOBJ c6c12.x = tex1Dfetch(nbparam.nbfp_comb_texobj, 2*type); c6c12.y = tex1Dfetch(nbparam.nbfp_comb_texobj, 2*type + 1); @@ -321,6 +325,7 @@ float2 fetch_nbfp_comb_c6_c12(const cu_nbparam_t nbparam, c6c12.x = tex1Dfetch(nbfp_comb_texref, 2*type); c6c12.y = tex1Dfetch(nbfp_comb_texref, 2*type + 1); #endif /* USE_TEXOBJ */ +#endif /* DISABLE_CUDA_TEXTURES */ return c6c12; } @@ -378,6 +383,8 @@ void calculate_lj_ewald_comb_LB_F_E(const cu_nbparam_t nbparam, /*! Fetch two consecutive values from the Ewald correction F*r table. * + * Depending on what is supported, it fetches parameters either + * using direct load, texture objects, or texrefs. */ static __forceinline__ __device__ float2 fetch_coulomb_force_r(const cu_nbparam_t nbparam, @@ -385,6 +392,11 @@ float2 fetch_coulomb_force_r(const cu_nbparam_t nbparam, { float2 d; +#if DISABLE_CUDA_TEXTURES + /* Can't do 8-byte fetch because some of the addresses will be misaligned. */ + d.x = LDG(&nbparam.coulomb_tab[index]); + d.y = LDG(&nbparam.coulomb_tab[index + 1]); +#else #ifdef USE_TEXOBJ d.x = tex1Dfetch(nbparam.coulomb_tab_texobj, index); d.y = tex1Dfetch(nbparam.coulomb_tab_texobj, index + 1); @@ -392,6 +404,7 @@ float2 fetch_coulomb_force_r(const cu_nbparam_t nbparam, d.x = tex1Dfetch(coulomb_tab_texref, index); d.y = tex1Dfetch(coulomb_tab_texref, index + 1); #endif // USE_TEXOBJ +#endif // DISABLE_CUDA_TEXTURES return d; } @@ -427,6 +440,8 @@ float interpolate_coulomb_force_r(const cu_nbparam_t nbparam, /*! Fetch C6 and C12 from the parameter table. * + * Depending on what is supported, it fetches parameters either + * using direct load, texture objects, or texrefs. */ static __forceinline__ __device__ void fetch_nbfp_c6_c12(float &c6, @@ -434,6 +449,16 @@ void fetch_nbfp_c6_c12(float &c6, const cu_nbparam_t nbparam, int baseIndex) { +#if DISABLE_CUDA_TEXTURES + /* Force an 8-byte fetch to save a memory instruction. */ + float2 *nbfp = (float2 *)nbparam.nbfp; + float2 c6c12; + c6c12 = LDG(&nbfp[baseIndex]); + c6 = c6c12.x; + c12 = c6c12.y; +#else + /* NOTE: as we always do 8-byte aligned loads, we could + fetch float2 here too just as above. */ #ifdef USE_TEXOBJ c6 = tex1Dfetch(nbparam.nbfp_texobj, 2*baseIndex); c12 = tex1Dfetch(nbparam.nbfp_texobj, 2*baseIndex + 1); @@ -441,6 +466,7 @@ void fetch_nbfp_c6_c12(float &c6, c6 = tex1Dfetch(nbfp_texref, 2*baseIndex); c12 = tex1Dfetch(nbfp_texref, 2*baseIndex + 1); #endif +#endif // DISABLE_CUDA_TEXTURES } @@ -506,7 +532,7 @@ void reduce_force_j_generic(float *f_buf, float3 *fout, /*! Final j-force reduction; this implementation only with power of two * array sizes and with sm >= 3.0 */ -#if GMX_PTX_ARCH >= 300 +#if GMX_PTX_ARCH >= 300 || GMX_PTX_ARCH == 0 static __forceinline__ __device__ void reduce_force_j_warp_shfl(float3 f, float3 *fout, int tidxi, int aidx) @@ -632,7 +658,7 @@ void reduce_force_i(float *f_buf, float3 *f, /*! Final i-force reduction; this implementation works only with power of two * array sizes and with sm >= 3.0 */ -#if GMX_PTX_ARCH >= 300 +#if GMX_PTX_ARCH >= 300 || GMX_PTX_ARCH == 0 static __forceinline__ __device__ void reduce_force_i_warp_shfl(float3 fin, float3 *fout, float *fshift_buf, bool bCalcFshift, @@ -709,7 +735,7 @@ void reduce_energy_pow2(volatile float *buf, /*! Energy reduction; this implementation works only with power of two * array sizes and with sm >= 3.0 */ -#if GMX_PTX_ARCH >= 300 +#if GMX_PTX_ARCH >= 300 || GMX_PTX_ARCH == 0 static __forceinline__ __device__ void reduce_energy_warp_shfl(float E_lj, float E_el, float *e_lj, float *e_el, diff --git a/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernels.cuh b/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernels.cuh index e56ce65411..a9be593e84 100644 --- a/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernels.cuh +++ b/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernels.cuh @@ -1,7 +1,7 @@ /* * This file is part of the GROMACS molecular simulation package. * - * Copyright (c) 2012,2013,2014,2016, by the GROMACS development team, led by + * Copyright (c) 2012,2013,2014,2016,2017, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -52,7 +52,8 @@ * \ingroup module_mdlib */ -#if GMX_PTX_ARCH >= 300 +/* Use the standard non-Fermi kernel in host pass too (to avoid texref API calls). */ +#if GMX_PTX_ARCH >= 300 || GMX_PTX_ARCH == 0 #define FLAVOR_LEVEL_GENERATOR "gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel.cuh" #else #define FLAVOR_LEVEL_GENERATOR "gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_kernel_fermi.cuh" diff --git a/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_types.h b/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_types.h index 8519b5598e..09eccc34c0 100644 --- a/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_types.h +++ b/src/gromacs/mdlib/nbnxn_cuda/nbnxn_cuda_types.h @@ -3,7 +3,7 @@ * * Copyright (c) 1991-2000, University of Groningen, The Netherlands. * Copyright (c) 2001-2012, The GROMACS development team. - * Copyright (c) 2012,2013,2014,2015,2016, by the GROMACS development team, led by + * Copyright (c) 2012,2013,2014,2015,2016,2017, by the GROMACS development team, led by * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, * and including many others, as listed in the AUTHORS file in the * top-level source directory and at http://www.gromacs.org. @@ -46,6 +46,7 @@ #ifndef NBNXN_CUDA_TYPES_H #define NBNXN_CUDA_TYPES_H +#include "gromacs/gpu_utils/cuda_arch_utils.cuh" #include "gromacs/gpu_utils/cudautils.cuh" #include "gromacs/mdlib/nbnxn_consts.h" #include "gromacs/mdlib/nbnxn_pairlist.h" @@ -60,6 +61,10 @@ static const int c_numClPerSupercl = c_nbnxnGpuNumClusterPerSupercluster; /*! \brief cluster size = number of atoms per cluster. */ static const int c_clSize = c_nbnxnGpuClusterSize; +/*! \brief True if the use of texture fetch in the CUDA kernels is disabled. */ +static const bool c_disableCudaTextures = DISABLE_CUDA_TEXTURES; + + #ifdef __cplusplus extern "C" { #endif -- 2.11.4.GIT