From 8997703741f18f7f2f2021f6ab62586bce50049d Mon Sep 17 00:00:00 2001
From: Szilard Pall <pall.szilard@gmail.com>
Date: Thu, 8 Oct 2015 17:20:29 +0200
Subject: [PATCH] Avoid using incompatible NVML version

This change introduces a check on the NVML API version in order to avoid
compilation issues when old driver/NVML versions are picked up.

Moved macro handling NVML errors to the only file they are issued.

Made the requirement for including nvml.h in gpu_utils.cu explicit.

Refactored handling of clock support so that we don't have to compile
three different ways merely to detect syntax errors.

Fixes #1838

Change-Id: Ibb7a9f7cc80aa9e9476a455d0b6192451a038088
---
 src/gromacs/gmxlib/cuda_tools/cudautils.cuh | 12 +----
 src/gromacs/gmxlib/gpu_utils/gpu_utils.cu   | 69 ++++++++++++++++++++++-------
 2 files changed, 54 insertions(+), 27 deletions(-)

diff --git a/src/gromacs/gmxlib/cuda_tools/cudautils.cuh b/src/gromacs/gmxlib/cuda_tools/cudautils.cuh
index cdd9dbc052..3ab9588e38 100644
--- a/src/gromacs/gmxlib/cuda_tools/cudautils.cuh
+++ b/src/gromacs/gmxlib/cuda_tools/cudautils.cuh
@@ -104,17 +104,7 @@
         } \
     } while (0)
 
-/*! Check for NVML error on the return status of a NVML API call. */
-#ifdef HAVE_NVML
-#define HANDLE_NVML_RET_ERR(status, msg) \
-    do { \
-        if (status != NVML_SUCCESS) \
-        { \
-            gmx_warning("%s: %s\n", msg, nvmlErrorString(status)); \
-        } \
-    } while (0)
-#endif /* HAVE_NVML */
-#else
+#else /* CHECK_CUDA_ERRORS */
 
 #define CU_RET_ERR(status, msg) do { } while (0)
 #define CU_CHECK_PREV_ERR()     do { } while (0)
diff --git a/src/gromacs/gmxlib/gpu_utils/gpu_utils.cu b/src/gromacs/gmxlib/gpu_utils/gpu_utils.cu
index 00260f9fe2..53ffba1a05 100644
--- a/src/gromacs/gmxlib/gpu_utils/gpu_utils.cu
+++ b/src/gromacs/gmxlib/gpu_utils/gpu_utils.cu
@@ -55,6 +55,32 @@
 #include "gromacs/utility/cstringutil.h"
 #include "gromacs/utility/smalloc.h"
 
+#ifdef HAVE_NVML
+#include <nvml.h>
+#define HAVE_NVML_APPLICATION_CLOCKS (NVML_API_VERSION >= 6)
+#else  /* HAVE_NVML */
+#define HAVE_NVML_APPLICATION_CLOCKS 0
+#endif /* HAVE_NVML */
+
+#if defined(CHECK_CUDA_ERRORS) && HAVE_NVML_APPLICATION_CLOCKS
+/*! Check for NVML error on the return status of a NVML API call. */
+#  define HANDLE_NVML_RET_ERR(status, msg) \
+    do { \
+        if (status != NVML_SUCCESS) \
+        { \
+            gmx_warning("%s: %s\n", msg, nvmlErrorString(status)); \
+        } \
+    } while (0)
+#else  /* defined(CHECK_CUDA_ERRORS) && HAVE_NVML_APPLICATION_CLOCKS */
+#  define HANDLE_NVML_RET_ERR(status, msg) do { } while (0)
+#endif /* defined(CHECK_CUDA_ERRORS) && HAVE_NVML_APPLICATION_CLOCKS */
+
+#if HAVE_NVML_APPLICATION_CLOCKS
+static const gmx_bool            bCompiledWithApplicationClockSupport = true;
+#else
+static const gmx_bool gmx_unused bCompiledWithApplicationClockSupport = false;
+#endif
+
 /*! \internal \brief
  * Max number of devices supported by CUDA (for consistency checking).
  *
@@ -229,7 +255,7 @@ static void md_print_warn(FILE       *fplog,
     }
 }
 
-#ifdef HAVE_NVML
+#if HAVE_NVML_APPLICATION_CLOCKS
 /*! \brief Determines and adds the NVML device ID to the passed \cuda_dev.
  *
  * Determines and adds the NVML device ID to the passed \cuda_dev. This is done by
@@ -240,11 +266,10 @@ static void md_print_warn(FILE       *fplog,
  */
 static bool addNVMLDeviceId(gmx_device_info_t* cuda_dev)
 {
-    nvmlReturn_t nvml_stat = NVML_SUCCESS;
     nvmlDevice_t nvml_device_id;
     unsigned int nvml_device_count = 0;
+    nvmlReturn_t nvml_stat         = nvmlDeviceGetCount ( &nvml_device_count );
     cuda_dev->nvml_initialized = false;
-    nvml_stat                  = nvmlDeviceGetCount ( &nvml_device_count );
     HANDLE_NVML_RET_ERR( nvml_stat, "nvmlDeviceGetCount failed" );
     for (unsigned int nvml_device_idx = 0; nvml_stat == NVML_SUCCESS && nvml_device_idx < nvml_device_count; ++nvml_device_idx)
     {
@@ -273,7 +298,7 @@ static bool addNVMLDeviceId(gmx_device_info_t* cuda_dev)
     }
     return cuda_dev->nvml_initialized;
 }
-#endif /*HAVE_NVML*/
+#endif /* HAVE_NVML_APPLICATION_CLOCKS */
 
 /*! \brief Tries to set application clocks for the GPU with the given index.
  *
@@ -295,27 +320,39 @@ static gmx_bool init_gpu_application_clocks(FILE gmx_unused *fplog, int gmx_unus
     gmx_bool              bGpuCanUseApplicationClocks =
         ((0 == gmx_wcmatch("*Tesla*", prop->name) && cuda_version_number >= 35 ) ||
          (0 == gmx_wcmatch("*Quadro*", prop->name) && cuda_version_number >= 52 ));
+    if (!bGpuCanUseApplicationClocks)
+    {
+        return true;
+    }
 #ifndef HAVE_NVML
-    if (bGpuCanUseApplicationClocks)
+    int cuda_driver  = 0;
+    int cuda_runtime = 0;
+    cudaDriverGetVersion(&cuda_driver);
+    cudaRuntimeGetVersion(&cuda_runtime);
+    md_print_warn( fplog, "Note: NVML support was not found (CUDA runtime %d.%d, driver %d.%d), so your\n"
+                   "      %s GPU cannot use application clock support to improve performance.\n",
+                   cuda_runtime/1000, cuda_runtime%100,
+                   cuda_driver/1000, cuda_driver%100,
+                   prop->name );
+    return true;
+#else
+    if (!bCompiledWithApplicationClockSupport)
     {
         int cuda_driver  = 0;
         int cuda_runtime = 0;
         cudaDriverGetVersion(&cuda_driver);
         cudaRuntimeGetVersion(&cuda_runtime);
-        md_print_warn( fplog, "Note: NVML support was not found (CUDA runtime %d.%d, driver %d.%d), so your\n"
+        md_print_warn( fplog, "Note: The NVML support in use is too old (CUDA runtime %d.%d, driver %d.%d), so your\n"
                        "      %s GPU cannot use application clock support to improve performance.\n",
                        cuda_runtime/1000, cuda_runtime%100,
                        cuda_driver/1000, cuda_driver%100,
                        prop->name );
+        return true;
     }
-    return true;
-#else /* HAVE_NVML defined */
+
+    /* We've compiled with NVML application clocks support, and have a GPU that can use it */
     nvmlReturn_t nvml_stat = NVML_SUCCESS;
     char        *env;
-    if (!bGpuCanUseApplicationClocks)
-    {
-        return true;
-    }
     //TODO: GMX_GPU_APPLICATION_CLOCKS is currently only used to enable/disable setting of application clocks
     //      this variable can be later used to give a user more fine grained control.
     env = getenv("GMX_GPU_APPLICATION_CLOCKS");
@@ -386,7 +423,7 @@ static gmx_bool init_gpu_application_clocks(FILE gmx_unused *fplog, int gmx_unus
         md_print_warn( fplog,  "Not possible to change GPU clocks to optimal value because application clocks handling failed with NVML error (%d): %s.\n", nvml_stat, nvmlErrorString(nvml_stat));
     }
     return (nvml_stat == NVML_SUCCESS);
-#endif /*HAVE_NVML*/
+#endif /* HAVE_NVML */
 }
 
 /*! \brief Resets application clocks if changed and cleans up NVML for the passed \gpu_dev.
@@ -395,10 +432,10 @@ static gmx_bool init_gpu_application_clocks(FILE gmx_unused *fplog, int gmx_unus
  */
 static gmx_bool reset_gpu_application_clocks(const gmx_device_info_t gmx_unused * cuda_dev)
 {
-#ifndef HAVE_NVML
+#if !HAVE_NVML_APPLICATION_CLOCKS
     GMX_UNUSED_VALUE(cuda_dev);
     return true;
-#else
+#else /* HAVE_NVML_APPLICATION_CLOCKS */
     nvmlReturn_t nvml_stat = NVML_SUCCESS;
     if (cuda_dev &&
         cuda_dev->nvml_is_restricted == NVML_FEATURE_DISABLED &&
@@ -410,7 +447,7 @@ static gmx_bool reset_gpu_application_clocks(const gmx_device_info_t gmx_unused
     nvml_stat = nvmlShutdown();
     HANDLE_NVML_RET_ERR( nvml_stat, "nvmlShutdown failed" );
     return (nvml_stat == NVML_SUCCESS);
-#endif /*HAVE_NVML*/
+#endif /* HAVE_NVML_APPLICATION_CLOCKS */
 }
 
 gmx_bool init_gpu(FILE gmx_unused *fplog, int mygpu, char *result_str,
-- 
2.11.4.GIT