From 0123f06c3566150113d63a9ee64147e6cb872ae8 Mon Sep 17 00:00:00 2001
From: Mark Abraham <mark.j.abraham@gmail.com>
Date: Wed, 31 May 2017 18:52:10 +0100
Subject: [PATCH] Move gmx_parse_gpu_id out of checking function

Parsing user input should happen separately from running checks for
consistency or sanity. Some of the content of gmx_parse_gpu_id is more
like consistency checking, so left that where it was, using the output
of the parsing to run the checks. Doing so means that the first such
check now works when GMX_GPU_ID is used, when previously it did not.

Made minor improvement to error message text for that check.

Noted TODO to remove helper functionality for GPU sharing checks,
since it is now supported everywhere.

Change-Id: I2ea5841f0fdf461f3024b442a2fe641ea1435f49
---
 src/gromacs/hardware/detecthardware.cpp  | 38 +++------------------------
 src/programs/mdrun/resource-division.cpp | 45 +++++++++++++++++++++++++++++---
 src/programs/mdrun/runner.cpp            |  5 ++++
 3 files changed, 49 insertions(+), 39 deletions(-)

diff --git a/src/gromacs/hardware/detecthardware.cpp b/src/gromacs/hardware/detecthardware.cpp
index 0b627a36a6..2d5788717f 100644
--- a/src/gromacs/hardware/detecthardware.cpp
+++ b/src/gromacs/hardware/detecthardware.cpp
@@ -108,6 +108,9 @@ static const bool bGPUBinary     = GMX_GPU != GMX_GPU_NONE;
  * enumeration" in src/config.h.cmakein, so that GMX_GPU looks up an
  * array entry. */
 
+/* TODO GPU sharing is now always supported, so we can simplify things
+ * and remove these constants, functions, and comments about sharing
+ * below. */
 /* Both CUDA and OpenCL (on the supported/tested platforms) supports
  * GPU device sharing.
  */
@@ -1232,37 +1235,10 @@ void gmx_print_detected_hardware(FILE *fplog, const t_commrec *cr,
     check_use_of_rdtscp_on_this_cpu(mdlog, cpuInfo);
 }
 
-//! \brief Return if any GPU ID (e.g in a user-supplied string) is repeated
-static gmx_bool anyGpuIdIsRepeated(const gmx_gpu_opt_t *gpu_opt)
-{
-    /* Loop over IDs in the string */
-    for (int i = 0; i < gpu_opt->n_dev_use - 1; ++i)
-    {
-        /* Look for the ID in location i in the following part of the
-           string */
-        for (int j = i + 1; j < gpu_opt->n_dev_use; ++j)
-        {
-            if (gpu_opt->dev_use[i] == gpu_opt->dev_use[j])
-            {
-                /* Same ID found in locations i and j */
-                return TRUE;
-            }
-        }
-    }
-
-    return FALSE;
-}
-
 void gmx_parse_gpu_ids(gmx_gpu_opt_t *gpu_opt)
 {
     char *env;
 
-    if (gpu_opt->gpu_id != nullptr && !bGPUBinary)
-    {
-        gmx_fatal(FARGS, "GPU ID string set, but %s was compiled without GPU support!",
-                  gmx::getProgramContext().displayName());
-    }
-
     env = getenv("GMX_GPU_ID");
     if (env != nullptr && gpu_opt->gpu_id != nullptr)
     {
@@ -1281,14 +1257,6 @@ void gmx_parse_gpu_ids(gmx_gpu_opt_t *gpu_opt)
          * indicate the process/tMPI thread - GPU assignment. */
         parse_digits_from_string(env, &gpu_opt->n_dev_use, &gpu_opt->dev_use);
 
-        if (!gmx_multiple_gpu_per_node_supported() && 1 < gpu_opt->n_dev_use)
-        {
-            gmx_fatal(FARGS, "The %s implementation only supports using exactly one PP rank per node", getGpuImplementationString());
-        }
-        if (!gmx_gpu_sharing_supported() && anyGpuIdIsRepeated(gpu_opt))
-        {
-            gmx_fatal(FARGS, "The %s implementation only supports using exactly one PP rank per GPU", getGpuImplementationString());
-        }
         if (gpu_opt->n_dev_use == 0)
         {
             gmx_fatal(FARGS, "Empty GPU ID string encountered.\n%s\n",
diff --git a/src/programs/mdrun/resource-division.cpp b/src/programs/mdrun/resource-division.cpp
index 295ad05cd3..1920341e47 100644
--- a/src/programs/mdrun/resource-division.cpp
+++ b/src/programs/mdrun/resource-division.cpp
@@ -54,9 +54,11 @@
 #include "gromacs/mdtypes/inputrec.h"
 #include "gromacs/mdtypes/md_enums.h"
 #include "gromacs/topology/topology.h"
+#include "gromacs/utility/baseversion.h"
 #include "gromacs/utility/fatalerror.h"
 #include "gromacs/utility/gmxassert.h"
 #include "gromacs/utility/logger.h"
+#include "gromacs/utility/programcontext.h"
 #include "gromacs/utility/stringutil.h"
 
 
@@ -70,8 +72,12 @@
  * and after a switch point doesn't change too much.
  */
 
+//! Constant used to help minimize preprocessed code
 static const bool bHasOmpSupport = GMX_OPENMP;
 
+//! Constant used to help minimize preprocessed code
+static const bool bGPUBinary     = GMX_GPU != GMX_GPU_NONE;
+
 #if GMX_THREAD_MPI
 /* The minimum number of atoms per tMPI thread. With fewer atoms than this,
  * the number of threads will get lowered.
@@ -679,6 +685,27 @@ static void print_hw_opt(FILE *fp, const gmx_hw_opt_t *hw_opt)
             hw_opt->gpu_opt.gpu_id != nullptr ? hw_opt->gpu_opt.gpu_id : "");
 }
 
+//! \brief Return if any GPU ID (e.g in a user-supplied string) is repeated
+static gmx_bool anyGpuIdIsRepeated(const gmx_gpu_opt_t *gpu_opt)
+{
+    /* Loop over IDs in the string */
+    for (int i = 0; i < gpu_opt->n_dev_use - 1; ++i)
+    {
+        /* Look for the ID in location i in the following part of the
+           string */
+        for (int j = i + 1; j < gpu_opt->n_dev_use; ++j)
+        {
+            if (gpu_opt->dev_use[i] == gpu_opt->dev_use[j])
+            {
+                /* Same ID found in locations i and j */
+                return TRUE;
+            }
+        }
+    }
+
+    return FALSE;
+}
+
 /* Checks we can do when we don't (yet) know the cut-off scheme */
 void check_and_update_hw_opt_1(gmx_hw_opt_t    *hw_opt,
                                const t_commrec *cr,
@@ -784,10 +811,20 @@ void check_and_update_hw_opt_1(gmx_hw_opt_t    *hw_opt,
         hw_opt->nthreads_omp_pme = 1;
     }
 
-    /* Parse GPU IDs, if provided.
-     * We check consistency with the tMPI thread count later.
-     */
-    gmx_parse_gpu_ids(&hw_opt->gpu_opt);
+    if (hw_opt->gpu_opt.n_dev_use > 0 && !bGPUBinary)
+    {
+        gmx_fatal(FARGS, "GPU IDs have been selected, but %s was compiled without GPU support!",
+                  gmx::getProgramContext().displayName());
+    }
+
+    if (!gmx_multiple_gpu_per_node_supported() && 1 < hw_opt->gpu_opt.n_dev_use)
+    {
+        gmx_fatal(FARGS, "The %s implementation only supports using exactly one PP rank per node", getGpuImplementationString());
+    }
+    if (!gmx_gpu_sharing_supported() && anyGpuIdIsRepeated(&hw_opt->gpu_opt))
+    {
+        gmx_fatal(FARGS, "The %s implementation only supports using exactly one PP rank per GPU", getGpuImplementationString());
+    }
 
 #if GMX_THREAD_MPI
     if (hw_opt->gpu_opt.n_dev_use > 0 && hw_opt->nthreads_tmpi == 0)
diff --git a/src/programs/mdrun/runner.cpp b/src/programs/mdrun/runner.cpp
index bc4f70ba2c..fd709444c9 100644
--- a/src/programs/mdrun/runner.cpp
+++ b/src/programs/mdrun/runner.cpp
@@ -745,8 +745,13 @@ int mdrunner(gmx_hw_opt_t *hw_opt,
 
     bool doMembed = opt2bSet("-membed", nfile, fnm);
     bRerunMD     = (Flags & MD_RERUN);
+
+    /* Handle GPU-related user options. Later, we check consistency
+     * with things like whether support is compiled, or tMPI thread
+     * count. */
     bForceUseGPU = (strncmp(nbpu_opt, "gpu", 3) == 0);
     bTryUseGPU   = (strncmp(nbpu_opt, "auto", 4) == 0) || bForceUseGPU;
+    gmx_parse_gpu_ids(&hw_opt->gpu_opt);
 
     // Here we assume that SIMMASTER(cr) does not change even after the
     // threads are started.
-- 
2.11.4.GIT