2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2015,2016,2017,2018,2019, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
36 * \brief Defines functionality for deciding whether tasks will run on GPUs.
38 * \author Mark Abraham <mark.j.abraham@gmail.com>
39 * \ingroup module_taskassignment
44 #include "decidegpuusage.h"
54 #include "gromacs/ewald/pme.h"
55 #include "gromacs/hardware/cpuinfo.h"
56 #include "gromacs/hardware/detecthardware.h"
57 #include "gromacs/hardware/hardwaretopology.h"
58 #include "gromacs/hardware/hw_info.h"
59 #include "gromacs/mdlib/gmx_omp_nthreads.h"
60 #include "gromacs/mdtypes/commrec.h"
61 #include "gromacs/mdtypes/inputrec.h"
62 #include "gromacs/mdtypes/md_enums.h"
63 #include "gromacs/taskassignment/taskassignment.h"
64 #include "gromacs/topology/topology.h"
65 #include "gromacs/utility/baseversion.h"
66 #include "gromacs/utility/exceptions.h"
67 #include "gromacs/utility/fatalerror.h"
68 #include "gromacs/utility/gmxassert.h"
69 #include "gromacs/utility/logger.h"
70 #include "gromacs/utility/stringutil.h"
79 //! Helper variable to localise the text of an often repeated message.
80 const char * g_specifyEverythingFormatString
=
81 "When you use mdrun -gputasks, %s must be set to non-default "
82 "values, so that the device IDs can be interpreted correctly."
83 #if GMX_GPU != GMX_GPU_NONE
84 " If you simply want to restrict which GPUs are used, then it is "
85 "better to use mdrun -gpu_id. Otherwise, setting the "
86 # if GMX_GPU == GMX_GPU_CUDA
87 "CUDA_VISIBLE_DEVICES"
88 # elif GMX_GPU == GMX_GPU_OPENCL
89 // Technically there is no portable way to do this offered by the
90 // OpenCL standard, but the only current relevant case for GROMACS
91 // is AMD OpenCL, which offers this variable.
94 # error "Unreachable branch"
96 " environment variable in your bash profile or job "
97 "script may be more convenient."
104 decideWhetherToUseGpusForNonbondedWithThreadMpi(const TaskTarget nonbondedTarget
,
105 const std::vector
<int> &gpuIdsToUse
,
106 const std::vector
<int> &userGpuTaskAssignment
,
107 const EmulateGpuNonbonded emulateGpuNonbonded
,
108 const bool buildSupportsNonbondedOnGpu
,
109 const bool nonbondedOnGpuIsUseful
,
110 const int numRanksPerSimulation
)
112 // First, exclude all cases where we can't run NB on GPUs.
113 if (nonbondedTarget
== TaskTarget::Cpu
||
114 emulateGpuNonbonded
== EmulateGpuNonbonded::Yes
||
115 !nonbondedOnGpuIsUseful
||
116 !buildSupportsNonbondedOnGpu
)
118 // If the user required NB on GPUs, we issue an error later.
122 // We now know that NB on GPUs makes sense, if we have any.
124 if (!userGpuTaskAssignment
.empty())
126 // Specifying -gputasks requires specifying everything.
127 if (nonbondedTarget
== TaskTarget::Auto
||
128 numRanksPerSimulation
< 1)
130 GMX_THROW(InconsistentInputError(formatString(g_specifyEverythingFormatString
, "-nb and -ntmpi")));
135 if (nonbondedTarget
== TaskTarget::Gpu
)
140 // Because this is thread-MPI, we already know about the GPUs that
141 // all potential ranks can use, and can use that in a global
142 // decision that will later be consistent.
143 auto haveGpus
= !gpuIdsToUse
.empty();
145 // If we get here, then the user permitted or required GPUs.
150 decideWhetherToUseGpusForPmeWithThreadMpi(const bool useGpuForNonbonded
,
151 const TaskTarget pmeTarget
,
152 const std::vector
<int> &gpuIdsToUse
,
153 const std::vector
<int> &userGpuTaskAssignment
,
154 const gmx_hw_info_t
&hardwareInfo
,
155 const t_inputrec
&inputrec
,
156 const gmx_mtop_t
&mtop
,
157 const int numRanksPerSimulation
,
158 const int numPmeRanksPerSimulation
)
160 // First, exclude all cases where we can't run PME on GPUs.
161 if ((pmeTarget
== TaskTarget::Cpu
) ||
162 !useGpuForNonbonded
||
163 !pme_gpu_supports_build(nullptr) ||
164 !pme_gpu_supports_hardware(hardwareInfo
, nullptr) ||
165 !pme_gpu_supports_input(inputrec
, mtop
, nullptr))
167 // PME can't run on a GPU. If the user required that, we issue
172 // We now know that PME on GPUs might make sense, if we have any.
174 if (!userGpuTaskAssignment
.empty())
176 // Follow the user's choice of GPU task assignment, if we
177 // can. Checking that their IDs are for compatible GPUs comes
180 // Specifying -gputasks requires specifying everything.
181 if (pmeTarget
== TaskTarget::Auto
||
182 numRanksPerSimulation
< 1)
184 GMX_THROW(InconsistentInputError(formatString(g_specifyEverythingFormatString
, "all of -nb, -pme, and -ntmpi")));
187 // PME on GPUs is only supported in a single case
188 if (pmeTarget
== TaskTarget::Gpu
)
190 if (((numRanksPerSimulation
> 1) && (numPmeRanksPerSimulation
== 0)) ||
191 (numPmeRanksPerSimulation
> 1))
193 GMX_THROW(InconsistentInputError
194 ("When you run mdrun -pme gpu -gputasks, you must supply a PME-enabled .tpr file and use a single PME rank."));
199 // pmeTarget == TaskTarget::Auto
200 return numRanksPerSimulation
== 1;
203 // Because this is thread-MPI, we already know about the GPUs that
204 // all potential ranks can use, and can use that in a global
205 // decision that will later be consistent.
207 if (pmeTarget
== TaskTarget::Gpu
)
209 if (((numRanksPerSimulation
> 1) && (numPmeRanksPerSimulation
== 0)) ||
210 (numPmeRanksPerSimulation
> 1))
212 GMX_THROW(NotImplementedError
213 ("PME tasks were required to run on GPUs, but that is not implemented with "
214 "more than one PME rank. Use a single rank simulation, or a separate PME rank, "
215 "or permit PME tasks to be assigned to the CPU."));
220 if (numRanksPerSimulation
== 1)
222 // PME can run well on a GPU shared with NB, and we permit
223 // mdrun to default to try that.
224 return !gpuIdsToUse
.empty();
227 if (numRanksPerSimulation
< 1)
229 // Full automated mode for thread-MPI (the default). PME can
230 // run well on a GPU shared with NB, and we permit mdrun to
231 // default to it if there is only one GPU available.
232 return (gpuIdsToUse
.size() == 1);
235 // Not enough support for PME on GPUs for anything else
239 bool decideWhetherToUseGpusForNonbonded(const TaskTarget nonbondedTarget
,
240 const std::vector
<int> &userGpuTaskAssignment
,
241 const EmulateGpuNonbonded emulateGpuNonbonded
,
242 const bool buildSupportsNonbondedOnGpu
,
243 const bool nonbondedOnGpuIsUseful
,
244 const bool gpusWereDetected
)
246 if (nonbondedTarget
== TaskTarget::Cpu
)
248 if (!userGpuTaskAssignment
.empty())
250 GMX_THROW(InconsistentInputError
251 ("A GPU task assignment was specified, but nonbonded interactions were "
252 "assigned to the CPU. Make no more than one of these choices."));
258 if (!buildSupportsNonbondedOnGpu
&& nonbondedTarget
== TaskTarget::Gpu
)
260 GMX_THROW(InconsistentInputError
261 ("Nonbonded interactions on the GPU were requested with -nb gpu, "
262 "but the GROMACS binary has been built without GPU support. "
263 "Either run without selecting GPU options, or recompile GROMACS "
264 "with GPU support enabled"));
267 // TODO refactor all these TaskTarget::Gpu checks into one place?
268 // e.g. use a subfunction that handles only the cases where
269 // TaskTargets are not Cpu?
270 if (emulateGpuNonbonded
== EmulateGpuNonbonded::Yes
)
272 if (nonbondedTarget
== TaskTarget::Gpu
)
274 GMX_THROW(InconsistentInputError
275 ("Nonbonded interactions on the GPU were required, which is inconsistent "
276 "with choosing emulation. Make no more than one of these choices."));
278 if (!userGpuTaskAssignment
.empty())
280 GMX_THROW(InconsistentInputError
281 ("GPU ID usage was specified, as was GPU emulation. Make no more than one of these choices."));
287 if (!nonbondedOnGpuIsUseful
)
289 if (nonbondedTarget
== TaskTarget::Gpu
)
291 GMX_THROW(InconsistentInputError
292 ("Nonbonded interactions on the GPU were required, but not supported for these "
293 "simulation settings. Change your settings, or do not require using GPUs."));
299 if (!userGpuTaskAssignment
.empty())
301 // Specifying -gputasks requires specifying everything.
302 if (nonbondedTarget
== TaskTarget::Auto
)
304 GMX_THROW(InconsistentInputError(formatString(g_specifyEverythingFormatString
, "-nb and -ntmpi")));
310 if (nonbondedTarget
== TaskTarget::Gpu
)
312 // We still don't know whether it is an error if no GPUs are found
313 // because we don't know the duty of this rank, yet. For example,
314 // a node with only PME ranks and -pme cpu is OK if there are not
319 // If we get here, then the user permitted GPUs, which we should
320 // use for nonbonded interactions.
321 return gpusWereDetected
;
324 bool decideWhetherToUseGpusForPme(const bool useGpuForNonbonded
,
325 const TaskTarget pmeTarget
,
326 const std::vector
<int> &userGpuTaskAssignment
,
327 const gmx_hw_info_t
&hardwareInfo
,
328 const t_inputrec
&inputrec
,
329 const gmx_mtop_t
&mtop
,
330 const int numRanksPerSimulation
,
331 const int numPmeRanksPerSimulation
,
332 const bool gpusWereDetected
)
334 if (pmeTarget
== TaskTarget::Cpu
)
339 if (!useGpuForNonbonded
)
341 if (pmeTarget
== TaskTarget::Gpu
)
343 GMX_THROW(NotImplementedError
344 ("PME on GPUs is only supported when nonbonded interactions run on GPUs also."));
350 if (!pme_gpu_supports_build(&message
))
352 if (pmeTarget
== TaskTarget::Gpu
)
354 GMX_THROW(NotImplementedError
355 ("Cannot compute PME interactions on a GPU, because " + message
));
359 if (!pme_gpu_supports_hardware(hardwareInfo
, &message
))
361 if (pmeTarget
== TaskTarget::Gpu
)
363 GMX_THROW(NotImplementedError
364 ("Cannot compute PME interactions on a GPU, because " + message
));
368 if (!pme_gpu_supports_input(inputrec
, mtop
, &message
))
370 if (pmeTarget
== TaskTarget::Gpu
)
372 GMX_THROW(NotImplementedError
373 ("Cannot compute PME interactions on a GPU, because " + message
));
378 if (pmeTarget
== TaskTarget::Cpu
)
380 if (!userGpuTaskAssignment
.empty())
382 GMX_THROW(InconsistentInputError
383 ("A GPU task assignment was specified, but PME interactions were "
384 "assigned to the CPU. Make no more than one of these choices."));
390 if (!userGpuTaskAssignment
.empty())
392 // Specifying -gputasks requires specifying everything.
393 if (pmeTarget
== TaskTarget::Auto
)
395 GMX_THROW(InconsistentInputError(formatString(g_specifyEverythingFormatString
, "all of -nb, -pme, and -ntmpi"))); // TODO ntmpi?
401 // We still don't know whether it is an error if no GPUs are found
402 // because we don't know the duty of this rank, yet. For example,
403 // a node with only PME ranks and -pme cpu is OK if there are not
406 if (pmeTarget
== TaskTarget::Gpu
)
408 if (((numRanksPerSimulation
> 1) && (numPmeRanksPerSimulation
== 0)) ||
409 (numPmeRanksPerSimulation
> 1))
411 GMX_THROW(NotImplementedError
412 ("PME tasks were required to run on GPUs, but that is not implemented with "
413 "more than one PME rank. Use a single rank simulation, or a separate PME rank, "
414 "or permit PME tasks to be assigned to the CPU."));
419 // If we get here, then the user permitted GPUs.
420 if (numRanksPerSimulation
== 1)
422 // PME can run well on a single GPU shared with NB when there
423 // is one rank, so we permit mdrun to try that if we have
425 return gpusWereDetected
;
428 // Not enough support for PME on GPUs for anything else
432 bool decideWhetherToUseGpusForBonded(const bool useGpuForNonbonded
,
433 const bool useGpuForPme
,
434 const TaskTarget bondedTarget
,
435 const bool canUseGpuForBonded
,
436 const bool usingLJPme
,
437 const bool usingElecPmeOrEwald
,
438 const int numPmeRanksPerSimulation
,
439 const bool gpusWereDetected
)
441 if (bondedTarget
== TaskTarget::Cpu
)
446 if (!canUseGpuForBonded
)
448 if (bondedTarget
== TaskTarget::Gpu
)
450 GMX_THROW(InconsistentInputError
451 ("Bonded interactions on the GPU were required, but not supported for these "
452 "simulation settings. Change your settings, or do not require using GPUs."));
458 if (!useGpuForNonbonded
)
460 if (bondedTarget
== TaskTarget::Gpu
)
462 GMX_THROW(InconsistentInputError
463 ("Bonded interactions on the GPU were required, but this requires that "
464 "short-ranged non-bonded interactions are also run on the GPU. Change "
465 "your settings, or do not require using GPUs."));
471 // TODO If the bonded kernels do not get fused, then performance
472 // overheads might suggest alternative choices here.
474 if (bondedTarget
== TaskTarget::Gpu
)
476 // We still don't know whether it is an error if no GPUs are
481 // If we get here, then the user permitted GPUs, which we should
482 // use for bonded interactions if any were detected and the CPU
483 // is busy, for which we currently only check PME or Ewald.
484 // (It would be better to dynamically assign bondeds based on timings)
485 // Note that here we assume that the auto setting of PME ranks will not
486 // choose seperate PME ranks when nonBonded are assigned to the GPU.
487 bool usingOurCpuForPmeOrEwald
= (usingLJPme
|| (usingElecPmeOrEwald
&& !useGpuForPme
&& numPmeRanksPerSimulation
<= 0));
489 return gpusWereDetected
&& usingOurCpuForPmeOrEwald
;