2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2015,2016,2017,2018,2019 by the GROMACS development team.
5 * Copyright (c) 2020, by the GROMACS development team, led by
6 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
7 * and including many others, as listed in the AUTHORS file in the
8 * top-level source directory and at http://www.gromacs.org.
10 * GROMACS is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public License
12 * as published by the Free Software Foundation; either version 2.1
13 * of the License, or (at your option) any later version.
15 * GROMACS is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with GROMACS; if not, see
22 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
23 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
25 * If you want to redistribute modifications to GROMACS, please
26 * consider that scientific software is very special. Version
27 * control is crucial - bugs must be traceable. We will be happy to
28 * consider code for inclusion in the official distribution, but
29 * derived work must not be called official GROMACS. Details are found
30 * in the README & COPYING files - if they are missing, get the
31 * official version at http://www.gromacs.org.
33 * To help us fund GROMACS development, we humbly ask that you cite
34 * the research papers on the package. Check out http://www.gromacs.org.
37 * \brief Defines functionality for deciding whether tasks will run on GPUs.
39 * \author Mark Abraham <mark.j.abraham@gmail.com>
40 * \ingroup module_taskassignment
45 #include "decidegpuusage.h"
55 #include "gromacs/ewald/pme.h"
56 #include "gromacs/hardware/cpuinfo.h"
57 #include "gromacs/hardware/detecthardware.h"
58 #include "gromacs/hardware/hardwaretopology.h"
59 #include "gromacs/hardware/hw_info.h"
60 #include "gromacs/mdlib/gmx_omp_nthreads.h"
61 #include "gromacs/mdlib/update_constrain_gpu.h"
62 #include "gromacs/mdtypes/commrec.h"
63 #include "gromacs/mdtypes/inputrec.h"
64 #include "gromacs/mdtypes/md_enums.h"
65 #include "gromacs/mdtypes/mdrunoptions.h"
66 #include "gromacs/pulling/pull.h"
67 #include "gromacs/taskassignment/taskassignment.h"
68 #include "gromacs/topology/mtop_util.h"
69 #include "gromacs/topology/topology.h"
70 #include "gromacs/utility/baseversion.h"
71 #include "gromacs/utility/exceptions.h"
72 #include "gromacs/utility/fatalerror.h"
73 #include "gromacs/utility/gmxassert.h"
74 #include "gromacs/utility/logger.h"
75 #include "gromacs/utility/stringutil.h"
84 //! Helper variable to localise the text of an often repeated message.
85 const char* g_specifyEverythingFormatString
=
86 "When you use mdrun -gputasks, %s must be set to non-default "
87 "values, so that the device IDs can be interpreted correctly."
89 " If you simply want to restrict which GPUs are used, then it is "
90 "better to use mdrun -gpu_id. Otherwise, setting the "
92 "CUDA_VISIBLE_DEVICES"
94 // Technically there is no portable way to do this offered by the
95 // OpenCL standard, but the only current relevant case for GROMACS
96 // is AMD OpenCL, which offers this variable.
100 "How to restrict visible devices in SYCL?"
102 # error "Unreachable branch"
104 " environment variable in your bash profile or job "
105 "script may be more convenient."
111 bool decideWhetherToUseGpusForNonbondedWithThreadMpi(const TaskTarget nonbondedTarget
,
112 const std::vector
<int>& gpuIdsToUse
,
113 const std::vector
<int>& userGpuTaskAssignment
,
114 const EmulateGpuNonbonded emulateGpuNonbonded
,
115 const bool buildSupportsNonbondedOnGpu
,
116 const bool nonbondedOnGpuIsUseful
,
117 const int numRanksPerSimulation
)
119 // First, exclude all cases where we can't run NB on GPUs.
120 if (nonbondedTarget
== TaskTarget::Cpu
|| emulateGpuNonbonded
== EmulateGpuNonbonded::Yes
121 || !nonbondedOnGpuIsUseful
|| !buildSupportsNonbondedOnGpu
)
123 // If the user required NB on GPUs, we issue an error later.
127 // We now know that NB on GPUs makes sense, if we have any.
129 if (!userGpuTaskAssignment
.empty())
131 // Specifying -gputasks requires specifying everything.
132 if (nonbondedTarget
== TaskTarget::Auto
|| numRanksPerSimulation
< 1)
134 GMX_THROW(InconsistentInputError(
135 formatString(g_specifyEverythingFormatString
, "-nb and -ntmpi")));
140 if (nonbondedTarget
== TaskTarget::Gpu
)
145 // Because this is thread-MPI, we already know about the GPUs that
146 // all potential ranks can use, and can use that in a global
147 // decision that will later be consistent.
148 auto haveGpus
= !gpuIdsToUse
.empty();
150 // If we get here, then the user permitted or required GPUs.
154 bool decideWhetherToUseGpusForPmeWithThreadMpi(const bool useGpuForNonbonded
,
155 const TaskTarget pmeTarget
,
156 const std::vector
<int>& gpuIdsToUse
,
157 const std::vector
<int>& userGpuTaskAssignment
,
158 const gmx_hw_info_t
& hardwareInfo
,
159 const t_inputrec
& inputrec
,
160 const int numRanksPerSimulation
,
161 const int numPmeRanksPerSimulation
)
163 // First, exclude all cases where we can't run PME on GPUs.
164 if ((pmeTarget
== TaskTarget::Cpu
) || !useGpuForNonbonded
|| !pme_gpu_supports_build(nullptr)
165 || !pme_gpu_supports_hardware(hardwareInfo
, nullptr) || !pme_gpu_supports_input(inputrec
, nullptr))
167 // PME can't run on a GPU. If the user required that, we issue
172 // We now know that PME on GPUs might make sense, if we have any.
174 if (!userGpuTaskAssignment
.empty())
176 // Follow the user's choice of GPU task assignment, if we
177 // can. Checking that their IDs are for compatible GPUs comes
180 // Specifying -gputasks requires specifying everything.
181 if (pmeTarget
== TaskTarget::Auto
|| numRanksPerSimulation
< 1)
183 GMX_THROW(InconsistentInputError(
184 formatString(g_specifyEverythingFormatString
, "all of -nb, -pme, and -ntmpi")));
187 // PME on GPUs is only supported in a single case
188 if (pmeTarget
== TaskTarget::Gpu
)
190 if (((numRanksPerSimulation
> 1) && (numPmeRanksPerSimulation
== 0))
191 || (numPmeRanksPerSimulation
> 1))
193 GMX_THROW(InconsistentInputError(
194 "When you run mdrun -pme gpu -gputasks, you must supply a PME-enabled .tpr "
195 "file and use a single PME rank."));
200 // pmeTarget == TaskTarget::Auto
201 return numRanksPerSimulation
== 1;
204 // Because this is thread-MPI, we already know about the GPUs that
205 // all potential ranks can use, and can use that in a global
206 // decision that will later be consistent.
208 if (pmeTarget
== TaskTarget::Gpu
)
210 if (((numRanksPerSimulation
> 1) && (numPmeRanksPerSimulation
== 0))
211 || (numPmeRanksPerSimulation
> 1))
213 GMX_THROW(NotImplementedError(
214 "PME tasks were required to run on GPUs, but that is not implemented with "
215 "more than one PME rank. Use a single rank simulation, or a separate PME rank, "
216 "or permit PME tasks to be assigned to the CPU."));
221 if (numRanksPerSimulation
== 1)
223 // PME can run well on a GPU shared with NB, and we permit
224 // mdrun to default to try that.
225 return !gpuIdsToUse
.empty();
228 if (numRanksPerSimulation
< 1)
230 // Full automated mode for thread-MPI (the default). PME can
231 // run well on a GPU shared with NB, and we permit mdrun to
232 // default to it if there is only one GPU available.
233 return (gpuIdsToUse
.size() == 1);
236 // Not enough support for PME on GPUs for anything else
240 bool decideWhetherToUseGpusForNonbonded(const TaskTarget nonbondedTarget
,
241 const std::vector
<int>& userGpuTaskAssignment
,
242 const EmulateGpuNonbonded emulateGpuNonbonded
,
243 const bool buildSupportsNonbondedOnGpu
,
244 const bool nonbondedOnGpuIsUseful
,
245 const bool gpusWereDetected
)
247 if (nonbondedTarget
== TaskTarget::Cpu
)
249 if (!userGpuTaskAssignment
.empty())
251 GMX_THROW(InconsistentInputError(
252 "A GPU task assignment was specified, but nonbonded interactions were "
253 "assigned to the CPU. Make no more than one of these choices."));
259 if (!buildSupportsNonbondedOnGpu
&& nonbondedTarget
== TaskTarget::Gpu
)
261 GMX_THROW(InconsistentInputError(
262 "Nonbonded interactions on the GPU were requested with -nb gpu, "
263 "but the GROMACS binary has been built without GPU support. "
264 "Either run without selecting GPU options, or recompile GROMACS "
265 "with GPU support enabled"));
268 // TODO refactor all these TaskTarget::Gpu checks into one place?
269 // e.g. use a subfunction that handles only the cases where
270 // TaskTargets are not Cpu?
271 if (emulateGpuNonbonded
== EmulateGpuNonbonded::Yes
)
273 if (nonbondedTarget
== TaskTarget::Gpu
)
275 GMX_THROW(InconsistentInputError(
276 "Nonbonded interactions on the GPU were required, which is inconsistent "
277 "with choosing emulation. Make no more than one of these choices."));
279 if (!userGpuTaskAssignment
.empty())
282 InconsistentInputError("GPU ID usage was specified, as was GPU emulation. Make "
283 "no more than one of these choices."));
289 if (!nonbondedOnGpuIsUseful
)
291 if (nonbondedTarget
== TaskTarget::Gpu
)
293 GMX_THROW(InconsistentInputError(
294 "Nonbonded interactions on the GPU were required, but not supported for these "
295 "simulation settings. Change your settings, or do not require using GPUs."));
301 if (!userGpuTaskAssignment
.empty())
303 // Specifying -gputasks requires specifying everything.
304 if (nonbondedTarget
== TaskTarget::Auto
)
306 GMX_THROW(InconsistentInputError(
307 formatString(g_specifyEverythingFormatString
, "-nb and -ntmpi")));
313 if (nonbondedTarget
== TaskTarget::Gpu
)
315 // We still don't know whether it is an error if no GPUs are found
316 // because we don't know the duty of this rank, yet. For example,
317 // a node with only PME ranks and -pme cpu is OK if there are not
322 // If we get here, then the user permitted GPUs, which we should
323 // use for nonbonded interactions.
324 return gpusWereDetected
;
327 bool decideWhetherToUseGpusForPme(const bool useGpuForNonbonded
,
328 const TaskTarget pmeTarget
,
329 const std::vector
<int>& userGpuTaskAssignment
,
330 const gmx_hw_info_t
& hardwareInfo
,
331 const t_inputrec
& inputrec
,
332 const int numRanksPerSimulation
,
333 const int numPmeRanksPerSimulation
,
334 const bool gpusWereDetected
)
336 if (pmeTarget
== TaskTarget::Cpu
)
341 if (!useGpuForNonbonded
)
343 if (pmeTarget
== TaskTarget::Gpu
)
345 GMX_THROW(NotImplementedError(
346 "PME on GPUs is only supported when nonbonded interactions run on GPUs also."));
352 if (!pme_gpu_supports_build(&message
))
354 if (pmeTarget
== TaskTarget::Gpu
)
356 GMX_THROW(NotImplementedError("Cannot compute PME interactions on a GPU, because " + message
));
360 if (!pme_gpu_supports_hardware(hardwareInfo
, &message
))
362 if (pmeTarget
== TaskTarget::Gpu
)
364 GMX_THROW(NotImplementedError("Cannot compute PME interactions on a GPU, because " + message
));
368 if (!pme_gpu_supports_input(inputrec
, &message
))
370 if (pmeTarget
== TaskTarget::Gpu
)
372 GMX_THROW(NotImplementedError("Cannot compute PME interactions on a GPU, because " + message
));
377 if (pmeTarget
== TaskTarget::Cpu
)
379 if (!userGpuTaskAssignment
.empty())
381 GMX_THROW(InconsistentInputError(
382 "A GPU task assignment was specified, but PME interactions were "
383 "assigned to the CPU. Make no more than one of these choices."));
389 if (!userGpuTaskAssignment
.empty())
391 // Specifying -gputasks requires specifying everything.
392 if (pmeTarget
== TaskTarget::Auto
)
394 GMX_THROW(InconsistentInputError(formatString(
395 g_specifyEverythingFormatString
, "all of -nb, -pme, and -ntmpi"))); // TODO ntmpi?
401 // We still don't know whether it is an error if no GPUs are found
402 // because we don't know the duty of this rank, yet. For example,
403 // a node with only PME ranks and -pme cpu is OK if there are not
406 if (pmeTarget
== TaskTarget::Gpu
)
408 if (((numRanksPerSimulation
> 1) && (numPmeRanksPerSimulation
== 0))
409 || (numPmeRanksPerSimulation
> 1))
411 GMX_THROW(NotImplementedError(
412 "PME tasks were required to run on GPUs, but that is not implemented with "
413 "more than one PME rank. Use a single rank simulation, or a separate PME rank, "
414 "or permit PME tasks to be assigned to the CPU."));
419 // If we get here, then the user permitted GPUs.
420 if (numRanksPerSimulation
== 1)
422 // PME can run well on a single GPU shared with NB when there
423 // is one rank, so we permit mdrun to try that if we have
425 return gpusWereDetected
;
428 // Not enough support for PME on GPUs for anything else
433 PmeRunMode
determinePmeRunMode(const bool useGpuForPme
, const TaskTarget
& pmeFftTarget
, const t_inputrec
& inputrec
)
435 if (!EEL_PME(inputrec
.coulombtype
))
437 return PmeRunMode::None
;
442 if (pmeFftTarget
== TaskTarget::Cpu
)
444 return PmeRunMode::Mixed
;
448 return PmeRunMode::GPU
;
453 if (pmeFftTarget
== TaskTarget::Gpu
)
456 "Assigning FFTs to GPU requires PME to be assigned to GPU as well. With PME "
457 "on CPU you should not be using -pmefft.");
459 return PmeRunMode::CPU
;
463 bool decideWhetherToUseGpusForBonded(const bool useGpuForNonbonded
,
464 const bool useGpuForPme
,
465 const TaskTarget bondedTarget
,
466 const bool canUseGpuForBonded
,
467 const bool usingLJPme
,
468 const bool usingElecPmeOrEwald
,
469 const int numPmeRanksPerSimulation
,
470 const bool gpusWereDetected
)
472 if (bondedTarget
== TaskTarget::Cpu
)
477 if (!canUseGpuForBonded
)
479 if (bondedTarget
== TaskTarget::Gpu
)
481 GMX_THROW(InconsistentInputError(
482 "Bonded interactions on the GPU were required, but not supported for these "
483 "simulation settings. Change your settings, or do not require using GPUs."));
489 if (!useGpuForNonbonded
)
491 if (bondedTarget
== TaskTarget::Gpu
)
493 GMX_THROW(InconsistentInputError(
494 "Bonded interactions on the GPU were required, but this requires that "
495 "short-ranged non-bonded interactions are also run on the GPU. Change "
496 "your settings, or do not require using GPUs."));
502 // TODO If the bonded kernels do not get fused, then performance
503 // overheads might suggest alternative choices here.
505 if (bondedTarget
== TaskTarget::Gpu
)
507 // We still don't know whether it is an error if no GPUs are
512 // If we get here, then the user permitted GPUs, which we should
513 // use for bonded interactions if any were detected and the CPU
514 // is busy, for which we currently only check PME or Ewald.
515 // (It would be better to dynamically assign bondeds based on timings)
516 // Note that here we assume that the auto setting of PME ranks will not
517 // choose seperate PME ranks when nonBonded are assigned to the GPU.
518 bool usingOurCpuForPmeOrEwald
=
519 (usingLJPme
|| (usingElecPmeOrEwald
&& !useGpuForPme
&& numPmeRanksPerSimulation
<= 0));
521 return gpusWereDetected
&& usingOurCpuForPmeOrEwald
;
524 bool decideWhetherToUseGpuForUpdate(const bool isDomainDecomposition
,
525 const bool useUpdateGroups
,
526 const PmeRunMode pmeRunMode
,
527 const bool havePmeOnlyRank
,
528 const bool useGpuForNonbonded
,
529 const TaskTarget updateTarget
,
530 const bool gpusWereDetected
,
531 const t_inputrec
& inputrec
,
532 const gmx_mtop_t
& mtop
,
533 const bool useEssentialDynamics
,
534 const bool doOrientationRestraints
,
535 const bool useReplicaExchange
,
537 const DevelopmentFeatureFlags
& devFlags
,
538 const gmx::MDLogger
& mdlog
)
541 // '-update cpu' overrides the environment variable, '-update auto' does not
542 if (updateTarget
== TaskTarget::Cpu
543 || (updateTarget
== TaskTarget::Auto
&& !devFlags
.forceGpuUpdateDefault
))
548 const bool hasAnyConstraints
= gmx_mtop_interaction_count(mtop
, IF_CONSTRAINT
) > 0;
549 const bool pmeUsesCpu
= (pmeRunMode
== PmeRunMode::CPU
|| pmeRunMode
== PmeRunMode::Mixed
);
551 std::string errorMessage
;
553 if (isDomainDecomposition
)
555 if (!devFlags
.enableGpuHaloExchange
)
557 errorMessage
+= "Domain decomposition without GPU halo exchange is not supported.\n ";
561 if (hasAnyConstraints
&& !useUpdateGroups
)
564 "Domain decomposition is only supported with constraints when update "
566 "are used. This means constraining all bonds is not supported, except for "
567 "small molecules, and box sizes close to half the pair-list cutoff are not "
573 errorMessage
+= "With domain decomposition, PME must run fully on the GPU.\n";
582 errorMessage
+= "With separate PME rank(s), PME must run fully on the GPU.\n";
585 if (!devFlags
.enableGpuPmePPComm
)
587 errorMessage
+= "With separate PME rank(s), PME must use direct communication.\n";
593 errorMessage
+= "Multiple time stepping is not supported.\n";
596 if (inputrec
.eConstrAlg
== econtSHAKE
&& hasAnyConstraints
&& gmx_mtop_ftype_count(mtop
, F_CONSTR
) > 0)
598 errorMessage
+= "SHAKE constraints are not supported.\n";
600 // Using the GPU-version of update if:
601 // 1. PME is on the GPU (there should be a copy of coordinates on GPU for PME spread) or inactive, or
602 // 2. Non-bonded interactions are on the GPU.
603 if ((pmeRunMode
== PmeRunMode::CPU
|| pmeRunMode
== PmeRunMode::None
) && !useGpuForNonbonded
)
606 "Either PME or short-ranged non-bonded interaction tasks must run on the GPU.\n";
609 if (!gpusWereDetected
)
611 errorMessage
+= "Compatible GPUs must have been found.\n";
615 errorMessage
+= "Only a CUDA build is supported.\n";
617 if (inputrec
.eI
!= eiMD
)
619 errorMessage
+= "Only the md integrator is supported.\n";
621 if (inputrec
.etc
== etcNOSEHOOVER
)
623 errorMessage
+= "Nose-Hoover temperature coupling is not supported.\n";
625 if (!(inputrec
.epc
== epcNO
|| inputrec
.epc
== epcPARRINELLORAHMAN
626 || inputrec
.epc
== epcBERENDSEN
|| inputrec
.epc
== epcCRESCALE
))
629 "Only Parrinello-Rahman, Berendsen, and C-rescale pressure coupling are "
632 if (EEL_PME_EWALD(inputrec
.coulombtype
) && inputrec
.epsilon_surface
!= 0)
634 // The graph is needed, but not supported
635 errorMessage
+= "Ewald surface correction is not supported.\n";
637 if (gmx_mtop_interaction_count(mtop
, IF_VSITE
) > 0)
639 errorMessage
+= "Virtual sites are not supported.\n";
641 if (useEssentialDynamics
)
643 errorMessage
+= "Essential dynamics is not supported.\n";
645 if (inputrec
.bPull
&& pull_have_constraint(inputrec
.pull
))
647 errorMessage
+= "Constraints pulling is not supported.\n";
649 if (doOrientationRestraints
)
651 // The graph is needed, but not supported
652 errorMessage
+= "Orientation restraints are not supported.\n";
654 if (inputrec
.efep
!= efepNO
655 && (haveFreeEnergyType(inputrec
, efptBONDED
) || haveFreeEnergyType(inputrec
, efptMASS
)))
657 errorMessage
+= "Free energy perturbation for mass and constraints are not supported.\n";
659 const auto particleTypes
= gmx_mtop_particletype_count(mtop
);
660 if (particleTypes
[eptShell
] > 0)
662 errorMessage
+= "Shells are not supported.\n";
664 if (useReplicaExchange
)
666 errorMessage
+= "Replica exchange simulations are not supported.\n";
668 if (inputrec
.eSwapCoords
!= eswapNO
)
670 errorMessage
+= "Swapping the coordinates is not supported.\n";
674 errorMessage
+= "Re-run is not supported.\n";
677 // TODO: F_CONSTRNC is only unsupported, because isNumCoupledConstraintsSupported()
678 // does not support it, the actual CUDA LINCS code does support it
679 if (gmx_mtop_ftype_count(mtop
, F_CONSTRNC
) > 0)
681 errorMessage
+= "Non-connecting constraints are not supported\n";
683 if (!UpdateConstrainGpu::isNumCoupledConstraintsSupported(mtop
))
686 "The number of coupled constraints is higher than supported in the GPU LINCS "
690 if (!errorMessage
.empty())
692 if (updateTarget
== TaskTarget::Auto
&& devFlags
.forceGpuUpdateDefault
)
694 GMX_LOG(mdlog
.warning
)
697 "Update task on the GPU was required, by the "
698 "GMX_FORCE_UPDATE_DEFAULT_GPU environment variable, but the following "
699 "condition(s) were not satisfied:");
700 GMX_LOG(mdlog
.warning
).asParagraph().appendText(errorMessage
.c_str());
701 GMX_LOG(mdlog
.warning
).asParagraph().appendText("Will use CPU version of update.");
703 else if (updateTarget
== TaskTarget::Gpu
)
705 std::string prefix
= gmx::formatString(
706 "Update task on the GPU was required,\n"
707 "but the following condition(s) were not satisfied:\n");
708 GMX_THROW(InconsistentInputError((prefix
+ errorMessage
).c_str()));
713 return (updateTarget
== TaskTarget::Gpu
714 || (updateTarget
== TaskTarget::Auto
&& devFlags
.forceGpuUpdateDefault
));