2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2017,2018,2019,2020, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
36 * \brief Defines routines for handling user-specified GPU IDs.
38 * \author Mark Abraham <mark.j.abraham@gmail.com>
39 * \ingroup module_taskassignment
43 #include "usergpuids.h"
52 #include "gromacs/hardware/device_information.h"
53 #include "gromacs/hardware/device_management.h"
54 #include "gromacs/hardware/hw_info.h"
55 #include "gromacs/utility/exceptions.h"
56 #include "gromacs/utility/stringutil.h"
61 /*! \brief Parse a GPU ID specifier string into a container.
63 * \param[in] gpuIdString String like "013" or "0,1,3" typically
64 * supplied by the user.
65 * Must contain only unique decimal digits, or only decimal
66 * digits separated by comma delimiters. A terminal
67 * comma is accceptable (and required to specify a
68 * single ID that is larger than 9).
70 * \returns A vector of numeric IDs extracted from \c gpuIdString.
72 * \throws std::bad_alloc If out of memory.
73 * InvalidInputError If an invalid character is found (ie not a digit or ',').
75 static std::vector
<int> parseGpuDeviceIdentifierList(const std::string
& gpuIdString
)
77 std::vector
<int> digits
;
78 auto foundCommaDelimiters
= gpuIdString
.find(',') != std::string::npos
;
79 if (!foundCommaDelimiters
)
81 for (const auto& c
: gpuIdString
)
83 if (std::isdigit(c
) == 0)
85 GMX_THROW(InvalidInputError(
86 formatString("Invalid character in GPU ID string: \"%c\"\n", c
)));
88 // Convert each character in the token to an integer
89 digits
.push_back(c
- '0');
94 if (gpuIdString
[0] == ',')
96 GMX_THROW(InvalidInputError("Invalid use of leading comma in GPU ID string"));
98 std::istringstream
ss(gpuIdString
);
100 digits
.reserve(gpuIdString
.length());
101 token
.reserve(gpuIdString
.length());
102 while (std::getline(ss
, token
, ','))
104 // Convert the whole token to an integer
107 GMX_THROW(InvalidInputError("Invalid use of comma in GPU ID string"));
109 digits
.push_back(std::stoi(token
));
115 std::vector
<int> parseUserGpuIdString(const std::string
& gpuIdString
)
117 // An optional comma is used to separate GPU IDs assigned to the
118 // same type of task, which will be useful for any nodes that have
119 // more than ten GPUs.
121 auto digits
= parseGpuDeviceIdentifierList(gpuIdString
);
123 // Check and enforce that no duplicate IDs are allowed
124 for (size_t i
= 0; i
!= digits
.size(); ++i
)
126 for (size_t j
= i
+ 1; j
!= digits
.size(); ++j
)
128 if (digits
[i
] == digits
[j
])
131 InvalidInputError(formatString("The string of available GPU device IDs "
132 "'%s' may not contain duplicate device IDs",
133 gpuIdString
.c_str())));
140 std::vector
<int> makeGpuIdsToUse(const std::vector
<std::unique_ptr
<DeviceInformation
>>& deviceInfoList
,
141 const std::string
& gpuIdsAvailableString
)
143 auto compatibleDeviceInfoList
= getCompatibleDevices(deviceInfoList
);
144 std::vector
<int> gpuIdsAvailable
= parseUserGpuIdString(gpuIdsAvailableString
);
146 std::vector
<int> gpuIdsToUse
;
147 if (gpuIdsAvailable
.empty())
149 for (const auto& compatibleDeviceInfo
: compatibleDeviceInfoList
)
151 gpuIdsToUse
.emplace_back(compatibleDeviceInfo
.get().id
);
156 gpuIdsToUse
.reserve(gpuIdsAvailable
.size());
157 std::vector
<int> availableGpuIdsThatAreIncompatible
;
158 for (const auto& availableGpuId
: gpuIdsAvailable
)
160 bool availableGpuIsCompatible
= false;
161 for (const auto& compatibleDeviceInfo
: compatibleDeviceInfoList
)
163 if (availableGpuId
== compatibleDeviceInfo
.get().id
)
165 availableGpuIsCompatible
= true;
169 if (availableGpuIsCompatible
)
171 gpuIdsToUse
.push_back(availableGpuId
);
175 // Prepare data for an error message about all incompatible available GPU IDs.
176 availableGpuIdsThatAreIncompatible
.push_back(availableGpuId
);
179 if (!availableGpuIdsThatAreIncompatible
.empty())
181 auto message
= "You requested mdrun to use GPUs with IDs " + gpuIdsAvailableString
182 + ", but that includes the following incompatible GPUs: "
183 + formatAndJoin(availableGpuIdsThatAreIncompatible
, ",", StringFormatter("%d"))
184 + ". Request only compatible GPUs.";
185 GMX_THROW(InvalidInputError(message
));
190 std::vector
<int> parseUserTaskAssignmentString(const std::string
& gpuIdString
)
192 // Implement any additional constraints here that need to be imposed
194 return parseGpuDeviceIdentifierList(gpuIdString
);
197 std::vector
<int> makeGpuIds(ArrayRef
<const int> compatibleGpus
, size_t numGpuTasks
)
199 std::vector
<int> gpuIdsToUse
;
201 gpuIdsToUse
.reserve(numGpuTasks
);
203 auto currentGpuId
= compatibleGpus
.begin();
204 for (size_t i
= 0; i
!= numGpuTasks
; ++i
)
206 GMX_ASSERT(!compatibleGpus
.empty(),
207 "Must have compatible GPUs from which to build a list of GPU IDs to use");
208 gpuIdsToUse
.push_back(*currentGpuId
);
210 if (currentGpuId
== compatibleGpus
.end())
212 // Wrap around and assign tasks again.
213 currentGpuId
= compatibleGpus
.begin();
216 std::sort(gpuIdsToUse
.begin(), gpuIdsToUse
.end());
220 std::string
makeGpuIdString(const std::vector
<int>& gpuIds
, int totalNumberOfTasks
)
222 auto resultGpuIds
= makeGpuIds(gpuIds
, totalNumberOfTasks
);
223 return formatAndJoin(resultGpuIds
, ",", StringFormatter("%d"));
226 void checkUserGpuIds(const std::vector
<std::unique_ptr
<DeviceInformation
>>& deviceInfoList
,
227 const std::vector
<int>& compatibleGpus
,
228 const std::vector
<int>& gpuIds
)
230 bool foundIncompatibleGpuIds
= false;
231 std::string message
=
232 "Some of the requested GPUs do not exist, behave strangely, or are not compatible:\n";
234 for (const auto& gpuId
: gpuIds
)
236 if (std::find(compatibleGpus
.begin(), compatibleGpus
.end(), gpuId
) == compatibleGpus
.end())
238 foundIncompatibleGpuIds
= true;
239 message
+= gmx::formatString(" GPU #%d: %s\n", gpuId
,
240 getDeviceCompatibilityDescription(deviceInfoList
, gpuId
).c_str());
243 if (foundIncompatibleGpuIds
)
245 GMX_THROW(InconsistentInputError(message
));