2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2012,2013,2014,2015,2016, by the GROMACS development team.
5 * Copyright (c) 2017,2018,2019,2020, by the GROMACS development team, led by
6 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
7 * and including many others, as listed in the AUTHORS file in the
8 * top-level source directory and at http://www.gromacs.org.
10 * GROMACS is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public License
12 * as published by the Free Software Foundation; either version 2.1
13 * of the License, or (at your option) any later version.
15 * GROMACS is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with GROMACS; if not, see
22 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
23 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
25 * If you want to redistribute modifications to GROMACS, please
26 * consider that scientific software is very special. Version
27 * control is crucial - bugs must be traceable. We will be happy to
28 * consider code for inclusion in the official distribution, but
29 * derived work must not be called official GROMACS. Details are found
30 * in the README & COPYING files - if they are missing, get the
31 * official version at http://www.gromacs.org.
33 * To help us fund GROMACS development, we humbly ask that you cite
34 * the research papers on the package. Check out http://www.gromacs.org.
37 * \brief Defines the OpenCL implementations of the device management.
39 * \author Anca Hamuraru <anca@streamcomputing.eu>
40 * \author Dimitrios Karkoulis <dimitris.karkoulis@gmail.com>
41 * \author Teemu Virolainen <teemu@streamcomputing.eu>
42 * \author Mark Abraham <mark.j.abraham@gmail.com>
43 * \author Szilárd Páll <pall.szilard@gmail.com>
44 * \author Artem Zhmurov <zhmurov@gmail.com>
46 * \ingroup module_hardware
52 #include "gromacs/gpu_utils/oclraii.h"
53 #include "gromacs/gpu_utils/oclutils.h"
54 #include "gromacs/hardware/device_management.h"
55 #include "gromacs/utility/fatalerror.h"
56 #include "gromacs/utility/smalloc.h"
57 #include "gromacs/utility/stringutil.h"
59 #include "device_information.h"
64 /*! \brief Return true if executing on compatible OS for AMD OpenCL.
66 * This is assumed to be true for OS X version of at least 10.10.4 and
67 * all other OS flavors.
69 * \return true if version is 14.4 or later (= OS X version 10.10.4),
70 * or OS is not Darwin.
72 static bool runningOnCompatibleOSForAmd()
76 char kernelVersion
[256];
77 size_t len
= sizeof(kernelVersion
);
81 int major
= strtod(kernelVersion
, NULL
);
82 int minor
= strtod(strchr(kernelVersion
, '.') + 1, NULL
);
84 // Kernel 14.4 corresponds to OS X 10.10.4
85 return (major
> 14 || (major
== 14 && minor
>= 4));
91 /*! \brief Return true if executing on compatible GPU for NVIDIA OpenCL.
93 * There are known issues with OpenCL when running on NVIDIA Volta or newer (CC 7+).
94 * As a workaround, we recommend using CUDA on such hardware.
96 * This function relies on cl_nv_device_attribute_query. In case it's not functioning properly,
97 * we trust the user and mark the device as compatible.
99 * \return true if running on Pascal (CC 6.x) or older, or if we can not determine device generation.
101 static bool runningOnCompatibleHWForNvidia(const DeviceInformation
& deviceInfo
)
103 // The macro is defined in Intel's and AMD's headers, but it's not strictly required to be there.
104 #ifndef CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV
107 static const unsigned int ccMajorBad
= 7; // Volta and Turing
108 unsigned int ccMajor
;
109 cl_device_id devId
= deviceInfo
.oclDeviceId
;
110 const cl_int err
= clGetDeviceInfo(devId
, CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV
,
111 sizeof(ccMajor
), &ccMajor
, nullptr);
112 if (err
!= CL_SUCCESS
)
114 return true; // Err on a side of trusting the user to know what they are doing.
116 return ccMajor
< ccMajorBad
;
121 * \brief Checks that device \c deviceInfo is compatible with GROMACS.
123 * Vendor and OpenCL version support checks are executed an the result
126 * \param[in] deviceInfo The device info pointer.
127 * \returns The status enumeration value for the checked device:
129 static DeviceStatus
isDeviceFunctional(const DeviceInformation
& deviceInfo
)
131 if (getenv("GMX_GPU_DISABLE_COMPATIBILITY_CHECK") != nullptr)
133 // Assume the device is compatible because checking has been disabled.
134 return DeviceStatus::Compatible
;
136 if (getenv("GMX_OCL_DISABLE_COMPATIBILITY_CHECK") != nullptr)
139 "Environment variable GMX_OCL_DISABLE_COMPATIBILITY_CHECK is deprecated and will "
140 "be removed in release 2022. Please use GMX_GPU_DISABLE_COMPATIBILITY_CHECK "
142 return DeviceStatus::Compatible
;
145 // OpenCL device version check, ensure >= REQUIRED_OPENCL_MIN_VERSION
146 constexpr unsigned int minVersionMajor
= REQUIRED_OPENCL_MIN_VERSION_MAJOR
;
147 constexpr unsigned int minVersionMinor
= REQUIRED_OPENCL_MIN_VERSION_MINOR
;
149 // Based on the OpenCL spec we're checking the version supported by
150 // the device which has the following format:
151 // OpenCL<space><major_version.minor_version><space><vendor-specific information>
152 unsigned int deviceVersionMinor
, deviceVersionMajor
;
153 const int valuesScanned
= std::sscanf(deviceInfo
.device_version
, "OpenCL %u.%u",
154 &deviceVersionMajor
, &deviceVersionMinor
);
155 const bool versionLargeEnough
=
156 ((valuesScanned
== 2)
157 && ((deviceVersionMajor
> minVersionMajor
)
158 || (deviceVersionMajor
== minVersionMajor
&& deviceVersionMinor
>= minVersionMinor
)));
159 if (!versionLargeEnough
)
161 return DeviceStatus::Incompatible
;
164 /* Only AMD, Intel, and NVIDIA GPUs are supported for now */
165 switch (deviceInfo
.deviceVendor
)
167 case DeviceVendor::Nvidia
:
168 return runningOnCompatibleHWForNvidia(deviceInfo
) ? DeviceStatus::Compatible
169 : DeviceStatus::IncompatibleNvidiaVolta
;
170 case DeviceVendor::Amd
:
171 return runningOnCompatibleOSForAmd() ? DeviceStatus::Compatible
: DeviceStatus::Incompatible
;
172 case DeviceVendor::Intel
:
173 return GMX_OPENCL_NB_CLUSTER_SIZE
== 4 ? DeviceStatus::Compatible
174 : DeviceStatus::IncompatibleClusterSize
;
175 default: return DeviceStatus::Incompatible
;
179 /*! \brief Make an error string following an OpenCL API call.
181 * It is meant to be called with \p status != CL_SUCCESS, but it will
182 * work correctly even if it is called with no OpenCL failure.
184 * \todo Make use of this function more.
186 * \param[in] message Supplies context, e.g. the name of the API call that returned the error.
187 * \param[in] status OpenCL API status code
188 * \returns A string describing the OpenCL error.
190 inline std::string
makeOpenClInternalErrorString(const char* message
, cl_int status
)
192 if (message
!= nullptr)
194 return gmx::formatString("%s did %ssucceed %d: %s", message
,
195 ((status
!= CL_SUCCESS
) ? "not " : ""), status
,
196 ocl_get_error_string(status
).c_str());
200 return gmx::formatString("%sOpenCL error encountered %d: %s",
201 ((status
!= CL_SUCCESS
) ? "" : "No "), status
,
202 ocl_get_error_string(status
).c_str());
207 * \brief Checks that device \c deviceInfo is sane (ie can run a kernel).
209 * Compiles and runs a dummy kernel to determine whether the given
210 * OpenCL device functions properly.
213 * \param[in] deviceInfo The device info pointer.
214 * \param[out] errorMessage An error message related to a failing OpenCL API call.
215 * \throws std::bad_alloc When out of memory.
216 * \returns Whether the device passed sanity checks
218 static bool isDeviceFunctional(const DeviceInformation
& deviceInfo
, std::string
* errorMessage
)
220 cl_context_properties properties
[] = {
221 CL_CONTEXT_PLATFORM
, reinterpret_cast<cl_context_properties
>(deviceInfo
.oclPlatformId
), 0
223 // uncrustify spacing
226 auto deviceId
= deviceInfo
.oclDeviceId
;
227 ClContext
context(clCreateContext(properties
, 1, &deviceId
, nullptr, nullptr, &status
));
228 if (status
!= CL_SUCCESS
)
230 errorMessage
->assign(makeOpenClInternalErrorString("clCreateContext", status
));
233 ClCommandQueue
commandQueue(clCreateCommandQueue(context
, deviceId
, 0, &status
));
234 if (status
!= CL_SUCCESS
)
236 errorMessage
->assign(makeOpenClInternalErrorString("clCreateCommandQueue", status
));
240 // Some compilers such as Apple's require kernel functions to have at least one argument
241 const char* lines
[] = { "__kernel void dummyKernel(__global void* input){}" };
242 ClProgram
program(clCreateProgramWithSource(context
, 1, lines
, nullptr, &status
));
243 if (status
!= CL_SUCCESS
)
245 errorMessage
->assign(makeOpenClInternalErrorString("clCreateProgramWithSource", status
));
249 if ((status
= clBuildProgram(program
, 0, nullptr, nullptr, nullptr, nullptr)) != CL_SUCCESS
)
251 errorMessage
->assign(makeOpenClInternalErrorString("clBuildProgram", status
));
255 ClKernel
kernel(clCreateKernel(program
, "dummyKernel", &status
));
256 if (status
!= CL_SUCCESS
)
258 errorMessage
->assign(makeOpenClInternalErrorString("clCreateKernel", status
));
262 clSetKernelArg(kernel
, 0, sizeof(void*), nullptr);
264 const size_t localWorkSize
= 1, globalWorkSize
= 1;
265 if ((status
= clEnqueueNDRangeKernel(commandQueue
, kernel
, 1, nullptr, &globalWorkSize
,
266 &localWorkSize
, 0, nullptr, nullptr))
269 errorMessage
->assign(makeOpenClInternalErrorString("clEnqueueNDRangeKernel", status
));
275 /*! \brief Check whether the \c ocl_gpu_device is suitable for use by mdrun
277 * Runs sanity checks: checking that the runtime can compile a dummy kernel
278 * and this can be executed;
279 * Runs compatibility checks verifying the device OpenCL version requirement
280 * and vendor/OS support.
282 * \param[in] deviceId The runtime-reported numeric ID of the device.
283 * \param[in] deviceInfo The device info pointer.
284 * \returns A DeviceStatus to indicate if the GPU device is supported and if it was able to run
285 * basic functionality checks.
287 static DeviceStatus
checkGpu(size_t deviceId
, const DeviceInformation
& deviceInfo
)
290 DeviceStatus supportStatus
= isDeviceFunctional(deviceInfo
);
291 if (supportStatus
!= DeviceStatus::Compatible
)
293 return supportStatus
;
296 std::string errorMessage
;
297 if (!isDeviceFunctional(deviceInfo
, &errorMessage
))
299 gmx_warning("While sanity checking device #%zu, %s", deviceId
, errorMessage
.c_str());
300 return DeviceStatus::NonFunctional
;
303 return DeviceStatus::Compatible
;
308 bool isDeviceDetectionFunctional(std::string
* errorMessage
)
310 cl_uint numPlatforms
;
311 cl_int status
= clGetPlatformIDs(0, nullptr, &numPlatforms
);
312 GMX_ASSERT(status
!= CL_INVALID_VALUE
, "Incorrect call of clGetPlatformIDs detected");
314 if (status
== CL_PLATFORM_NOT_FOUND_KHR
)
316 // No valid ICDs found
317 if (errorMessage
!= nullptr)
319 errorMessage
->assign("No valid OpenCL driver found");
325 status
== CL_SUCCESS
,
326 gmx::formatString("An unexpected value was returned from clGetPlatformIDs %d: %s",
327 status
, ocl_get_error_string(status
).c_str())
329 bool foundPlatform
= (numPlatforms
> 0);
330 if (!foundPlatform
&& errorMessage
!= nullptr)
332 errorMessage
->assign("No OpenCL platforms found even though the driver was valid");
334 return foundPlatform
;
337 std::vector
<std::unique_ptr
<DeviceInformation
>> findDevices()
339 cl_uint ocl_platform_count
;
340 cl_platform_id
* ocl_platform_ids
;
341 cl_device_type req_dev_type
= CL_DEVICE_TYPE_GPU
;
343 ocl_platform_ids
= nullptr;
345 if (getenv("GMX_OCL_FORCE_CPU") != nullptr)
347 req_dev_type
= CL_DEVICE_TYPE_CPU
;
351 std::vector
<std::unique_ptr
<DeviceInformation
>> deviceInfoList(0);
355 cl_int status
= clGetPlatformIDs(0, nullptr, &ocl_platform_count
);
356 if (CL_SUCCESS
!= status
)
358 GMX_THROW(gmx::InternalError(
359 gmx::formatString("An unexpected value %d was returned from clGetPlatformIDs: ", status
)
360 + ocl_get_error_string(status
)));
363 if (1 > ocl_platform_count
)
365 // TODO this should have a descriptive error message that we only support one OpenCL platform
369 snew(ocl_platform_ids
, ocl_platform_count
);
371 status
= clGetPlatformIDs(ocl_platform_count
, ocl_platform_ids
, nullptr);
372 if (CL_SUCCESS
!= status
)
374 GMX_THROW(gmx::InternalError(
375 gmx::formatString("An unexpected value %d was returned from clGetPlatformIDs: ", status
)
376 + ocl_get_error_string(status
)));
379 for (unsigned int i
= 0; i
< ocl_platform_count
; i
++)
381 cl_uint ocl_device_count
;
383 /* If requesting req_dev_type devices fails, just go to the next platform */
384 if (CL_SUCCESS
!= clGetDeviceIDs(ocl_platform_ids
[i
], req_dev_type
, 0, nullptr, &ocl_device_count
))
389 if (1 <= ocl_device_count
)
391 numDevices
+= ocl_device_count
;
400 deviceInfoList
.resize(numDevices
);
404 cl_device_id
* ocl_device_ids
;
406 snew(ocl_device_ids
, numDevices
);
409 for (unsigned int i
= 0; i
< ocl_platform_count
; i
++)
411 cl_uint ocl_device_count
;
413 /* If requesting req_dev_type devices fails, just go to the next platform */
415 != clGetDeviceIDs(ocl_platform_ids
[i
], req_dev_type
, numDevices
, ocl_device_ids
,
421 if (1 > ocl_device_count
)
426 for (unsigned int j
= 0; j
< ocl_device_count
; j
++)
428 deviceInfoList
[device_index
] = std::make_unique
<DeviceInformation
>();
430 deviceInfoList
[device_index
]->id
= device_index
;
432 deviceInfoList
[device_index
]->oclPlatformId
= ocl_platform_ids
[i
];
433 deviceInfoList
[device_index
]->oclDeviceId
= ocl_device_ids
[j
];
435 deviceInfoList
[device_index
]->device_name
[0] = 0;
436 clGetDeviceInfo(ocl_device_ids
[j
], CL_DEVICE_NAME
,
437 sizeof(deviceInfoList
[device_index
]->device_name
),
438 deviceInfoList
[device_index
]->device_name
, nullptr);
440 deviceInfoList
[device_index
]->device_version
[0] = 0;
441 clGetDeviceInfo(ocl_device_ids
[j
], CL_DEVICE_VERSION
,
442 sizeof(deviceInfoList
[device_index
]->device_version
),
443 deviceInfoList
[device_index
]->device_version
, nullptr);
445 deviceInfoList
[device_index
]->vendorName
[0] = 0;
446 clGetDeviceInfo(ocl_device_ids
[j
], CL_DEVICE_VENDOR
,
447 sizeof(deviceInfoList
[device_index
]->vendorName
),
448 deviceInfoList
[device_index
]->vendorName
, nullptr);
450 deviceInfoList
[device_index
]->compute_units
= 0;
451 clGetDeviceInfo(ocl_device_ids
[j
], CL_DEVICE_MAX_COMPUTE_UNITS
,
452 sizeof(deviceInfoList
[device_index
]->compute_units
),
453 &(deviceInfoList
[device_index
]->compute_units
), nullptr);
455 deviceInfoList
[device_index
]->adress_bits
= 0;
456 clGetDeviceInfo(ocl_device_ids
[j
], CL_DEVICE_ADDRESS_BITS
,
457 sizeof(deviceInfoList
[device_index
]->adress_bits
),
458 &(deviceInfoList
[device_index
]->adress_bits
), nullptr);
460 deviceInfoList
[device_index
]->deviceVendor
=
461 getDeviceVendor(deviceInfoList
[device_index
]->vendorName
);
463 clGetDeviceInfo(ocl_device_ids
[j
], CL_DEVICE_MAX_WORK_ITEM_SIZES
, 3 * sizeof(size_t),
464 &deviceInfoList
[device_index
]->maxWorkItemSizes
, nullptr);
466 clGetDeviceInfo(ocl_device_ids
[j
], CL_DEVICE_MAX_WORK_GROUP_SIZE
, sizeof(size_t),
467 &deviceInfoList
[device_index
]->maxWorkGroupSize
, nullptr);
469 deviceInfoList
[device_index
]->status
=
470 gmx::checkGpu(device_index
, *deviceInfoList
[device_index
]);
476 numDevices
= device_index
;
478 /* Dummy sort of devices - AMD first, then NVIDIA, then Intel */
479 // TODO: Sort devices based on performance.
483 for (int i
= 0; i
< numDevices
; i
++)
485 if (deviceInfoList
[i
]->deviceVendor
== DeviceVendor::Amd
)
491 std::swap(deviceInfoList
[i
], deviceInfoList
[last
]);
496 /* if more than 1 device left to be sorted */
497 if ((numDevices
- 1 - last
) > 1)
499 for (int i
= 0; i
< numDevices
; i
++)
501 if (deviceInfoList
[i
]->deviceVendor
== DeviceVendor::Nvidia
)
507 std::swap(deviceInfoList
[i
], deviceInfoList
[last
]);
514 sfree(ocl_device_ids
);
520 sfree(ocl_platform_ids
);
521 return deviceInfoList
;
524 void setActiveDevice(const DeviceInformation
& deviceInfo
)
526 // If the device is NVIDIA, for safety reasons we disable the JIT
527 // caching as this is known to be broken at least until driver 364.19;
528 // the cache does not always get regenerated when the source code changes,
529 // e.g. if the path to the kernel sources remains the same
531 if (deviceInfo
.deviceVendor
== DeviceVendor::Nvidia
)
533 // Ignore return values, failing to set the variable does not mean
534 // that something will go wrong later.
536 _putenv("CUDA_CACHE_DISABLE=1");
538 // Don't override, maybe a dev is testing.
539 setenv("CUDA_CACHE_DISABLE", "1", 0);
544 void releaseDevice(DeviceInformation
* /* deviceInfo */) {}
546 std::string
getDeviceInformationString(const DeviceInformation
& deviceInfo
)
548 bool gpuExists
= (deviceInfo
.status
!= DeviceStatus::Nonexistent
549 && deviceInfo
.status
!= DeviceStatus::NonFunctional
);
553 return gmx::formatString("#%d: %s, status: %s", deviceInfo
.id
, "N/A",
554 c_deviceStateString
[deviceInfo
.status
]);
558 return gmx::formatString("#%d: name: %s, vendor: %s, device version: %s, status: %s",
559 deviceInfo
.id
, deviceInfo
.device_name
, deviceInfo
.vendorName
,
560 deviceInfo
.device_version
, c_deviceStateString
[deviceInfo
.status
]);