2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2014,2015,2016,2017,2018 by the GROMACS development team.
5 * Copyright (c) 2019,2020, by the GROMACS development team, led by
6 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
7 * and including many others, as listed in the AUTHORS file in the
8 * top-level source directory and at http://www.gromacs.org.
10 * GROMACS is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public License
12 * as published by the Free Software Foundation; either version 2.1
13 * of the License, or (at your option) any later version.
15 * GROMACS is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with GROMACS; if not, see
22 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
23 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
25 * If you want to redistribute modifications to GROMACS, please
26 * consider that scientific software is very special. Version
27 * control is crucial - bugs must be traceable. We will be happy to
28 * consider code for inclusion in the official distribution, but
29 * derived work must not be called official GROMACS. Details are found
30 * in the README & COPYING files - if they are missing, get the
31 * official version at http://www.gromacs.org.
33 * To help us fund GROMACS development, we humbly ask that you cite
34 * the research papers on the package. Check out http://www.gromacs.org.
37 * \brief Define utility routines for OpenCL
39 * \author Anca Hamuraru <anca@streamcomputing.eu>
52 #include "gromacs/gpu_utils/gpu_utils.h"
53 #include "gromacs/utility/fatalerror.h"
54 #include "gromacs/utility/smalloc.h"
56 /*! \brief \brief Allocates nbytes of host memory. Use ocl_free to free memory allocated with this function.
59 * This function should allocate page-locked memory to help reduce D2H and H2D
60 * transfer times, similar with pmalloc from pmalloc_cuda.cu.
62 * \param[in,out] h_ptr Pointer where to store the address of the newly allocated buffer.
63 * \param[in] nbytes Size in bytes of the buffer to be allocated.
65 void pmalloc(void** h_ptr
, size_t nbytes
)
67 /* Need a temporary type whose size is 1 byte, so that the
68 * implementation of snew_aligned can cope without issuing
70 char** temporary
= reinterpret_cast<char**>(h_ptr
);
72 /* 16-byte alignment is required by the neighbour-searching code,
73 * because it uses four-wide SIMD for bounding-box calculation.
74 * However, when we organize using page-locked memory for
75 * device-host transfers, it will probably need to be aligned to a
76 * 4kb page, like CUDA does. */
77 snew_aligned(*temporary
, nbytes
, 16);
80 /*! \brief Frees memory allocated with pmalloc.
82 * \param[in] h_ptr Buffer allocated with pmalloc that needs to be freed.
84 void pfree(void* h_ptr
)
93 /*! \brief Convert error code to diagnostic string */
94 std::string
ocl_get_error_string(cl_int error
)
98 // run-time and JIT compiler errors
99 case 0: return "CL_SUCCESS";
100 case -1: return "CL_DEVICE_NOT_FOUND";
101 case -2: return "CL_DEVICE_NOT_AVAILABLE";
102 case -3: return "CL_COMPILER_NOT_AVAILABLE";
103 case -4: return "CL_MEM_OBJECT_ALLOCATION_FAILURE";
104 case -5: return "CL_OUT_OF_RESOURCES";
105 case -6: return "CL_OUT_OF_HOST_MEMORY";
106 case -7: return "CL_PROFILING_INFO_NOT_AVAILABLE";
107 case -8: return "CL_MEM_COPY_OVERLAP";
108 case -9: return "CL_IMAGE_FORMAT_MISMATCH";
109 case -10: return "CL_IMAGE_FORMAT_NOT_SUPPORTED";
110 case -11: return "CL_BUILD_PROGRAM_FAILURE";
111 case -12: return "CL_MAP_FAILURE";
112 case -13: return "CL_MISALIGNED_SUB_BUFFER_OFFSET";
113 case -14: return "CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST";
114 case -15: return "CL_COMPILE_PROGRAM_FAILURE";
115 case -16: return "CL_LINKER_NOT_AVAILABLE";
116 case -17: return "CL_LINK_PROGRAM_FAILURE";
117 case -18: return "CL_DEVICE_PARTITION_FAILED";
118 case -19: return "CL_KERNEL_ARG_INFO_NOT_AVAILABLE";
120 // compile-time errors
121 case -30: return "CL_INVALID_VALUE";
122 case -31: return "CL_INVALID_DEVICE_TYPE";
123 case -32: return "CL_INVALID_PLATFORM";
124 case -33: return "CL_INVALID_DEVICE";
125 case -34: return "CL_INVALID_CONTEXT";
126 case -35: return "CL_INVALID_QUEUE_PROPERTIES";
127 case -36: return "CL_INVALID_COMMAND_QUEUE";
128 case -37: return "CL_INVALID_HOST_PTR";
129 case -38: return "CL_INVALID_MEM_OBJECT";
130 case -39: return "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR";
131 case -40: return "CL_INVALID_IMAGE_SIZE";
132 case -41: return "CL_INVALID_SAMPLER";
133 case -42: return "CL_INVALID_BINARY";
134 case -43: return "CL_INVALID_BUILD_OPTIONS";
135 case -44: return "CL_INVALID_PROGRAM";
136 case -45: return "CL_INVALID_PROGRAM_EXECUTABLE";
137 case -46: return "CL_INVALID_KERNEL_NAME";
138 case -47: return "CL_INVALID_KERNEL_DEFINITION";
139 case -48: return "CL_INVALID_KERNEL";
140 case -49: return "CL_INVALID_ARG_INDEX";
141 case -50: return "CL_INVALID_ARG_VALUE";
142 case -51: return "CL_INVALID_ARG_SIZE";
143 case -52: return "CL_INVALID_KERNEL_ARGS";
144 case -53: return "CL_INVALID_WORK_DIMENSION";
145 case -54: return "CL_INVALID_WORK_GROUP_SIZE";
146 case -55: return "CL_INVALID_WORK_ITEM_SIZE";
147 case -56: return "CL_INVALID_GLOBAL_OFFSET";
148 case -57: return "CL_INVALID_EVENT_WAIT_LIST";
149 case -58: return "CL_INVALID_EVENT";
150 case -59: return "CL_INVALID_OPERATION";
151 case -60: return "CL_INVALID_GL_OBJECT";
152 case -61: return "CL_INVALID_BUFFER_SIZE";
153 case -62: return "CL_INVALID_MIP_LEVEL";
154 case -63: return "CL_INVALID_GLOBAL_WORK_SIZE";
155 case -64: return "CL_INVALID_PROPERTY";
156 case -65: return "CL_INVALID_IMAGE_DESCRIPTOR";
157 case -66: return "CL_INVALID_COMPILER_OPTIONS";
158 case -67: return "CL_INVALID_LINKER_OPTIONS";
159 case -68: return "CL_INVALID_DEVICE_PARTITION_COUNT";
162 case -1000: return "CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR";
163 case -1001: return "CL_PLATFORM_NOT_FOUND_KHR";
164 case -1002: return "CL_INVALID_D3D10_DEVICE_KHR";
165 case -1003: return "CL_INVALID_D3D10_RESOURCE_KHR";
166 case -1004: return "CL_D3D10_RESOURCE_ALREADY_ACQUIRED_KHR";
167 case -1005: return "CL_D3D10_RESOURCE_NOT_ACQUIRED_KHR";
168 default: return "Unknown OpenCL error: " + std::to_string(static_cast<int32_t>(error
));