2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2012,2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
36 * \brief Define infrastructure for OpenCL JIT compilation for Gromacs
38 * \author Dimitrios Karkoulis <dimitris.karkoulis@gmail.com>
39 * \author Anca Hamuraru <anca@streamcomputing.eu>
40 * \author Teemu Virolainen <teemu@streamcomputing.eu>
41 * \author Mark Abraham <mark.j.abraham@gmail.com>
46 #include "ocl_compiler.h"
56 #include "gromacs/gpu_utils/oclutils.h"
57 #include "gromacs/utility/cstringutil.h"
58 #include "gromacs/utility/exceptions.h"
59 #include "gromacs/utility/gmxassert.h"
60 #include "gromacs/utility/path.h"
61 #include "gromacs/utility/programcontext.h"
62 #include "gromacs/utility/smalloc.h"
63 #include "gromacs/utility/stringutil.h"
64 #include "gromacs/utility/textreader.h"
65 #include "gromacs/utility/unique_cptr.h"
67 #include "ocl_caching.h"
74 /*! \brief True if OpenCL binary caching is enabled.
76 * Currently caching is disabled by default unless the env var override
77 * is used until we resolve concurrency issues. */
78 static bool useBuildCache
= getenv("GMX_OCL_GENCACHE") != nullptr;
80 /*! \brief Handles writing the OpenCL JIT compilation log to \c fplog.
82 * If \c fplog is non-null and either the GMX_OCL_DUMP_LOG environment
83 * variable is set or the compilation failed, then the OpenCL
84 * compilation log is written.
86 * \param fplog Open file pointer to log file
87 * \param program OpenCL program that was compiled
88 * \param deviceId Id of the device for which compilation took place
89 * \param kernelFilename File name containing the kernel
90 * \param preprocessorOptions String containing the preprocessor command-line options used for the build
91 * \param buildFailed Whether the OpenCL build succeeded
93 * \throws std::bad_alloc if out of memory */
95 writeOclBuildLog(FILE *fplog
,
97 cl_device_id deviceId
,
98 const std::string
&kernelFilename
,
99 const std::string
&preprocessorOptions
,
102 bool writeOutput
= ((fplog
!= nullptr) &&
103 (buildFailed
|| (getenv("GMX_OCL_DUMP_LOG") != nullptr)));
110 // Get build log string size
112 cl_int cl_error
= clGetProgramBuildInfo(program
,
114 CL_PROGRAM_BUILD_LOG
,
118 if (cl_error
!= CL_SUCCESS
)
120 GMX_THROW(InternalError("Could not get OpenCL program build log size, error was " + ocl_get_error_string(cl_error
)));
123 char *buildLog
= nullptr;
124 unique_cptr
<char> buildLogGuard
;
125 if (buildLogSize
!= 0)
127 /* Allocate memory to fit the build log,
128 it can be very large in case of errors */
129 snew(buildLog
, buildLogSize
);
130 buildLogGuard
.reset(buildLog
);
132 /* Get the actual compilation log */
133 cl_error
= clGetProgramBuildInfo(program
,
135 CL_PROGRAM_BUILD_LOG
,
139 if (cl_error
!= CL_SUCCESS
)
141 GMX_THROW(InternalError("Could not get OpenCL program build log, error was " + ocl_get_error_string(cl_error
)));
148 message
+= "Compilation of source file " + kernelFilename
+ " failed!\n";
152 message
+= "Compilation of source file " + kernelFilename
+ " was successful!\n";
154 message
+= "-- Used build options: " + preprocessorOptions
+ "\n";
155 message
+= "--------------LOG START---------------\n";
157 message
+= "---------------LOG END----------------\n";;
159 fputs(message
.c_str(), fplog
);
162 /*! \brief Construct compiler options string
164 * \param deviceVendorId Device vendor id. Used to
165 * automatically enable some vendor-specific options
166 * \return The string with the compiler options
169 selectCompilerOptions(ocl_vendor_id_t deviceVendorId
)
171 std::string compilerOptions
;
173 if (getenv("GMX_OCL_NOOPT") )
175 compilerOptions
+= " -cl-opt-disable";
178 /* Fastmath imprves performance on all supported arch */
179 if (getenv("GMX_OCL_DISABLE_FASTMATH") == nullptr)
181 compilerOptions
+= " -cl-fast-relaxed-math";
183 // Hint to the compiler that it can flush denorms to zero.
184 // In CUDA this is triggered by the -use_fast_math flag, equivalent with
185 // -cl-fast-relaxed-math, hence the inclusion on the conditional block.
186 compilerOptions
+= " -cl-denorms-are-zero";
189 if ((deviceVendorId
== OCL_VENDOR_NVIDIA
) && getenv("GMX_OCL_VERBOSE"))
191 compilerOptions
+= " -cl-nv-verbose";
194 if ((deviceVendorId
== OCL_VENDOR_AMD
) && getenv("GMX_OCL_DUMP_INTERM_FILES"))
196 /* To dump OpenCL build intermediate files, caching must be off */
199 compilerOptions
+= " -save-temps";
203 if (getenv("GMX_OCL_DEBUG"))
205 compilerOptions
+= " -g";
208 return compilerOptions
;
211 /*! \brief Get the path to the folder storing an OpenCL source file.
213 * By default, this function constructs the full path to the OpenCL from
214 * the known location of the binary that is running, so that we handle
215 * both in-source and installed builds. The user can override this
216 * behavior by defining GMX_OCL_FILE_PATH environment variable.
218 * \param[in] sourceRelativePath Relative path to the kernel or other file in the source tree,
219 * from src, e.g. "gromacs/mdlib/nbnxn_ocl" for NB kernels.
220 * \return OS-normalized path string to the folder storing OpenCL source file
222 * \throws std::bad_alloc if out of memory.
223 * FileIOError if GMX_OCL_FILE_PATH does not specify a readable path
226 getSourceRootPath(const std::string
&sourceRelativePath
)
228 std::string sourceRootPath
;
229 /* Use GMX_OCL_FILE_PATH if the user has defined it */
230 const char *gmxOclFilePath
= getenv("GMX_OCL_FILE_PATH");
232 if (gmxOclFilePath
== nullptr)
234 /* Normal way of getting ocl_root_dir. First get the right
235 root path from the path to the binary that is running. */
236 InstallationPrefixInfo info
= getProgramContext().installationPrefix();
237 std::string dataPathSuffix
= (info
.bSourceLayout
?
240 sourceRootPath
= Path::join(info
.path
, dataPathSuffix
, sourceRelativePath
);
244 if (!Directory::exists(gmxOclFilePath
))
246 GMX_THROW(FileIOError(formatString("GMX_OCL_FILE_PATH must point to the directory where OpenCL"
247 "kernels are found, but '%s' does not exist", gmxOclFilePath
)));
249 sourceRootPath
= Path::join(gmxOclFilePath
, sourceRelativePath
);
252 // Make sure we return an OS-correct path format
253 return Path::normalize(sourceRootPath
);
256 size_t getKernelWarpSize(cl_kernel kernel
, cl_device_id deviceId
)
259 cl_int cl_error
= clGetKernelWorkGroupInfo(kernel
, deviceId
, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE
,
260 sizeof(warpSize
), &warpSize
, nullptr);
261 if (cl_error
!= CL_SUCCESS
)
263 GMX_THROW(InternalError("Could not query OpenCL preferred workgroup size, error was " + ocl_get_error_string(cl_error
)));
267 GMX_THROW(InternalError(formatString("Invalid OpenCL warp size encountered")));
272 size_t getDeviceWarpSize(cl_context context
, cl_device_id deviceId
)
275 const char *warpSizeKernel
= "__kernel void test(__global int* test){test[get_local_id(0)] = 0;}";
276 cl_program program
= clCreateProgramWithSource(context
, 1, &warpSizeKernel
, nullptr, &cl_error
);
277 if (cl_error
!= CL_SUCCESS
)
279 GMX_THROW(InternalError("Could not create OpenCL program to determine warp size, error was " + ocl_get_error_string(cl_error
)));
282 cl_error
= clBuildProgram(program
, 0, nullptr, nullptr, nullptr, nullptr);
283 if (cl_error
!= CL_SUCCESS
)
285 GMX_THROW(InternalError("Could not build OpenCL program to determine warp size, error was " + ocl_get_error_string(cl_error
)));
288 cl_kernel kernel
= clCreateKernel(program
, "test", &cl_error
);
289 if (cl_error
!= CL_SUCCESS
)
291 GMX_THROW(InternalError("Could not create OpenCL kernel to determine warp size, error was " + ocl_get_error_string(cl_error
)));
294 size_t warpSize
= getKernelWarpSize(kernel
, deviceId
);
296 cl_error
= clReleaseKernel(kernel
);
297 if (cl_error
!= CL_SUCCESS
)
299 GMX_THROW(InternalError("Could not release OpenCL warp-size kernel, error was " + ocl_get_error_string(cl_error
)));
301 cl_error
= clReleaseProgram(program
);
302 if (cl_error
!= CL_SUCCESS
)
304 GMX_THROW(InternalError("Could not release OpenCL warp-size program, error was " + ocl_get_error_string(cl_error
)));
310 /*! \brief Select a compilation-line define for a vendor-specific kernel choice from vendor id
312 * \param[in] vendorId Vendor id enumerator
314 * \return The appropriate compilation-line define
317 makeVendorFlavorChoice(ocl_vendor_id_t vendorId
)
323 choice
= "-D_AMD_SOURCE_";
325 case OCL_VENDOR_NVIDIA
:
326 choice
= "-D_NVIDIA_SOURCE_";
328 case OCL_VENDOR_INTEL
:
329 choice
= "-D_INTEL_SOURCE_";
338 /*! \brief Create include paths for kernel sources.
340 * All OpenCL kernel files are expected to be stored in one single folder.
342 * \throws std::bad_alloc if out of memory.
344 static std::string
makeKernelIncludePathOption(const std::string
&unescapedKernelRootPath
)
346 std::string includePathOption
;
348 /* Apple does not seem to accept the quoted include paths other
349 * OpenCL implementations are happy with. Since the standard still says
350 * it should be quoted, we handle Apple as a special case.
353 includePathOption
+= "-I";
355 // Prepend all the spaces with a backslash
356 for (std::string::size_type i
= 0; i
< unescapedKernelRootPath
.length(); i
++)
358 if (unescapedKernelRootPath
[i
] == ' ')
360 includePathOption
.push_back('\\');
362 includePathOption
.push_back(unescapedKernelRootPath
[i
]);
365 includePathOption
+= "-I\"" + unescapedKernelRootPath
+ "\"";
368 return includePathOption
;
371 /*! \brief Replace duplicated spaces with a single one in string
373 * Only the first character will be kept for multiple adjacent characters that
374 * are both identical and where the first one returns true for isspace().
376 * \param str String that will be modified.
379 removeExtraSpaces(std::string
*str
)
381 GMX_RELEASE_ASSERT(str
!= nullptr, "A pointer to an actual string must be provided");
382 std::string::iterator newEnd
=
383 std::unique( str
->begin(), str
->end(), [ = ](char a
, char b
){ return isspace(a
) != 0 && (a
== b
); } );
384 str
->erase(newEnd
, str
->end());
387 /*! \brief Builds a string with build options for the OpenCL kernels
389 * \throws std::bad_alloc if out of memory. */
391 makePreprocessorOptions(const std::string
&kernelRootPath
,
392 const std::string
&includeRootPath
,
394 ocl_vendor_id_t deviceVendorId
,
395 const std::string
&extraDefines
)
397 std::string preprocessorOptions
;
399 /* Compose the complete build options */
400 preprocessorOptions
= formatString("-DWARP_SIZE_TEST=%d", static_cast<int>(warpSize
));
401 preprocessorOptions
+= ' ';
402 preprocessorOptions
+= makeVendorFlavorChoice(deviceVendorId
);
403 preprocessorOptions
+= ' ';
404 preprocessorOptions
+= extraDefines
;
405 preprocessorOptions
+= ' ';
406 preprocessorOptions
+= selectCompilerOptions(deviceVendorId
);
407 preprocessorOptions
+= ' ';
408 preprocessorOptions
+= makeKernelIncludePathOption(kernelRootPath
);
409 preprocessorOptions
+= ' ';
410 preprocessorOptions
+= makeKernelIncludePathOption(includeRootPath
);
412 // Mac OS (and maybe some other implementations) does not accept double spaces in options
413 removeExtraSpaces(&preprocessorOptions
);
415 return preprocessorOptions
;
419 compileProgram(FILE *fplog
,
420 const std::string
&kernelRelativePath
,
421 const std::string
&kernelBaseFilename
,
422 const std::string
&extraDefines
,
424 cl_device_id deviceId
,
425 ocl_vendor_id_t deviceVendorId
)
428 // Let the kernel find include files from its module.
429 std::string kernelRootPath
= getSourceRootPath(kernelRelativePath
);
430 // Let the kernel find include files from other modules.
431 std::string rootPath
= getSourceRootPath("");
433 GMX_RELEASE_ASSERT(fplog
!= nullptr, "Need a valid log file for building OpenCL programs");
435 /* Load OpenCL source files */
436 std::string kernelFilename
= Path::join(kernelRootPath
,
439 /* Make the build options */
440 std::string preprocessorOptions
= makePreprocessorOptions(kernelRootPath
,
442 getDeviceWarpSize(context
, deviceId
),
446 bool buildCacheWasRead
= false;
448 std::string cacheFilename
;
451 cacheFilename
= makeBinaryCacheFilename(kernelBaseFilename
, deviceId
);
454 /* Create OpenCL program */
455 cl_program program
= nullptr;
458 if (File::exists(cacheFilename
, File::returnFalseOnError
))
460 /* Check if there's a valid cache available */
463 program
= makeProgramFromCache(cacheFilename
, context
, deviceId
);
464 buildCacheWasRead
= true;
466 catch (FileIOError
&e
)
468 // Failing to read from the cache is not a critical error
469 formatExceptionMessageToFile(fplog
, e
);
474 fprintf(fplog
, "No OpenCL binary cache file was present, so will compile kernels normally.\n");
477 if (program
== nullptr)
479 // Compile OpenCL program from source
480 std::string kernelSource
= TextReader::readFileToString(kernelFilename
);
481 if (kernelSource
.empty())
483 GMX_THROW(FileIOError("Error loading OpenCL code " + kernelFilename
));
485 const char *kernelSourcePtr
= kernelSource
.c_str();
486 size_t kernelSourceSize
= kernelSource
.size();
487 /* Create program from source code */
488 program
= clCreateProgramWithSource(context
,
493 if (cl_error
!= CL_SUCCESS
)
495 GMX_THROW(InternalError("Could not create OpenCL program, error was " + ocl_get_error_string(cl_error
)));
499 /* Build the OpenCL program, keeping the status to potentially
500 write to the simulation log file. */
501 cl_int buildStatus
= clBuildProgram(program
, 0, nullptr, preprocessorOptions
.c_str(), nullptr, nullptr);
503 /* Write log first, and then throw exception that the user know what is
504 the issue even if the build fails. */
505 writeOclBuildLog(fplog
,
510 buildStatus
!= CL_SUCCESS
);
512 if (buildStatus
!= CL_SUCCESS
)
514 GMX_THROW(InternalError("Could not build OpenCL program, error was " + ocl_get_error_string(buildStatus
)));
519 if (!buildCacheWasRead
)
521 /* If OpenCL caching is ON, but the current cache is not
522 valid => update it */
525 writeBinaryToCache(program
, cacheFilename
);
527 catch (GromacsException
&e
)
529 // Failing to write the cache is not a critical error
530 formatExceptionMessageToFile(fplog
, e
);
534 if ((OCL_VENDOR_NVIDIA
== deviceVendorId
) && getenv("GMX_OCL_DUMP_INTERM_FILES"))
536 /* If dumping intermediate files has been requested and this is an NVIDIA card
537 => write PTX to file */
540 cl_error
= clGetDeviceInfo(deviceId
, CL_DEVICE_NAME
, sizeof(buffer
), buffer
, nullptr);
541 if (cl_error
!= CL_SUCCESS
)
543 GMX_THROW(InternalError("Could not get OpenCL device info, error was " + ocl_get_error_string(cl_error
)));
545 std::string ptxFilename
= buffer
;
546 ptxFilename
+= ".ptx";
550 writeBinaryToCache(program
, ptxFilename
);
552 catch (GromacsException
&e
)
554 // Failing to write the cache is not a critical error
555 formatExceptionMessageToFile(fplog
, e
);