src/gromacs/gpu_utils/ocl_compiler.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2012,2013,2014,2015,2016, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35 /*! \internal \file
  36  *  \brief Define infrastructure for OpenCL JIT compilation for Gromacs
  37  *
  38  *  \author Dimitrios Karkoulis <dimitris.karkoulis@gmail.com>
  39  *  \author Anca Hamuraru <anca@streamcomputing.eu>
  40  *  \author Teemu Virolainen <teemu@streamcomputing.eu>
  41  *  \author Mark Abraham <mark.j.abraham@gmail.com>
  42  */
  43
  44 #include "gmxpre.h"
  45
  46 #include "ocl_compiler.h"
  47
  48 #include "config.h"
  49
  50 #include <cstdio>
  51
  52 #include <string>
  53 #include <vector>
  54
  55 #include "gromacs/gpu_utils/oclutils.h"
  56 #include "gromacs/utility/cstringutil.h"
  57 #include "gromacs/utility/exceptions.h"
  58 #include "gromacs/utility/gmxassert.h"
  59 #include "gromacs/utility/path.h"
  60 #include "gromacs/utility/programcontext.h"
  61 #include "gromacs/utility/scoped_cptr.h"
  62 #include "gromacs/utility/smalloc.h"
  63 #include "gromacs/utility/stringutil.h"
  64 #include "gromacs/utility/textreader.h"
  65
  66 #include "ocl_caching.h"
  67
  68 namespace gmx
  69 {
  70 namespace ocl
  71 {
  72
  73 /*! \brief True if OpenCL binary caching is enabled.
  74  *
  75  *  Currently caching is disabled by default unless the env var override
  76  *  is used until we resolve concurrency issues. */
  77 static bool useBuildCache = getenv("GMX_OCL_GENCACHE"); // (NULL == getenv("GMX_OCL_NOGENCACHE"));
  78
  79 /*! \brief Handles writing the OpenCL JIT compilation log to \c fplog.
  80  *
  81  * If \c fplog is non-null and either the GMX_OCL_DUMP_LOG environment
  82  * variable is set or the compilation failed, then the OpenCL
  83  * compilation log is written.
  84  *
  85  * \param fplog               Open file pointer to log file
  86  * \param program             OpenCL program that was compiled
  87  * \param deviceId            Id of the device for which compilation took place
  88  * \param kernelFilename      File name containing the kernel
  89  * \param preprocessorOptions String containing the preprocessor command-line options used for the build
  90  * \param buildFailed         Whether the OpenCL build succeeded
  91  *
  92  * \throws std::bad_alloc if out of memory */
  93 static void
  94 writeOclBuildLog(FILE              *fplog,
  95                  cl_program         program,
  96                  cl_device_id       deviceId,
  97                  const std::string &kernelFilename,
  98                  const std::string &preprocessorOptions,
  99                  bool               buildFailed)
 100 {
 101     bool writeOutput = ((fplog != nullptr) &&
 102                         (buildFailed || (getenv("GMX_OCL_DUMP_LOG") != nullptr)));
 103
 104     if (!writeOutput)
 105     {
 106         return;
 107     }
 108
 109     // Get build log string size
 110     size_t buildLogSize;
 111     cl_int cl_error = clGetProgramBuildInfo(program,
 112                                             deviceId,
 113                                             CL_PROGRAM_BUILD_LOG,
 114                                             0,
 115                                             NULL,
 116                                             &buildLogSize);
 117     if (cl_error != CL_SUCCESS)
 118     {
 119         GMX_THROW(InternalError("Could not get OpenCL program build log size, error was " + ocl_get_error_string(cl_error)));
 120     }
 121
 122     char             *buildLog = nullptr;
 123     scoped_cptr<char> buildLogGuard;
 124     if (buildLogSize != 0)
 125     {
 126         /* Allocate memory to fit the build log,
 127            it can be very large in case of errors */
 128         snew(buildLog, buildLogSize);
 129         buildLogGuard.reset(buildLog);
 130
 131         /* Get the actual compilation log */
 132         cl_error = clGetProgramBuildInfo(program,
 133                                          deviceId,
 134                                          CL_PROGRAM_BUILD_LOG,
 135                                          buildLogSize,
 136                                          buildLog,
 137                                          NULL);
 138         if (cl_error != CL_SUCCESS)
 139         {
 140             GMX_THROW(InternalError("Could not get OpenCL program build log, error was " + ocl_get_error_string(cl_error)));
 141         }
 142     }
 143
 144     std::string message;
 145     if (buildFailed)
 146     {
 147         message += "Compilation of source file " + kernelFilename + " failed!\n";
 148     }
 149     else
 150     {
 151         message += "Compilation of source file " + kernelFilename + " was successful!\n";
 152     }
 153     message += "-- Used build options: " + preprocessorOptions + "\n";
 154     message += "--------------LOG START---------------\n";
 155     message += buildLog;
 156     message += "---------------LOG END----------------\n";;
 157
 158     fputs(message.c_str(), fplog);
 159 }
 160
 161 /*! \brief Construct compiler options string
 162  *
 163  * \param deviceVendorId  Device vendor id. Used to
 164  *          automatically enable some vendor-specific options
 165  * \return The string with the compiler options
 166  */
 167 static std::string
 168 selectCompilerOptions(ocl_vendor_id_t deviceVendorId)
 169 {
 170     std::string compilerOptions;
 171
 172     if (getenv("GMX_OCL_NOOPT") )
 173     {
 174         compilerOptions += " -cl-opt-disable";
 175     }
 176
 177     if (getenv("GMX_OCL_FASTMATH") )
 178     {
 179         compilerOptions += " -cl-fast-relaxed-math";
 180     }
 181
 182     if ((deviceVendorId == OCL_VENDOR_NVIDIA) && getenv("GMX_OCL_VERBOSE"))
 183     {
 184         compilerOptions += " -cl-nv-verbose";
 185     }
 186
 187     if ((deviceVendorId == OCL_VENDOR_AMD) && getenv("GMX_OCL_DUMP_INTERM_FILES"))
 188     {
 189         /* To dump OpenCL build intermediate files, caching must be off */
 190         if (!useBuildCache)
 191         {
 192             compilerOptions += " -save-temps";
 193         }
 194     }
 195
 196     if ( ( deviceVendorId == OCL_VENDOR_AMD ) && getenv("GMX_OCL_DEBUG"))
 197     {
 198         compilerOptions += " -g";
 199     }
 200
 201     return compilerOptions;
 202 }
 203
 204 /*! \brief Get the path to the main folder storing OpenCL kernels.
 205  *
 206  * By default, this function constructs the full path to the OpenCL from
 207  * the known location of the binary that is running, so that we handle
 208  * both in-source and installed builds. The user can override this
 209  * behavior by defining GMX_OCL_FILE_PATH environment variable.
 210  *
 211  * \return OS-normalized path string to the main folder storing OpenCL kernels
 212  *
 213  * \throws std::bad_alloc    if out of memory.
 214  *         FileIOError  if GMX_OCL_FILE_PATH does not specify a readable path
 215  */
 216 static std::string
 217 getKernelRootPath()
 218 {
 219     std::string kernelRootPath;
 220     /* Use GMX_OCL_FILE_PATH if the user has defined it */
 221     const char *gmxOclFilePath = getenv("GMX_OCL_FILE_PATH");
 222
 223     if (gmxOclFilePath == nullptr)
 224     {
 225         /* Normal way of getting ocl_root_dir. First get the right
 226            root path from the path to the binary that is running. */
 227         InstallationPrefixInfo      info           = getProgramContext().installationPrefix();
 228         std::string                 dataPathSuffix = (info.bSourceLayout ?
 229                                                       "src/gromacs/mdlib/nbnxn_ocl" :
 230                                                       OCL_INSTALL_DIR);
 231         kernelRootPath = Path::join(info.path, dataPathSuffix);
 232     }
 233     else
 234     {
 235         if (!Directory::exists(gmxOclFilePath))
 236         {
 237             GMX_THROW(FileIOError(formatString("GMX_OCL_FILE_PATH must point to the directory where OpenCL"
 238                                                "kernels are found, but '%s' does not exist", gmxOclFilePath)));
 239         }
 240         kernelRootPath = gmxOclFilePath;
 241     }
 242
 243     // Make sure we return an OS-correct path format
 244     return Path::normalize(kernelRootPath);
 245 }
 246
 247 /*!  \brief Get the warp size reported by device
 248  *
 249  *  This is platform implementation dependant and seems to only work on the Nvidia and AMD platforms!
 250  *  Nvidia reports 32, AMD for GPU 64. Ignore the rest
 251  *
 252  *  \param  context   Current OpenCL context
 253  *  \param  deviceId OpenCL device with the context
 254  *  \return cl_int value of the warp size
 255  *
 256  * \throws InternalError if an OpenCL error was encountered
 257  */
 258 static size_t
 259 getWarpSize(cl_context context, cl_device_id deviceId)
 260 {
 261     cl_int      cl_error;
 262     const char *warpSizeKernel = "__kernel void test(__global int* test){test[get_local_id(0)] = 0;}";
 263     cl_program  program        = clCreateProgramWithSource(context, 1, (const char**)&warpSizeKernel, NULL, &cl_error);
 264     if (cl_error != CL_SUCCESS)
 265     {
 266         GMX_THROW(InternalError("Could not create OpenCL program to determine warp size, error was " + ocl_get_error_string(cl_error)));
 267     }
 268
 269     cl_error = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
 270     if (cl_error != CL_SUCCESS)
 271     {
 272         GMX_THROW(InternalError("Could not build OpenCL program to determine warp size, error was " + ocl_get_error_string(cl_error)));
 273     }
 274
 275     cl_kernel kernel = clCreateKernel(program, "test", &cl_error);
 276     if (cl_error != CL_SUCCESS)
 277     {
 278         GMX_THROW(InternalError("Could not create OpenCL kernel to determine warp size, error was " + ocl_get_error_string(cl_error)));
 279     }
 280
 281     size_t warpSize = 0;
 282     cl_error = clGetKernelWorkGroupInfo(kernel, deviceId, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE,
 283                                         sizeof(warpSize), &warpSize, NULL);
 284     if (cl_error != CL_SUCCESS)
 285     {
 286         GMX_THROW(InternalError("Could not measure OpenCL warp size, error was " + ocl_get_error_string(cl_error)));
 287     }
 288     if (warpSize == 0)
 289     {
 290         GMX_THROW(InternalError(formatString("Did not measure a valid OpenCL warp size")));
 291     }
 292
 293     cl_error = clReleaseKernel(kernel);
 294     if (cl_error != CL_SUCCESS)
 295     {
 296         GMX_THROW(InternalError("Could not release OpenCL warp-size kernel, error was " + ocl_get_error_string(cl_error)));
 297     }
 298     cl_error = clReleaseProgram(program);
 299     if (cl_error != CL_SUCCESS)
 300     {
 301         GMX_THROW(InternalError("Could not release OpenCL warp-size program, error was " + ocl_get_error_string(cl_error)));
 302     }
 303
 304     return warpSize;
 305 }
 306
 307 /*! \brief Select a compilation-line define for a vendor-specific kernel choice from vendor id
 308  *
 309  * \param[in] vendorId Vendor id enumerator
 310  *
 311  * \return The appropriate compilation-line define
 312  */
 313 static const char *
 314 makeVendorFlavorChoice(ocl_vendor_id_t vendorId)
 315 {
 316     const char *choice;
 317     switch (vendorId)
 318     {
 319         case OCL_VENDOR_AMD:
 320             choice = "-D_AMD_SOURCE_";
 321             break;
 322         case OCL_VENDOR_NVIDIA:
 323             choice = "-D_NVIDIA_SOURCE_";
 324             break;
 325         default:
 326             choice = "-D_WARPLESS_SOURCE_";
 327             break;
 328     }
 329     return choice;
 330 }
 331
 332 /*! \brief Create include paths for kernel sources.
 333  *
 334  * All OpenCL kernel files are expected to be stored in one single folder.
 335  *
 336  * \throws std::bad_alloc  if out of memory.
 337  */
 338 static std::string makeKernelIncludePathOption(const std::string &unescapedKernelRootPath)
 339 {
 340     std::string includePathOption;
 341
 342     /* Apple does not seem to accept the quoted include paths other
 343      * OpenCL implementations are happy with. Since the standard still says
 344      * it should be quoted, we handle Apple as a special case.
 345      */
 346 #ifdef __APPLE__
 347     includePathOption += "-I";
 348
 349     // Prepend all the spaces with a backslash
 350     for (std::string::size_type i = 0; i < unescapedKernelRootPath.length(); i++)
 351     {
 352         if (unescapedKernelRootPath[i] == ' ')
 353         {
 354             includePathOption.push_back('\\');
 355         }
 356         includePathOption.push_back(unescapedKernelRootPath[i]);
 357     }
 358 #else
 359     includePathOption += "-I\"" + unescapedKernelRootPath + "\"";
 360 #endif
 361
 362     return includePathOption;
 363 }
 364
 365 /*! \brief Builds a string with build options for the OpenCL kernels
 366  *
 367  * \throws std::bad_alloc  if out of memory. */
 368 std::string
 369 makePreprocessorOptions(const std::string   &kernelRootPath,
 370                         size_t               warpSize,
 371                         ocl_vendor_id_t      deviceVendorId,
 372                         const std::string   &extraDefines)
 373 {
 374     std::string preprocessorOptions;
 375
 376     /* Compose the complete build options */
 377     preprocessorOptions  = formatString("-DWARP_SIZE_TEST=%d", static_cast<int>(warpSize));
 378     preprocessorOptions += ' ';
 379     preprocessorOptions += makeVendorFlavorChoice(deviceVendorId);
 380     preprocessorOptions += ' ';
 381     preprocessorOptions += extraDefines;
 382     preprocessorOptions += ' ';
 383     preprocessorOptions += selectCompilerOptions(deviceVendorId);
 384     preprocessorOptions += ' ';
 385     preprocessorOptions += makeKernelIncludePathOption(kernelRootPath);
 386
 387     return preprocessorOptions;
 388 }
 389
 390 cl_program
 391 compileProgram(FILE              *fplog,
 392                const std::string &kernelBaseFilename,
 393                const std::string &extraDefines,
 394                cl_context         context,
 395                cl_device_id       deviceId,
 396                ocl_vendor_id_t    deviceVendorId)
 397 {
 398     cl_int      cl_error;
 399     std::string kernelRootPath = getKernelRootPath();
 400
 401     GMX_RELEASE_ASSERT(fplog != nullptr, "Need a valid log file for building OpenCL programs");
 402
 403     /* Load OpenCL source files */
 404     std::string kernelFilename = Path::join(kernelRootPath,
 405                                             kernelBaseFilename);
 406
 407     /* Make the build options */
 408     std::string preprocessorOptions = makePreprocessorOptions(kernelRootPath,
 409                                                               getWarpSize(context, deviceId),
 410                                                               deviceVendorId,
 411                                                               extraDefines);
 412
 413     bool        buildCacheWasRead = false;
 414
 415     std::string cacheFilename;
 416     if (useBuildCache)
 417     {
 418         cacheFilename = makeBinaryCacheFilename(kernelBaseFilename, deviceId);
 419     }
 420
 421     /* Create OpenCL program */
 422     cl_program program = nullptr;
 423     if (useBuildCache)
 424     {
 425         if (File::exists(cacheFilename, File::returnFalseOnError))
 426         {
 427             /* Check if there's a valid cache available */
 428             try
 429             {
 430                 program           = makeProgramFromCache(cacheFilename, context, deviceId);
 431                 buildCacheWasRead = true;
 432             }
 433             catch (FileIOError &e)
 434             {
 435                 // Failing to read from the cache is not a critical error
 436                 formatExceptionMessageToFile(fplog, e);
 437             }
 438         }
 439         else
 440         {
 441             fprintf(fplog, "No OpenCL binary cache file was present, so will compile kernels normally.\n");
 442         }
 443     }
 444     if (program == nullptr)
 445     {
 446         // Compile OpenCL program from source
 447         std::string kernelSource = TextReader::readFileToString(kernelFilename);
 448         if (kernelSource.empty())
 449         {
 450             GMX_THROW(FileIOError("Error loading OpenCL code " + kernelFilename));
 451         }
 452         const char *kernelSourcePtr  = kernelSource.c_str();
 453         size_t      kernelSourceSize = kernelSource.size();
 454         /* Create program from source code */
 455         program = clCreateProgramWithSource(context,
 456                                             1,
 457                                             &kernelSourcePtr,
 458                                             &kernelSourceSize,
 459                                             &cl_error);
 460         if (cl_error != CL_SUCCESS)
 461         {
 462             GMX_THROW(InternalError("Could not create OpenCL program, error was " + ocl_get_error_string(cl_error)));
 463         }
 464     }
 465
 466     /* Build the OpenCL program, keeping the status to potentially
 467        write to the simulation log file. */
 468     cl_int buildStatus = clBuildProgram(program, 0, NULL, preprocessorOptions.c_str(), NULL, NULL);
 469     if (buildStatus != CL_SUCCESS)
 470     {
 471         GMX_THROW(InternalError("Could not build OpenCL program, error was " + ocl_get_error_string(buildStatus)));
 472     }
 473
 474     if (useBuildCache)
 475     {
 476         if (!buildCacheWasRead)
 477         {
 478             /* If OpenCL caching is ON, but the current cache is not
 479                valid => update it */
 480             try
 481             {
 482                 writeBinaryToCache(program, cacheFilename);
 483             }
 484             catch (GromacsException &e)
 485             {
 486                 // Failing to write the cache is not a critical error
 487                 formatExceptionMessageToFile(fplog, e);
 488             }
 489         }
 490     }
 491     if ((OCL_VENDOR_NVIDIA == deviceVendorId) && getenv("GMX_OCL_DUMP_INTERM_FILES"))
 492     {
 493         /* If dumping intermediate files has been requested and this is an NVIDIA card
 494            => write PTX to file */
 495         char buffer[STRLEN];
 496
 497         cl_error = clGetDeviceInfo(deviceId, CL_DEVICE_NAME, sizeof(buffer), buffer, NULL);
 498         if (cl_error != CL_SUCCESS)
 499         {
 500             GMX_THROW(InternalError("Could not get OpenCL device info, error was " + ocl_get_error_string(cl_error)));
 501         }
 502         std::string ptxFilename = buffer;
 503         ptxFilename += ".ptx";
 504
 505         try
 506         {
 507             writeBinaryToCache(program, ptxFilename);
 508         }
 509         catch (GromacsException &e)
 510         {
 511             // Failing to write the cache is not a critical error
 512             formatExceptionMessageToFile(fplog, e);
 513         }
 514     }
 515
 516     writeOclBuildLog(fplog,
 517                      program,
 518                      deviceId,
 519                      kernelFilename,
 520                      preprocessorOptions,
 521                      buildStatus != CL_SUCCESS);
 522
 523     return program;
 524 }
 525
 526 } // namespace
 527 } // namespace