mlir/lib/Target/LLVM/NVVM/Target.cpp

   1 //===- Target.cpp - MLIR LLVM NVVM target compilation -----------*- C++ -*-===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // This files defines NVVM target related functions including registration
  10 // calls for the `#nvvm.target` compilation attribute.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "mlir/Target/LLVM/NVVM/Target.h"
  15
  16 #include "mlir/Dialect/GPU/IR/GPUDialect.h"
  17 #include "mlir/Dialect/LLVMIR/NVVMDialect.h"
  18 #include "mlir/Target/LLVM/NVVM/Utils.h"
  19 #include "mlir/Target/LLVMIR/Dialect/GPU/GPUToLLVMIRTranslation.h"
  20 #include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h"
  21 #include "mlir/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.h"
  22 #include "mlir/Target/LLVMIR/Export.h"
  23
  24 #include "llvm/Config/llvm-config.h"
  25 #include "llvm/Support/FileSystem.h"
  26 #include "llvm/Support/FileUtilities.h"
  27 #include "llvm/Support/FormatVariadic.h"
  28 #include "llvm/Support/MemoryBuffer.h"
  29 #include "llvm/Support/Path.h"
  30 #include "llvm/Support/Process.h"
  31 #include "llvm/Support/Program.h"
  32 #include "llvm/Support/TargetSelect.h"
  33 #include "llvm/Support/raw_ostream.h"
  34
  35 #include <cstdlib>
  36
  37 using namespace mlir;
  38 using namespace mlir::NVVM;
  39
  40 #ifndef __DEFAULT_CUDATOOLKIT_PATH__
  41 #define __DEFAULT_CUDATOOLKIT_PATH__ ""
  42 #endif
  43
  44 namespace {
  45 // Implementation of the `TargetAttrInterface` model.
  46 class NVVMTargetAttrImpl
  47     : public gpu::TargetAttrInterface::FallbackModel<NVVMTargetAttrImpl> {
  48 public:
  49   std::optional<SmallVector<char, 0>>
  50   serializeToObject(Attribute attribute, Operation *module,
  51                     const gpu::TargetOptions &options) const;
  52
  53   Attribute createObject(Attribute attribute, Operation *module,
  54                          const SmallVector<char, 0> &object,
  55                          const gpu::TargetOptions &options) const;
  56 };
  57 } // namespace
  58
  59 // Register the NVVM dialect, the NVVM translation & the target interface.
  60 void mlir::NVVM::registerNVVMTargetInterfaceExternalModels(
  61     DialectRegistry &registry) {
  62   registry.addExtension(+[](MLIRContext *ctx, NVVM::NVVMDialect *dialect) {
  63     NVVMTargetAttr::attachInterface<NVVMTargetAttrImpl>(*ctx);
  64   });
  65 }
  66
  67 void mlir::NVVM::registerNVVMTargetInterfaceExternalModels(
  68     MLIRContext &context) {
  69   DialectRegistry registry;
  70   registerNVVMTargetInterfaceExternalModels(registry);
  71   context.appendDialectRegistry(registry);
  72 }
  73
  74 // Search for the CUDA toolkit path.
  75 StringRef mlir::NVVM::getCUDAToolkitPath() {
  76   if (const char *var = std::getenv("CUDA_ROOT"))
  77     return var;
  78   if (const char *var = std::getenv("CUDA_HOME"))
  79     return var;
  80   if (const char *var = std::getenv("CUDA_PATH"))
  81     return var;
  82   return __DEFAULT_CUDATOOLKIT_PATH__;
  83 }
  84
  85 SerializeGPUModuleBase::SerializeGPUModuleBase(
  86     Operation &module, NVVMTargetAttr target,
  87     const gpu::TargetOptions &targetOptions)
  88     : ModuleToObject(module, target.getTriple(), target.getChip(),
  89                      target.getFeatures(), target.getO(),
  90                      targetOptions.getInitialLlvmIRCallback(),
  91                      targetOptions.getLinkedLlvmIRCallback(),
  92                      targetOptions.getOptimizedLlvmIRCallback(),
  93                      targetOptions.getISACallback()),
  94       target(target), toolkitPath(targetOptions.getToolkitPath()),
  95       fileList(targetOptions.getLinkFiles()) {
  96
  97   // If `targetOptions` have an empty toolkitPath use `getCUDAToolkitPath`
  98   if (toolkitPath.empty())
  99     toolkitPath = getCUDAToolkitPath();
 100
 101   // Append the files in the target attribute.
 102   if (ArrayAttr files = target.getLink())
 103     for (Attribute attr : files.getValue())
 104       if (auto file = dyn_cast<StringAttr>(attr))
 105         fileList.push_back(file.str());
 106
 107   // Append libdevice to the files to be loaded.
 108   (void)appendStandardLibs();
 109 }
 110
 111 void SerializeGPUModuleBase::init() {
 112   static llvm::once_flag initializeBackendOnce;
 113   llvm::call_once(initializeBackendOnce, []() {
 114   // If the `NVPTX` LLVM target was built, initialize it.
 115 #if LLVM_HAS_NVPTX_TARGET
 116     LLVMInitializeNVPTXTarget();
 117     LLVMInitializeNVPTXTargetInfo();
 118     LLVMInitializeNVPTXTargetMC();
 119     LLVMInitializeNVPTXAsmPrinter();
 120 #endif
 121   });
 122 }
 123
 124 NVVMTargetAttr SerializeGPUModuleBase::getTarget() const { return target; }
 125
 126 StringRef SerializeGPUModuleBase::getToolkitPath() const { return toolkitPath; }
 127
 128 ArrayRef<std::string> SerializeGPUModuleBase::getFileList() const {
 129   return fileList;
 130 }
 131
 132 // Try to append `libdevice` from a CUDA toolkit installation.
 133 LogicalResult SerializeGPUModuleBase::appendStandardLibs() {
 134   StringRef pathRef = getToolkitPath();
 135   if (!pathRef.empty()) {
 136     SmallVector<char, 256> path;
 137     path.insert(path.begin(), pathRef.begin(), pathRef.end());
 138     pathRef = StringRef(path.data(), path.size());
 139     if (!llvm::sys::fs::is_directory(pathRef)) {
 140       getOperation().emitError() << "CUDA path: " << pathRef
 141                                  << " does not exist or is not a directory.\n";
 142       return failure();
 143     }
 144     llvm::sys::path::append(path, "nvvm", "libdevice", "libdevice.10.bc");
 145     pathRef = StringRef(path.data(), path.size());
 146     if (!llvm::sys::fs::is_regular_file(pathRef)) {
 147       getOperation().emitError() << "LibDevice path: " << pathRef
 148                                  << " does not exist or is not a file.\n";
 149       return failure();
 150     }
 151     fileList.push_back(pathRef.str());
 152   }
 153   return success();
 154 }
 155
 156 std::optional<SmallVector<std::unique_ptr<llvm::Module>>>
 157 SerializeGPUModuleBase::loadBitcodeFiles(llvm::Module &module) {
 158   SmallVector<std::unique_ptr<llvm::Module>> bcFiles;
 159   if (failed(loadBitcodeFilesFromList(module.getContext(), fileList, bcFiles,
 160                                       true)))
 161     return std::nullopt;
 162   return std::move(bcFiles);
 163 }
 164
 165 namespace {
 166 class NVPTXSerializer : public SerializeGPUModuleBase {
 167 public:
 168   NVPTXSerializer(Operation &module, NVVMTargetAttr target,
 169                   const gpu::TargetOptions &targetOptions);
 170
 171   /// Returns the GPU module op being serialized.
 172   gpu::GPUModuleOp getOperation();
 173
 174   /// Compiles PTX to cubin using `ptxas`.
 175   std::optional<SmallVector<char, 0>>
 176   compileToBinary(const std::string &ptxCode);
 177
 178   /// Compiles PTX to cubin using the `nvptxcompiler` library.
 179   std::optional<SmallVector<char, 0>>
 180   compileToBinaryNVPTX(const std::string &ptxCode);
 181
 182   /// Serializes the LLVM module to an object format, depending on the
 183   /// compilation target selected in target options.
 184   std::optional<SmallVector<char, 0>>
 185   moduleToObject(llvm::Module &llvmModule) override;
 186
 187 private:
 188   using TmpFile = std::pair<llvm::SmallString<128>, llvm::FileRemover>;
 189
 190   /// Creates a temp file.
 191   std::optional<TmpFile> createTemp(StringRef name, StringRef suffix);
 192
 193   /// Finds the `tool` path, where `tool` is the name of the binary to search,
 194   /// i.e. `ptxas` or `fatbinary`. The search order is:
 195   /// 1. The toolkit path in `targetOptions`.
 196   /// 2. In the system PATH.
 197   /// 3. The path from `getCUDAToolkitPath()`.
 198   std::optional<std::string> findTool(StringRef tool);
 199
 200   /// Target options.
 201   gpu::TargetOptions targetOptions;
 202 };
 203 } // namespace
 204
 205 NVPTXSerializer::NVPTXSerializer(Operation &module, NVVMTargetAttr target,
 206                                  const gpu::TargetOptions &targetOptions)
 207     : SerializeGPUModuleBase(module, target, targetOptions),
 208       targetOptions(targetOptions) {}
 209
 210 std::optional<NVPTXSerializer::TmpFile>
 211 NVPTXSerializer::createTemp(StringRef name, StringRef suffix) {
 212   llvm::SmallString<128> filename;
 213   std::error_code ec =
 214       llvm::sys::fs::createTemporaryFile(name, suffix, filename);
 215   if (ec) {
 216     getOperation().emitError() << "Couldn't create the temp file: `" << filename
 217                                << "`, error message: " << ec.message();
 218     return std::nullopt;
 219   }
 220   return TmpFile(filename, llvm::FileRemover(filename.c_str()));
 221 }
 222
 223 gpu::GPUModuleOp NVPTXSerializer::getOperation() {
 224   return dyn_cast<gpu::GPUModuleOp>(&SerializeGPUModuleBase::getOperation());
 225 }
 226
 227 std::optional<std::string> NVPTXSerializer::findTool(StringRef tool) {
 228   // Find the `tool` path.
 229   // 1. Check the toolkit path given in the command line.
 230   StringRef pathRef = targetOptions.getToolkitPath();
 231   SmallVector<char, 256> path;
 232   if (!pathRef.empty()) {
 233     path.insert(path.begin(), pathRef.begin(), pathRef.end());
 234     llvm::sys::path::append(path, "bin", tool);
 235     if (llvm::sys::fs::can_execute(path))
 236       return StringRef(path.data(), path.size()).str();
 237   }
 238
 239   // 2. Check PATH.
 240   if (std::optional<std::string> toolPath =
 241           llvm::sys::Process::FindInEnvPath("PATH", tool))
 242     return *toolPath;
 243
 244   // 3. Check `getCUDAToolkitPath()`.
 245   pathRef = getCUDAToolkitPath();
 246   path.clear();
 247   if (!pathRef.empty()) {
 248     path.insert(path.begin(), pathRef.begin(), pathRef.end());
 249     llvm::sys::path::append(path, "bin", tool);
 250     if (llvm::sys::fs::can_execute(path))
 251       return StringRef(path.data(), path.size()).str();
 252   }
 253   getOperation().emitError()
 254       << "Couldn't find the `" << tool
 255       << "` binary. Please specify the toolkit "
 256          "path, add the compiler to $PATH, or set one of the environment "
 257          "variables in `NVVM::getCUDAToolkitPath()`.";
 258   return std::nullopt;
 259 }
 260
 261 // TODO: clean this method & have a generic tool driver or never emit binaries
 262 // with this mechanism and let another stage take care of it.
 263 std::optional<SmallVector<char, 0>>
 264 NVPTXSerializer::compileToBinary(const std::string &ptxCode) {
 265   // Determine if the serializer should create a fatbinary with the PTX embeded
 266   // or a simple CUBIN binary.
 267   const bool createFatbin =
 268       targetOptions.getCompilationTarget() == gpu::CompilationTarget::Fatbin;
 269
 270   // Find the `ptxas` & `fatbinary` tools.
 271   std::optional<std::string> ptxasCompiler = findTool("ptxas");
 272   if (!ptxasCompiler)
 273     return std::nullopt;
 274   std::optional<std::string> fatbinaryTool;
 275   if (createFatbin) {
 276     fatbinaryTool = findTool("fatbinary");
 277     if (!fatbinaryTool)
 278       return std::nullopt;
 279   }
 280   Location loc = getOperation().getLoc();
 281
 282   // Base name for all temp files: mlir-<module name>-<target triple>-<chip>.
 283   std::string basename =
 284       llvm::formatv("mlir-{0}-{1}-{2}", getOperation().getNameAttr().getValue(),
 285                     getTarget().getTriple(), getTarget().getChip());
 286
 287   // Create temp files:
 288   std::optional<TmpFile> ptxFile = createTemp(basename, "ptx");
 289   if (!ptxFile)
 290     return std::nullopt;
 291   std::optional<TmpFile> logFile = createTemp(basename, "log");
 292   if (!logFile)
 293     return std::nullopt;
 294   std::optional<TmpFile> binaryFile = createTemp(basename, "bin");
 295   if (!binaryFile)
 296     return std::nullopt;
 297   TmpFile cubinFile;
 298   if (createFatbin) {
 299     Twine cubinFilename = ptxFile->first + ".cubin";
 300     cubinFile = TmpFile(cubinFilename.str(), llvm::FileRemover(cubinFilename));
 301   } else {
 302     cubinFile.first = binaryFile->first;
 303   }
 304
 305   std::error_code ec;
 306   // Dump the PTX to a temp file.
 307   {
 308     llvm::raw_fd_ostream ptxStream(ptxFile->first, ec);
 309     if (ec) {
 310       emitError(loc) << "Couldn't open the file: `" << ptxFile->first
 311                      << "`, error message: " << ec.message();
 312       return std::nullopt;
 313     }
 314     ptxStream << ptxCode;
 315     if (ptxStream.has_error()) {
 316       emitError(loc) << "An error occurred while writing the PTX to: `"
 317                      << ptxFile->first << "`.";
 318       return std::nullopt;
 319     }
 320     ptxStream.flush();
 321   }
 322
 323   // Command redirects.
 324   std::optional<StringRef> redirects[] = {
 325       std::nullopt,
 326       logFile->first,
 327       logFile->first,
 328   };
 329
 330   // Get any extra args passed in `targetOptions`.
 331   std::pair<llvm::BumpPtrAllocator, SmallVector<const char *>> cmdOpts =
 332       targetOptions.tokenizeCmdOptions();
 333
 334   // Create ptxas args.
 335   std::string optLevel = std::to_string(this->optLevel);
 336   SmallVector<StringRef, 12> ptxasArgs(
 337       {StringRef("ptxas"), StringRef("-arch"), getTarget().getChip(),
 338        StringRef(ptxFile->first), StringRef("-o"), StringRef(cubinFile.first),
 339        "--opt-level", optLevel});
 340
 341   bool useFatbin32 = false;
 342   for (const auto *cArg : cmdOpts.second) {
 343     // All `cmdOpts` are for `ptxas` except `-32` which passes `-32` to
 344     // `fatbinary`, indicating a 32-bit target. By default a 64-bit target is
 345     // assumed.
 346     if (StringRef arg(cArg); arg != "-32")
 347       ptxasArgs.push_back(arg);
 348     else
 349       useFatbin32 = true;
 350   }
 351
 352   // Create the `fatbinary` args.
 353   StringRef chip = getTarget().getChip();
 354   // Remove the arch prefix to obtain the compute capability.
 355   chip.consume_front("sm_"), chip.consume_front("compute_");
 356   // Embed the cubin object.
 357   std::string cubinArg =
 358       llvm::formatv("--image3=kind=elf,sm={0},file={1}", chip, cubinFile.first)
 359           .str();
 360   // Embed the PTX file so the driver can JIT if needed.
 361   std::string ptxArg =
 362       llvm::formatv("--image3=kind=ptx,sm={0},file={1}", chip, ptxFile->first)
 363           .str();
 364   SmallVector<StringRef, 6> fatbinArgs({StringRef("fatbinary"),
 365                                         useFatbin32 ? "-32" : "-64", cubinArg,
 366                                         ptxArg, "--create", binaryFile->first});
 367
 368   // Dump tool invocation commands.
 369 #define DEBUG_TYPE "serialize-to-binary"
 370   LLVM_DEBUG({
 371     llvm::dbgs() << "Tool invocation for module: "
 372                  << getOperation().getNameAttr() << "\n";
 373     llvm::interleave(ptxasArgs, llvm::dbgs(), " ");
 374     llvm::dbgs() << "\n";
 375     if (createFatbin) {
 376       llvm::interleave(fatbinArgs, llvm::dbgs(), " ");
 377       llvm::dbgs() << "\n";
 378     }
 379   });
 380 #undef DEBUG_TYPE
 381
 382   // Helper function for printing tool error logs.
 383   std::string message;
 384   auto emitLogError =
 385       [&](StringRef toolName) -> std::optional<SmallVector<char, 0>> {
 386     if (message.empty()) {
 387       llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> toolStderr =
 388           llvm::MemoryBuffer::getFile(logFile->first);
 389       if (toolStderr)
 390         emitError(loc) << toolName << " invocation failed. Log:\n"
 391                        << toolStderr->get()->getBuffer();
 392       else
 393         emitError(loc) << toolName << " invocation failed.";
 394       return std::nullopt;
 395     }
 396     emitError(loc) << toolName
 397                    << " invocation failed, error message: " << message;
 398     return std::nullopt;
 399   };
 400
 401   // Invoke PTXAS.
 402   if (llvm::sys::ExecuteAndWait(ptxasCompiler.value(), ptxasArgs,
 403                                 /*Env=*/std::nullopt,
 404                                 /*Redirects=*/redirects,
 405                                 /*SecondsToWait=*/0,
 406                                 /*MemoryLimit=*/0,
 407                                 /*ErrMsg=*/&message))
 408     return emitLogError("`ptxas`");
 409 #define DEBUG_TYPE "dump-sass"
 410   LLVM_DEBUG({
 411     std::optional<std::string> nvdisasm = findTool("nvdisasm");
 412     SmallVector<StringRef> nvdisasmArgs(
 413         {StringRef("nvdisasm"), StringRef(cubinFile.first)});
 414     if (llvm::sys::ExecuteAndWait(nvdisasm.value(), nvdisasmArgs,
 415                                   /*Env=*/std::nullopt,
 416                                   /*Redirects=*/redirects,
 417                                   /*SecondsToWait=*/0,
 418                                   /*MemoryLimit=*/0,
 419                                   /*ErrMsg=*/&message))
 420       return emitLogError("`nvdisasm`");
 421     llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> logBuffer =
 422         llvm::MemoryBuffer::getFile(logFile->first);
 423     if (logBuffer && !(*logBuffer)->getBuffer().empty()) {
 424       llvm::dbgs() << "Output:\n" << (*logBuffer)->getBuffer() << "\n";
 425       llvm::dbgs().flush();
 426     }
 427   });
 428 #undef DEBUG_TYPE
 429
 430   // Invoke `fatbin`.
 431   message.clear();
 432   if (createFatbin && llvm::sys::ExecuteAndWait(*fatbinaryTool, fatbinArgs,
 433                                                 /*Env=*/std::nullopt,
 434                                                 /*Redirects=*/redirects,
 435                                                 /*SecondsToWait=*/0,
 436                                                 /*MemoryLimit=*/0,
 437                                                 /*ErrMsg=*/&message))
 438     return emitLogError("`fatbinary`");
 439
 440 // Dump the output of the tools, helpful if the verbose flag was passed.
 441 #define DEBUG_TYPE "serialize-to-binary"
 442   LLVM_DEBUG({
 443     llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> logBuffer =
 444         llvm::MemoryBuffer::getFile(logFile->first);
 445     if (logBuffer && !(*logBuffer)->getBuffer().empty()) {
 446       llvm::dbgs() << "Output:\n" << (*logBuffer)->getBuffer() << "\n";
 447       llvm::dbgs().flush();
 448     }
 449   });
 450 #undef DEBUG_TYPE
 451
 452   // Read the fatbin.
 453   llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> binaryBuffer =
 454       llvm::MemoryBuffer::getFile(binaryFile->first);
 455   if (!binaryBuffer) {
 456     emitError(loc) << "Couldn't open the file: `" << binaryFile->first
 457                    << "`, error message: " << binaryBuffer.getError().message();
 458     return std::nullopt;
 459   }
 460   StringRef fatbin = (*binaryBuffer)->getBuffer();
 461   return SmallVector<char, 0>(fatbin.begin(), fatbin.end());
 462 }
 463
 464 #if MLIR_ENABLE_NVPTXCOMPILER
 465 #include "nvPTXCompiler.h"
 466
 467 #define RETURN_ON_NVPTXCOMPILER_ERROR(expr)                                    \
 468   do {                                                                         \
 469     if (auto status = (expr)) {                                                \
 470       emitError(loc) << llvm::Twine(#expr).concat(" failed with error code ")  \
 471                      << status;                                                \
 472       return std::nullopt;                                                     \
 473     }                                                                          \
 474   } while (false)
 475
 476 std::optional<SmallVector<char, 0>>
 477 NVPTXSerializer::compileToBinaryNVPTX(const std::string &ptxCode) {
 478   Location loc = getOperation().getLoc();
 479   nvPTXCompilerHandle compiler = nullptr;
 480   nvPTXCompileResult status;
 481   size_t logSize;
 482
 483   // Create the options.
 484   std::string optLevel = std::to_string(this->optLevel);
 485   std::pair<llvm::BumpPtrAllocator, SmallVector<const char *>> cmdOpts =
 486       targetOptions.tokenizeCmdOptions();
 487   cmdOpts.second.append(
 488       {"-arch", getTarget().getChip().data(), "--opt-level", optLevel.c_str()});
 489
 490   // Create the compiler handle.
 491   RETURN_ON_NVPTXCOMPILER_ERROR(
 492       nvPTXCompilerCreate(&compiler, ptxCode.size(), ptxCode.c_str()));
 493
 494   // Try to compile the binary.
 495   status = nvPTXCompilerCompile(compiler, cmdOpts.second.size(),
 496                                 cmdOpts.second.data());
 497
 498   // Check if compilation failed.
 499   if (status != NVPTXCOMPILE_SUCCESS) {
 500     RETURN_ON_NVPTXCOMPILER_ERROR(
 501         nvPTXCompilerGetErrorLogSize(compiler, &logSize));
 502     if (logSize != 0) {
 503       SmallVector<char> log(logSize + 1, 0);
 504       RETURN_ON_NVPTXCOMPILER_ERROR(
 505           nvPTXCompilerGetErrorLog(compiler, log.data()));
 506       emitError(loc) << "NVPTX compiler invocation failed, error log: "
 507                      << log.data();
 508     } else
 509       emitError(loc) << "NVPTX compiler invocation failed with error code: "
 510                      << status;
 511     return std::nullopt;
 512   }
 513
 514   // Retrieve the binary.
 515   size_t elfSize;
 516   RETURN_ON_NVPTXCOMPILER_ERROR(
 517       nvPTXCompilerGetCompiledProgramSize(compiler, &elfSize));
 518   SmallVector<char, 0> binary(elfSize, 0);
 519   RETURN_ON_NVPTXCOMPILER_ERROR(
 520       nvPTXCompilerGetCompiledProgram(compiler, (void *)binary.data()));
 521
 522 // Dump the log of the compiler, helpful if the verbose flag was passed.
 523 #define DEBUG_TYPE "serialize-to-binary"
 524   LLVM_DEBUG({
 525     RETURN_ON_NVPTXCOMPILER_ERROR(
 526         nvPTXCompilerGetInfoLogSize(compiler, &logSize));
 527     if (logSize != 0) {
 528       SmallVector<char> log(logSize + 1, 0);
 529       RETURN_ON_NVPTXCOMPILER_ERROR(
 530           nvPTXCompilerGetInfoLog(compiler, log.data()));
 531       llvm::dbgs() << "NVPTX compiler invocation for module: "
 532                    << getOperation().getNameAttr() << "\n";
 533       llvm::dbgs() << "Arguments: ";
 534       llvm::interleave(cmdOpts.second, llvm::dbgs(), " ");
 535       llvm::dbgs() << "\nOutput\n" << log.data() << "\n";
 536       llvm::dbgs().flush();
 537     }
 538   });
 539 #undef DEBUG_TYPE
 540   RETURN_ON_NVPTXCOMPILER_ERROR(nvPTXCompilerDestroy(&compiler));
 541   return binary;
 542 }
 543 #endif // MLIR_ENABLE_NVPTXCOMPILER
 544
 545 std::optional<SmallVector<char, 0>>
 546 NVPTXSerializer::moduleToObject(llvm::Module &llvmModule) {
 547   // Return LLVM IR if the compilation target is `offload`.
 548 #define DEBUG_TYPE "serialize-to-llvm"
 549   LLVM_DEBUG({
 550     llvm::dbgs() << "LLVM IR for module: " << getOperation().getNameAttr()
 551                  << "\n";
 552     llvm::dbgs() << llvmModule << "\n";
 553     llvm::dbgs().flush();
 554   });
 555 #undef DEBUG_TYPE
 556   if (targetOptions.getCompilationTarget() == gpu::CompilationTarget::Offload)
 557     return SerializeGPUModuleBase::moduleToObject(llvmModule);
 558
 559 #if !LLVM_HAS_NVPTX_TARGET
 560   getOperation()->emitError(
 561       "The `NVPTX` target was not built. Please enable it when building LLVM.");
 562   return std::nullopt;
 563 #endif // LLVM_HAS_NVPTX_TARGET
 564
 565   // Emit PTX code.
 566   std::optional<llvm::TargetMachine *> targetMachine =
 567       getOrCreateTargetMachine();
 568   if (!targetMachine) {
 569     getOperation().emitError() << "Target Machine unavailable for triple "
 570                                << triple << ", can't optimize with LLVM\n";
 571     return std::nullopt;
 572   }
 573   std::optional<std::string> serializedISA =
 574       translateToISA(llvmModule, **targetMachine);
 575   if (!serializedISA) {
 576     getOperation().emitError() << "Failed translating the module to ISA.";
 577     return std::nullopt;
 578   }
 579   if (isaCallback)
 580     isaCallback(serializedISA.value());
 581
 582 #define DEBUG_TYPE "serialize-to-isa"
 583   LLVM_DEBUG({
 584     llvm::dbgs() << "PTX for module: " << getOperation().getNameAttr() << "\n";
 585     llvm::dbgs() << *serializedISA << "\n";
 586     llvm::dbgs().flush();
 587   });
 588 #undef DEBUG_TYPE
 589
 590   // Return PTX if the compilation target is `assembly`.
 591   if (targetOptions.getCompilationTarget() ==
 592       gpu::CompilationTarget::Assembly) {
 593     // Make sure to include the null terminator.
 594     StringRef bin(serializedISA->c_str(), serializedISA->size() + 1);
 595     return SmallVector<char, 0>(bin.begin(), bin.end());
 596   }
 597
 598   // Compile to binary.
 599 #if MLIR_ENABLE_NVPTXCOMPILER
 600   return compileToBinaryNVPTX(*serializedISA);
 601 #else
 602   return compileToBinary(*serializedISA);
 603 #endif // MLIR_ENABLE_NVPTXCOMPILER
 604 }
 605
 606 std::optional<SmallVector<char, 0>>
 607 NVVMTargetAttrImpl::serializeToObject(Attribute attribute, Operation *module,
 608                                       const gpu::TargetOptions &options) const {
 609   assert(module && "The module must be non null.");
 610   if (!module)
 611     return std::nullopt;
 612   if (!mlir::isa<gpu::GPUModuleOp>(module)) {
 613     module->emitError("Module must be a GPU module.");
 614     return std::nullopt;
 615   }
 616   NVPTXSerializer serializer(*module, cast<NVVMTargetAttr>(attribute), options);
 617   serializer.init();
 618   return serializer.run();
 619 }
 620
 621 Attribute
 622 NVVMTargetAttrImpl::createObject(Attribute attribute, Operation *module,
 623                                  const SmallVector<char, 0> &object,
 624                                  const gpu::TargetOptions &options) const {
 625   auto target = cast<NVVMTargetAttr>(attribute);
 626   gpu::CompilationTarget format = options.getCompilationTarget();
 627   DictionaryAttr objectProps;
 628   Builder builder(attribute.getContext());
 629   if (format == gpu::CompilationTarget::Assembly)
 630     objectProps = builder.getDictionaryAttr(
 631         {builder.getNamedAttr("O", builder.getI32IntegerAttr(target.getO()))});
 632   return builder.getAttr<gpu::ObjectAttr>(
 633       attribute, format,
 634       builder.getStringAttr(StringRef(object.data(), object.size())),
 635       objectProps, /*kernels=*/nullptr);
 636 }