mlir/lib/Target/LLVM/NVVM/Target.cpp

   1 //===- Target.cpp - MLIR LLVM NVVM target compilation -----------*- C++ -*-===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // This files defines NVVM target related functions including registration
  10 // calls for the `#nvvm.target` compilation attribute.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "mlir/Target/LLVM/NVVM/Target.h"
  15
  16 #include "mlir/Config/mlir-config.h"
  17 #include "mlir/Dialect/GPU/IR/GPUDialect.h"
  18 #include "mlir/Dialect/LLVMIR/NVVMDialect.h"
  19 #include "mlir/Target/LLVM/NVVM/Utils.h"
  20 #include "mlir/Target/LLVMIR/Dialect/GPU/GPUToLLVMIRTranslation.h"
  21 #include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h"
  22 #include "mlir/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.h"
  23 #include "mlir/Target/LLVMIR/Export.h"
  24
  25 #include "llvm/Config/llvm-config.h"
  26 #include "llvm/Support/FileSystem.h"
  27 #include "llvm/Support/FileUtilities.h"
  28 #include "llvm/Support/FormatVariadic.h"
  29 #include "llvm/Support/MemoryBuffer.h"
  30 #include "llvm/Support/Path.h"
  31 #include "llvm/Support/Process.h"
  32 #include "llvm/Support/Program.h"
  33 #include "llvm/Support/TargetSelect.h"
  34
  35 #include <cstdlib>
  36
  37 using namespace mlir;
  38 using namespace mlir::NVVM;
  39
  40 #ifndef __DEFAULT_CUDATOOLKIT_PATH__
  41 #define __DEFAULT_CUDATOOLKIT_PATH__ ""
  42 #endif
  43
  44 namespace {
  45 // Implementation of the `TargetAttrInterface` model.
  46 class NVVMTargetAttrImpl
  47     : public gpu::TargetAttrInterface::FallbackModel<NVVMTargetAttrImpl> {
  48 public:
  49   std::optional<SmallVector<char, 0>>
  50   serializeToObject(Attribute attribute, Operation *module,
  51                     const gpu::TargetOptions &options) const;
  52
  53   Attribute createObject(Attribute attribute,
  54                          const SmallVector<char, 0> &object,
  55                          const gpu::TargetOptions &options) const;
  56 };
  57 } // namespace
  58
  59 // Register the NVVM dialect, the NVVM translation & the target interface.
  60 void mlir::NVVM::registerNVVMTargetInterfaceExternalModels(
  61     DialectRegistry &registry) {
  62   registry.addExtension(+[](MLIRContext *ctx, NVVM::NVVMDialect *dialect) {
  63     NVVMTargetAttr::attachInterface<NVVMTargetAttrImpl>(*ctx);
  64   });
  65 }
  66
  67 void mlir::NVVM::registerNVVMTargetInterfaceExternalModels(
  68     MLIRContext &context) {
  69   DialectRegistry registry;
  70   registerNVVMTargetInterfaceExternalModels(registry);
  71   context.appendDialectRegistry(registry);
  72 }
  73
  74 // Search for the CUDA toolkit path.
  75 StringRef mlir::NVVM::getCUDAToolkitPath() {
  76   if (const char *var = std::getenv("CUDA_ROOT"))
  77     return var;
  78   if (const char *var = std::getenv("CUDA_HOME"))
  79     return var;
  80   if (const char *var = std::getenv("CUDA_PATH"))
  81     return var;
  82   return __DEFAULT_CUDATOOLKIT_PATH__;
  83 }
  84
  85 SerializeGPUModuleBase::SerializeGPUModuleBase(
  86     Operation &module, NVVMTargetAttr target,
  87     const gpu::TargetOptions &targetOptions)
  88     : ModuleToObject(module, target.getTriple(), target.getChip(),
  89                      target.getFeatures(), target.getO()),
  90       target(target), toolkitPath(targetOptions.getToolkitPath()),
  91       fileList(targetOptions.getLinkFiles()) {
  92
  93   // If `targetOptions` have an empty toolkitPath use `getCUDAToolkitPath`
  94   if (toolkitPath.empty())
  95     toolkitPath = getCUDAToolkitPath();
  96
  97   // Append the files in the target attribute.
  98   if (ArrayAttr files = target.getLink())
  99     for (Attribute attr : files.getValue())
 100       if (auto file = dyn_cast<StringAttr>(attr))
 101         fileList.push_back(file.str());
 102
 103   // Append libdevice to the files to be loaded.
 104   (void)appendStandardLibs();
 105 }
 106
 107 void SerializeGPUModuleBase::init() {
 108   static llvm::once_flag initializeBackendOnce;
 109   llvm::call_once(initializeBackendOnce, []() {
 110   // If the `NVPTX` LLVM target was built, initialize it.
 111 #if LLVM_HAS_NVPTX_TARGET
 112     LLVMInitializeNVPTXTarget();
 113     LLVMInitializeNVPTXTargetInfo();
 114     LLVMInitializeNVPTXTargetMC();
 115     LLVMInitializeNVPTXAsmPrinter();
 116 #endif
 117   });
 118 }
 119
 120 NVVMTargetAttr SerializeGPUModuleBase::getTarget() const { return target; }
 121
 122 StringRef SerializeGPUModuleBase::getToolkitPath() const { return toolkitPath; }
 123
 124 ArrayRef<std::string> SerializeGPUModuleBase::getFileList() const {
 125   return fileList;
 126 }
 127
 128 // Try to append `libdevice` from a CUDA toolkit installation.
 129 LogicalResult SerializeGPUModuleBase::appendStandardLibs() {
 130   StringRef pathRef = getToolkitPath();
 131   if (!pathRef.empty()) {
 132     SmallVector<char, 256> path;
 133     path.insert(path.begin(), pathRef.begin(), pathRef.end());
 134     pathRef = StringRef(path.data(), path.size());
 135     if (!llvm::sys::fs::is_directory(pathRef)) {
 136       getOperation().emitError() << "CUDA path: " << pathRef
 137                                  << " does not exist or is not a directory.\n";
 138       return failure();
 139     }
 140     llvm::sys::path::append(path, "nvvm", "libdevice", "libdevice.10.bc");
 141     pathRef = StringRef(path.data(), path.size());
 142     if (!llvm::sys::fs::is_regular_file(pathRef)) {
 143       getOperation().emitError() << "LibDevice path: " << pathRef
 144                                  << " does not exist or is not a file.\n";
 145       return failure();
 146     }
 147     fileList.push_back(pathRef.str());
 148   }
 149   return success();
 150 }
 151
 152 std::optional<SmallVector<std::unique_ptr<llvm::Module>>>
 153 SerializeGPUModuleBase::loadBitcodeFiles(llvm::Module &module) {
 154   SmallVector<std::unique_ptr<llvm::Module>> bcFiles;
 155   if (failed(loadBitcodeFilesFromList(module.getContext(), fileList, bcFiles,
 156                                       true)))
 157     return std::nullopt;
 158   return std::move(bcFiles);
 159 }
 160
 161 #if MLIR_ENABLE_CUDA_CONVERSIONS
 162 namespace {
 163 class NVPTXSerializer : public SerializeGPUModuleBase {
 164 public:
 165   NVPTXSerializer(Operation &module, NVVMTargetAttr target,
 166                   const gpu::TargetOptions &targetOptions);
 167
 168   gpu::GPUModuleOp getOperation();
 169
 170   // Compile PTX to cubin using `ptxas`.
 171   std::optional<SmallVector<char, 0>>
 172   compileToBinary(const std::string &ptxCode);
 173
 174   // Compile PTX to cubin using the `nvptxcompiler` library.
 175   std::optional<SmallVector<char, 0>>
 176   compileToBinaryNVPTX(const std::string &ptxCode);
 177
 178   std::optional<SmallVector<char, 0>>
 179   moduleToObject(llvm::Module &llvmModule) override;
 180
 181 private:
 182   using TmpFile = std::pair<llvm::SmallString<128>, llvm::FileRemover>;
 183
 184   // Create a temp file.
 185   std::optional<TmpFile> createTemp(StringRef name, StringRef suffix);
 186
 187   // Find the `tool` path, where `tool` is the name of the binary to search,
 188   // i.e. `ptxas` or `fatbinary`. The search order is:
 189   // 1. The toolkit path in `targetOptions`.
 190   // 2. In the system PATH.
 191   // 3. The path from `getCUDAToolkitPath()`.
 192   std::optional<std::string> findTool(StringRef tool);
 193
 194   // Target options.
 195   gpu::TargetOptions targetOptions;
 196 };
 197 } // namespace
 198
 199 NVPTXSerializer::NVPTXSerializer(Operation &module, NVVMTargetAttr target,
 200                                  const gpu::TargetOptions &targetOptions)
 201     : SerializeGPUModuleBase(module, target, targetOptions),
 202       targetOptions(targetOptions) {}
 203
 204 std::optional<NVPTXSerializer::TmpFile>
 205 NVPTXSerializer::createTemp(StringRef name, StringRef suffix) {
 206   llvm::SmallString<128> filename;
 207   std::error_code ec =
 208       llvm::sys::fs::createTemporaryFile(name, suffix, filename);
 209   if (ec) {
 210     getOperation().emitError() << "Couldn't create the temp file: `" << filename
 211                                << "`, error message: " << ec.message();
 212     return std::nullopt;
 213   }
 214   return TmpFile(filename, llvm::FileRemover(filename.c_str()));
 215 }
 216
 217 gpu::GPUModuleOp NVPTXSerializer::getOperation() {
 218   return dyn_cast<gpu::GPUModuleOp>(&SerializeGPUModuleBase::getOperation());
 219 }
 220
 221 std::optional<std::string> NVPTXSerializer::findTool(StringRef tool) {
 222   // Find the `tool` path.
 223   // 1. Check the toolkit path given in the command line.
 224   StringRef pathRef = targetOptions.getToolkitPath();
 225   SmallVector<char, 256> path;
 226   if (!pathRef.empty()) {
 227     path.insert(path.begin(), pathRef.begin(), pathRef.end());
 228     llvm::sys::path::append(path, "bin", tool);
 229     if (llvm::sys::fs::can_execute(path))
 230       return StringRef(path.data(), path.size()).str();
 231   }
 232
 233   // 2. Check PATH.
 234   if (std::optional<std::string> toolPath =
 235           llvm::sys::Process::FindInEnvPath("PATH", tool))
 236     return *toolPath;
 237
 238   // 3. Check `getCUDAToolkitPath()`.
 239   pathRef = getCUDAToolkitPath();
 240   path.clear();
 241   if (!pathRef.empty()) {
 242     path.insert(path.begin(), pathRef.begin(), pathRef.end());
 243     llvm::sys::path::append(path, "bin", tool);
 244     if (llvm::sys::fs::can_execute(path))
 245       return StringRef(path.data(), path.size()).str();
 246   }
 247   getOperation().emitError()
 248       << "Couldn't find the `" << tool
 249       << "` binary. Please specify the toolkit "
 250          "path, add the compiler to $PATH, or set one of the environment "
 251          "variables in `NVVM::getCUDAToolkitPath()`.";
 252   return std::nullopt;
 253 }
 254
 255 // TODO: clean this method & have a generic tool driver or never emit binaries
 256 // with this mechanism and let another stage take care of it.
 257 std::optional<SmallVector<char, 0>>
 258 NVPTXSerializer::compileToBinary(const std::string &ptxCode) {
 259   // Determine if the serializer should create a fatbinary with the PTX embeded
 260   // or a simple CUBIN binary.
 261   const bool createFatbin =
 262       targetOptions.getCompilationTarget() == gpu::CompilationTarget::Fatbin;
 263
 264   // Find the `ptxas` & `fatbinary` tools.
 265   std::optional<std::string> ptxasCompiler = findTool("ptxas");
 266   if (!ptxasCompiler)
 267     return std::nullopt;
 268   std::optional<std::string> fatbinaryTool = findTool("fatbinary");
 269   if (createFatbin && !fatbinaryTool)
 270     return std::nullopt;
 271   Location loc = getOperation().getLoc();
 272
 273   // Base name for all temp files: mlir-<module name>-<target triple>-<chip>.
 274   std::string basename =
 275       llvm::formatv("mlir-{0}-{1}-{2}", getOperation().getNameAttr().getValue(),
 276                     getTarget().getTriple(), getTarget().getChip());
 277
 278   // Create temp files:
 279   std::optional<TmpFile> ptxFile = createTemp(basename, "ptx");
 280   if (!ptxFile)
 281     return std::nullopt;
 282   std::optional<TmpFile> logFile = createTemp(basename, "log");
 283   if (!logFile)
 284     return std::nullopt;
 285   std::optional<TmpFile> binaryFile = createTemp(basename, "bin");
 286   if (!binaryFile)
 287     return std::nullopt;
 288   TmpFile cubinFile;
 289   if (createFatbin) {
 290     Twine cubinFilename = ptxFile->first + ".cubin";
 291     cubinFile = TmpFile(cubinFilename.str(), llvm::FileRemover(cubinFilename));
 292   } else {
 293     cubinFile.first = binaryFile->first;
 294   }
 295
 296   std::error_code ec;
 297   // Dump the PTX to a temp file.
 298   {
 299     llvm::raw_fd_ostream ptxStream(ptxFile->first, ec);
 300     if (ec) {
 301       emitError(loc) << "Couldn't open the file: `" << ptxFile->first
 302                      << "`, error message: " << ec.message();
 303       return std::nullopt;
 304     }
 305     ptxStream << ptxCode;
 306     if (ptxStream.has_error()) {
 307       emitError(loc) << "An error occurred while writing the PTX to: `"
 308                      << ptxFile->first << "`.";
 309       return std::nullopt;
 310     }
 311     ptxStream.flush();
 312   }
 313
 314   // Command redirects.
 315   std::optional<StringRef> redirects[] = {
 316       std::nullopt,
 317       logFile->first,
 318       logFile->first,
 319   };
 320
 321   // Get any extra args passed in `targetOptions`.
 322   std::pair<llvm::BumpPtrAllocator, SmallVector<const char *>> cmdOpts =
 323       targetOptions.tokenizeCmdOptions();
 324
 325   // Create ptxas args.
 326   std::string optLevel = std::to_string(this->optLevel);
 327   SmallVector<StringRef, 12> ptxasArgs(
 328       {StringRef("ptxas"), StringRef("-arch"), getTarget().getChip(),
 329        StringRef(ptxFile->first), StringRef("-o"), StringRef(cubinFile.first),
 330        "--opt-level", optLevel});
 331
 332   bool useFatbin32 = false;
 333   for (const auto *cArg : cmdOpts.second) {
 334     // All `cmdOpts` are for `ptxas` except `-32` which passes `-32` to
 335     // `fatbinary`, indicating a 32-bit target. By default a 64-bit target is
 336     // assumed.
 337     if (StringRef arg(cArg); arg != "-32")
 338       ptxasArgs.push_back(arg);
 339     else
 340       useFatbin32 = true;
 341   }
 342
 343   // Create the `fatbinary` args.
 344   StringRef chip = getTarget().getChip();
 345   // Remove the arch prefix to obtain the compute capability.
 346   chip.consume_front("sm_"), chip.consume_front("compute_");
 347   // Embed the cubin object.
 348   std::string cubinArg =
 349       llvm::formatv("--image3=kind=elf,sm={0},file={1}", chip, cubinFile.first)
 350           .str();
 351   // Embed the PTX file so the driver can JIT if needed.
 352   std::string ptxArg =
 353       llvm::formatv("--image3=kind=ptx,sm={0},file={1}", chip, ptxFile->first)
 354           .str();
 355   SmallVector<StringRef, 6> fatbinArgs({StringRef("fatbinary"),
 356                                         useFatbin32 ? "-32" : "-64", cubinArg,
 357                                         ptxArg, "--create", binaryFile->first});
 358
 359   // Dump tool invocation commands.
 360 #define DEBUG_TYPE "serialize-to-binary"
 361   LLVM_DEBUG({
 362     llvm::dbgs() << "Tool invocation for module: "
 363                  << getOperation().getNameAttr() << "\n";
 364     llvm::interleave(ptxasArgs, llvm::dbgs(), " ");
 365     llvm::dbgs() << "\n";
 366     if (createFatbin) {
 367       llvm::interleave(fatbinArgs, llvm::dbgs(), " ");
 368       llvm::dbgs() << "\n";
 369     }
 370   });
 371 #undef DEBUG_TYPE
 372
 373   // Helper function for printing tool error logs.
 374   std::string message;
 375   auto emitLogError =
 376       [&](StringRef toolName) -> std::optional<SmallVector<char, 0>> {
 377     if (message.empty()) {
 378       llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> toolStderr =
 379           llvm::MemoryBuffer::getFile(logFile->first);
 380       if (toolStderr)
 381         emitError(loc) << toolName << " invocation failed. Log:\n"
 382                        << toolStderr->get()->getBuffer();
 383       else
 384         emitError(loc) << toolName << " invocation failed.";
 385       return std::nullopt;
 386     }
 387     emitError(loc) << toolName
 388                    << " invocation failed, error message: " << message;
 389     return std::nullopt;
 390   };
 391
 392   // Invoke PTXAS.
 393   if (llvm::sys::ExecuteAndWait(ptxasCompiler.value(), ptxasArgs,
 394                                 /*Env=*/std::nullopt,
 395                                 /*Redirects=*/redirects,
 396                                 /*SecondsToWait=*/0,
 397                                 /*MemoryLimit=*/0,
 398                                 /*ErrMsg=*/&message))
 399     return emitLogError("`ptxas`");
 400
 401   // Invoke `fatbin`.
 402   message.clear();
 403   if (createFatbin && llvm::sys::ExecuteAndWait(*fatbinaryTool, fatbinArgs,
 404                                                 /*Env=*/std::nullopt,
 405                                                 /*Redirects=*/redirects,
 406                                                 /*SecondsToWait=*/0,
 407                                                 /*MemoryLimit=*/0,
 408                                                 /*ErrMsg=*/&message))
 409     return emitLogError("`fatbinary`");
 410
 411 // Dump the output of the tools, helpful if the verbose flag was passed.
 412 #define DEBUG_TYPE "serialize-to-binary"
 413   LLVM_DEBUG({
 414     llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> logBuffer =
 415         llvm::MemoryBuffer::getFile(logFile->first);
 416     if (logBuffer && !(*logBuffer)->getBuffer().empty()) {
 417       llvm::dbgs() << "Output:\n" << (*logBuffer)->getBuffer() << "\n";
 418       llvm::dbgs().flush();
 419     }
 420   });
 421 #undef DEBUG_TYPE
 422
 423   // Read the fatbin.
 424   llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> binaryBuffer =
 425       llvm::MemoryBuffer::getFile(binaryFile->first);
 426   if (!binaryBuffer) {
 427     emitError(loc) << "Couldn't open the file: `" << binaryFile->first
 428                    << "`, error message: " << binaryBuffer.getError().message();
 429     return std::nullopt;
 430   }
 431   StringRef fatbin = (*binaryBuffer)->getBuffer();
 432   return SmallVector<char, 0>(fatbin.begin(), fatbin.end());
 433 }
 434
 435 #if MLIR_ENABLE_NVPTXCOMPILER
 436 #include "nvPTXCompiler.h"
 437
 438 #define RETURN_ON_NVPTXCOMPILER_ERROR(expr)                                    \
 439   do {                                                                         \
 440     if (auto status = (expr)) {                                                \
 441       emitError(loc) << llvm::Twine(#expr).concat(" failed with error code ")  \
 442                      << status;                                                \
 443       return std::nullopt;                                                     \
 444     }                                                                          \
 445   } while (false)
 446
 447 std::optional<SmallVector<char, 0>>
 448 NVPTXSerializer::compileToBinaryNVPTX(const std::string &ptxCode) {
 449   Location loc = getOperation().getLoc();
 450   nvPTXCompilerHandle compiler = nullptr;
 451   nvPTXCompileResult status;
 452   size_t logSize;
 453
 454   // Create the options.
 455   std::string optLevel = std::to_string(this->optLevel);
 456   std::pair<llvm::BumpPtrAllocator, SmallVector<const char *>> cmdOpts =
 457       targetOptions.tokenizeCmdOptions();
 458   cmdOpts.second.append(
 459       {"-arch", getTarget().getChip().data(), "--opt-level", optLevel.c_str()});
 460
 461   // Create the compiler handle.
 462   RETURN_ON_NVPTXCOMPILER_ERROR(
 463       nvPTXCompilerCreate(&compiler, ptxCode.size(), ptxCode.c_str()));
 464
 465   // Try to compile the binary.
 466   status = nvPTXCompilerCompile(compiler, cmdOpts.second.size(),
 467                                 cmdOpts.second.data());
 468
 469   // Check if compilation failed.
 470   if (status != NVPTXCOMPILE_SUCCESS) {
 471     RETURN_ON_NVPTXCOMPILER_ERROR(
 472         nvPTXCompilerGetErrorLogSize(compiler, &logSize));
 473     if (logSize != 0) {
 474       SmallVector<char> log(logSize + 1, 0);
 475       RETURN_ON_NVPTXCOMPILER_ERROR(
 476           nvPTXCompilerGetErrorLog(compiler, log.data()));
 477       emitError(loc) << "NVPTX compiler invocation failed, error log: "
 478                      << log.data();
 479     } else
 480       emitError(loc) << "NVPTX compiler invocation failed with error code: "
 481                      << status;
 482     return std::nullopt;
 483   }
 484
 485   // Retrieve the binary.
 486   size_t elfSize;
 487   RETURN_ON_NVPTXCOMPILER_ERROR(
 488       nvPTXCompilerGetCompiledProgramSize(compiler, &elfSize));
 489   SmallVector<char, 0> binary(elfSize, 0);
 490   RETURN_ON_NVPTXCOMPILER_ERROR(
 491       nvPTXCompilerGetCompiledProgram(compiler, (void *)binary.data()));
 492
 493 // Dump the log of the compiler, helpful if the verbose flag was passed.
 494 #define DEBUG_TYPE "serialize-to-binary"
 495   LLVM_DEBUG({
 496     RETURN_ON_NVPTXCOMPILER_ERROR(
 497         nvPTXCompilerGetInfoLogSize(compiler, &logSize));
 498     if (logSize != 0) {
 499       SmallVector<char> log(logSize + 1, 0);
 500       RETURN_ON_NVPTXCOMPILER_ERROR(
 501           nvPTXCompilerGetInfoLog(compiler, log.data()));
 502       llvm::dbgs() << "NVPTX compiler invocation for module: "
 503                    << getOperation().getNameAttr() << "\n";
 504       llvm::dbgs() << "Arguments: ";
 505       llvm::interleave(cmdOpts.second, llvm::dbgs(), " ");
 506       llvm::dbgs() << "\nOutput\n" << log.data() << "\n";
 507       llvm::dbgs().flush();
 508     }
 509   });
 510 #undef DEBUG_TYPE
 511   RETURN_ON_NVPTXCOMPILER_ERROR(nvPTXCompilerDestroy(&compiler));
 512   return binary;
 513 }
 514 #endif // MLIR_ENABLE_NVPTXCOMPILER
 515
 516 std::optional<SmallVector<char, 0>>
 517 NVPTXSerializer::moduleToObject(llvm::Module &llvmModule) {
 518   // Return LLVM IR if the compilation target is offload.
 519 #define DEBUG_TYPE "serialize-to-llvm"
 520   LLVM_DEBUG({
 521     llvm::dbgs() << "LLVM IR for module: " << getOperation().getNameAttr()
 522                  << "\n";
 523     llvm::dbgs() << llvmModule << "\n";
 524     llvm::dbgs().flush();
 525   });
 526 #undef DEBUG_TYPE
 527   if (targetOptions.getCompilationTarget() == gpu::CompilationTarget::Offload)
 528     return SerializeGPUModuleBase::moduleToObject(llvmModule);
 529
 530   // Emit PTX code.
 531   std::optional<llvm::TargetMachine *> targetMachine =
 532       getOrCreateTargetMachine();
 533   if (!targetMachine) {
 534     getOperation().emitError() << "Target Machine unavailable for triple "
 535                                << triple << ", can't optimize with LLVM\n";
 536     return std::nullopt;
 537   }
 538   std::optional<std::string> serializedISA =
 539       translateToISA(llvmModule, **targetMachine);
 540   if (!serializedISA) {
 541     getOperation().emitError() << "Failed translating the module to ISA.";
 542     return std::nullopt;
 543   }
 544 #define DEBUG_TYPE "serialize-to-isa"
 545   LLVM_DEBUG({
 546     llvm::dbgs() << "PTX for module: " << getOperation().getNameAttr() << "\n";
 547     llvm::dbgs() << *serializedISA << "\n";
 548     llvm::dbgs().flush();
 549   });
 550 #undef DEBUG_TYPE
 551
 552   // Return PTX if the compilation target is assembly.
 553   if (targetOptions.getCompilationTarget() ==
 554       gpu::CompilationTarget::Assembly) {
 555     // Make sure to include the null terminator.
 556     StringRef bin(serializedISA->c_str(), serializedISA->size() + 1);
 557     return SmallVector<char, 0>(bin.begin(), bin.end());
 558   }
 559
 560   // Compile to binary.
 561 #if MLIR_ENABLE_NVPTXCOMPILER
 562   return compileToBinaryNVPTX(*serializedISA);
 563 #else
 564   return compileToBinary(*serializedISA);
 565 #endif // MLIR_ENABLE_NVPTXCOMPILER
 566 }
 567 #endif // MLIR_ENABLE_CUDA_CONVERSIONS
 568
 569 std::optional<SmallVector<char, 0>>
 570 NVVMTargetAttrImpl::serializeToObject(Attribute attribute, Operation *module,
 571                                       const gpu::TargetOptions &options) const {
 572   assert(module && "The module must be non null.");
 573   if (!module)
 574     return std::nullopt;
 575   if (!mlir::isa<gpu::GPUModuleOp>(module)) {
 576     module->emitError("Module must be a GPU module.");
 577     return std::nullopt;
 578   }
 579 #if MLIR_ENABLE_CUDA_CONVERSIONS
 580   NVPTXSerializer serializer(*module, cast<NVVMTargetAttr>(attribute), options);
 581   serializer.init();
 582   return serializer.run();
 583 #else
 584   module->emitError(
 585       "The `NVPTX` target was not built. Please enable it when building LLVM.");
 586   return std::nullopt;
 587 #endif // MLIR_ENABLE_CUDA_CONVERSIONS
 588 }
 589
 590 Attribute
 591 NVVMTargetAttrImpl::createObject(Attribute attribute,
 592                                  const SmallVector<char, 0> &object,
 593                                  const gpu::TargetOptions &options) const {
 594   auto target = cast<NVVMTargetAttr>(attribute);
 595   gpu::CompilationTarget format = options.getCompilationTarget();
 596   DictionaryAttr objectProps;
 597   Builder builder(attribute.getContext());
 598   if (format == gpu::CompilationTarget::Assembly)
 599     objectProps = builder.getDictionaryAttr(
 600         {builder.getNamedAttr("O", builder.getI32IntegerAttr(target.getO()))});
 601   return builder.getAttr<gpu::ObjectAttr>(
 602       attribute, format,
 603       builder.getStringAttr(StringRef(object.data(), object.size())),
 604       objectProps);
 605 }