[OpenACC] Implement 'collapse' for combined constructs.
[llvm-project.git] / mlir / lib / Target / LLVM / NVVM / Target.cpp
blobbca26e3a0e84a931efda9e9130a76f101ecbdee1
1 //===- Target.cpp - MLIR LLVM NVVM target compilation -----------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This files defines NVVM target related functions including registration
10 // calls for the `#nvvm.target` compilation attribute.
12 //===----------------------------------------------------------------------===//
14 #include "mlir/Target/LLVM/NVVM/Target.h"
16 #include "mlir/Dialect/GPU/IR/GPUDialect.h"
17 #include "mlir/Dialect/LLVMIR/NVVMDialect.h"
18 #include "mlir/Target/LLVM/NVVM/Utils.h"
19 #include "mlir/Target/LLVMIR/Dialect/GPU/GPUToLLVMIRTranslation.h"
20 #include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h"
21 #include "mlir/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.h"
22 #include "mlir/Target/LLVMIR/Export.h"
24 #include "llvm/Config/llvm-config.h"
25 #include "llvm/Support/FileSystem.h"
26 #include "llvm/Support/FileUtilities.h"
27 #include "llvm/Support/FormatVariadic.h"
28 #include "llvm/Support/MemoryBuffer.h"
29 #include "llvm/Support/Path.h"
30 #include "llvm/Support/Process.h"
31 #include "llvm/Support/Program.h"
32 #include "llvm/Support/TargetSelect.h"
33 #include "llvm/Support/raw_ostream.h"
35 #include <cstdlib>
37 using namespace mlir;
38 using namespace mlir::NVVM;
40 #ifndef __DEFAULT_CUDATOOLKIT_PATH__
41 #define __DEFAULT_CUDATOOLKIT_PATH__ ""
42 #endif
44 namespace {
45 // Implementation of the `TargetAttrInterface` model.
46 class NVVMTargetAttrImpl
47 : public gpu::TargetAttrInterface::FallbackModel<NVVMTargetAttrImpl> {
48 public:
49 std::optional<SmallVector<char, 0>>
50 serializeToObject(Attribute attribute, Operation *module,
51 const gpu::TargetOptions &options) const;
53 Attribute createObject(Attribute attribute, Operation *module,
54 const SmallVector<char, 0> &object,
55 const gpu::TargetOptions &options) const;
57 } // namespace
59 // Register the NVVM dialect, the NVVM translation & the target interface.
60 void mlir::NVVM::registerNVVMTargetInterfaceExternalModels(
61 DialectRegistry &registry) {
62 registry.addExtension(+[](MLIRContext *ctx, NVVM::NVVMDialect *dialect) {
63 NVVMTargetAttr::attachInterface<NVVMTargetAttrImpl>(*ctx);
64 });
67 void mlir::NVVM::registerNVVMTargetInterfaceExternalModels(
68 MLIRContext &context) {
69 DialectRegistry registry;
70 registerNVVMTargetInterfaceExternalModels(registry);
71 context.appendDialectRegistry(registry);
74 // Search for the CUDA toolkit path.
75 StringRef mlir::NVVM::getCUDAToolkitPath() {
76 if (const char *var = std::getenv("CUDA_ROOT"))
77 return var;
78 if (const char *var = std::getenv("CUDA_HOME"))
79 return var;
80 if (const char *var = std::getenv("CUDA_PATH"))
81 return var;
82 return __DEFAULT_CUDATOOLKIT_PATH__;
85 SerializeGPUModuleBase::SerializeGPUModuleBase(
86 Operation &module, NVVMTargetAttr target,
87 const gpu::TargetOptions &targetOptions)
88 : ModuleToObject(module, target.getTriple(), target.getChip(),
89 target.getFeatures(), target.getO(),
90 targetOptions.getInitialLlvmIRCallback(),
91 targetOptions.getLinkedLlvmIRCallback(),
92 targetOptions.getOptimizedLlvmIRCallback(),
93 targetOptions.getISACallback()),
94 target(target), toolkitPath(targetOptions.getToolkitPath()),
95 fileList(targetOptions.getLinkFiles()) {
97 // If `targetOptions` have an empty toolkitPath use `getCUDAToolkitPath`
98 if (toolkitPath.empty())
99 toolkitPath = getCUDAToolkitPath();
101 // Append the files in the target attribute.
102 if (ArrayAttr files = target.getLink())
103 for (Attribute attr : files.getValue())
104 if (auto file = dyn_cast<StringAttr>(attr))
105 fileList.push_back(file.str());
107 // Append libdevice to the files to be loaded.
108 (void)appendStandardLibs();
111 void SerializeGPUModuleBase::init() {
112 static llvm::once_flag initializeBackendOnce;
113 llvm::call_once(initializeBackendOnce, []() {
114 // If the `NVPTX` LLVM target was built, initialize it.
115 #if LLVM_HAS_NVPTX_TARGET
116 LLVMInitializeNVPTXTarget();
117 LLVMInitializeNVPTXTargetInfo();
118 LLVMInitializeNVPTXTargetMC();
119 LLVMInitializeNVPTXAsmPrinter();
120 #endif
124 NVVMTargetAttr SerializeGPUModuleBase::getTarget() const { return target; }
126 StringRef SerializeGPUModuleBase::getToolkitPath() const { return toolkitPath; }
128 ArrayRef<std::string> SerializeGPUModuleBase::getFileList() const {
129 return fileList;
132 // Try to append `libdevice` from a CUDA toolkit installation.
133 LogicalResult SerializeGPUModuleBase::appendStandardLibs() {
134 StringRef pathRef = getToolkitPath();
135 if (!pathRef.empty()) {
136 SmallVector<char, 256> path;
137 path.insert(path.begin(), pathRef.begin(), pathRef.end());
138 pathRef = StringRef(path.data(), path.size());
139 if (!llvm::sys::fs::is_directory(pathRef)) {
140 getOperation().emitError() << "CUDA path: " << pathRef
141 << " does not exist or is not a directory.\n";
142 return failure();
144 llvm::sys::path::append(path, "nvvm", "libdevice", "libdevice.10.bc");
145 pathRef = StringRef(path.data(), path.size());
146 if (!llvm::sys::fs::is_regular_file(pathRef)) {
147 getOperation().emitError() << "LibDevice path: " << pathRef
148 << " does not exist or is not a file.\n";
149 return failure();
151 fileList.push_back(pathRef.str());
153 return success();
156 std::optional<SmallVector<std::unique_ptr<llvm::Module>>>
157 SerializeGPUModuleBase::loadBitcodeFiles(llvm::Module &module) {
158 SmallVector<std::unique_ptr<llvm::Module>> bcFiles;
159 if (failed(loadBitcodeFilesFromList(module.getContext(), fileList, bcFiles,
160 true)))
161 return std::nullopt;
162 return std::move(bcFiles);
165 namespace {
166 class NVPTXSerializer : public SerializeGPUModuleBase {
167 public:
168 NVPTXSerializer(Operation &module, NVVMTargetAttr target,
169 const gpu::TargetOptions &targetOptions);
171 /// Returns the GPU module op being serialized.
172 gpu::GPUModuleOp getOperation();
174 /// Compiles PTX to cubin using `ptxas`.
175 std::optional<SmallVector<char, 0>>
176 compileToBinary(const std::string &ptxCode);
178 /// Compiles PTX to cubin using the `nvptxcompiler` library.
179 std::optional<SmallVector<char, 0>>
180 compileToBinaryNVPTX(const std::string &ptxCode);
182 /// Serializes the LLVM module to an object format, depending on the
183 /// compilation target selected in target options.
184 std::optional<SmallVector<char, 0>>
185 moduleToObject(llvm::Module &llvmModule) override;
187 private:
188 using TmpFile = std::pair<llvm::SmallString<128>, llvm::FileRemover>;
190 /// Creates a temp file.
191 std::optional<TmpFile> createTemp(StringRef name, StringRef suffix);
193 /// Finds the `tool` path, where `tool` is the name of the binary to search,
194 /// i.e. `ptxas` or `fatbinary`. The search order is:
195 /// 1. The toolkit path in `targetOptions`.
196 /// 2. In the system PATH.
197 /// 3. The path from `getCUDAToolkitPath()`.
198 std::optional<std::string> findTool(StringRef tool);
200 /// Target options.
201 gpu::TargetOptions targetOptions;
203 } // namespace
205 NVPTXSerializer::NVPTXSerializer(Operation &module, NVVMTargetAttr target,
206 const gpu::TargetOptions &targetOptions)
207 : SerializeGPUModuleBase(module, target, targetOptions),
208 targetOptions(targetOptions) {}
210 std::optional<NVPTXSerializer::TmpFile>
211 NVPTXSerializer::createTemp(StringRef name, StringRef suffix) {
212 llvm::SmallString<128> filename;
213 std::error_code ec =
214 llvm::sys::fs::createTemporaryFile(name, suffix, filename);
215 if (ec) {
216 getOperation().emitError() << "Couldn't create the temp file: `" << filename
217 << "`, error message: " << ec.message();
218 return std::nullopt;
220 return TmpFile(filename, llvm::FileRemover(filename.c_str()));
223 gpu::GPUModuleOp NVPTXSerializer::getOperation() {
224 return dyn_cast<gpu::GPUModuleOp>(&SerializeGPUModuleBase::getOperation());
227 std::optional<std::string> NVPTXSerializer::findTool(StringRef tool) {
228 // Find the `tool` path.
229 // 1. Check the toolkit path given in the command line.
230 StringRef pathRef = targetOptions.getToolkitPath();
231 SmallVector<char, 256> path;
232 if (!pathRef.empty()) {
233 path.insert(path.begin(), pathRef.begin(), pathRef.end());
234 llvm::sys::path::append(path, "bin", tool);
235 if (llvm::sys::fs::can_execute(path))
236 return StringRef(path.data(), path.size()).str();
239 // 2. Check PATH.
240 if (std::optional<std::string> toolPath =
241 llvm::sys::Process::FindInEnvPath("PATH", tool))
242 return *toolPath;
244 // 3. Check `getCUDAToolkitPath()`.
245 pathRef = getCUDAToolkitPath();
246 path.clear();
247 if (!pathRef.empty()) {
248 path.insert(path.begin(), pathRef.begin(), pathRef.end());
249 llvm::sys::path::append(path, "bin", tool);
250 if (llvm::sys::fs::can_execute(path))
251 return StringRef(path.data(), path.size()).str();
253 getOperation().emitError()
254 << "Couldn't find the `" << tool
255 << "` binary. Please specify the toolkit "
256 "path, add the compiler to $PATH, or set one of the environment "
257 "variables in `NVVM::getCUDAToolkitPath()`.";
258 return std::nullopt;
261 // TODO: clean this method & have a generic tool driver or never emit binaries
262 // with this mechanism and let another stage take care of it.
263 std::optional<SmallVector<char, 0>>
264 NVPTXSerializer::compileToBinary(const std::string &ptxCode) {
265 // Determine if the serializer should create a fatbinary with the PTX embeded
266 // or a simple CUBIN binary.
267 const bool createFatbin =
268 targetOptions.getCompilationTarget() == gpu::CompilationTarget::Fatbin;
270 // Find the `ptxas` & `fatbinary` tools.
271 std::optional<std::string> ptxasCompiler = findTool("ptxas");
272 if (!ptxasCompiler)
273 return std::nullopt;
274 std::optional<std::string> fatbinaryTool;
275 if (createFatbin) {
276 fatbinaryTool = findTool("fatbinary");
277 if (!fatbinaryTool)
278 return std::nullopt;
280 Location loc = getOperation().getLoc();
282 // Base name for all temp files: mlir-<module name>-<target triple>-<chip>.
283 std::string basename =
284 llvm::formatv("mlir-{0}-{1}-{2}", getOperation().getNameAttr().getValue(),
285 getTarget().getTriple(), getTarget().getChip());
287 // Create temp files:
288 std::optional<TmpFile> ptxFile = createTemp(basename, "ptx");
289 if (!ptxFile)
290 return std::nullopt;
291 std::optional<TmpFile> logFile = createTemp(basename, "log");
292 if (!logFile)
293 return std::nullopt;
294 std::optional<TmpFile> binaryFile = createTemp(basename, "bin");
295 if (!binaryFile)
296 return std::nullopt;
297 TmpFile cubinFile;
298 if (createFatbin) {
299 Twine cubinFilename = ptxFile->first + ".cubin";
300 cubinFile = TmpFile(cubinFilename.str(), llvm::FileRemover(cubinFilename));
301 } else {
302 cubinFile.first = binaryFile->first;
305 std::error_code ec;
306 // Dump the PTX to a temp file.
308 llvm::raw_fd_ostream ptxStream(ptxFile->first, ec);
309 if (ec) {
310 emitError(loc) << "Couldn't open the file: `" << ptxFile->first
311 << "`, error message: " << ec.message();
312 return std::nullopt;
314 ptxStream << ptxCode;
315 if (ptxStream.has_error()) {
316 emitError(loc) << "An error occurred while writing the PTX to: `"
317 << ptxFile->first << "`.";
318 return std::nullopt;
320 ptxStream.flush();
323 // Command redirects.
324 std::optional<StringRef> redirects[] = {
325 std::nullopt,
326 logFile->first,
327 logFile->first,
330 // Get any extra args passed in `targetOptions`.
331 std::pair<llvm::BumpPtrAllocator, SmallVector<const char *>> cmdOpts =
332 targetOptions.tokenizeCmdOptions();
334 // Create ptxas args.
335 std::string optLevel = std::to_string(this->optLevel);
336 SmallVector<StringRef, 12> ptxasArgs(
337 {StringRef("ptxas"), StringRef("-arch"), getTarget().getChip(),
338 StringRef(ptxFile->first), StringRef("-o"), StringRef(cubinFile.first),
339 "--opt-level", optLevel});
341 bool useFatbin32 = false;
342 for (const auto *cArg : cmdOpts.second) {
343 // All `cmdOpts` are for `ptxas` except `-32` which passes `-32` to
344 // `fatbinary`, indicating a 32-bit target. By default a 64-bit target is
345 // assumed.
346 if (StringRef arg(cArg); arg != "-32")
347 ptxasArgs.push_back(arg);
348 else
349 useFatbin32 = true;
352 // Create the `fatbinary` args.
353 StringRef chip = getTarget().getChip();
354 // Remove the arch prefix to obtain the compute capability.
355 chip.consume_front("sm_"), chip.consume_front("compute_");
356 // Embed the cubin object.
357 std::string cubinArg =
358 llvm::formatv("--image3=kind=elf,sm={0},file={1}", chip, cubinFile.first)
359 .str();
360 // Embed the PTX file so the driver can JIT if needed.
361 std::string ptxArg =
362 llvm::formatv("--image3=kind=ptx,sm={0},file={1}", chip, ptxFile->first)
363 .str();
364 SmallVector<StringRef, 6> fatbinArgs({StringRef("fatbinary"),
365 useFatbin32 ? "-32" : "-64", cubinArg,
366 ptxArg, "--create", binaryFile->first});
368 // Dump tool invocation commands.
369 #define DEBUG_TYPE "serialize-to-binary"
370 LLVM_DEBUG({
371 llvm::dbgs() << "Tool invocation for module: "
372 << getOperation().getNameAttr() << "\n";
373 llvm::interleave(ptxasArgs, llvm::dbgs(), " ");
374 llvm::dbgs() << "\n";
375 if (createFatbin) {
376 llvm::interleave(fatbinArgs, llvm::dbgs(), " ");
377 llvm::dbgs() << "\n";
380 #undef DEBUG_TYPE
382 // Helper function for printing tool error logs.
383 std::string message;
384 auto emitLogError =
385 [&](StringRef toolName) -> std::optional<SmallVector<char, 0>> {
386 if (message.empty()) {
387 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> toolStderr =
388 llvm::MemoryBuffer::getFile(logFile->first);
389 if (toolStderr)
390 emitError(loc) << toolName << " invocation failed. Log:\n"
391 << toolStderr->get()->getBuffer();
392 else
393 emitError(loc) << toolName << " invocation failed.";
394 return std::nullopt;
396 emitError(loc) << toolName
397 << " invocation failed, error message: " << message;
398 return std::nullopt;
401 // Invoke PTXAS.
402 if (llvm::sys::ExecuteAndWait(ptxasCompiler.value(), ptxasArgs,
403 /*Env=*/std::nullopt,
404 /*Redirects=*/redirects,
405 /*SecondsToWait=*/0,
406 /*MemoryLimit=*/0,
407 /*ErrMsg=*/&message))
408 return emitLogError("`ptxas`");
409 #define DEBUG_TYPE "dump-sass"
410 LLVM_DEBUG({
411 std::optional<std::string> nvdisasm = findTool("nvdisasm");
412 SmallVector<StringRef> nvdisasmArgs(
413 {StringRef("nvdisasm"), StringRef(cubinFile.first)});
414 if (llvm::sys::ExecuteAndWait(nvdisasm.value(), nvdisasmArgs,
415 /*Env=*/std::nullopt,
416 /*Redirects=*/redirects,
417 /*SecondsToWait=*/0,
418 /*MemoryLimit=*/0,
419 /*ErrMsg=*/&message))
420 return emitLogError("`nvdisasm`");
421 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> logBuffer =
422 llvm::MemoryBuffer::getFile(logFile->first);
423 if (logBuffer && !(*logBuffer)->getBuffer().empty()) {
424 llvm::dbgs() << "Output:\n" << (*logBuffer)->getBuffer() << "\n";
425 llvm::dbgs().flush();
428 #undef DEBUG_TYPE
430 // Invoke `fatbin`.
431 message.clear();
432 if (createFatbin && llvm::sys::ExecuteAndWait(*fatbinaryTool, fatbinArgs,
433 /*Env=*/std::nullopt,
434 /*Redirects=*/redirects,
435 /*SecondsToWait=*/0,
436 /*MemoryLimit=*/0,
437 /*ErrMsg=*/&message))
438 return emitLogError("`fatbinary`");
440 // Dump the output of the tools, helpful if the verbose flag was passed.
441 #define DEBUG_TYPE "serialize-to-binary"
442 LLVM_DEBUG({
443 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> logBuffer =
444 llvm::MemoryBuffer::getFile(logFile->first);
445 if (logBuffer && !(*logBuffer)->getBuffer().empty()) {
446 llvm::dbgs() << "Output:\n" << (*logBuffer)->getBuffer() << "\n";
447 llvm::dbgs().flush();
450 #undef DEBUG_TYPE
452 // Read the fatbin.
453 llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> binaryBuffer =
454 llvm::MemoryBuffer::getFile(binaryFile->first);
455 if (!binaryBuffer) {
456 emitError(loc) << "Couldn't open the file: `" << binaryFile->first
457 << "`, error message: " << binaryBuffer.getError().message();
458 return std::nullopt;
460 StringRef fatbin = (*binaryBuffer)->getBuffer();
461 return SmallVector<char, 0>(fatbin.begin(), fatbin.end());
464 #if MLIR_ENABLE_NVPTXCOMPILER
465 #include "nvPTXCompiler.h"
467 #define RETURN_ON_NVPTXCOMPILER_ERROR(expr) \
468 do { \
469 if (auto status = (expr)) { \
470 emitError(loc) << llvm::Twine(#expr).concat(" failed with error code ") \
471 << status; \
472 return std::nullopt; \
474 } while (false)
476 std::optional<SmallVector<char, 0>>
477 NVPTXSerializer::compileToBinaryNVPTX(const std::string &ptxCode) {
478 Location loc = getOperation().getLoc();
479 nvPTXCompilerHandle compiler = nullptr;
480 nvPTXCompileResult status;
481 size_t logSize;
483 // Create the options.
484 std::string optLevel = std::to_string(this->optLevel);
485 std::pair<llvm::BumpPtrAllocator, SmallVector<const char *>> cmdOpts =
486 targetOptions.tokenizeCmdOptions();
487 cmdOpts.second.append(
488 {"-arch", getTarget().getChip().data(), "--opt-level", optLevel.c_str()});
490 // Create the compiler handle.
491 RETURN_ON_NVPTXCOMPILER_ERROR(
492 nvPTXCompilerCreate(&compiler, ptxCode.size(), ptxCode.c_str()));
494 // Try to compile the binary.
495 status = nvPTXCompilerCompile(compiler, cmdOpts.second.size(),
496 cmdOpts.second.data());
498 // Check if compilation failed.
499 if (status != NVPTXCOMPILE_SUCCESS) {
500 RETURN_ON_NVPTXCOMPILER_ERROR(
501 nvPTXCompilerGetErrorLogSize(compiler, &logSize));
502 if (logSize != 0) {
503 SmallVector<char> log(logSize + 1, 0);
504 RETURN_ON_NVPTXCOMPILER_ERROR(
505 nvPTXCompilerGetErrorLog(compiler, log.data()));
506 emitError(loc) << "NVPTX compiler invocation failed, error log: "
507 << log.data();
508 } else
509 emitError(loc) << "NVPTX compiler invocation failed with error code: "
510 << status;
511 return std::nullopt;
514 // Retrieve the binary.
515 size_t elfSize;
516 RETURN_ON_NVPTXCOMPILER_ERROR(
517 nvPTXCompilerGetCompiledProgramSize(compiler, &elfSize));
518 SmallVector<char, 0> binary(elfSize, 0);
519 RETURN_ON_NVPTXCOMPILER_ERROR(
520 nvPTXCompilerGetCompiledProgram(compiler, (void *)binary.data()));
522 // Dump the log of the compiler, helpful if the verbose flag was passed.
523 #define DEBUG_TYPE "serialize-to-binary"
524 LLVM_DEBUG({
525 RETURN_ON_NVPTXCOMPILER_ERROR(
526 nvPTXCompilerGetInfoLogSize(compiler, &logSize));
527 if (logSize != 0) {
528 SmallVector<char> log(logSize + 1, 0);
529 RETURN_ON_NVPTXCOMPILER_ERROR(
530 nvPTXCompilerGetInfoLog(compiler, log.data()));
531 llvm::dbgs() << "NVPTX compiler invocation for module: "
532 << getOperation().getNameAttr() << "\n";
533 llvm::dbgs() << "Arguments: ";
534 llvm::interleave(cmdOpts.second, llvm::dbgs(), " ");
535 llvm::dbgs() << "\nOutput\n" << log.data() << "\n";
536 llvm::dbgs().flush();
539 #undef DEBUG_TYPE
540 RETURN_ON_NVPTXCOMPILER_ERROR(nvPTXCompilerDestroy(&compiler));
541 return binary;
543 #endif // MLIR_ENABLE_NVPTXCOMPILER
545 std::optional<SmallVector<char, 0>>
546 NVPTXSerializer::moduleToObject(llvm::Module &llvmModule) {
547 // Return LLVM IR if the compilation target is `offload`.
548 #define DEBUG_TYPE "serialize-to-llvm"
549 LLVM_DEBUG({
550 llvm::dbgs() << "LLVM IR for module: " << getOperation().getNameAttr()
551 << "\n";
552 llvm::dbgs() << llvmModule << "\n";
553 llvm::dbgs().flush();
555 #undef DEBUG_TYPE
556 if (targetOptions.getCompilationTarget() == gpu::CompilationTarget::Offload)
557 return SerializeGPUModuleBase::moduleToObject(llvmModule);
559 #if !LLVM_HAS_NVPTX_TARGET
560 getOperation()->emitError(
561 "The `NVPTX` target was not built. Please enable it when building LLVM.");
562 return std::nullopt;
563 #endif // LLVM_HAS_NVPTX_TARGET
565 // Emit PTX code.
566 std::optional<llvm::TargetMachine *> targetMachine =
567 getOrCreateTargetMachine();
568 if (!targetMachine) {
569 getOperation().emitError() << "Target Machine unavailable for triple "
570 << triple << ", can't optimize with LLVM\n";
571 return std::nullopt;
573 std::optional<std::string> serializedISA =
574 translateToISA(llvmModule, **targetMachine);
575 if (!serializedISA) {
576 getOperation().emitError() << "Failed translating the module to ISA.";
577 return std::nullopt;
579 if (isaCallback)
580 isaCallback(serializedISA.value());
582 #define DEBUG_TYPE "serialize-to-isa"
583 LLVM_DEBUG({
584 llvm::dbgs() << "PTX for module: " << getOperation().getNameAttr() << "\n";
585 llvm::dbgs() << *serializedISA << "\n";
586 llvm::dbgs().flush();
588 #undef DEBUG_TYPE
590 // Return PTX if the compilation target is `assembly`.
591 if (targetOptions.getCompilationTarget() ==
592 gpu::CompilationTarget::Assembly) {
593 // Make sure to include the null terminator.
594 StringRef bin(serializedISA->c_str(), serializedISA->size() + 1);
595 return SmallVector<char, 0>(bin.begin(), bin.end());
598 // Compile to binary.
599 #if MLIR_ENABLE_NVPTXCOMPILER
600 return compileToBinaryNVPTX(*serializedISA);
601 #else
602 return compileToBinary(*serializedISA);
603 #endif // MLIR_ENABLE_NVPTXCOMPILER
606 std::optional<SmallVector<char, 0>>
607 NVVMTargetAttrImpl::serializeToObject(Attribute attribute, Operation *module,
608 const gpu::TargetOptions &options) const {
609 assert(module && "The module must be non null.");
610 if (!module)
611 return std::nullopt;
612 if (!mlir::isa<gpu::GPUModuleOp>(module)) {
613 module->emitError("Module must be a GPU module.");
614 return std::nullopt;
616 NVPTXSerializer serializer(*module, cast<NVVMTargetAttr>(attribute), options);
617 serializer.init();
618 return serializer.run();
621 Attribute
622 NVVMTargetAttrImpl::createObject(Attribute attribute, Operation *module,
623 const SmallVector<char, 0> &object,
624 const gpu::TargetOptions &options) const {
625 auto target = cast<NVVMTargetAttr>(attribute);
626 gpu::CompilationTarget format = options.getCompilationTarget();
627 DictionaryAttr objectProps;
628 Builder builder(attribute.getContext());
629 if (format == gpu::CompilationTarget::Assembly)
630 objectProps = builder.getDictionaryAttr(
631 {builder.getNamedAttr("O", builder.getI32IntegerAttr(target.getO()))});
632 return builder.getAttr<gpu::ObjectAttr>(
633 attribute, format,
634 builder.getStringAttr(StringRef(object.data(), object.size())),
635 objectProps, /*kernels=*/nullptr);