1 //===- Target.cpp - MLIR LLVM NVVM target compilation -----------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This files defines NVVM target related functions including registration
10 // calls for the `#nvvm.target` compilation attribute.
12 //===----------------------------------------------------------------------===//
14 #include "mlir/Target/LLVM/NVVM/Target.h"
16 #include "mlir/Config/mlir-config.h"
17 #include "mlir/Dialect/GPU/IR/GPUDialect.h"
18 #include "mlir/Dialect/LLVMIR/NVVMDialect.h"
19 #include "mlir/Target/LLVM/NVVM/Utils.h"
20 #include "mlir/Target/LLVMIR/Dialect/GPU/GPUToLLVMIRTranslation.h"
21 #include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h"
22 #include "mlir/Target/LLVMIR/Dialect/NVVM/NVVMToLLVMIRTranslation.h"
23 #include "mlir/Target/LLVMIR/Export.h"
25 #include "llvm/Config/llvm-config.h"
26 #include "llvm/Support/FileSystem.h"
27 #include "llvm/Support/FileUtilities.h"
28 #include "llvm/Support/FormatVariadic.h"
29 #include "llvm/Support/MemoryBuffer.h"
30 #include "llvm/Support/Path.h"
31 #include "llvm/Support/Process.h"
32 #include "llvm/Support/Program.h"
33 #include "llvm/Support/TargetSelect.h"
38 using namespace mlir::NVVM
;
40 #ifndef __DEFAULT_CUDATOOLKIT_PATH__
41 #define __DEFAULT_CUDATOOLKIT_PATH__ ""
45 // Implementation of the `TargetAttrInterface` model.
46 class NVVMTargetAttrImpl
47 : public gpu::TargetAttrInterface::FallbackModel
<NVVMTargetAttrImpl
> {
49 std::optional
<SmallVector
<char, 0>>
50 serializeToObject(Attribute attribute
, Operation
*module
,
51 const gpu::TargetOptions
&options
) const;
53 Attribute
createObject(Attribute attribute
,
54 const SmallVector
<char, 0> &object
,
55 const gpu::TargetOptions
&options
) const;
59 // Register the NVVM dialect, the NVVM translation & the target interface.
60 void mlir::NVVM::registerNVVMTargetInterfaceExternalModels(
61 DialectRegistry
®istry
) {
62 registry
.addExtension(+[](MLIRContext
*ctx
, NVVM::NVVMDialect
*dialect
) {
63 NVVMTargetAttr::attachInterface
<NVVMTargetAttrImpl
>(*ctx
);
67 void mlir::NVVM::registerNVVMTargetInterfaceExternalModels(
68 MLIRContext
&context
) {
69 DialectRegistry registry
;
70 registerNVVMTargetInterfaceExternalModels(registry
);
71 context
.appendDialectRegistry(registry
);
74 // Search for the CUDA toolkit path.
75 StringRef
mlir::NVVM::getCUDAToolkitPath() {
76 if (const char *var
= std::getenv("CUDA_ROOT"))
78 if (const char *var
= std::getenv("CUDA_HOME"))
80 if (const char *var
= std::getenv("CUDA_PATH"))
82 return __DEFAULT_CUDATOOLKIT_PATH__
;
85 SerializeGPUModuleBase::SerializeGPUModuleBase(
86 Operation
&module
, NVVMTargetAttr target
,
87 const gpu::TargetOptions
&targetOptions
)
88 : ModuleToObject(module
, target
.getTriple(), target
.getChip(),
89 target
.getFeatures(), target
.getO()),
90 target(target
), toolkitPath(targetOptions
.getToolkitPath()),
91 fileList(targetOptions
.getLinkFiles()) {
93 // If `targetOptions` have an empty toolkitPath use `getCUDAToolkitPath`
94 if (toolkitPath
.empty())
95 toolkitPath
= getCUDAToolkitPath();
97 // Append the files in the target attribute.
98 if (ArrayAttr files
= target
.getLink())
99 for (Attribute attr
: files
.getValue())
100 if (auto file
= dyn_cast
<StringAttr
>(attr
))
101 fileList
.push_back(file
.str());
103 // Append libdevice to the files to be loaded.
104 (void)appendStandardLibs();
107 void SerializeGPUModuleBase::init() {
108 static llvm::once_flag initializeBackendOnce
;
109 llvm::call_once(initializeBackendOnce
, []() {
110 // If the `NVPTX` LLVM target was built, initialize it.
111 #if LLVM_HAS_NVPTX_TARGET
112 LLVMInitializeNVPTXTarget();
113 LLVMInitializeNVPTXTargetInfo();
114 LLVMInitializeNVPTXTargetMC();
115 LLVMInitializeNVPTXAsmPrinter();
120 NVVMTargetAttr
SerializeGPUModuleBase::getTarget() const { return target
; }
122 StringRef
SerializeGPUModuleBase::getToolkitPath() const { return toolkitPath
; }
124 ArrayRef
<std::string
> SerializeGPUModuleBase::getFileList() const {
128 // Try to append `libdevice` from a CUDA toolkit installation.
129 LogicalResult
SerializeGPUModuleBase::appendStandardLibs() {
130 StringRef pathRef
= getToolkitPath();
131 if (!pathRef
.empty()) {
132 SmallVector
<char, 256> path
;
133 path
.insert(path
.begin(), pathRef
.begin(), pathRef
.end());
134 pathRef
= StringRef(path
.data(), path
.size());
135 if (!llvm::sys::fs::is_directory(pathRef
)) {
136 getOperation().emitError() << "CUDA path: " << pathRef
137 << " does not exist or is not a directory.\n";
140 llvm::sys::path::append(path
, "nvvm", "libdevice", "libdevice.10.bc");
141 pathRef
= StringRef(path
.data(), path
.size());
142 if (!llvm::sys::fs::is_regular_file(pathRef
)) {
143 getOperation().emitError() << "LibDevice path: " << pathRef
144 << " does not exist or is not a file.\n";
147 fileList
.push_back(pathRef
.str());
152 std::optional
<SmallVector
<std::unique_ptr
<llvm::Module
>>>
153 SerializeGPUModuleBase::loadBitcodeFiles(llvm::Module
&module
) {
154 SmallVector
<std::unique_ptr
<llvm::Module
>> bcFiles
;
155 if (failed(loadBitcodeFilesFromList(module
.getContext(), fileList
, bcFiles
,
158 return std::move(bcFiles
);
161 #if MLIR_ENABLE_CUDA_CONVERSIONS
163 class NVPTXSerializer
: public SerializeGPUModuleBase
{
165 NVPTXSerializer(Operation
&module
, NVVMTargetAttr target
,
166 const gpu::TargetOptions
&targetOptions
);
168 gpu::GPUModuleOp
getOperation();
170 // Compile PTX to cubin using `ptxas`.
171 std::optional
<SmallVector
<char, 0>>
172 compileToBinary(const std::string
&ptxCode
);
174 // Compile PTX to cubin using the `nvptxcompiler` library.
175 std::optional
<SmallVector
<char, 0>>
176 compileToBinaryNVPTX(const std::string
&ptxCode
);
178 std::optional
<SmallVector
<char, 0>>
179 moduleToObject(llvm::Module
&llvmModule
) override
;
182 using TmpFile
= std::pair
<llvm::SmallString
<128>, llvm::FileRemover
>;
184 // Create a temp file.
185 std::optional
<TmpFile
> createTemp(StringRef name
, StringRef suffix
);
187 // Find the `tool` path, where `tool` is the name of the binary to search,
188 // i.e. `ptxas` or `fatbinary`. The search order is:
189 // 1. The toolkit path in `targetOptions`.
190 // 2. In the system PATH.
191 // 3. The path from `getCUDAToolkitPath()`.
192 std::optional
<std::string
> findTool(StringRef tool
);
195 gpu::TargetOptions targetOptions
;
199 NVPTXSerializer::NVPTXSerializer(Operation
&module
, NVVMTargetAttr target
,
200 const gpu::TargetOptions
&targetOptions
)
201 : SerializeGPUModuleBase(module
, target
, targetOptions
),
202 targetOptions(targetOptions
) {}
204 std::optional
<NVPTXSerializer::TmpFile
>
205 NVPTXSerializer::createTemp(StringRef name
, StringRef suffix
) {
206 llvm::SmallString
<128> filename
;
208 llvm::sys::fs::createTemporaryFile(name
, suffix
, filename
);
210 getOperation().emitError() << "Couldn't create the temp file: `" << filename
211 << "`, error message: " << ec
.message();
214 return TmpFile(filename
, llvm::FileRemover(filename
.c_str()));
217 gpu::GPUModuleOp
NVPTXSerializer::getOperation() {
218 return dyn_cast
<gpu::GPUModuleOp
>(&SerializeGPUModuleBase::getOperation());
221 std::optional
<std::string
> NVPTXSerializer::findTool(StringRef tool
) {
222 // Find the `tool` path.
223 // 1. Check the toolkit path given in the command line.
224 StringRef pathRef
= targetOptions
.getToolkitPath();
225 SmallVector
<char, 256> path
;
226 if (!pathRef
.empty()) {
227 path
.insert(path
.begin(), pathRef
.begin(), pathRef
.end());
228 llvm::sys::path::append(path
, "bin", tool
);
229 if (llvm::sys::fs::can_execute(path
))
230 return StringRef(path
.data(), path
.size()).str();
234 if (std::optional
<std::string
> toolPath
=
235 llvm::sys::Process::FindInEnvPath("PATH", tool
))
238 // 3. Check `getCUDAToolkitPath()`.
239 pathRef
= getCUDAToolkitPath();
241 if (!pathRef
.empty()) {
242 path
.insert(path
.begin(), pathRef
.begin(), pathRef
.end());
243 llvm::sys::path::append(path
, "bin", tool
);
244 if (llvm::sys::fs::can_execute(path
))
245 return StringRef(path
.data(), path
.size()).str();
247 getOperation().emitError()
248 << "Couldn't find the `" << tool
249 << "` binary. Please specify the toolkit "
250 "path, add the compiler to $PATH, or set one of the environment "
251 "variables in `NVVM::getCUDAToolkitPath()`.";
255 // TODO: clean this method & have a generic tool driver or never emit binaries
256 // with this mechanism and let another stage take care of it.
257 std::optional
<SmallVector
<char, 0>>
258 NVPTXSerializer::compileToBinary(const std::string
&ptxCode
) {
259 // Determine if the serializer should create a fatbinary with the PTX embeded
260 // or a simple CUBIN binary.
261 const bool createFatbin
=
262 targetOptions
.getCompilationTarget() == gpu::CompilationTarget::Fatbin
;
264 // Find the `ptxas` & `fatbinary` tools.
265 std::optional
<std::string
> ptxasCompiler
= findTool("ptxas");
268 std::optional
<std::string
> fatbinaryTool
= findTool("fatbinary");
269 if (createFatbin
&& !fatbinaryTool
)
271 Location loc
= getOperation().getLoc();
273 // Base name for all temp files: mlir-<module name>-<target triple>-<chip>.
274 std::string basename
=
275 llvm::formatv("mlir-{0}-{1}-{2}", getOperation().getNameAttr().getValue(),
276 getTarget().getTriple(), getTarget().getChip());
278 // Create temp files:
279 std::optional
<TmpFile
> ptxFile
= createTemp(basename
, "ptx");
282 std::optional
<TmpFile
> logFile
= createTemp(basename
, "log");
285 std::optional
<TmpFile
> binaryFile
= createTemp(basename
, "bin");
290 Twine cubinFilename
= ptxFile
->first
+ ".cubin";
291 cubinFile
= TmpFile(cubinFilename
.str(), llvm::FileRemover(cubinFilename
));
293 cubinFile
.first
= binaryFile
->first
;
297 // Dump the PTX to a temp file.
299 llvm::raw_fd_ostream
ptxStream(ptxFile
->first
, ec
);
301 emitError(loc
) << "Couldn't open the file: `" << ptxFile
->first
302 << "`, error message: " << ec
.message();
305 ptxStream
<< ptxCode
;
306 if (ptxStream
.has_error()) {
307 emitError(loc
) << "An error occurred while writing the PTX to: `"
308 << ptxFile
->first
<< "`.";
314 // Command redirects.
315 std::optional
<StringRef
> redirects
[] = {
321 // Get any extra args passed in `targetOptions`.
322 std::pair
<llvm::BumpPtrAllocator
, SmallVector
<const char *>> cmdOpts
=
323 targetOptions
.tokenizeCmdOptions();
325 // Create ptxas args.
326 std::string optLevel
= std::to_string(this->optLevel
);
327 SmallVector
<StringRef
, 12> ptxasArgs(
328 {StringRef("ptxas"), StringRef("-arch"), getTarget().getChip(),
329 StringRef(ptxFile
->first
), StringRef("-o"), StringRef(cubinFile
.first
),
330 "--opt-level", optLevel
});
332 bool useFatbin32
= false;
333 for (const auto *cArg
: cmdOpts
.second
) {
334 // All `cmdOpts` are for `ptxas` except `-32` which passes `-32` to
335 // `fatbinary`, indicating a 32-bit target. By default a 64-bit target is
337 if (StringRef
arg(cArg
); arg
!= "-32")
338 ptxasArgs
.push_back(arg
);
343 // Create the `fatbinary` args.
344 StringRef chip
= getTarget().getChip();
345 // Remove the arch prefix to obtain the compute capability.
346 chip
.consume_front("sm_"), chip
.consume_front("compute_");
347 // Embed the cubin object.
348 std::string cubinArg
=
349 llvm::formatv("--image3=kind=elf,sm={0},file={1}", chip
, cubinFile
.first
)
351 // Embed the PTX file so the driver can JIT if needed.
353 llvm::formatv("--image3=kind=ptx,sm={0},file={1}", chip
, ptxFile
->first
)
355 SmallVector
<StringRef
, 6> fatbinArgs({StringRef("fatbinary"),
356 useFatbin32
? "-32" : "-64", cubinArg
,
357 ptxArg
, "--create", binaryFile
->first
});
359 // Dump tool invocation commands.
360 #define DEBUG_TYPE "serialize-to-binary"
362 llvm::dbgs() << "Tool invocation for module: "
363 << getOperation().getNameAttr() << "\n";
364 llvm::interleave(ptxasArgs
, llvm::dbgs(), " ");
365 llvm::dbgs() << "\n";
367 llvm::interleave(fatbinArgs
, llvm::dbgs(), " ");
368 llvm::dbgs() << "\n";
373 // Helper function for printing tool error logs.
376 [&](StringRef toolName
) -> std::optional
<SmallVector
<char, 0>> {
377 if (message
.empty()) {
378 llvm::ErrorOr
<std::unique_ptr
<llvm::MemoryBuffer
>> toolStderr
=
379 llvm::MemoryBuffer::getFile(logFile
->first
);
381 emitError(loc
) << toolName
<< " invocation failed. Log:\n"
382 << toolStderr
->get()->getBuffer();
384 emitError(loc
) << toolName
<< " invocation failed.";
387 emitError(loc
) << toolName
388 << " invocation failed, error message: " << message
;
393 if (llvm::sys::ExecuteAndWait(ptxasCompiler
.value(), ptxasArgs
,
394 /*Env=*/std::nullopt
,
395 /*Redirects=*/redirects
,
398 /*ErrMsg=*/&message
))
399 return emitLogError("`ptxas`");
403 if (createFatbin
&& llvm::sys::ExecuteAndWait(*fatbinaryTool
, fatbinArgs
,
404 /*Env=*/std::nullopt
,
405 /*Redirects=*/redirects
,
408 /*ErrMsg=*/&message
))
409 return emitLogError("`fatbinary`");
411 // Dump the output of the tools, helpful if the verbose flag was passed.
412 #define DEBUG_TYPE "serialize-to-binary"
414 llvm::ErrorOr
<std::unique_ptr
<llvm::MemoryBuffer
>> logBuffer
=
415 llvm::MemoryBuffer::getFile(logFile
->first
);
416 if (logBuffer
&& !(*logBuffer
)->getBuffer().empty()) {
417 llvm::dbgs() << "Output:\n" << (*logBuffer
)->getBuffer() << "\n";
418 llvm::dbgs().flush();
424 llvm::ErrorOr
<std::unique_ptr
<llvm::MemoryBuffer
>> binaryBuffer
=
425 llvm::MemoryBuffer::getFile(binaryFile
->first
);
427 emitError(loc
) << "Couldn't open the file: `" << binaryFile
->first
428 << "`, error message: " << binaryBuffer
.getError().message();
431 StringRef fatbin
= (*binaryBuffer
)->getBuffer();
432 return SmallVector
<char, 0>(fatbin
.begin(), fatbin
.end());
435 #if MLIR_ENABLE_NVPTXCOMPILER
436 #include "nvPTXCompiler.h"
438 #define RETURN_ON_NVPTXCOMPILER_ERROR(expr) \
440 if (auto status = (expr)) { \
441 emitError(loc) << llvm::Twine(#expr).concat(" failed with error code ") \
443 return std::nullopt; \
447 std::optional
<SmallVector
<char, 0>>
448 NVPTXSerializer::compileToBinaryNVPTX(const std::string
&ptxCode
) {
449 Location loc
= getOperation().getLoc();
450 nvPTXCompilerHandle compiler
= nullptr;
451 nvPTXCompileResult status
;
454 // Create the options.
455 std::string optLevel
= std::to_string(this->optLevel
);
456 std::pair
<llvm::BumpPtrAllocator
, SmallVector
<const char *>> cmdOpts
=
457 targetOptions
.tokenizeCmdOptions();
458 cmdOpts
.second
.append(
459 {"-arch", getTarget().getChip().data(), "--opt-level", optLevel
.c_str()});
461 // Create the compiler handle.
462 RETURN_ON_NVPTXCOMPILER_ERROR(
463 nvPTXCompilerCreate(&compiler
, ptxCode
.size(), ptxCode
.c_str()));
465 // Try to compile the binary.
466 status
= nvPTXCompilerCompile(compiler
, cmdOpts
.second
.size(),
467 cmdOpts
.second
.data());
469 // Check if compilation failed.
470 if (status
!= NVPTXCOMPILE_SUCCESS
) {
471 RETURN_ON_NVPTXCOMPILER_ERROR(
472 nvPTXCompilerGetErrorLogSize(compiler
, &logSize
));
474 SmallVector
<char> log(logSize
+ 1, 0);
475 RETURN_ON_NVPTXCOMPILER_ERROR(
476 nvPTXCompilerGetErrorLog(compiler
, log
.data()));
477 emitError(loc
) << "NVPTX compiler invocation failed, error log: "
480 emitError(loc
) << "NVPTX compiler invocation failed with error code: "
485 // Retrieve the binary.
487 RETURN_ON_NVPTXCOMPILER_ERROR(
488 nvPTXCompilerGetCompiledProgramSize(compiler
, &elfSize
));
489 SmallVector
<char, 0> binary(elfSize
, 0);
490 RETURN_ON_NVPTXCOMPILER_ERROR(
491 nvPTXCompilerGetCompiledProgram(compiler
, (void *)binary
.data()));
493 // Dump the log of the compiler, helpful if the verbose flag was passed.
494 #define DEBUG_TYPE "serialize-to-binary"
496 RETURN_ON_NVPTXCOMPILER_ERROR(
497 nvPTXCompilerGetInfoLogSize(compiler
, &logSize
));
499 SmallVector
<char> log(logSize
+ 1, 0);
500 RETURN_ON_NVPTXCOMPILER_ERROR(
501 nvPTXCompilerGetInfoLog(compiler
, log
.data()));
502 llvm::dbgs() << "NVPTX compiler invocation for module: "
503 << getOperation().getNameAttr() << "\n";
504 llvm::dbgs() << "Arguments: ";
505 llvm::interleave(cmdOpts
.second
, llvm::dbgs(), " ");
506 llvm::dbgs() << "\nOutput\n" << log
.data() << "\n";
507 llvm::dbgs().flush();
511 RETURN_ON_NVPTXCOMPILER_ERROR(nvPTXCompilerDestroy(&compiler
));
514 #endif // MLIR_ENABLE_NVPTXCOMPILER
516 std::optional
<SmallVector
<char, 0>>
517 NVPTXSerializer::moduleToObject(llvm::Module
&llvmModule
) {
518 // Return LLVM IR if the compilation target is offload.
519 #define DEBUG_TYPE "serialize-to-llvm"
521 llvm::dbgs() << "LLVM IR for module: " << getOperation().getNameAttr()
523 llvm::dbgs() << llvmModule
<< "\n";
524 llvm::dbgs().flush();
527 if (targetOptions
.getCompilationTarget() == gpu::CompilationTarget::Offload
)
528 return SerializeGPUModuleBase::moduleToObject(llvmModule
);
531 std::optional
<llvm::TargetMachine
*> targetMachine
=
532 getOrCreateTargetMachine();
533 if (!targetMachine
) {
534 getOperation().emitError() << "Target Machine unavailable for triple "
535 << triple
<< ", can't optimize with LLVM\n";
538 std::optional
<std::string
> serializedISA
=
539 translateToISA(llvmModule
, **targetMachine
);
540 if (!serializedISA
) {
541 getOperation().emitError() << "Failed translating the module to ISA.";
544 #define DEBUG_TYPE "serialize-to-isa"
546 llvm::dbgs() << "PTX for module: " << getOperation().getNameAttr() << "\n";
547 llvm::dbgs() << *serializedISA
<< "\n";
548 llvm::dbgs().flush();
552 // Return PTX if the compilation target is assembly.
553 if (targetOptions
.getCompilationTarget() ==
554 gpu::CompilationTarget::Assembly
) {
555 // Make sure to include the null terminator.
556 StringRef
bin(serializedISA
->c_str(), serializedISA
->size() + 1);
557 return SmallVector
<char, 0>(bin
.begin(), bin
.end());
560 // Compile to binary.
561 #if MLIR_ENABLE_NVPTXCOMPILER
562 return compileToBinaryNVPTX(*serializedISA
);
564 return compileToBinary(*serializedISA
);
565 #endif // MLIR_ENABLE_NVPTXCOMPILER
567 #endif // MLIR_ENABLE_CUDA_CONVERSIONS
569 std::optional
<SmallVector
<char, 0>>
570 NVVMTargetAttrImpl::serializeToObject(Attribute attribute
, Operation
*module
,
571 const gpu::TargetOptions
&options
) const {
572 assert(module
&& "The module must be non null.");
575 if (!mlir::isa
<gpu::GPUModuleOp
>(module
)) {
576 module
->emitError("Module must be a GPU module.");
579 #if MLIR_ENABLE_CUDA_CONVERSIONS
580 NVPTXSerializer
serializer(*module
, cast
<NVVMTargetAttr
>(attribute
), options
);
582 return serializer
.run();
585 "The `NVPTX` target was not built. Please enable it when building LLVM.");
587 #endif // MLIR_ENABLE_CUDA_CONVERSIONS
591 NVVMTargetAttrImpl::createObject(Attribute attribute
,
592 const SmallVector
<char, 0> &object
,
593 const gpu::TargetOptions
&options
) const {
594 auto target
= cast
<NVVMTargetAttr
>(attribute
);
595 gpu::CompilationTarget format
= options
.getCompilationTarget();
596 DictionaryAttr objectProps
;
597 Builder
builder(attribute
.getContext());
598 if (format
== gpu::CompilationTarget::Assembly
)
599 objectProps
= builder
.getDictionaryAttr(
600 {builder
.getNamedAttr("O", builder
.getI32IntegerAttr(target
.getO()))});
601 return builder
.getAttr
<gpu::ObjectAttr
>(
603 builder
.getStringAttr(StringRef(object
.data(), object
.size())),