1 //===- Target.cpp - MLIR LLVM ROCDL target compilation ----------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This files defines ROCDL target related functions including registration
10 // calls for the `#rocdl.target` compilation attribute.
12 //===----------------------------------------------------------------------===//
14 #include "mlir/Target/LLVM/ROCDL/Target.h"
16 #include "mlir/Dialect/GPU/IR/GPUDialect.h"
17 #include "mlir/Dialect/LLVMIR/ROCDLDialect.h"
18 #include "mlir/Support/FileUtilities.h"
19 #include "mlir/Target/LLVM/ROCDL/Utils.h"
20 #include "mlir/Target/LLVMIR/Export.h"
22 #include "llvm/IR/Constants.h"
23 #include "llvm/MC/MCAsmBackend.h"
24 #include "llvm/MC/MCAsmInfo.h"
25 #include "llvm/MC/MCCodeEmitter.h"
26 #include "llvm/MC/MCContext.h"
27 #include "llvm/MC/MCInstrInfo.h"
28 #include "llvm/MC/MCObjectFileInfo.h"
29 #include "llvm/MC/MCObjectWriter.h"
30 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
31 #include "llvm/MC/MCRegisterInfo.h"
32 #include "llvm/MC/MCStreamer.h"
33 #include "llvm/MC/MCSubtargetInfo.h"
34 #include "llvm/MC/TargetRegistry.h"
35 #include "llvm/Support/FileSystem.h"
36 #include "llvm/Support/FileUtilities.h"
37 #include "llvm/Support/Path.h"
38 #include "llvm/Support/Program.h"
39 #include "llvm/Support/SourceMgr.h"
40 #include "llvm/Support/TargetSelect.h"
41 #include "llvm/TargetParser/TargetParser.h"
47 using namespace mlir::ROCDL
;
49 #ifndef __DEFAULT_ROCM_PATH__
50 #define __DEFAULT_ROCM_PATH__ ""
54 // Implementation of the `TargetAttrInterface` model.
55 class ROCDLTargetAttrImpl
56 : public gpu::TargetAttrInterface::FallbackModel
<ROCDLTargetAttrImpl
> {
58 std::optional
<SmallVector
<char, 0>>
59 serializeToObject(Attribute attribute
, Operation
*module
,
60 const gpu::TargetOptions
&options
) const;
62 Attribute
createObject(Attribute attribute
, Operation
*module
,
63 const SmallVector
<char, 0> &object
,
64 const gpu::TargetOptions
&options
) const;
68 // Register the ROCDL dialect, the ROCDL translation and the target interface.
69 void mlir::ROCDL::registerROCDLTargetInterfaceExternalModels(
70 DialectRegistry
®istry
) {
71 registry
.addExtension(+[](MLIRContext
*ctx
, ROCDL::ROCDLDialect
*dialect
) {
72 ROCDLTargetAttr::attachInterface
<ROCDLTargetAttrImpl
>(*ctx
);
76 void mlir::ROCDL::registerROCDLTargetInterfaceExternalModels(
77 MLIRContext
&context
) {
78 DialectRegistry registry
;
79 registerROCDLTargetInterfaceExternalModels(registry
);
80 context
.appendDialectRegistry(registry
);
83 // Search for the ROCM path.
84 StringRef
mlir::ROCDL::getROCMPath() {
85 if (const char *var
= std::getenv("ROCM_PATH"))
87 if (const char *var
= std::getenv("ROCM_ROOT"))
89 if (const char *var
= std::getenv("ROCM_HOME"))
91 return __DEFAULT_ROCM_PATH__
;
94 SerializeGPUModuleBase::SerializeGPUModuleBase(
95 Operation
&module
, ROCDLTargetAttr target
,
96 const gpu::TargetOptions
&targetOptions
)
97 : ModuleToObject(module
, target
.getTriple(), target
.getChip(),
98 target
.getFeatures(), target
.getO()),
99 target(target
), toolkitPath(targetOptions
.getToolkitPath()),
100 librariesToLink(targetOptions
.getLibrariesToLink()) {
102 // If `targetOptions` has an empty toolkitPath use `getROCMPath`
103 if (toolkitPath
.empty())
104 toolkitPath
= getROCMPath();
106 // Append the files in the target attribute.
107 if (target
.getLink())
108 librariesToLink
.append(target
.getLink().begin(), target
.getLink().end());
111 void SerializeGPUModuleBase::init() {
112 static llvm::once_flag initializeBackendOnce
;
113 llvm::call_once(initializeBackendOnce
, []() {
114 // If the `AMDGPU` LLVM target was built, initialize it.
115 #if MLIR_ENABLE_ROCM_CONVERSIONS
116 LLVMInitializeAMDGPUTarget();
117 LLVMInitializeAMDGPUTargetInfo();
118 LLVMInitializeAMDGPUTargetMC();
119 LLVMInitializeAMDGPUAsmParser();
120 LLVMInitializeAMDGPUAsmPrinter();
125 ROCDLTargetAttr
SerializeGPUModuleBase::getTarget() const { return target
; }
127 StringRef
SerializeGPUModuleBase::getToolkitPath() const { return toolkitPath
; }
129 ArrayRef
<Attribute
> SerializeGPUModuleBase::getLibrariesToLink() const {
130 return librariesToLink
;
133 LogicalResult
SerializeGPUModuleBase::appendStandardLibs(AMDGCNLibraries libs
) {
134 if (libs
== AMDGCNLibraries::None
)
136 StringRef pathRef
= getToolkitPath();
138 // Get the path for the device libraries
139 SmallString
<256> path
;
140 path
.insert(path
.begin(), pathRef
.begin(), pathRef
.end());
141 llvm::sys::path::append(path
, "amdgcn", "bitcode");
142 pathRef
= StringRef(path
.data(), path
.size());
144 // Fail if the path is invalid.
145 if (!llvm::sys::fs::is_directory(pathRef
)) {
146 getOperation().emitError() << "ROCm amdgcn bitcode path: " << pathRef
147 << " does not exist or is not a directory";
151 // Helper function for adding a library.
152 auto addLib
= [&](const Twine
&lib
) -> bool {
153 auto baseSize
= path
.size();
154 llvm::sys::path::append(path
, lib
);
155 StringRef
pathRef(path
.data(), path
.size());
156 if (!llvm::sys::fs::is_regular_file(pathRef
)) {
157 getOperation().emitRemark() << "bitcode library path: " << pathRef
158 << " does not exist or is not a file";
161 librariesToLink
.push_back(StringAttr::get(target
.getContext(), pathRef
));
162 path
.truncate(baseSize
);
166 // Add ROCm device libraries. Fail if any of the libraries is not found, ie.
167 // if any of the `addLib` failed.
168 if ((any(libs
& AMDGCNLibraries::Ocml
) && addLib("ocml.bc")) ||
169 (any(libs
& AMDGCNLibraries::Ockl
) && addLib("ockl.bc")) ||
170 (any(libs
& AMDGCNLibraries::Hip
) && addLib("hip.bc")) ||
171 (any(libs
& AMDGCNLibraries::OpenCL
) && addLib("opencl.bc")))
176 std::optional
<SmallVector
<std::unique_ptr
<llvm::Module
>>>
177 SerializeGPUModuleBase::loadBitcodeFiles(llvm::Module
&module
) {
178 // Return if there are no libs to load.
179 if (deviceLibs
== AMDGCNLibraries::None
&& librariesToLink
.empty())
180 return SmallVector
<std::unique_ptr
<llvm::Module
>>();
181 if (failed(appendStandardLibs(deviceLibs
)))
183 SmallVector
<std::unique_ptr
<llvm::Module
>> bcFiles
;
184 if (failed(loadBitcodeFilesFromList(module
.getContext(), librariesToLink
,
187 return std::move(bcFiles
);
190 LogicalResult
SerializeGPUModuleBase::handleBitcodeFile(llvm::Module
&module
) {
191 // Some ROCM builds don't strip this like they should
192 if (auto *openclVersion
= module
.getNamedMetadata("opencl.ocl.version"))
193 module
.eraseNamedMetadata(openclVersion
);
194 // Stop spamming us with clang version numbers
195 if (auto *ident
= module
.getNamedMetadata("llvm.ident"))
196 module
.eraseNamedMetadata(ident
);
197 // Override the libModules datalayout and target triple with the compiler's
198 // data layout should there be a discrepency.
199 setDataLayoutAndTriple(module
);
203 void SerializeGPUModuleBase::handleModulePreLink(llvm::Module
&module
) {
204 // If all libraries are not set, traverse the module to determine which
205 // libraries are required.
206 if (deviceLibs
!= AMDGCNLibraries::All
) {
207 for (llvm::Function
&f
: module
.functions()) {
208 if (f
.hasExternalLinkage() && f
.hasName() && !f
.hasExactDefinition()) {
209 StringRef funcName
= f
.getName();
210 if ("printf" == funcName
)
211 deviceLibs
|= AMDGCNLibraries::OpenCL
| AMDGCNLibraries::Ockl
|
212 AMDGCNLibraries::Ocml
;
213 if (funcName
.starts_with("__ockl_"))
214 deviceLibs
|= AMDGCNLibraries::Ockl
;
215 if (funcName
.starts_with("__ocml_"))
216 deviceLibs
|= AMDGCNLibraries::Ocml
;
217 if (funcName
== "__atomic_work_item_fence")
218 deviceLibs
|= AMDGCNLibraries::Hip
;
222 addControlVariables(module
, deviceLibs
, target
.hasWave64(), target
.hasDaz(),
223 target
.hasFiniteOnly(), target
.hasUnsafeMath(),
224 target
.hasFastMath(), target
.hasCorrectSqrt(),
228 void SerializeGPUModuleBase::addControlVariables(
229 llvm::Module
&module
, AMDGCNLibraries libs
, bool wave64
, bool daz
,
230 bool finiteOnly
, bool unsafeMath
, bool fastMath
, bool correctSqrt
,
232 // Helper function for adding control variables.
233 auto addControlVariable
= [&module
](StringRef name
, uint32_t value
,
235 if (module
.getNamedGlobal(name
))
237 llvm::IntegerType
*type
=
238 llvm::IntegerType::getIntNTy(module
.getContext(), bitwidth
);
239 llvm::GlobalVariable
*controlVariable
= new llvm::GlobalVariable(
240 module
, /*isConstant=*/type
, true,
241 llvm::GlobalValue::LinkageTypes::LinkOnceODRLinkage
,
242 llvm::ConstantInt::get(type
, value
), name
, /*before=*/nullptr,
243 /*threadLocalMode=*/llvm::GlobalValue::ThreadLocalMode::NotThreadLocal
,
245 controlVariable
->setVisibility(
246 llvm::GlobalValue::VisibilityTypes::ProtectedVisibility
);
247 controlVariable
->setAlignment(llvm::MaybeAlign(bitwidth
/ 8));
248 controlVariable
->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Local
);
252 abiVer
.getAsInteger(0, abi
);
253 module
.addModuleFlag(llvm::Module::Error
, "amdhsa_code_object_version", abi
);
254 // Return if no device libraries are required.
255 if (libs
== AMDGCNLibraries::None
)
257 // Add ocml related control variables.
258 if (any(libs
& AMDGCNLibraries::Ocml
)) {
259 addControlVariable("__oclc_finite_only_opt", finiteOnly
|| fastMath
, 8);
260 addControlVariable("__oclc_daz_opt", daz
|| fastMath
, 8);
261 addControlVariable("__oclc_correctly_rounded_sqrt32",
262 correctSqrt
&& !fastMath
, 8);
263 addControlVariable("__oclc_unsafe_math_opt", unsafeMath
|| fastMath
, 8);
265 // Add ocml or ockl related control variables.
266 if (any(libs
& (AMDGCNLibraries::Ocml
| AMDGCNLibraries::Ockl
))) {
267 addControlVariable("__oclc_wavefrontsize64", wave64
, 8);
268 // Get the ISA version.
269 llvm::AMDGPU::IsaVersion isaVersion
= llvm::AMDGPU::getIsaVersion(chip
);
270 // Add the ISA control variable.
271 addControlVariable("__oclc_ISA_version",
272 isaVersion
.Minor
+ 100 * isaVersion
.Stepping
+
273 1000 * isaVersion
.Major
,
275 addControlVariable("__oclc_ABI_version", abi
, 32);
279 std::optional
<SmallVector
<char, 0>>
280 SerializeGPUModuleBase::assembleIsa(StringRef isa
) {
281 auto loc
= getOperation().getLoc();
283 StringRef targetTriple
= this->triple
;
285 SmallVector
<char, 0> result
;
286 llvm::raw_svector_ostream
os(result
);
288 llvm::Triple
triple(llvm::Triple::normalize(targetTriple
));
290 const llvm::Target
*target
=
291 llvm::TargetRegistry::lookupTarget(triple
.normalize(), error
);
293 emitError(loc
, Twine("failed to lookup target: ") + error
);
297 llvm::SourceMgr srcMgr
;
298 srcMgr
.AddNewSourceBuffer(llvm::MemoryBuffer::getMemBuffer(isa
), SMLoc());
300 const llvm::MCTargetOptions mcOptions
;
301 std::unique_ptr
<llvm::MCRegisterInfo
> mri(
302 target
->createMCRegInfo(targetTriple
));
303 std::unique_ptr
<llvm::MCAsmInfo
> mai(
304 target
->createMCAsmInfo(*mri
, targetTriple
, mcOptions
));
305 std::unique_ptr
<llvm::MCSubtargetInfo
> sti(
306 target
->createMCSubtargetInfo(targetTriple
, chip
, features
));
308 llvm::MCContext
ctx(triple
, mai
.get(), mri
.get(), sti
.get(), &srcMgr
,
310 std::unique_ptr
<llvm::MCObjectFileInfo
> mofi(target
->createMCObjectFileInfo(
311 ctx
, /*PIC=*/false, /*LargeCodeModel=*/false));
312 ctx
.setObjectFileInfo(mofi
.get());
314 SmallString
<128> cwd
;
315 if (!llvm::sys::fs::current_path(cwd
))
316 ctx
.setCompilationDir(cwd
);
318 std::unique_ptr
<llvm::MCStreamer
> mcStreamer
;
319 std::unique_ptr
<llvm::MCInstrInfo
> mcii(target
->createMCInstrInfo());
321 llvm::MCCodeEmitter
*ce
= target
->createMCCodeEmitter(*mcii
, ctx
);
322 llvm::MCAsmBackend
*mab
= target
->createMCAsmBackend(*sti
, *mri
, mcOptions
);
323 mcStreamer
.reset(target
->createMCObjectStreamer(
324 triple
, ctx
, std::unique_ptr
<llvm::MCAsmBackend
>(mab
),
325 mab
->createObjectWriter(os
), std::unique_ptr
<llvm::MCCodeEmitter
>(ce
),
328 std::unique_ptr
<llvm::MCAsmParser
> parser(
329 createMCAsmParser(srcMgr
, ctx
, *mcStreamer
, *mai
));
330 std::unique_ptr
<llvm::MCTargetAsmParser
> tap(
331 target
->createMCAsmParser(*sti
, *parser
, *mcii
, mcOptions
));
334 emitError(loc
, "assembler initialization error");
338 parser
->setTargetParser(*tap
);
340 return std::move(result
);
343 std::optional
<SmallVector
<char, 0>>
344 SerializeGPUModuleBase::compileToBinary(const std::string
&serializedISA
) {
346 std::optional
<SmallVector
<char, 0>> isaBinary
= assembleIsa(serializedISA
);
349 getOperation().emitError() << "failed during ISA assembling";
353 // Save the ISA binary to a temp file.
354 int tempIsaBinaryFd
= -1;
355 SmallString
<128> tempIsaBinaryFilename
;
356 if (llvm::sys::fs::createTemporaryFile("kernel%%", "o", tempIsaBinaryFd
,
357 tempIsaBinaryFilename
)) {
358 getOperation().emitError()
359 << "failed to create a temporary file for dumping the ISA binary";
362 llvm::FileRemover
cleanupIsaBinary(tempIsaBinaryFilename
);
364 llvm::raw_fd_ostream
tempIsaBinaryOs(tempIsaBinaryFd
, true);
365 tempIsaBinaryOs
<< StringRef(isaBinary
->data(), isaBinary
->size());
366 tempIsaBinaryOs
.flush();
369 // Create a temp file for HSA code object.
370 SmallString
<128> tempHsacoFilename
;
371 if (llvm::sys::fs::createTemporaryFile("kernel", "hsaco",
372 tempHsacoFilename
)) {
373 getOperation().emitError()
374 << "failed to create a temporary file for the HSA code object";
377 llvm::FileRemover
cleanupHsaco(tempHsacoFilename
);
379 llvm::SmallString
<128> lldPath(toolkitPath
);
380 llvm::sys::path::append(lldPath
, "llvm", "bin", "ld.lld");
381 int lldResult
= llvm::sys::ExecuteAndWait(
383 {"ld.lld", "-shared", tempIsaBinaryFilename
, "-o", tempHsacoFilename
});
384 if (lldResult
!= 0) {
385 getOperation().emitError() << "lld invocation failed";
389 // Load the HSA code object.
391 llvm::MemoryBuffer::getFile(tempHsacoFilename
, /*IsText=*/false);
393 getOperation().emitError()
394 << "failed to read the HSA code object from the temp file";
398 StringRef buffer
= (*hsacoFile
)->getBuffer();
400 return SmallVector
<char, 0>(buffer
.begin(), buffer
.end());
403 std::optional
<SmallVector
<char, 0>> SerializeGPUModuleBase::moduleToObjectImpl(
404 const gpu::TargetOptions
&targetOptions
, llvm::Module
&llvmModule
) {
405 // Return LLVM IR if the compilation target is offload.
406 #define DEBUG_TYPE "serialize-to-llvm"
408 llvm::dbgs() << "LLVM IR for module: "
409 << cast
<gpu::GPUModuleOp
>(getOperation()).getNameAttr() << "\n"
410 << llvmModule
<< "\n";
413 if (targetOptions
.getCompilationTarget() == gpu::CompilationTarget::Offload
)
414 return SerializeGPUModuleBase::moduleToObject(llvmModule
);
416 std::optional
<llvm::TargetMachine
*> targetMachine
=
417 getOrCreateTargetMachine();
418 if (!targetMachine
) {
419 getOperation().emitError() << "target Machine unavailable for triple "
420 << triple
<< ", can't compile with LLVM";
424 // Translate the Module to ISA.
425 std::optional
<std::string
> serializedISA
=
426 translateToISA(llvmModule
, **targetMachine
);
427 if (!serializedISA
) {
428 getOperation().emitError() << "failed translating the module to ISA";
431 #define DEBUG_TYPE "serialize-to-isa"
433 llvm::dbgs() << "ISA for module: "
434 << cast
<gpu::GPUModuleOp
>(getOperation()).getNameAttr() << "\n"
435 << *serializedISA
<< "\n";
438 // Return ISA assembly code if the compilation target is assembly.
439 if (targetOptions
.getCompilationTarget() == gpu::CompilationTarget::Assembly
)
440 return SmallVector
<char, 0>(serializedISA
->begin(), serializedISA
->end());
442 // Compiling to binary requires a valid ROCm path, fail if it's not found.
443 if (getToolkitPath().empty()) {
444 getOperation().emitError() << "invalid ROCm path, please set a valid path";
448 // Compile to binary.
449 return compileToBinary(*serializedISA
);
452 #if MLIR_ENABLE_ROCM_CONVERSIONS
454 class AMDGPUSerializer
: public SerializeGPUModuleBase
{
456 AMDGPUSerializer(Operation
&module
, ROCDLTargetAttr target
,
457 const gpu::TargetOptions
&targetOptions
);
459 std::optional
<SmallVector
<char, 0>>
460 moduleToObject(llvm::Module
&llvmModule
) override
;
464 gpu::TargetOptions targetOptions
;
468 AMDGPUSerializer::AMDGPUSerializer(Operation
&module
, ROCDLTargetAttr target
,
469 const gpu::TargetOptions
&targetOptions
)
470 : SerializeGPUModuleBase(module
, target
, targetOptions
),
471 targetOptions(targetOptions
) {}
473 std::optional
<SmallVector
<char, 0>>
474 AMDGPUSerializer::moduleToObject(llvm::Module
&llvmModule
) {
475 return moduleToObjectImpl(targetOptions
, llvmModule
);
477 #endif // MLIR_ENABLE_ROCM_CONVERSIONS
479 std::optional
<SmallVector
<char, 0>> ROCDLTargetAttrImpl::serializeToObject(
480 Attribute attribute
, Operation
*module
,
481 const gpu::TargetOptions
&options
) const {
482 assert(module
&& "The module must be non null.");
485 if (!mlir::isa
<gpu::GPUModuleOp
>(module
)) {
486 module
->emitError("module must be a GPU module");
489 #if MLIR_ENABLE_ROCM_CONVERSIONS
490 AMDGPUSerializer
serializer(*module
, cast
<ROCDLTargetAttr
>(attribute
),
493 return serializer
.run();
495 module
->emitError("the `AMDGPU` target was not built. Please enable it when "
498 #endif // MLIR_ENABLE_ROCM_CONVERSIONS
502 ROCDLTargetAttrImpl::createObject(Attribute attribute
, Operation
*module
,
503 const SmallVector
<char, 0> &object
,
504 const gpu::TargetOptions
&options
) const {
505 gpu::CompilationTarget format
= options
.getCompilationTarget();
506 // If format is `fatbin` transform it to binary as `fatbin` is not yet
508 gpu::KernelTableAttr kernels
;
509 if (format
> gpu::CompilationTarget::Binary
) {
510 format
= gpu::CompilationTarget::Binary
;
511 kernels
= ROCDL::getKernelMetadata(module
, object
);
513 DictionaryAttr properties
{};
514 Builder
builder(attribute
.getContext());
515 StringAttr objectStr
=
516 builder
.getStringAttr(StringRef(object
.data(), object
.size()));
517 return builder
.getAttr
<gpu::ObjectAttr
>(attribute
, format
, objectStr
,
518 properties
, kernels
);