1 //===- Target.cpp - MLIR LLVM ROCDL target compilation ----------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This files defines ROCDL target related functions including registration
10 // calls for the `#rocdl.target` compilation attribute.
12 //===----------------------------------------------------------------------===//
14 #include "mlir/Target/LLVM/ROCDL/Target.h"
16 #include "mlir/Dialect/GPU/IR/GPUDialect.h"
17 #include "mlir/Dialect/LLVMIR/ROCDLDialect.h"
18 #include "mlir/Support/FileUtilities.h"
19 #include "mlir/Target/LLVM/ROCDL/Utils.h"
20 #include "mlir/Target/LLVMIR/Dialect/GPU/GPUToLLVMIRTranslation.h"
21 #include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h"
22 #include "mlir/Target/LLVMIR/Dialect/ROCDL/ROCDLToLLVMIRTranslation.h"
23 #include "mlir/Target/LLVMIR/Export.h"
25 #include "llvm/IR/Constants.h"
26 #include "llvm/MC/MCAsmBackend.h"
27 #include "llvm/MC/MCAsmInfo.h"
28 #include "llvm/MC/MCCodeEmitter.h"
29 #include "llvm/MC/MCContext.h"
30 #include "llvm/MC/MCInstrInfo.h"
31 #include "llvm/MC/MCObjectFileInfo.h"
32 #include "llvm/MC/MCObjectWriter.h"
33 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
34 #include "llvm/MC/MCRegisterInfo.h"
35 #include "llvm/MC/MCStreamer.h"
36 #include "llvm/MC/MCSubtargetInfo.h"
37 #include "llvm/MC/TargetRegistry.h"
38 #include "llvm/Support/FileSystem.h"
39 #include "llvm/Support/FileUtilities.h"
40 #include "llvm/Support/Path.h"
41 #include "llvm/Support/Program.h"
42 #include "llvm/Support/SourceMgr.h"
43 #include "llvm/Support/TargetSelect.h"
44 #include "llvm/TargetParser/TargetParser.h"
50 using namespace mlir::ROCDL
;
52 #ifndef __DEFAULT_ROCM_PATH__
53 #define __DEFAULT_ROCM_PATH__ ""
57 // Implementation of the `TargetAttrInterface` model.
58 class ROCDLTargetAttrImpl
59 : public gpu::TargetAttrInterface::FallbackModel
<ROCDLTargetAttrImpl
> {
61 std::optional
<SmallVector
<char, 0>>
62 serializeToObject(Attribute attribute
, Operation
*module
,
63 const gpu::TargetOptions
&options
) const;
65 Attribute
createObject(Attribute attribute
,
66 const SmallVector
<char, 0> &object
,
67 const gpu::TargetOptions
&options
) const;
71 // Register the ROCDL dialect, the ROCDL translation and the target interface.
72 void mlir::ROCDL::registerROCDLTargetInterfaceExternalModels(
73 DialectRegistry
®istry
) {
74 registry
.addExtension(+[](MLIRContext
*ctx
, ROCDL::ROCDLDialect
*dialect
) {
75 ROCDLTargetAttr::attachInterface
<ROCDLTargetAttrImpl
>(*ctx
);
79 void mlir::ROCDL::registerROCDLTargetInterfaceExternalModels(
80 MLIRContext
&context
) {
81 DialectRegistry registry
;
82 registerROCDLTargetInterfaceExternalModels(registry
);
83 context
.appendDialectRegistry(registry
);
86 // Search for the ROCM path.
87 StringRef
mlir::ROCDL::getROCMPath() {
88 if (const char *var
= std::getenv("ROCM_PATH"))
90 if (const char *var
= std::getenv("ROCM_ROOT"))
92 if (const char *var
= std::getenv("ROCM_HOME"))
94 return __DEFAULT_ROCM_PATH__
;
97 SerializeGPUModuleBase::SerializeGPUModuleBase(
98 Operation
&module
, ROCDLTargetAttr target
,
99 const gpu::TargetOptions
&targetOptions
)
100 : ModuleToObject(module
, target
.getTriple(), target
.getChip(),
101 target
.getFeatures(), target
.getO()),
102 target(target
), toolkitPath(targetOptions
.getToolkitPath()),
103 fileList(targetOptions
.getLinkFiles()) {
105 // If `targetOptions` has an empty toolkitPath use `getROCMPath`
106 if (toolkitPath
.empty())
107 toolkitPath
= getROCMPath();
109 // Append the files in the target attribute.
110 if (ArrayAttr files
= target
.getLink())
111 for (Attribute attr
: files
.getValue())
112 if (auto file
= dyn_cast
<StringAttr
>(attr
))
113 fileList
.push_back(file
.str());
115 // Append standard ROCm device bitcode libraries to the files to be loaded.
116 (void)appendStandardLibs();
119 void SerializeGPUModuleBase::init() {
120 static llvm::once_flag initializeBackendOnce
;
121 llvm::call_once(initializeBackendOnce
, []() {
122 // If the `AMDGPU` LLVM target was built, initialize it.
123 #if MLIR_ENABLE_ROCM_CONVERSIONS
124 LLVMInitializeAMDGPUTarget();
125 LLVMInitializeAMDGPUTargetInfo();
126 LLVMInitializeAMDGPUTargetMC();
127 LLVMInitializeAMDGPUAsmParser();
128 LLVMInitializeAMDGPUAsmPrinter();
133 ROCDLTargetAttr
SerializeGPUModuleBase::getTarget() const { return target
; }
135 StringRef
SerializeGPUModuleBase::getToolkitPath() const { return toolkitPath
; }
137 ArrayRef
<std::string
> SerializeGPUModuleBase::getFileList() const {
141 LogicalResult
SerializeGPUModuleBase::appendStandardLibs() {
142 StringRef pathRef
= getToolkitPath();
143 if (!pathRef
.empty()) {
144 SmallVector
<char, 256> path
;
145 path
.insert(path
.begin(), pathRef
.begin(), pathRef
.end());
146 llvm::sys::path::append(path
, "amdgcn", "bitcode");
147 pathRef
= StringRef(path
.data(), path
.size());
148 if (!llvm::sys::fs::is_directory(pathRef
)) {
149 getOperation().emitRemark() << "ROCm amdgcn bitcode path: " << pathRef
150 << " does not exist or is not a directory.";
153 StringRef isaVersion
=
154 llvm::AMDGPU::getArchNameAMDGCN(llvm::AMDGPU::parseArchAMDGCN(chip
));
155 isaVersion
.consume_front("gfx");
156 return getCommonBitcodeLibs(fileList
, path
, isaVersion
);
161 std::optional
<SmallVector
<std::unique_ptr
<llvm::Module
>>>
162 SerializeGPUModuleBase::loadBitcodeFiles(llvm::Module
&module
) {
163 SmallVector
<std::unique_ptr
<llvm::Module
>> bcFiles
;
164 if (failed(loadBitcodeFilesFromList(module
.getContext(), fileList
, bcFiles
,
167 return std::move(bcFiles
);
170 LogicalResult
SerializeGPUModuleBase::handleBitcodeFile(llvm::Module
&module
) {
171 // Some ROCM builds don't strip this like they should
172 if (auto *openclVersion
= module
.getNamedMetadata("opencl.ocl.version"))
173 module
.eraseNamedMetadata(openclVersion
);
174 // Stop spamming us with clang version numbers
175 if (auto *ident
= module
.getNamedMetadata("llvm.ident"))
176 module
.eraseNamedMetadata(ident
);
180 void SerializeGPUModuleBase::handleModulePreLink(llvm::Module
&module
) {
181 [[maybe_unused
]] std::optional
<llvm::TargetMachine
*> targetMachine
=
182 getOrCreateTargetMachine();
183 assert(targetMachine
&& "expect a TargetMachine");
184 addControlVariables(module
, target
.hasWave64(), target
.hasDaz(),
185 target
.hasFiniteOnly(), target
.hasUnsafeMath(),
186 target
.hasFastMath(), target
.hasCorrectSqrt(),
190 // Get the paths of ROCm device libraries.
191 LogicalResult
SerializeGPUModuleBase::getCommonBitcodeLibs(
192 llvm::SmallVector
<std::string
> &libs
, SmallVector
<char, 256> &libPath
,
193 StringRef isaVersion
) {
194 auto addLib
= [&](StringRef path
) -> bool {
195 if (!llvm::sys::fs::is_regular_file(path
)) {
196 getOperation().emitRemark() << "Bitcode library path: " << path
197 << " does not exist or is not a file.\n";
200 libs
.push_back(path
.str());
203 auto getLibPath
= [&libPath
](Twine lib
) {
204 auto baseSize
= libPath
.size();
205 llvm::sys::path::append(libPath
, lib
+ ".bc");
206 std::string
path(StringRef(libPath
.data(), libPath
.size()).str());
207 libPath
.truncate(baseSize
);
211 // Add ROCm device libraries. Fail if any of the libraries is not found.
212 if (addLib(getLibPath("ocml")) || addLib(getLibPath("ockl")) ||
213 addLib(getLibPath("hip")) || addLib(getLibPath("opencl")) ||
214 addLib(getLibPath("oclc_isa_version_" + isaVersion
)))
219 void SerializeGPUModuleBase::addControlVariables(
220 llvm::Module
&module
, bool wave64
, bool daz
, bool finiteOnly
,
221 bool unsafeMath
, bool fastMath
, bool correctSqrt
, StringRef abiVer
) {
222 llvm::Type
*i8Ty
= llvm::Type::getInt8Ty(module
.getContext());
223 auto addControlVariable
= [i8Ty
, &module
](StringRef name
, bool enable
) {
224 llvm::GlobalVariable
*controlVariable
= new llvm::GlobalVariable(
225 module
, i8Ty
, true, llvm::GlobalValue::LinkageTypes::LinkOnceODRLinkage
,
226 llvm::ConstantInt::get(i8Ty
, enable
), name
, nullptr,
227 llvm::GlobalValue::ThreadLocalMode::NotThreadLocal
, 4);
228 controlVariable
->setVisibility(
229 llvm::GlobalValue::VisibilityTypes::ProtectedVisibility
);
230 controlVariable
->setAlignment(llvm::MaybeAlign(1));
231 controlVariable
->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Local
);
233 addControlVariable("__oclc_finite_only_opt", finiteOnly
|| fastMath
);
234 addControlVariable("__oclc_unsafe_math_opt", unsafeMath
|| fastMath
);
235 addControlVariable("__oclc_daz_opt", daz
|| fastMath
);
236 addControlVariable("__oclc_correctly_rounded_sqrt32",
237 correctSqrt
&& !fastMath
);
238 addControlVariable("__oclc_wavefrontsize64", wave64
);
240 llvm::Type
*i32Ty
= llvm::Type::getInt32Ty(module
.getContext());
242 abiVer
.getAsInteger(0, abi
);
243 llvm::GlobalVariable
*abiVersion
= new llvm::GlobalVariable(
244 module
, i32Ty
, true, llvm::GlobalValue::LinkageTypes::LinkOnceODRLinkage
,
245 llvm::ConstantInt::get(i32Ty
, abi
), "__oclc_ABI_version", nullptr,
246 llvm::GlobalValue::ThreadLocalMode::NotThreadLocal
, 4);
247 abiVersion
->setVisibility(
248 llvm::GlobalValue::VisibilityTypes::ProtectedVisibility
);
249 abiVersion
->setAlignment(llvm::MaybeAlign(4));
250 abiVersion
->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Local
);
253 std::optional
<SmallVector
<char, 0>>
254 SerializeGPUModuleBase::assembleIsa(StringRef isa
) {
255 auto loc
= getOperation().getLoc();
257 StringRef targetTriple
= this->triple
;
259 SmallVector
<char, 0> result
;
260 llvm::raw_svector_ostream
os(result
);
262 llvm::Triple
triple(llvm::Triple::normalize(targetTriple
));
264 const llvm::Target
*target
=
265 llvm::TargetRegistry::lookupTarget(triple
.normalize(), error
);
267 emitError(loc
, Twine("failed to lookup target: ") + error
);
271 llvm::SourceMgr srcMgr
;
272 srcMgr
.AddNewSourceBuffer(llvm::MemoryBuffer::getMemBuffer(isa
), SMLoc());
274 const llvm::MCTargetOptions mcOptions
;
275 std::unique_ptr
<llvm::MCRegisterInfo
> mri(
276 target
->createMCRegInfo(targetTriple
));
277 std::unique_ptr
<llvm::MCAsmInfo
> mai(
278 target
->createMCAsmInfo(*mri
, targetTriple
, mcOptions
));
279 std::unique_ptr
<llvm::MCSubtargetInfo
> sti(
280 target
->createMCSubtargetInfo(targetTriple
, chip
, features
));
282 llvm::MCContext
ctx(triple
, mai
.get(), mri
.get(), sti
.get(), &srcMgr
,
284 std::unique_ptr
<llvm::MCObjectFileInfo
> mofi(target
->createMCObjectFileInfo(
285 ctx
, /*PIC=*/false, /*LargeCodeModel=*/false));
286 ctx
.setObjectFileInfo(mofi
.get());
288 SmallString
<128> cwd
;
289 if (!llvm::sys::fs::current_path(cwd
))
290 ctx
.setCompilationDir(cwd
);
292 std::unique_ptr
<llvm::MCStreamer
> mcStreamer
;
293 std::unique_ptr
<llvm::MCInstrInfo
> mcii(target
->createMCInstrInfo());
295 llvm::MCCodeEmitter
*ce
= target
->createMCCodeEmitter(*mcii
, ctx
);
296 llvm::MCAsmBackend
*mab
= target
->createMCAsmBackend(*sti
, *mri
, mcOptions
);
297 mcStreamer
.reset(target
->createMCObjectStreamer(
298 triple
, ctx
, std::unique_ptr
<llvm::MCAsmBackend
>(mab
),
299 mab
->createObjectWriter(os
), std::unique_ptr
<llvm::MCCodeEmitter
>(ce
),
300 *sti
, mcOptions
.MCRelaxAll
, mcOptions
.MCIncrementalLinkerCompatible
,
301 /*DWARFMustBeAtTheEnd*/ false));
302 mcStreamer
->setUseAssemblerInfoForParsing(true);
304 std::unique_ptr
<llvm::MCAsmParser
> parser(
305 createMCAsmParser(srcMgr
, ctx
, *mcStreamer
, *mai
));
306 std::unique_ptr
<llvm::MCTargetAsmParser
> tap(
307 target
->createMCAsmParser(*sti
, *parser
, *mcii
, mcOptions
));
310 emitError(loc
, "assembler initialization error");
314 parser
->setTargetParser(*tap
);
320 #if MLIR_ENABLE_ROCM_CONVERSIONS
322 class AMDGPUSerializer
: public SerializeGPUModuleBase
{
324 AMDGPUSerializer(Operation
&module
, ROCDLTargetAttr target
,
325 const gpu::TargetOptions
&targetOptions
);
327 gpu::GPUModuleOp
getOperation();
330 std::optional
<SmallVector
<char, 0>>
331 compileToBinary(const std::string
&serializedISA
);
333 std::optional
<SmallVector
<char, 0>>
334 moduleToObject(llvm::Module
&llvmModule
) override
;
338 gpu::TargetOptions targetOptions
;
342 AMDGPUSerializer::AMDGPUSerializer(Operation
&module
, ROCDLTargetAttr target
,
343 const gpu::TargetOptions
&targetOptions
)
344 : SerializeGPUModuleBase(module
, target
, targetOptions
),
345 targetOptions(targetOptions
) {}
347 gpu::GPUModuleOp
AMDGPUSerializer::getOperation() {
348 return dyn_cast
<gpu::GPUModuleOp
>(&SerializeGPUModuleBase::getOperation());
351 std::optional
<SmallVector
<char, 0>>
352 AMDGPUSerializer::compileToBinary(const std::string
&serializedISA
) {
354 std::optional
<SmallVector
<char, 0>> isaBinary
= assembleIsa(serializedISA
);
357 getOperation().emitError() << "Failed during ISA assembling.";
361 // Save the ISA binary to a temp file.
362 int tempIsaBinaryFd
= -1;
363 SmallString
<128> tempIsaBinaryFilename
;
364 if (llvm::sys::fs::createTemporaryFile("kernel%%", "o", tempIsaBinaryFd
,
365 tempIsaBinaryFilename
)) {
366 getOperation().emitError()
367 << "Failed to create a temporary file for dumping the ISA binary.";
370 llvm::FileRemover
cleanupIsaBinary(tempIsaBinaryFilename
);
372 llvm::raw_fd_ostream
tempIsaBinaryOs(tempIsaBinaryFd
, true);
373 tempIsaBinaryOs
<< StringRef(isaBinary
->data(), isaBinary
->size());
374 tempIsaBinaryOs
.flush();
377 // Create a temp file for HSA code object.
378 SmallString
<128> tempHsacoFilename
;
379 if (llvm::sys::fs::createTemporaryFile("kernel", "hsaco",
380 tempHsacoFilename
)) {
381 getOperation().emitError()
382 << "Failed to create a temporary file for the HSA code object.";
385 llvm::FileRemover
cleanupHsaco(tempHsacoFilename
);
387 llvm::SmallString
<128> lldPath(toolkitPath
);
388 llvm::sys::path::append(lldPath
, "llvm", "bin", "ld.lld");
389 int lldResult
= llvm::sys::ExecuteAndWait(
391 {"ld.lld", "-shared", tempIsaBinaryFilename
, "-o", tempHsacoFilename
});
392 if (lldResult
!= 0) {
393 getOperation().emitError() << "lld invocation failed.";
397 // Load the HSA code object.
399 llvm::MemoryBuffer::getFile(tempHsacoFilename
, /*IsText=*/false);
401 getOperation().emitError()
402 << "Failed to read the HSA code object from the temp file.";
406 StringRef buffer
= (*hsacoFile
)->getBuffer();
408 return SmallVector
<char, 0>(buffer
.begin(), buffer
.end());
411 std::optional
<SmallVector
<char, 0>>
412 AMDGPUSerializer::moduleToObject(llvm::Module
&llvmModule
) {
413 // Return LLVM IR if the compilation target is offload.
414 #define DEBUG_TYPE "serialize-to-llvm"
416 llvm::dbgs() << "LLVM IR for module: " << getOperation().getNameAttr()
418 << llvmModule
<< "\n";
421 if (targetOptions
.getCompilationTarget() == gpu::CompilationTarget::Offload
)
422 return SerializeGPUModuleBase::moduleToObject(llvmModule
);
424 std::optional
<llvm::TargetMachine
*> targetMachine
=
425 getOrCreateTargetMachine();
426 if (!targetMachine
) {
427 getOperation().emitError() << "Target Machine unavailable for triple "
428 << triple
<< ", can't compile with LLVM\n";
432 // Translate the Module to ISA.
433 std::optional
<std::string
> serializedISA
=
434 translateToISA(llvmModule
, **targetMachine
);
435 if (!serializedISA
) {
436 getOperation().emitError() << "Failed translating the module to ISA.";
439 #define DEBUG_TYPE "serialize-to-isa"
441 llvm::dbgs() << "ISA for module: " << getOperation().getNameAttr() << "\n"
442 << *serializedISA
<< "\n";
445 // Return ISA assembly code if the compilation target is assembly.
446 if (targetOptions
.getCompilationTarget() == gpu::CompilationTarget::Assembly
)
447 return SmallVector
<char, 0>(serializedISA
->begin(), serializedISA
->end());
449 // Compile to binary.
450 return compileToBinary(*serializedISA
);
452 #endif // MLIR_ENABLE_ROCM_CONVERSIONS
454 std::optional
<SmallVector
<char, 0>> ROCDLTargetAttrImpl::serializeToObject(
455 Attribute attribute
, Operation
*module
,
456 const gpu::TargetOptions
&options
) const {
457 assert(module
&& "The module must be non null.");
460 if (!mlir::isa
<gpu::GPUModuleOp
>(module
)) {
461 module
->emitError("Module must be a GPU module.");
464 #if MLIR_ENABLE_ROCM_CONVERSIONS
465 AMDGPUSerializer
serializer(*module
, cast
<ROCDLTargetAttr
>(attribute
),
468 return serializer
.run();
470 module
->emitError("The `AMDGPU` target was not built. Please enable it when "
473 #endif // MLIR_ENABLE_ROCM_CONVERSIONS
477 ROCDLTargetAttrImpl::createObject(Attribute attribute
,
478 const SmallVector
<char, 0> &object
,
479 const gpu::TargetOptions
&options
) const {
480 gpu::CompilationTarget format
= options
.getCompilationTarget();
481 Builder
builder(attribute
.getContext());
482 return builder
.getAttr
<gpu::ObjectAttr
>(
484 format
> gpu::CompilationTarget::Binary
? gpu::CompilationTarget::Binary
486 builder
.getStringAttr(StringRef(object
.data(), object
.size())), nullptr);