[SampleProfileLoader] Fix integer overflow in generateMDProfMetadata (#90217)
[llvm-project.git] / mlir / lib / Target / LLVM / ROCDL / Target.cpp
blob66593fd8a55fa63d1f15f275b36fe82ab351dfc3
1 //===- Target.cpp - MLIR LLVM ROCDL target compilation ----------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This files defines ROCDL target related functions including registration
10 // calls for the `#rocdl.target` compilation attribute.
12 //===----------------------------------------------------------------------===//
14 #include "mlir/Target/LLVM/ROCDL/Target.h"
16 #include "mlir/Dialect/GPU/IR/GPUDialect.h"
17 #include "mlir/Dialect/LLVMIR/ROCDLDialect.h"
18 #include "mlir/Support/FileUtilities.h"
19 #include "mlir/Target/LLVM/ROCDL/Utils.h"
20 #include "mlir/Target/LLVMIR/Dialect/GPU/GPUToLLVMIRTranslation.h"
21 #include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h"
22 #include "mlir/Target/LLVMIR/Dialect/ROCDL/ROCDLToLLVMIRTranslation.h"
23 #include "mlir/Target/LLVMIR/Export.h"
25 #include "llvm/IR/Constants.h"
26 #include "llvm/MC/MCAsmBackend.h"
27 #include "llvm/MC/MCAsmInfo.h"
28 #include "llvm/MC/MCCodeEmitter.h"
29 #include "llvm/MC/MCContext.h"
30 #include "llvm/MC/MCInstrInfo.h"
31 #include "llvm/MC/MCObjectFileInfo.h"
32 #include "llvm/MC/MCObjectWriter.h"
33 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
34 #include "llvm/MC/MCRegisterInfo.h"
35 #include "llvm/MC/MCStreamer.h"
36 #include "llvm/MC/MCSubtargetInfo.h"
37 #include "llvm/MC/TargetRegistry.h"
38 #include "llvm/Support/FileSystem.h"
39 #include "llvm/Support/FileUtilities.h"
40 #include "llvm/Support/Path.h"
41 #include "llvm/Support/Program.h"
42 #include "llvm/Support/SourceMgr.h"
43 #include "llvm/Support/TargetSelect.h"
44 #include "llvm/TargetParser/TargetParser.h"
46 #include <cstdlib>
47 #include <optional>
49 using namespace mlir;
50 using namespace mlir::ROCDL;
52 #ifndef __DEFAULT_ROCM_PATH__
53 #define __DEFAULT_ROCM_PATH__ ""
54 #endif
56 namespace {
57 // Implementation of the `TargetAttrInterface` model.
58 class ROCDLTargetAttrImpl
59 : public gpu::TargetAttrInterface::FallbackModel<ROCDLTargetAttrImpl> {
60 public:
61 std::optional<SmallVector<char, 0>>
62 serializeToObject(Attribute attribute, Operation *module,
63 const gpu::TargetOptions &options) const;
65 Attribute createObject(Attribute attribute,
66 const SmallVector<char, 0> &object,
67 const gpu::TargetOptions &options) const;
69 } // namespace
71 // Register the ROCDL dialect, the ROCDL translation and the target interface.
72 void mlir::ROCDL::registerROCDLTargetInterfaceExternalModels(
73 DialectRegistry &registry) {
74 registry.addExtension(+[](MLIRContext *ctx, ROCDL::ROCDLDialect *dialect) {
75 ROCDLTargetAttr::attachInterface<ROCDLTargetAttrImpl>(*ctx);
76 });
79 void mlir::ROCDL::registerROCDLTargetInterfaceExternalModels(
80 MLIRContext &context) {
81 DialectRegistry registry;
82 registerROCDLTargetInterfaceExternalModels(registry);
83 context.appendDialectRegistry(registry);
86 // Search for the ROCM path.
87 StringRef mlir::ROCDL::getROCMPath() {
88 if (const char *var = std::getenv("ROCM_PATH"))
89 return var;
90 if (const char *var = std::getenv("ROCM_ROOT"))
91 return var;
92 if (const char *var = std::getenv("ROCM_HOME"))
93 return var;
94 return __DEFAULT_ROCM_PATH__;
97 SerializeGPUModuleBase::SerializeGPUModuleBase(
98 Operation &module, ROCDLTargetAttr target,
99 const gpu::TargetOptions &targetOptions)
100 : ModuleToObject(module, target.getTriple(), target.getChip(),
101 target.getFeatures(), target.getO()),
102 target(target), toolkitPath(targetOptions.getToolkitPath()),
103 fileList(targetOptions.getLinkFiles()) {
105 // If `targetOptions` has an empty toolkitPath use `getROCMPath`
106 if (toolkitPath.empty())
107 toolkitPath = getROCMPath();
109 // Append the files in the target attribute.
110 if (ArrayAttr files = target.getLink())
111 for (Attribute attr : files.getValue())
112 if (auto file = dyn_cast<StringAttr>(attr))
113 fileList.push_back(file.str());
115 // Append standard ROCm device bitcode libraries to the files to be loaded.
116 (void)appendStandardLibs();
119 void SerializeGPUModuleBase::init() {
120 static llvm::once_flag initializeBackendOnce;
121 llvm::call_once(initializeBackendOnce, []() {
122 // If the `AMDGPU` LLVM target was built, initialize it.
123 #if MLIR_ENABLE_ROCM_CONVERSIONS
124 LLVMInitializeAMDGPUTarget();
125 LLVMInitializeAMDGPUTargetInfo();
126 LLVMInitializeAMDGPUTargetMC();
127 LLVMInitializeAMDGPUAsmParser();
128 LLVMInitializeAMDGPUAsmPrinter();
129 #endif
133 ROCDLTargetAttr SerializeGPUModuleBase::getTarget() const { return target; }
135 StringRef SerializeGPUModuleBase::getToolkitPath() const { return toolkitPath; }
137 ArrayRef<std::string> SerializeGPUModuleBase::getFileList() const {
138 return fileList;
141 LogicalResult SerializeGPUModuleBase::appendStandardLibs() {
142 StringRef pathRef = getToolkitPath();
143 if (!pathRef.empty()) {
144 SmallVector<char, 256> path;
145 path.insert(path.begin(), pathRef.begin(), pathRef.end());
146 llvm::sys::path::append(path, "amdgcn", "bitcode");
147 pathRef = StringRef(path.data(), path.size());
148 if (!llvm::sys::fs::is_directory(pathRef)) {
149 getOperation().emitRemark() << "ROCm amdgcn bitcode path: " << pathRef
150 << " does not exist or is not a directory.";
151 return failure();
153 StringRef isaVersion =
154 llvm::AMDGPU::getArchNameAMDGCN(llvm::AMDGPU::parseArchAMDGCN(chip));
155 isaVersion.consume_front("gfx");
156 return getCommonBitcodeLibs(fileList, path, isaVersion);
158 return success();
161 std::optional<SmallVector<std::unique_ptr<llvm::Module>>>
162 SerializeGPUModuleBase::loadBitcodeFiles(llvm::Module &module) {
163 SmallVector<std::unique_ptr<llvm::Module>> bcFiles;
164 if (failed(loadBitcodeFilesFromList(module.getContext(), fileList, bcFiles,
165 true)))
166 return std::nullopt;
167 return std::move(bcFiles);
170 LogicalResult SerializeGPUModuleBase::handleBitcodeFile(llvm::Module &module) {
171 // Some ROCM builds don't strip this like they should
172 if (auto *openclVersion = module.getNamedMetadata("opencl.ocl.version"))
173 module.eraseNamedMetadata(openclVersion);
174 // Stop spamming us with clang version numbers
175 if (auto *ident = module.getNamedMetadata("llvm.ident"))
176 module.eraseNamedMetadata(ident);
177 return success();
180 void SerializeGPUModuleBase::handleModulePreLink(llvm::Module &module) {
181 [[maybe_unused]] std::optional<llvm::TargetMachine *> targetMachine =
182 getOrCreateTargetMachine();
183 assert(targetMachine && "expect a TargetMachine");
184 addControlVariables(module, target.hasWave64(), target.hasDaz(),
185 target.hasFiniteOnly(), target.hasUnsafeMath(),
186 target.hasFastMath(), target.hasCorrectSqrt(),
187 target.getAbi());
190 // Get the paths of ROCm device libraries.
191 LogicalResult SerializeGPUModuleBase::getCommonBitcodeLibs(
192 llvm::SmallVector<std::string> &libs, SmallVector<char, 256> &libPath,
193 StringRef isaVersion) {
194 auto addLib = [&](StringRef path) -> bool {
195 if (!llvm::sys::fs::is_regular_file(path)) {
196 getOperation().emitRemark() << "Bitcode library path: " << path
197 << " does not exist or is not a file.\n";
198 return true;
200 libs.push_back(path.str());
201 return false;
203 auto getLibPath = [&libPath](Twine lib) {
204 auto baseSize = libPath.size();
205 llvm::sys::path::append(libPath, lib + ".bc");
206 std::string path(StringRef(libPath.data(), libPath.size()).str());
207 libPath.truncate(baseSize);
208 return path;
211 // Add ROCm device libraries. Fail if any of the libraries is not found.
212 if (addLib(getLibPath("ocml")) || addLib(getLibPath("ockl")) ||
213 addLib(getLibPath("hip")) || addLib(getLibPath("opencl")) ||
214 addLib(getLibPath("oclc_isa_version_" + isaVersion)))
215 return failure();
216 return success();
219 void SerializeGPUModuleBase::addControlVariables(
220 llvm::Module &module, bool wave64, bool daz, bool finiteOnly,
221 bool unsafeMath, bool fastMath, bool correctSqrt, StringRef abiVer) {
222 llvm::Type *i8Ty = llvm::Type::getInt8Ty(module.getContext());
223 auto addControlVariable = [i8Ty, &module](StringRef name, bool enable) {
224 llvm::GlobalVariable *controlVariable = new llvm::GlobalVariable(
225 module, i8Ty, true, llvm::GlobalValue::LinkageTypes::LinkOnceODRLinkage,
226 llvm::ConstantInt::get(i8Ty, enable), name, nullptr,
227 llvm::GlobalValue::ThreadLocalMode::NotThreadLocal, 4);
228 controlVariable->setVisibility(
229 llvm::GlobalValue::VisibilityTypes::ProtectedVisibility);
230 controlVariable->setAlignment(llvm::MaybeAlign(1));
231 controlVariable->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Local);
233 addControlVariable("__oclc_finite_only_opt", finiteOnly || fastMath);
234 addControlVariable("__oclc_unsafe_math_opt", unsafeMath || fastMath);
235 addControlVariable("__oclc_daz_opt", daz || fastMath);
236 addControlVariable("__oclc_correctly_rounded_sqrt32",
237 correctSqrt && !fastMath);
238 addControlVariable("__oclc_wavefrontsize64", wave64);
240 llvm::Type *i32Ty = llvm::Type::getInt32Ty(module.getContext());
241 int abi = 500;
242 abiVer.getAsInteger(0, abi);
243 llvm::GlobalVariable *abiVersion = new llvm::GlobalVariable(
244 module, i32Ty, true, llvm::GlobalValue::LinkageTypes::LinkOnceODRLinkage,
245 llvm::ConstantInt::get(i32Ty, abi), "__oclc_ABI_version", nullptr,
246 llvm::GlobalValue::ThreadLocalMode::NotThreadLocal, 4);
247 abiVersion->setVisibility(
248 llvm::GlobalValue::VisibilityTypes::ProtectedVisibility);
249 abiVersion->setAlignment(llvm::MaybeAlign(4));
250 abiVersion->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Local);
253 std::optional<SmallVector<char, 0>>
254 SerializeGPUModuleBase::assembleIsa(StringRef isa) {
255 auto loc = getOperation().getLoc();
257 StringRef targetTriple = this->triple;
259 SmallVector<char, 0> result;
260 llvm::raw_svector_ostream os(result);
262 llvm::Triple triple(llvm::Triple::normalize(targetTriple));
263 std::string error;
264 const llvm::Target *target =
265 llvm::TargetRegistry::lookupTarget(triple.normalize(), error);
266 if (!target) {
267 emitError(loc, Twine("failed to lookup target: ") + error);
268 return std::nullopt;
271 llvm::SourceMgr srcMgr;
272 srcMgr.AddNewSourceBuffer(llvm::MemoryBuffer::getMemBuffer(isa), SMLoc());
274 const llvm::MCTargetOptions mcOptions;
275 std::unique_ptr<llvm::MCRegisterInfo> mri(
276 target->createMCRegInfo(targetTriple));
277 std::unique_ptr<llvm::MCAsmInfo> mai(
278 target->createMCAsmInfo(*mri, targetTriple, mcOptions));
279 std::unique_ptr<llvm::MCSubtargetInfo> sti(
280 target->createMCSubtargetInfo(targetTriple, chip, features));
282 llvm::MCContext ctx(triple, mai.get(), mri.get(), sti.get(), &srcMgr,
283 &mcOptions);
284 std::unique_ptr<llvm::MCObjectFileInfo> mofi(target->createMCObjectFileInfo(
285 ctx, /*PIC=*/false, /*LargeCodeModel=*/false));
286 ctx.setObjectFileInfo(mofi.get());
288 SmallString<128> cwd;
289 if (!llvm::sys::fs::current_path(cwd))
290 ctx.setCompilationDir(cwd);
292 std::unique_ptr<llvm::MCStreamer> mcStreamer;
293 std::unique_ptr<llvm::MCInstrInfo> mcii(target->createMCInstrInfo());
295 llvm::MCCodeEmitter *ce = target->createMCCodeEmitter(*mcii, ctx);
296 llvm::MCAsmBackend *mab = target->createMCAsmBackend(*sti, *mri, mcOptions);
297 mcStreamer.reset(target->createMCObjectStreamer(
298 triple, ctx, std::unique_ptr<llvm::MCAsmBackend>(mab),
299 mab->createObjectWriter(os), std::unique_ptr<llvm::MCCodeEmitter>(ce),
300 *sti, mcOptions.MCRelaxAll, mcOptions.MCIncrementalLinkerCompatible,
301 /*DWARFMustBeAtTheEnd*/ false));
302 mcStreamer->setUseAssemblerInfoForParsing(true);
304 std::unique_ptr<llvm::MCAsmParser> parser(
305 createMCAsmParser(srcMgr, ctx, *mcStreamer, *mai));
306 std::unique_ptr<llvm::MCTargetAsmParser> tap(
307 target->createMCAsmParser(*sti, *parser, *mcii, mcOptions));
309 if (!tap) {
310 emitError(loc, "assembler initialization error");
311 return {};
314 parser->setTargetParser(*tap);
315 parser->Run(false);
317 return result;
320 #if MLIR_ENABLE_ROCM_CONVERSIONS
321 namespace {
322 class AMDGPUSerializer : public SerializeGPUModuleBase {
323 public:
324 AMDGPUSerializer(Operation &module, ROCDLTargetAttr target,
325 const gpu::TargetOptions &targetOptions);
327 gpu::GPUModuleOp getOperation();
329 // Compile to HSA.
330 std::optional<SmallVector<char, 0>>
331 compileToBinary(const std::string &serializedISA);
333 std::optional<SmallVector<char, 0>>
334 moduleToObject(llvm::Module &llvmModule) override;
336 private:
337 // Target options.
338 gpu::TargetOptions targetOptions;
340 } // namespace
342 AMDGPUSerializer::AMDGPUSerializer(Operation &module, ROCDLTargetAttr target,
343 const gpu::TargetOptions &targetOptions)
344 : SerializeGPUModuleBase(module, target, targetOptions),
345 targetOptions(targetOptions) {}
347 gpu::GPUModuleOp AMDGPUSerializer::getOperation() {
348 return dyn_cast<gpu::GPUModuleOp>(&SerializeGPUModuleBase::getOperation());
351 std::optional<SmallVector<char, 0>>
352 AMDGPUSerializer::compileToBinary(const std::string &serializedISA) {
353 // Assemble the ISA.
354 std::optional<SmallVector<char, 0>> isaBinary = assembleIsa(serializedISA);
356 if (!isaBinary) {
357 getOperation().emitError() << "Failed during ISA assembling.";
358 return std::nullopt;
361 // Save the ISA binary to a temp file.
362 int tempIsaBinaryFd = -1;
363 SmallString<128> tempIsaBinaryFilename;
364 if (llvm::sys::fs::createTemporaryFile("kernel%%", "o", tempIsaBinaryFd,
365 tempIsaBinaryFilename)) {
366 getOperation().emitError()
367 << "Failed to create a temporary file for dumping the ISA binary.";
368 return std::nullopt;
370 llvm::FileRemover cleanupIsaBinary(tempIsaBinaryFilename);
372 llvm::raw_fd_ostream tempIsaBinaryOs(tempIsaBinaryFd, true);
373 tempIsaBinaryOs << StringRef(isaBinary->data(), isaBinary->size());
374 tempIsaBinaryOs.flush();
377 // Create a temp file for HSA code object.
378 SmallString<128> tempHsacoFilename;
379 if (llvm::sys::fs::createTemporaryFile("kernel", "hsaco",
380 tempHsacoFilename)) {
381 getOperation().emitError()
382 << "Failed to create a temporary file for the HSA code object.";
383 return std::nullopt;
385 llvm::FileRemover cleanupHsaco(tempHsacoFilename);
387 llvm::SmallString<128> lldPath(toolkitPath);
388 llvm::sys::path::append(lldPath, "llvm", "bin", "ld.lld");
389 int lldResult = llvm::sys::ExecuteAndWait(
390 lldPath,
391 {"ld.lld", "-shared", tempIsaBinaryFilename, "-o", tempHsacoFilename});
392 if (lldResult != 0) {
393 getOperation().emitError() << "lld invocation failed.";
394 return std::nullopt;
397 // Load the HSA code object.
398 auto hsacoFile =
399 llvm::MemoryBuffer::getFile(tempHsacoFilename, /*IsText=*/false);
400 if (!hsacoFile) {
401 getOperation().emitError()
402 << "Failed to read the HSA code object from the temp file.";
403 return std::nullopt;
406 StringRef buffer = (*hsacoFile)->getBuffer();
408 return SmallVector<char, 0>(buffer.begin(), buffer.end());
411 std::optional<SmallVector<char, 0>>
412 AMDGPUSerializer::moduleToObject(llvm::Module &llvmModule) {
413 // Return LLVM IR if the compilation target is offload.
414 #define DEBUG_TYPE "serialize-to-llvm"
415 LLVM_DEBUG({
416 llvm::dbgs() << "LLVM IR for module: " << getOperation().getNameAttr()
417 << "\n"
418 << llvmModule << "\n";
420 #undef DEBUG_TYPE
421 if (targetOptions.getCompilationTarget() == gpu::CompilationTarget::Offload)
422 return SerializeGPUModuleBase::moduleToObject(llvmModule);
424 std::optional<llvm::TargetMachine *> targetMachine =
425 getOrCreateTargetMachine();
426 if (!targetMachine) {
427 getOperation().emitError() << "Target Machine unavailable for triple "
428 << triple << ", can't compile with LLVM\n";
429 return std::nullopt;
432 // Translate the Module to ISA.
433 std::optional<std::string> serializedISA =
434 translateToISA(llvmModule, **targetMachine);
435 if (!serializedISA) {
436 getOperation().emitError() << "Failed translating the module to ISA.";
437 return std::nullopt;
439 #define DEBUG_TYPE "serialize-to-isa"
440 LLVM_DEBUG({
441 llvm::dbgs() << "ISA for module: " << getOperation().getNameAttr() << "\n"
442 << *serializedISA << "\n";
444 #undef DEBUG_TYPE
445 // Return ISA assembly code if the compilation target is assembly.
446 if (targetOptions.getCompilationTarget() == gpu::CompilationTarget::Assembly)
447 return SmallVector<char, 0>(serializedISA->begin(), serializedISA->end());
449 // Compile to binary.
450 return compileToBinary(*serializedISA);
452 #endif // MLIR_ENABLE_ROCM_CONVERSIONS
454 std::optional<SmallVector<char, 0>> ROCDLTargetAttrImpl::serializeToObject(
455 Attribute attribute, Operation *module,
456 const gpu::TargetOptions &options) const {
457 assert(module && "The module must be non null.");
458 if (!module)
459 return std::nullopt;
460 if (!mlir::isa<gpu::GPUModuleOp>(module)) {
461 module->emitError("Module must be a GPU module.");
462 return std::nullopt;
464 #if MLIR_ENABLE_ROCM_CONVERSIONS
465 AMDGPUSerializer serializer(*module, cast<ROCDLTargetAttr>(attribute),
466 options);
467 serializer.init();
468 return serializer.run();
469 #else
470 module->emitError("The `AMDGPU` target was not built. Please enable it when "
471 "building LLVM.");
472 return std::nullopt;
473 #endif // MLIR_ENABLE_ROCM_CONVERSIONS
476 Attribute
477 ROCDLTargetAttrImpl::createObject(Attribute attribute,
478 const SmallVector<char, 0> &object,
479 const gpu::TargetOptions &options) const {
480 gpu::CompilationTarget format = options.getCompilationTarget();
481 Builder builder(attribute.getContext());
482 return builder.getAttr<gpu::ObjectAttr>(
483 attribute,
484 format > gpu::CompilationTarget::Binary ? gpu::CompilationTarget::Binary
485 : format,
486 builder.getStringAttr(StringRef(object.data(), object.size())), nullptr);