[Clang/AMDGPU] Zero sized arrays not allowed in HIP device code. (#113470)
[llvm-project.git] / llvm / lib / Target / AMDGPU / AMDGPUHSAMetadataStreamer.cpp
blobee8a700f988dc55473191f16e40635b1ff378262
1 //===--- AMDGPUHSAMetadataStreamer.cpp --------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// AMDGPU HSA Metadata Streamer.
11 ///
13 //===----------------------------------------------------------------------===//
15 #include "AMDGPUHSAMetadataStreamer.h"
16 #include "AMDGPU.h"
17 #include "GCNSubtarget.h"
18 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
19 #include "SIMachineFunctionInfo.h"
20 #include "SIProgramInfo.h"
21 #include "llvm/IR/Module.h"
22 #include "llvm/MC/MCContext.h"
23 #include "llvm/MC/MCExpr.h"
24 using namespace llvm;
26 static std::pair<Type *, Align> getArgumentTypeAlign(const Argument &Arg,
27 const DataLayout &DL) {
28 Type *Ty = Arg.getType();
29 MaybeAlign ArgAlign;
30 if (Arg.hasByRefAttr()) {
31 Ty = Arg.getParamByRefType();
32 ArgAlign = Arg.getParamAlign();
35 if (!ArgAlign)
36 ArgAlign = DL.getABITypeAlign(Ty);
38 return std::pair(Ty, *ArgAlign);
41 namespace llvm {
43 static cl::opt<bool> DumpHSAMetadata(
44 "amdgpu-dump-hsa-metadata",
45 cl::desc("Dump AMDGPU HSA Metadata"));
46 static cl::opt<bool> VerifyHSAMetadata(
47 "amdgpu-verify-hsa-metadata",
48 cl::desc("Verify AMDGPU HSA Metadata"));
50 namespace AMDGPU::HSAMD {
52 //===----------------------------------------------------------------------===//
53 // HSAMetadataStreamerV4
54 //===----------------------------------------------------------------------===//
56 void MetadataStreamerMsgPackV4::dump(StringRef HSAMetadataString) const {
57 errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n';
60 void MetadataStreamerMsgPackV4::verify(StringRef HSAMetadataString) const {
61 errs() << "AMDGPU HSA Metadata Parser Test: ";
63 msgpack::Document FromHSAMetadataString;
65 if (!FromHSAMetadataString.fromYAML(HSAMetadataString)) {
66 errs() << "FAIL\n";
67 return;
70 std::string ToHSAMetadataString;
71 raw_string_ostream StrOS(ToHSAMetadataString);
72 FromHSAMetadataString.toYAML(StrOS);
74 errs() << (HSAMetadataString == StrOS.str() ? "PASS" : "FAIL") << '\n';
75 if (HSAMetadataString != ToHSAMetadataString) {
76 errs() << "Original input: " << HSAMetadataString << '\n'
77 << "Produced output: " << StrOS.str() << '\n';
81 std::optional<StringRef>
82 MetadataStreamerMsgPackV4::getAccessQualifier(StringRef AccQual) const {
83 return StringSwitch<std::optional<StringRef>>(AccQual)
84 .Case("read_only", StringRef("read_only"))
85 .Case("write_only", StringRef("write_only"))
86 .Case("read_write", StringRef("read_write"))
87 .Default(std::nullopt);
90 std::optional<StringRef> MetadataStreamerMsgPackV4::getAddressSpaceQualifier(
91 unsigned AddressSpace) const {
92 switch (AddressSpace) {
93 case AMDGPUAS::PRIVATE_ADDRESS:
94 return StringRef("private");
95 case AMDGPUAS::GLOBAL_ADDRESS:
96 return StringRef("global");
97 case AMDGPUAS::CONSTANT_ADDRESS:
98 return StringRef("constant");
99 case AMDGPUAS::LOCAL_ADDRESS:
100 return StringRef("local");
101 case AMDGPUAS::FLAT_ADDRESS:
102 return StringRef("generic");
103 case AMDGPUAS::REGION_ADDRESS:
104 return StringRef("region");
105 default:
106 return std::nullopt;
110 StringRef
111 MetadataStreamerMsgPackV4::getValueKind(Type *Ty, StringRef TypeQual,
112 StringRef BaseTypeName) const {
113 if (TypeQual.contains("pipe"))
114 return "pipe";
116 return StringSwitch<StringRef>(BaseTypeName)
117 .Case("image1d_t", "image")
118 .Case("image1d_array_t", "image")
119 .Case("image1d_buffer_t", "image")
120 .Case("image2d_t", "image")
121 .Case("image2d_array_t", "image")
122 .Case("image2d_array_depth_t", "image")
123 .Case("image2d_array_msaa_t", "image")
124 .Case("image2d_array_msaa_depth_t", "image")
125 .Case("image2d_depth_t", "image")
126 .Case("image2d_msaa_t", "image")
127 .Case("image2d_msaa_depth_t", "image")
128 .Case("image3d_t", "image")
129 .Case("sampler_t", "sampler")
130 .Case("queue_t", "queue")
131 .Default(isa<PointerType>(Ty)
132 ? (Ty->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS
133 ? "dynamic_shared_pointer"
134 : "global_buffer")
135 : "by_value");
138 std::string MetadataStreamerMsgPackV4::getTypeName(Type *Ty,
139 bool Signed) const {
140 switch (Ty->getTypeID()) {
141 case Type::IntegerTyID: {
142 if (!Signed)
143 return (Twine('u') + getTypeName(Ty, true)).str();
145 auto BitWidth = Ty->getIntegerBitWidth();
146 switch (BitWidth) {
147 case 8:
148 return "char";
149 case 16:
150 return "short";
151 case 32:
152 return "int";
153 case 64:
154 return "long";
155 default:
156 return (Twine('i') + Twine(BitWidth)).str();
159 case Type::HalfTyID:
160 return "half";
161 case Type::FloatTyID:
162 return "float";
163 case Type::DoubleTyID:
164 return "double";
165 case Type::FixedVectorTyID: {
166 auto *VecTy = cast<FixedVectorType>(Ty);
167 auto *ElTy = VecTy->getElementType();
168 auto NumElements = VecTy->getNumElements();
169 return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str();
171 default:
172 return "unknown";
176 msgpack::ArrayDocNode
177 MetadataStreamerMsgPackV4::getWorkGroupDimensions(MDNode *Node) const {
178 auto Dims = HSAMetadataDoc->getArrayNode();
179 if (Node->getNumOperands() != 3)
180 return Dims;
182 for (auto &Op : Node->operands())
183 Dims.push_back(Dims.getDocument()->getNode(
184 uint64_t(mdconst::extract<ConstantInt>(Op)->getZExtValue())));
185 return Dims;
188 void MetadataStreamerMsgPackV4::emitVersion() {
189 auto Version = HSAMetadataDoc->getArrayNode();
190 Version.push_back(Version.getDocument()->getNode(VersionMajorV4));
191 Version.push_back(Version.getDocument()->getNode(VersionMinorV4));
192 getRootMetadata("amdhsa.version") = Version;
195 void MetadataStreamerMsgPackV4::emitTargetID(
196 const IsaInfo::AMDGPUTargetID &TargetID) {
197 getRootMetadata("amdhsa.target") =
198 HSAMetadataDoc->getNode(TargetID.toString(), /*Copy=*/true);
201 void MetadataStreamerMsgPackV4::emitPrintf(const Module &Mod) {
202 auto *Node = Mod.getNamedMetadata("llvm.printf.fmts");
203 if (!Node)
204 return;
206 auto Printf = HSAMetadataDoc->getArrayNode();
207 for (auto *Op : Node->operands())
208 if (Op->getNumOperands())
209 Printf.push_back(Printf.getDocument()->getNode(
210 cast<MDString>(Op->getOperand(0))->getString(), /*Copy=*/true));
211 getRootMetadata("amdhsa.printf") = Printf;
214 void MetadataStreamerMsgPackV4::emitKernelLanguage(const Function &Func,
215 msgpack::MapDocNode Kern) {
216 // TODO: What about other languages?
217 auto *Node = Func.getParent()->getNamedMetadata("opencl.ocl.version");
218 if (!Node || !Node->getNumOperands())
219 return;
220 auto *Op0 = Node->getOperand(0);
221 if (Op0->getNumOperands() <= 1)
222 return;
224 Kern[".language"] = Kern.getDocument()->getNode("OpenCL C");
225 auto LanguageVersion = Kern.getDocument()->getArrayNode();
226 LanguageVersion.push_back(Kern.getDocument()->getNode(
227 mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue()));
228 LanguageVersion.push_back(Kern.getDocument()->getNode(
229 mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue()));
230 Kern[".language_version"] = LanguageVersion;
233 void MetadataStreamerMsgPackV4::emitKernelAttrs(const Function &Func,
234 msgpack::MapDocNode Kern) {
236 if (auto *Node = Func.getMetadata("reqd_work_group_size"))
237 Kern[".reqd_workgroup_size"] = getWorkGroupDimensions(Node);
238 if (auto *Node = Func.getMetadata("work_group_size_hint"))
239 Kern[".workgroup_size_hint"] = getWorkGroupDimensions(Node);
240 if (auto *Node = Func.getMetadata("vec_type_hint")) {
241 Kern[".vec_type_hint"] = Kern.getDocument()->getNode(
242 getTypeName(
243 cast<ValueAsMetadata>(Node->getOperand(0))->getType(),
244 mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue()),
245 /*Copy=*/true);
247 if (Func.hasFnAttribute("runtime-handle")) {
248 Kern[".device_enqueue_symbol"] = Kern.getDocument()->getNode(
249 Func.getFnAttribute("runtime-handle").getValueAsString().str(),
250 /*Copy=*/true);
252 if (Func.hasFnAttribute("device-init"))
253 Kern[".kind"] = Kern.getDocument()->getNode("init");
254 else if (Func.hasFnAttribute("device-fini"))
255 Kern[".kind"] = Kern.getDocument()->getNode("fini");
258 void MetadataStreamerMsgPackV4::emitKernelArgs(const MachineFunction &MF,
259 msgpack::MapDocNode Kern) {
260 auto &Func = MF.getFunction();
261 unsigned Offset = 0;
262 auto Args = HSAMetadataDoc->getArrayNode();
263 for (auto &Arg : Func.args()) {
264 if (Arg.hasAttribute("amdgpu-hidden-argument"))
265 continue;
267 emitKernelArg(Arg, Offset, Args);
270 emitHiddenKernelArgs(MF, Offset, Args);
272 Kern[".args"] = Args;
275 void MetadataStreamerMsgPackV4::emitKernelArg(const Argument &Arg,
276 unsigned &Offset,
277 msgpack::ArrayDocNode Args) {
278 const auto *Func = Arg.getParent();
279 auto ArgNo = Arg.getArgNo();
280 const MDNode *Node;
282 StringRef Name;
283 Node = Func->getMetadata("kernel_arg_name");
284 if (Node && ArgNo < Node->getNumOperands())
285 Name = cast<MDString>(Node->getOperand(ArgNo))->getString();
286 else if (Arg.hasName())
287 Name = Arg.getName();
289 StringRef TypeName;
290 Node = Func->getMetadata("kernel_arg_type");
291 if (Node && ArgNo < Node->getNumOperands())
292 TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
294 StringRef BaseTypeName;
295 Node = Func->getMetadata("kernel_arg_base_type");
296 if (Node && ArgNo < Node->getNumOperands())
297 BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
299 StringRef ActAccQual;
300 // Do we really need NoAlias check here?
301 if (Arg.getType()->isPointerTy() && Arg.hasNoAliasAttr()) {
302 if (Arg.onlyReadsMemory())
303 ActAccQual = "read_only";
304 else if (Arg.hasAttribute(Attribute::WriteOnly))
305 ActAccQual = "write_only";
308 StringRef AccQual;
309 Node = Func->getMetadata("kernel_arg_access_qual");
310 if (Node && ArgNo < Node->getNumOperands())
311 AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
313 StringRef TypeQual;
314 Node = Func->getMetadata("kernel_arg_type_qual");
315 if (Node && ArgNo < Node->getNumOperands())
316 TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
318 const DataLayout &DL = Func->getDataLayout();
320 MaybeAlign PointeeAlign;
321 Type *Ty = Arg.hasByRefAttr() ? Arg.getParamByRefType() : Arg.getType();
323 // FIXME: Need to distinguish in memory alignment from pointer alignment.
324 if (auto *PtrTy = dyn_cast<PointerType>(Ty)) {
325 if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS)
326 PointeeAlign = Arg.getParamAlign().valueOrOne();
329 // There's no distinction between byval aggregates and raw aggregates.
330 Type *ArgTy;
331 Align ArgAlign;
332 std::tie(ArgTy, ArgAlign) = getArgumentTypeAlign(Arg, DL);
334 emitKernelArg(DL, ArgTy, ArgAlign,
335 getValueKind(ArgTy, TypeQual, BaseTypeName), Offset, Args,
336 PointeeAlign, Name, TypeName, BaseTypeName, ActAccQual,
337 AccQual, TypeQual);
340 void MetadataStreamerMsgPackV4::emitKernelArg(
341 const DataLayout &DL, Type *Ty, Align Alignment, StringRef ValueKind,
342 unsigned &Offset, msgpack::ArrayDocNode Args, MaybeAlign PointeeAlign,
343 StringRef Name, StringRef TypeName, StringRef BaseTypeName,
344 StringRef ActAccQual, StringRef AccQual, StringRef TypeQual) {
345 auto Arg = Args.getDocument()->getMapNode();
347 if (!Name.empty())
348 Arg[".name"] = Arg.getDocument()->getNode(Name, /*Copy=*/true);
349 if (!TypeName.empty())
350 Arg[".type_name"] = Arg.getDocument()->getNode(TypeName, /*Copy=*/true);
351 auto Size = DL.getTypeAllocSize(Ty);
352 Arg[".size"] = Arg.getDocument()->getNode(Size);
353 Offset = alignTo(Offset, Alignment);
354 Arg[".offset"] = Arg.getDocument()->getNode(Offset);
355 Offset += Size;
356 Arg[".value_kind"] = Arg.getDocument()->getNode(ValueKind, /*Copy=*/true);
357 if (PointeeAlign)
358 Arg[".pointee_align"] = Arg.getDocument()->getNode(PointeeAlign->value());
360 if (auto *PtrTy = dyn_cast<PointerType>(Ty))
361 if (auto Qualifier = getAddressSpaceQualifier(PtrTy->getAddressSpace()))
362 // Limiting address space to emit only for a certain ValueKind.
363 if (ValueKind == "global_buffer" || ValueKind == "dynamic_shared_pointer")
364 Arg[".address_space"] = Arg.getDocument()->getNode(*Qualifier,
365 /*Copy=*/true);
367 if (auto AQ = getAccessQualifier(AccQual))
368 Arg[".access"] = Arg.getDocument()->getNode(*AQ, /*Copy=*/true);
370 if (auto AAQ = getAccessQualifier(ActAccQual))
371 Arg[".actual_access"] = Arg.getDocument()->getNode(*AAQ, /*Copy=*/true);
373 SmallVector<StringRef, 1> SplitTypeQuals;
374 TypeQual.split(SplitTypeQuals, " ", -1, false);
375 for (StringRef Key : SplitTypeQuals) {
376 if (Key == "const")
377 Arg[".is_const"] = Arg.getDocument()->getNode(true);
378 else if (Key == "restrict")
379 Arg[".is_restrict"] = Arg.getDocument()->getNode(true);
380 else if (Key == "volatile")
381 Arg[".is_volatile"] = Arg.getDocument()->getNode(true);
382 else if (Key == "pipe")
383 Arg[".is_pipe"] = Arg.getDocument()->getNode(true);
386 Args.push_back(Arg);
389 void MetadataStreamerMsgPackV4::emitHiddenKernelArgs(
390 const MachineFunction &MF, unsigned &Offset, msgpack::ArrayDocNode Args) {
391 auto &Func = MF.getFunction();
392 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
394 unsigned HiddenArgNumBytes = ST.getImplicitArgNumBytes(Func);
395 if (!HiddenArgNumBytes)
396 return;
398 const Module *M = Func.getParent();
399 auto &DL = M->getDataLayout();
400 auto *Int64Ty = Type::getInt64Ty(Func.getContext());
402 Offset = alignTo(Offset, ST.getAlignmentForImplicitArgPtr());
404 if (HiddenArgNumBytes >= 8)
405 emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_x", Offset,
406 Args);
407 if (HiddenArgNumBytes >= 16)
408 emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_y", Offset,
409 Args);
410 if (HiddenArgNumBytes >= 24)
411 emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_z", Offset,
412 Args);
414 auto *Int8PtrTy =
415 PointerType::get(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS);
417 if (HiddenArgNumBytes >= 32) {
418 // We forbid the use of features requiring hostcall when compiling OpenCL
419 // before code object V5, which makes the mutual exclusion between the
420 // "printf buffer" and "hostcall buffer" here sound.
421 if (M->getNamedMetadata("llvm.printf.fmts"))
422 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_printf_buffer", Offset,
423 Args);
424 else if (!Func.hasFnAttribute("amdgpu-no-hostcall-ptr"))
425 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_hostcall_buffer", Offset,
426 Args);
427 else
428 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args);
431 // Emit "default queue" and "completion action" arguments if enqueue kernel is
432 // used, otherwise emit dummy "none" arguments.
433 if (HiddenArgNumBytes >= 40) {
434 if (!Func.hasFnAttribute("amdgpu-no-default-queue")) {
435 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_default_queue", Offset,
436 Args);
437 } else {
438 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args);
442 if (HiddenArgNumBytes >= 48) {
443 if (!Func.hasFnAttribute("amdgpu-no-completion-action")) {
444 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_completion_action", Offset,
445 Args);
446 } else {
447 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args);
451 // Emit the pointer argument for multi-grid object.
452 if (HiddenArgNumBytes >= 56) {
453 if (!Func.hasFnAttribute("amdgpu-no-multigrid-sync-arg")) {
454 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_multigrid_sync_arg", Offset,
455 Args);
456 } else {
457 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args);
462 msgpack::MapDocNode
463 MetadataStreamerMsgPackV4::getHSAKernelProps(const MachineFunction &MF,
464 const SIProgramInfo &ProgramInfo,
465 unsigned CodeObjectVersion) const {
466 const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
467 const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
468 const Function &F = MF.getFunction();
470 auto Kern = HSAMetadataDoc->getMapNode();
472 Align MaxKernArgAlign;
473 Kern[".kernarg_segment_size"] = Kern.getDocument()->getNode(
474 STM.getKernArgSegmentSize(F, MaxKernArgAlign));
475 Kern[".group_segment_fixed_size"] =
476 Kern.getDocument()->getNode(ProgramInfo.LDSSize);
477 DelayedExprs->assignDocNode(Kern[".private_segment_fixed_size"],
478 msgpack::Type::UInt, ProgramInfo.ScratchSize);
479 if (CodeObjectVersion >= AMDGPU::AMDHSA_COV5) {
480 DelayedExprs->assignDocNode(Kern[".uses_dynamic_stack"],
481 msgpack::Type::Boolean,
482 ProgramInfo.DynamicCallStack);
485 if (CodeObjectVersion >= AMDGPU::AMDHSA_COV5 && STM.supportsWGP())
486 Kern[".workgroup_processor_mode"] =
487 Kern.getDocument()->getNode(ProgramInfo.WgpMode);
489 // FIXME: The metadata treats the minimum as 16?
490 Kern[".kernarg_segment_align"] =
491 Kern.getDocument()->getNode(std::max(Align(4), MaxKernArgAlign).value());
492 Kern[".wavefront_size"] =
493 Kern.getDocument()->getNode(STM.getWavefrontSize());
494 DelayedExprs->assignDocNode(Kern[".sgpr_count"], msgpack::Type::UInt,
495 ProgramInfo.NumSGPR);
496 DelayedExprs->assignDocNode(Kern[".vgpr_count"], msgpack::Type::UInt,
497 ProgramInfo.NumVGPR);
499 // Only add AGPR count to metadata for supported devices
500 if (STM.hasMAIInsts()) {
501 DelayedExprs->assignDocNode(Kern[".agpr_count"], msgpack::Type::UInt,
502 ProgramInfo.NumAccVGPR);
505 Kern[".max_flat_workgroup_size"] =
506 Kern.getDocument()->getNode(MFI.getMaxFlatWorkGroupSize());
508 uint32_t NumWGY = MFI.getMaxNumWorkGroupsY();
509 uint32_t NumWGZ = MFI.getMaxNumWorkGroupsZ();
510 uint32_t NumWGX = MFI.getMaxNumWorkGroupsX();
512 // TODO: Should consider 0 invalid and reject in IR verifier.
513 if (NumWGX != std::numeric_limits<uint32_t>::max() && NumWGX != 0)
514 Kern[".max_num_workgroups_x"] = Kern.getDocument()->getNode(NumWGX);
516 if (NumWGY != std::numeric_limits<uint32_t>::max() && NumWGY != 0)
517 Kern[".max_num_workgroups_y"] = Kern.getDocument()->getNode(NumWGY);
519 if (NumWGZ != std::numeric_limits<uint32_t>::max() && NumWGZ != 0)
520 Kern[".max_num_workgroups_z"] = Kern.getDocument()->getNode(NumWGZ);
522 Kern[".sgpr_spill_count"] =
523 Kern.getDocument()->getNode(MFI.getNumSpilledSGPRs());
524 Kern[".vgpr_spill_count"] =
525 Kern.getDocument()->getNode(MFI.getNumSpilledVGPRs());
527 return Kern;
530 bool MetadataStreamerMsgPackV4::emitTo(AMDGPUTargetStreamer &TargetStreamer) {
531 DelayedExprs->resolveDelayedExpressions();
532 return TargetStreamer.EmitHSAMetadata(*HSAMetadataDoc, true);
535 void MetadataStreamerMsgPackV4::begin(const Module &Mod,
536 const IsaInfo::AMDGPUTargetID &TargetID) {
537 emitVersion();
538 emitTargetID(TargetID);
539 emitPrintf(Mod);
540 getRootMetadata("amdhsa.kernels") = HSAMetadataDoc->getArrayNode();
541 DelayedExprs->clear();
544 void MetadataStreamerMsgPackV4::end() {
545 DelayedExprs->resolveDelayedExpressions();
546 std::string HSAMetadataString;
547 raw_string_ostream StrOS(HSAMetadataString);
548 HSAMetadataDoc->toYAML(StrOS);
550 if (DumpHSAMetadata)
551 dump(StrOS.str());
552 if (VerifyHSAMetadata)
553 verify(StrOS.str());
556 void MetadataStreamerMsgPackV4::emitKernel(const MachineFunction &MF,
557 const SIProgramInfo &ProgramInfo) {
558 auto &Func = MF.getFunction();
559 if (Func.getCallingConv() != CallingConv::AMDGPU_KERNEL &&
560 Func.getCallingConv() != CallingConv::SPIR_KERNEL)
561 return;
563 auto CodeObjectVersion =
564 AMDGPU::getAMDHSACodeObjectVersion(*Func.getParent());
565 auto Kern = getHSAKernelProps(MF, ProgramInfo, CodeObjectVersion);
567 auto Kernels =
568 getRootMetadata("amdhsa.kernels").getArray(/*Convert=*/true);
571 Kern[".name"] = Kern.getDocument()->getNode(Func.getName());
572 Kern[".symbol"] = Kern.getDocument()->getNode(
573 (Twine(Func.getName()) + Twine(".kd")).str(), /*Copy=*/true);
574 emitKernelLanguage(Func, Kern);
575 emitKernelAttrs(Func, Kern);
576 emitKernelArgs(MF, Kern);
579 Kernels.push_back(Kern);
582 //===----------------------------------------------------------------------===//
583 // HSAMetadataStreamerV5
584 //===----------------------------------------------------------------------===//
586 void MetadataStreamerMsgPackV5::emitVersion() {
587 auto Version = HSAMetadataDoc->getArrayNode();
588 Version.push_back(Version.getDocument()->getNode(VersionMajorV5));
589 Version.push_back(Version.getDocument()->getNode(VersionMinorV5));
590 getRootMetadata("amdhsa.version") = Version;
593 void MetadataStreamerMsgPackV5::emitHiddenKernelArgs(
594 const MachineFunction &MF, unsigned &Offset, msgpack::ArrayDocNode Args) {
595 auto &Func = MF.getFunction();
596 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
598 // No implicit kernel argument is used.
599 if (ST.getImplicitArgNumBytes(Func) == 0)
600 return;
602 const Module *M = Func.getParent();
603 auto &DL = M->getDataLayout();
604 const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
606 auto *Int64Ty = Type::getInt64Ty(Func.getContext());
607 auto *Int32Ty = Type::getInt32Ty(Func.getContext());
608 auto *Int16Ty = Type::getInt16Ty(Func.getContext());
610 Offset = alignTo(Offset, ST.getAlignmentForImplicitArgPtr());
611 emitKernelArg(DL, Int32Ty, Align(4), "hidden_block_count_x", Offset, Args);
612 emitKernelArg(DL, Int32Ty, Align(4), "hidden_block_count_y", Offset, Args);
613 emitKernelArg(DL, Int32Ty, Align(4), "hidden_block_count_z", Offset, Args);
615 emitKernelArg(DL, Int16Ty, Align(2), "hidden_group_size_x", Offset, Args);
616 emitKernelArg(DL, Int16Ty, Align(2), "hidden_group_size_y", Offset, Args);
617 emitKernelArg(DL, Int16Ty, Align(2), "hidden_group_size_z", Offset, Args);
619 emitKernelArg(DL, Int16Ty, Align(2), "hidden_remainder_x", Offset, Args);
620 emitKernelArg(DL, Int16Ty, Align(2), "hidden_remainder_y", Offset, Args);
621 emitKernelArg(DL, Int16Ty, Align(2), "hidden_remainder_z", Offset, Args);
623 // Reserved for hidden_tool_correlation_id.
624 Offset += 8;
626 Offset += 8; // Reserved.
628 emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_x", Offset, Args);
629 emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_y", Offset, Args);
630 emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_z", Offset, Args);
632 emitKernelArg(DL, Int16Ty, Align(2), "hidden_grid_dims", Offset, Args);
634 Offset += 6; // Reserved.
635 auto *Int8PtrTy =
636 PointerType::get(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS);
638 if (M->getNamedMetadata("llvm.printf.fmts")) {
639 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_printf_buffer", Offset,
640 Args);
641 } else {
642 Offset += 8; // Skipped.
645 if (!Func.hasFnAttribute("amdgpu-no-hostcall-ptr")) {
646 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_hostcall_buffer", Offset,
647 Args);
648 } else {
649 Offset += 8; // Skipped.
652 if (!Func.hasFnAttribute("amdgpu-no-multigrid-sync-arg")) {
653 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_multigrid_sync_arg", Offset,
654 Args);
655 } else {
656 Offset += 8; // Skipped.
659 if (!Func.hasFnAttribute("amdgpu-no-heap-ptr"))
660 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_heap_v1", Offset, Args);
661 else
662 Offset += 8; // Skipped.
664 if (!Func.hasFnAttribute("amdgpu-no-default-queue")) {
665 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_default_queue", Offset,
666 Args);
667 } else {
668 Offset += 8; // Skipped.
671 if (!Func.hasFnAttribute("amdgpu-no-completion-action")) {
672 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_completion_action", Offset,
673 Args);
674 } else {
675 Offset += 8; // Skipped.
678 // Emit argument for hidden dynamic lds size
679 if (MFI.isDynamicLDSUsed()) {
680 emitKernelArg(DL, Int32Ty, Align(4), "hidden_dynamic_lds_size", Offset,
681 Args);
682 } else {
683 Offset += 4; // skipped
686 Offset += 68; // Reserved.
688 // hidden_private_base and hidden_shared_base are only when the subtarget has
689 // ApertureRegs.
690 if (!ST.hasApertureRegs()) {
691 emitKernelArg(DL, Int32Ty, Align(4), "hidden_private_base", Offset, Args);
692 emitKernelArg(DL, Int32Ty, Align(4), "hidden_shared_base", Offset, Args);
693 } else {
694 Offset += 8; // Skipped.
697 if (MFI.getUserSGPRInfo().hasQueuePtr())
698 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_queue_ptr", Offset, Args);
701 void MetadataStreamerMsgPackV5::emitKernelAttrs(const Function &Func,
702 msgpack::MapDocNode Kern) {
703 MetadataStreamerMsgPackV4::emitKernelAttrs(Func, Kern);
705 if (Func.getFnAttribute("uniform-work-group-size").getValueAsBool())
706 Kern[".uniform_work_group_size"] = Kern.getDocument()->getNode(1);
709 //===----------------------------------------------------------------------===//
710 // HSAMetadataStreamerV6
711 //===----------------------------------------------------------------------===//
713 void MetadataStreamerMsgPackV6::emitVersion() {
714 auto Version = HSAMetadataDoc->getArrayNode();
715 Version.push_back(Version.getDocument()->getNode(VersionMajorV6));
716 Version.push_back(Version.getDocument()->getNode(VersionMinorV6));
717 getRootMetadata("amdhsa.version") = Version;
720 } // end namespace AMDGPU::HSAMD
721 } // end namespace llvm