1 //===--- AMDGPUHSAMetadataStreamer.cpp --------------------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 /// AMDGPU HSA Metadata Streamer.
13 //===----------------------------------------------------------------------===//
15 #include "AMDGPUHSAMetadataStreamer.h"
17 #include "GCNSubtarget.h"
18 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
19 #include "SIMachineFunctionInfo.h"
20 #include "SIProgramInfo.h"
21 #include "llvm/IR/Module.h"
22 #include "llvm/MC/MCContext.h"
23 #include "llvm/MC/MCExpr.h"
26 static std::pair
<Type
*, Align
> getArgumentTypeAlign(const Argument
&Arg
,
27 const DataLayout
&DL
) {
28 Type
*Ty
= Arg
.getType();
30 if (Arg
.hasByRefAttr()) {
31 Ty
= Arg
.getParamByRefType();
32 ArgAlign
= Arg
.getParamAlign();
36 ArgAlign
= DL
.getABITypeAlign(Ty
);
38 return std::pair(Ty
, *ArgAlign
);
43 static cl::opt
<bool> DumpHSAMetadata(
44 "amdgpu-dump-hsa-metadata",
45 cl::desc("Dump AMDGPU HSA Metadata"));
46 static cl::opt
<bool> VerifyHSAMetadata(
47 "amdgpu-verify-hsa-metadata",
48 cl::desc("Verify AMDGPU HSA Metadata"));
50 namespace AMDGPU::HSAMD
{
52 //===----------------------------------------------------------------------===//
53 // HSAMetadataStreamerV4
54 //===----------------------------------------------------------------------===//
56 void MetadataStreamerMsgPackV4::dump(StringRef HSAMetadataString
) const {
57 errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString
<< '\n';
60 void MetadataStreamerMsgPackV4::verify(StringRef HSAMetadataString
) const {
61 errs() << "AMDGPU HSA Metadata Parser Test: ";
63 msgpack::Document FromHSAMetadataString
;
65 if (!FromHSAMetadataString
.fromYAML(HSAMetadataString
)) {
70 std::string ToHSAMetadataString
;
71 raw_string_ostream
StrOS(ToHSAMetadataString
);
72 FromHSAMetadataString
.toYAML(StrOS
);
74 errs() << (HSAMetadataString
== StrOS
.str() ? "PASS" : "FAIL") << '\n';
75 if (HSAMetadataString
!= ToHSAMetadataString
) {
76 errs() << "Original input: " << HSAMetadataString
<< '\n'
77 << "Produced output: " << StrOS
.str() << '\n';
81 std::optional
<StringRef
>
82 MetadataStreamerMsgPackV4::getAccessQualifier(StringRef AccQual
) const {
83 return StringSwitch
<std::optional
<StringRef
>>(AccQual
)
84 .Case("read_only", StringRef("read_only"))
85 .Case("write_only", StringRef("write_only"))
86 .Case("read_write", StringRef("read_write"))
87 .Default(std::nullopt
);
90 std::optional
<StringRef
> MetadataStreamerMsgPackV4::getAddressSpaceQualifier(
91 unsigned AddressSpace
) const {
92 switch (AddressSpace
) {
93 case AMDGPUAS::PRIVATE_ADDRESS
:
94 return StringRef("private");
95 case AMDGPUAS::GLOBAL_ADDRESS
:
96 return StringRef("global");
97 case AMDGPUAS::CONSTANT_ADDRESS
:
98 return StringRef("constant");
99 case AMDGPUAS::LOCAL_ADDRESS
:
100 return StringRef("local");
101 case AMDGPUAS::FLAT_ADDRESS
:
102 return StringRef("generic");
103 case AMDGPUAS::REGION_ADDRESS
:
104 return StringRef("region");
111 MetadataStreamerMsgPackV4::getValueKind(Type
*Ty
, StringRef TypeQual
,
112 StringRef BaseTypeName
) const {
113 if (TypeQual
.contains("pipe"))
116 return StringSwitch
<StringRef
>(BaseTypeName
)
117 .Case("image1d_t", "image")
118 .Case("image1d_array_t", "image")
119 .Case("image1d_buffer_t", "image")
120 .Case("image2d_t", "image")
121 .Case("image2d_array_t", "image")
122 .Case("image2d_array_depth_t", "image")
123 .Case("image2d_array_msaa_t", "image")
124 .Case("image2d_array_msaa_depth_t", "image")
125 .Case("image2d_depth_t", "image")
126 .Case("image2d_msaa_t", "image")
127 .Case("image2d_msaa_depth_t", "image")
128 .Case("image3d_t", "image")
129 .Case("sampler_t", "sampler")
130 .Case("queue_t", "queue")
131 .Default(isa
<PointerType
>(Ty
)
132 ? (Ty
->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS
133 ? "dynamic_shared_pointer"
138 std::string
MetadataStreamerMsgPackV4::getTypeName(Type
*Ty
,
140 switch (Ty
->getTypeID()) {
141 case Type::IntegerTyID
: {
143 return (Twine('u') + getTypeName(Ty
, true)).str();
145 auto BitWidth
= Ty
->getIntegerBitWidth();
156 return (Twine('i') + Twine(BitWidth
)).str();
161 case Type::FloatTyID
:
163 case Type::DoubleTyID
:
165 case Type::FixedVectorTyID
: {
166 auto *VecTy
= cast
<FixedVectorType
>(Ty
);
167 auto *ElTy
= VecTy
->getElementType();
168 auto NumElements
= VecTy
->getNumElements();
169 return (Twine(getTypeName(ElTy
, Signed
)) + Twine(NumElements
)).str();
176 msgpack::ArrayDocNode
177 MetadataStreamerMsgPackV4::getWorkGroupDimensions(MDNode
*Node
) const {
178 auto Dims
= HSAMetadataDoc
->getArrayNode();
179 if (Node
->getNumOperands() != 3)
182 for (auto &Op
: Node
->operands())
183 Dims
.push_back(Dims
.getDocument()->getNode(
184 uint64_t(mdconst::extract
<ConstantInt
>(Op
)->getZExtValue())));
188 void MetadataStreamerMsgPackV4::emitVersion() {
189 auto Version
= HSAMetadataDoc
->getArrayNode();
190 Version
.push_back(Version
.getDocument()->getNode(VersionMajorV4
));
191 Version
.push_back(Version
.getDocument()->getNode(VersionMinorV4
));
192 getRootMetadata("amdhsa.version") = Version
;
195 void MetadataStreamerMsgPackV4::emitTargetID(
196 const IsaInfo::AMDGPUTargetID
&TargetID
) {
197 getRootMetadata("amdhsa.target") =
198 HSAMetadataDoc
->getNode(TargetID
.toString(), /*Copy=*/true);
201 void MetadataStreamerMsgPackV4::emitPrintf(const Module
&Mod
) {
202 auto *Node
= Mod
.getNamedMetadata("llvm.printf.fmts");
206 auto Printf
= HSAMetadataDoc
->getArrayNode();
207 for (auto *Op
: Node
->operands())
208 if (Op
->getNumOperands())
209 Printf
.push_back(Printf
.getDocument()->getNode(
210 cast
<MDString
>(Op
->getOperand(0))->getString(), /*Copy=*/true));
211 getRootMetadata("amdhsa.printf") = Printf
;
214 void MetadataStreamerMsgPackV4::emitKernelLanguage(const Function
&Func
,
215 msgpack::MapDocNode Kern
) {
216 // TODO: What about other languages?
217 auto *Node
= Func
.getParent()->getNamedMetadata("opencl.ocl.version");
218 if (!Node
|| !Node
->getNumOperands())
220 auto *Op0
= Node
->getOperand(0);
221 if (Op0
->getNumOperands() <= 1)
224 Kern
[".language"] = Kern
.getDocument()->getNode("OpenCL C");
225 auto LanguageVersion
= Kern
.getDocument()->getArrayNode();
226 LanguageVersion
.push_back(Kern
.getDocument()->getNode(
227 mdconst::extract
<ConstantInt
>(Op0
->getOperand(0))->getZExtValue()));
228 LanguageVersion
.push_back(Kern
.getDocument()->getNode(
229 mdconst::extract
<ConstantInt
>(Op0
->getOperand(1))->getZExtValue()));
230 Kern
[".language_version"] = LanguageVersion
;
233 void MetadataStreamerMsgPackV4::emitKernelAttrs(const Function
&Func
,
234 msgpack::MapDocNode Kern
) {
236 if (auto *Node
= Func
.getMetadata("reqd_work_group_size"))
237 Kern
[".reqd_workgroup_size"] = getWorkGroupDimensions(Node
);
238 if (auto *Node
= Func
.getMetadata("work_group_size_hint"))
239 Kern
[".workgroup_size_hint"] = getWorkGroupDimensions(Node
);
240 if (auto *Node
= Func
.getMetadata("vec_type_hint")) {
241 Kern
[".vec_type_hint"] = Kern
.getDocument()->getNode(
243 cast
<ValueAsMetadata
>(Node
->getOperand(0))->getType(),
244 mdconst::extract
<ConstantInt
>(Node
->getOperand(1))->getZExtValue()),
247 if (Func
.hasFnAttribute("runtime-handle")) {
248 Kern
[".device_enqueue_symbol"] = Kern
.getDocument()->getNode(
249 Func
.getFnAttribute("runtime-handle").getValueAsString().str(),
252 if (Func
.hasFnAttribute("device-init"))
253 Kern
[".kind"] = Kern
.getDocument()->getNode("init");
254 else if (Func
.hasFnAttribute("device-fini"))
255 Kern
[".kind"] = Kern
.getDocument()->getNode("fini");
258 void MetadataStreamerMsgPackV4::emitKernelArgs(const MachineFunction
&MF
,
259 msgpack::MapDocNode Kern
) {
260 auto &Func
= MF
.getFunction();
262 auto Args
= HSAMetadataDoc
->getArrayNode();
263 for (auto &Arg
: Func
.args()) {
264 if (Arg
.hasAttribute("amdgpu-hidden-argument"))
267 emitKernelArg(Arg
, Offset
, Args
);
270 emitHiddenKernelArgs(MF
, Offset
, Args
);
272 Kern
[".args"] = Args
;
275 void MetadataStreamerMsgPackV4::emitKernelArg(const Argument
&Arg
,
277 msgpack::ArrayDocNode Args
) {
278 const auto *Func
= Arg
.getParent();
279 auto ArgNo
= Arg
.getArgNo();
283 Node
= Func
->getMetadata("kernel_arg_name");
284 if (Node
&& ArgNo
< Node
->getNumOperands())
285 Name
= cast
<MDString
>(Node
->getOperand(ArgNo
))->getString();
286 else if (Arg
.hasName())
287 Name
= Arg
.getName();
290 Node
= Func
->getMetadata("kernel_arg_type");
291 if (Node
&& ArgNo
< Node
->getNumOperands())
292 TypeName
= cast
<MDString
>(Node
->getOperand(ArgNo
))->getString();
294 StringRef BaseTypeName
;
295 Node
= Func
->getMetadata("kernel_arg_base_type");
296 if (Node
&& ArgNo
< Node
->getNumOperands())
297 BaseTypeName
= cast
<MDString
>(Node
->getOperand(ArgNo
))->getString();
299 StringRef ActAccQual
;
300 // Do we really need NoAlias check here?
301 if (Arg
.getType()->isPointerTy() && Arg
.hasNoAliasAttr()) {
302 if (Arg
.onlyReadsMemory())
303 ActAccQual
= "read_only";
304 else if (Arg
.hasAttribute(Attribute::WriteOnly
))
305 ActAccQual
= "write_only";
309 Node
= Func
->getMetadata("kernel_arg_access_qual");
310 if (Node
&& ArgNo
< Node
->getNumOperands())
311 AccQual
= cast
<MDString
>(Node
->getOperand(ArgNo
))->getString();
314 Node
= Func
->getMetadata("kernel_arg_type_qual");
315 if (Node
&& ArgNo
< Node
->getNumOperands())
316 TypeQual
= cast
<MDString
>(Node
->getOperand(ArgNo
))->getString();
318 const DataLayout
&DL
= Func
->getDataLayout();
320 MaybeAlign PointeeAlign
;
321 Type
*Ty
= Arg
.hasByRefAttr() ? Arg
.getParamByRefType() : Arg
.getType();
323 // FIXME: Need to distinguish in memory alignment from pointer alignment.
324 if (auto *PtrTy
= dyn_cast
<PointerType
>(Ty
)) {
325 if (PtrTy
->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS
)
326 PointeeAlign
= Arg
.getParamAlign().valueOrOne();
329 // There's no distinction between byval aggregates and raw aggregates.
332 std::tie(ArgTy
, ArgAlign
) = getArgumentTypeAlign(Arg
, DL
);
334 emitKernelArg(DL
, ArgTy
, ArgAlign
,
335 getValueKind(ArgTy
, TypeQual
, BaseTypeName
), Offset
, Args
,
336 PointeeAlign
, Name
, TypeName
, BaseTypeName
, ActAccQual
,
340 void MetadataStreamerMsgPackV4::emitKernelArg(
341 const DataLayout
&DL
, Type
*Ty
, Align Alignment
, StringRef ValueKind
,
342 unsigned &Offset
, msgpack::ArrayDocNode Args
, MaybeAlign PointeeAlign
,
343 StringRef Name
, StringRef TypeName
, StringRef BaseTypeName
,
344 StringRef ActAccQual
, StringRef AccQual
, StringRef TypeQual
) {
345 auto Arg
= Args
.getDocument()->getMapNode();
348 Arg
[".name"] = Arg
.getDocument()->getNode(Name
, /*Copy=*/true);
349 if (!TypeName
.empty())
350 Arg
[".type_name"] = Arg
.getDocument()->getNode(TypeName
, /*Copy=*/true);
351 auto Size
= DL
.getTypeAllocSize(Ty
);
352 Arg
[".size"] = Arg
.getDocument()->getNode(Size
);
353 Offset
= alignTo(Offset
, Alignment
);
354 Arg
[".offset"] = Arg
.getDocument()->getNode(Offset
);
356 Arg
[".value_kind"] = Arg
.getDocument()->getNode(ValueKind
, /*Copy=*/true);
358 Arg
[".pointee_align"] = Arg
.getDocument()->getNode(PointeeAlign
->value());
360 if (auto *PtrTy
= dyn_cast
<PointerType
>(Ty
))
361 if (auto Qualifier
= getAddressSpaceQualifier(PtrTy
->getAddressSpace()))
362 // Limiting address space to emit only for a certain ValueKind.
363 if (ValueKind
== "global_buffer" || ValueKind
== "dynamic_shared_pointer")
364 Arg
[".address_space"] = Arg
.getDocument()->getNode(*Qualifier
,
367 if (auto AQ
= getAccessQualifier(AccQual
))
368 Arg
[".access"] = Arg
.getDocument()->getNode(*AQ
, /*Copy=*/true);
370 if (auto AAQ
= getAccessQualifier(ActAccQual
))
371 Arg
[".actual_access"] = Arg
.getDocument()->getNode(*AAQ
, /*Copy=*/true);
373 SmallVector
<StringRef
, 1> SplitTypeQuals
;
374 TypeQual
.split(SplitTypeQuals
, " ", -1, false);
375 for (StringRef Key
: SplitTypeQuals
) {
377 Arg
[".is_const"] = Arg
.getDocument()->getNode(true);
378 else if (Key
== "restrict")
379 Arg
[".is_restrict"] = Arg
.getDocument()->getNode(true);
380 else if (Key
== "volatile")
381 Arg
[".is_volatile"] = Arg
.getDocument()->getNode(true);
382 else if (Key
== "pipe")
383 Arg
[".is_pipe"] = Arg
.getDocument()->getNode(true);
389 void MetadataStreamerMsgPackV4::emitHiddenKernelArgs(
390 const MachineFunction
&MF
, unsigned &Offset
, msgpack::ArrayDocNode Args
) {
391 auto &Func
= MF
.getFunction();
392 const GCNSubtarget
&ST
= MF
.getSubtarget
<GCNSubtarget
>();
394 unsigned HiddenArgNumBytes
= ST
.getImplicitArgNumBytes(Func
);
395 if (!HiddenArgNumBytes
)
398 const Module
*M
= Func
.getParent();
399 auto &DL
= M
->getDataLayout();
400 auto *Int64Ty
= Type::getInt64Ty(Func
.getContext());
402 Offset
= alignTo(Offset
, ST
.getAlignmentForImplicitArgPtr());
404 if (HiddenArgNumBytes
>= 8)
405 emitKernelArg(DL
, Int64Ty
, Align(8), "hidden_global_offset_x", Offset
,
407 if (HiddenArgNumBytes
>= 16)
408 emitKernelArg(DL
, Int64Ty
, Align(8), "hidden_global_offset_y", Offset
,
410 if (HiddenArgNumBytes
>= 24)
411 emitKernelArg(DL
, Int64Ty
, Align(8), "hidden_global_offset_z", Offset
,
415 PointerType::get(Func
.getContext(), AMDGPUAS::GLOBAL_ADDRESS
);
417 if (HiddenArgNumBytes
>= 32) {
418 // We forbid the use of features requiring hostcall when compiling OpenCL
419 // before code object V5, which makes the mutual exclusion between the
420 // "printf buffer" and "hostcall buffer" here sound.
421 if (M
->getNamedMetadata("llvm.printf.fmts"))
422 emitKernelArg(DL
, Int8PtrTy
, Align(8), "hidden_printf_buffer", Offset
,
424 else if (!Func
.hasFnAttribute("amdgpu-no-hostcall-ptr"))
425 emitKernelArg(DL
, Int8PtrTy
, Align(8), "hidden_hostcall_buffer", Offset
,
428 emitKernelArg(DL
, Int8PtrTy
, Align(8), "hidden_none", Offset
, Args
);
431 // Emit "default queue" and "completion action" arguments if enqueue kernel is
432 // used, otherwise emit dummy "none" arguments.
433 if (HiddenArgNumBytes
>= 40) {
434 if (!Func
.hasFnAttribute("amdgpu-no-default-queue")) {
435 emitKernelArg(DL
, Int8PtrTy
, Align(8), "hidden_default_queue", Offset
,
438 emitKernelArg(DL
, Int8PtrTy
, Align(8), "hidden_none", Offset
, Args
);
442 if (HiddenArgNumBytes
>= 48) {
443 if (!Func
.hasFnAttribute("amdgpu-no-completion-action")) {
444 emitKernelArg(DL
, Int8PtrTy
, Align(8), "hidden_completion_action", Offset
,
447 emitKernelArg(DL
, Int8PtrTy
, Align(8), "hidden_none", Offset
, Args
);
451 // Emit the pointer argument for multi-grid object.
452 if (HiddenArgNumBytes
>= 56) {
453 if (!Func
.hasFnAttribute("amdgpu-no-multigrid-sync-arg")) {
454 emitKernelArg(DL
, Int8PtrTy
, Align(8), "hidden_multigrid_sync_arg", Offset
,
457 emitKernelArg(DL
, Int8PtrTy
, Align(8), "hidden_none", Offset
, Args
);
463 MetadataStreamerMsgPackV4::getHSAKernelProps(const MachineFunction
&MF
,
464 const SIProgramInfo
&ProgramInfo
,
465 unsigned CodeObjectVersion
) const {
466 const GCNSubtarget
&STM
= MF
.getSubtarget
<GCNSubtarget
>();
467 const SIMachineFunctionInfo
&MFI
= *MF
.getInfo
<SIMachineFunctionInfo
>();
468 const Function
&F
= MF
.getFunction();
470 auto Kern
= HSAMetadataDoc
->getMapNode();
472 Align MaxKernArgAlign
;
473 Kern
[".kernarg_segment_size"] = Kern
.getDocument()->getNode(
474 STM
.getKernArgSegmentSize(F
, MaxKernArgAlign
));
475 Kern
[".group_segment_fixed_size"] =
476 Kern
.getDocument()->getNode(ProgramInfo
.LDSSize
);
477 DelayedExprs
->assignDocNode(Kern
[".private_segment_fixed_size"],
478 msgpack::Type::UInt
, ProgramInfo
.ScratchSize
);
479 if (CodeObjectVersion
>= AMDGPU::AMDHSA_COV5
) {
480 DelayedExprs
->assignDocNode(Kern
[".uses_dynamic_stack"],
481 msgpack::Type::Boolean
,
482 ProgramInfo
.DynamicCallStack
);
485 if (CodeObjectVersion
>= AMDGPU::AMDHSA_COV5
&& STM
.supportsWGP())
486 Kern
[".workgroup_processor_mode"] =
487 Kern
.getDocument()->getNode(ProgramInfo
.WgpMode
);
489 // FIXME: The metadata treats the minimum as 16?
490 Kern
[".kernarg_segment_align"] =
491 Kern
.getDocument()->getNode(std::max(Align(4), MaxKernArgAlign
).value());
492 Kern
[".wavefront_size"] =
493 Kern
.getDocument()->getNode(STM
.getWavefrontSize());
494 DelayedExprs
->assignDocNode(Kern
[".sgpr_count"], msgpack::Type::UInt
,
495 ProgramInfo
.NumSGPR
);
496 DelayedExprs
->assignDocNode(Kern
[".vgpr_count"], msgpack::Type::UInt
,
497 ProgramInfo
.NumVGPR
);
499 // Only add AGPR count to metadata for supported devices
500 if (STM
.hasMAIInsts()) {
501 DelayedExprs
->assignDocNode(Kern
[".agpr_count"], msgpack::Type::UInt
,
502 ProgramInfo
.NumAccVGPR
);
505 Kern
[".max_flat_workgroup_size"] =
506 Kern
.getDocument()->getNode(MFI
.getMaxFlatWorkGroupSize());
508 uint32_t NumWGY
= MFI
.getMaxNumWorkGroupsY();
509 uint32_t NumWGZ
= MFI
.getMaxNumWorkGroupsZ();
510 uint32_t NumWGX
= MFI
.getMaxNumWorkGroupsX();
512 // TODO: Should consider 0 invalid and reject in IR verifier.
513 if (NumWGX
!= std::numeric_limits
<uint32_t>::max() && NumWGX
!= 0)
514 Kern
[".max_num_workgroups_x"] = Kern
.getDocument()->getNode(NumWGX
);
516 if (NumWGY
!= std::numeric_limits
<uint32_t>::max() && NumWGY
!= 0)
517 Kern
[".max_num_workgroups_y"] = Kern
.getDocument()->getNode(NumWGY
);
519 if (NumWGZ
!= std::numeric_limits
<uint32_t>::max() && NumWGZ
!= 0)
520 Kern
[".max_num_workgroups_z"] = Kern
.getDocument()->getNode(NumWGZ
);
522 Kern
[".sgpr_spill_count"] =
523 Kern
.getDocument()->getNode(MFI
.getNumSpilledSGPRs());
524 Kern
[".vgpr_spill_count"] =
525 Kern
.getDocument()->getNode(MFI
.getNumSpilledVGPRs());
530 bool MetadataStreamerMsgPackV4::emitTo(AMDGPUTargetStreamer
&TargetStreamer
) {
531 DelayedExprs
->resolveDelayedExpressions();
532 return TargetStreamer
.EmitHSAMetadata(*HSAMetadataDoc
, true);
535 void MetadataStreamerMsgPackV4::begin(const Module
&Mod
,
536 const IsaInfo::AMDGPUTargetID
&TargetID
) {
538 emitTargetID(TargetID
);
540 getRootMetadata("amdhsa.kernels") = HSAMetadataDoc
->getArrayNode();
541 DelayedExprs
->clear();
544 void MetadataStreamerMsgPackV4::end() {
545 DelayedExprs
->resolveDelayedExpressions();
546 std::string HSAMetadataString
;
547 raw_string_ostream
StrOS(HSAMetadataString
);
548 HSAMetadataDoc
->toYAML(StrOS
);
552 if (VerifyHSAMetadata
)
556 void MetadataStreamerMsgPackV4::emitKernel(const MachineFunction
&MF
,
557 const SIProgramInfo
&ProgramInfo
) {
558 auto &Func
= MF
.getFunction();
559 if (Func
.getCallingConv() != CallingConv::AMDGPU_KERNEL
&&
560 Func
.getCallingConv() != CallingConv::SPIR_KERNEL
)
563 auto CodeObjectVersion
=
564 AMDGPU::getAMDHSACodeObjectVersion(*Func
.getParent());
565 auto Kern
= getHSAKernelProps(MF
, ProgramInfo
, CodeObjectVersion
);
568 getRootMetadata("amdhsa.kernels").getArray(/*Convert=*/true);
571 Kern
[".name"] = Kern
.getDocument()->getNode(Func
.getName());
572 Kern
[".symbol"] = Kern
.getDocument()->getNode(
573 (Twine(Func
.getName()) + Twine(".kd")).str(), /*Copy=*/true);
574 emitKernelLanguage(Func
, Kern
);
575 emitKernelAttrs(Func
, Kern
);
576 emitKernelArgs(MF
, Kern
);
579 Kernels
.push_back(Kern
);
582 //===----------------------------------------------------------------------===//
583 // HSAMetadataStreamerV5
584 //===----------------------------------------------------------------------===//
586 void MetadataStreamerMsgPackV5::emitVersion() {
587 auto Version
= HSAMetadataDoc
->getArrayNode();
588 Version
.push_back(Version
.getDocument()->getNode(VersionMajorV5
));
589 Version
.push_back(Version
.getDocument()->getNode(VersionMinorV5
));
590 getRootMetadata("amdhsa.version") = Version
;
593 void MetadataStreamerMsgPackV5::emitHiddenKernelArgs(
594 const MachineFunction
&MF
, unsigned &Offset
, msgpack::ArrayDocNode Args
) {
595 auto &Func
= MF
.getFunction();
596 const GCNSubtarget
&ST
= MF
.getSubtarget
<GCNSubtarget
>();
598 // No implicit kernel argument is used.
599 if (ST
.getImplicitArgNumBytes(Func
) == 0)
602 const Module
*M
= Func
.getParent();
603 auto &DL
= M
->getDataLayout();
604 const SIMachineFunctionInfo
&MFI
= *MF
.getInfo
<SIMachineFunctionInfo
>();
606 auto *Int64Ty
= Type::getInt64Ty(Func
.getContext());
607 auto *Int32Ty
= Type::getInt32Ty(Func
.getContext());
608 auto *Int16Ty
= Type::getInt16Ty(Func
.getContext());
610 Offset
= alignTo(Offset
, ST
.getAlignmentForImplicitArgPtr());
611 emitKernelArg(DL
, Int32Ty
, Align(4), "hidden_block_count_x", Offset
, Args
);
612 emitKernelArg(DL
, Int32Ty
, Align(4), "hidden_block_count_y", Offset
, Args
);
613 emitKernelArg(DL
, Int32Ty
, Align(4), "hidden_block_count_z", Offset
, Args
);
615 emitKernelArg(DL
, Int16Ty
, Align(2), "hidden_group_size_x", Offset
, Args
);
616 emitKernelArg(DL
, Int16Ty
, Align(2), "hidden_group_size_y", Offset
, Args
);
617 emitKernelArg(DL
, Int16Ty
, Align(2), "hidden_group_size_z", Offset
, Args
);
619 emitKernelArg(DL
, Int16Ty
, Align(2), "hidden_remainder_x", Offset
, Args
);
620 emitKernelArg(DL
, Int16Ty
, Align(2), "hidden_remainder_y", Offset
, Args
);
621 emitKernelArg(DL
, Int16Ty
, Align(2), "hidden_remainder_z", Offset
, Args
);
623 // Reserved for hidden_tool_correlation_id.
626 Offset
+= 8; // Reserved.
628 emitKernelArg(DL
, Int64Ty
, Align(8), "hidden_global_offset_x", Offset
, Args
);
629 emitKernelArg(DL
, Int64Ty
, Align(8), "hidden_global_offset_y", Offset
, Args
);
630 emitKernelArg(DL
, Int64Ty
, Align(8), "hidden_global_offset_z", Offset
, Args
);
632 emitKernelArg(DL
, Int16Ty
, Align(2), "hidden_grid_dims", Offset
, Args
);
634 Offset
+= 6; // Reserved.
636 PointerType::get(Func
.getContext(), AMDGPUAS::GLOBAL_ADDRESS
);
638 if (M
->getNamedMetadata("llvm.printf.fmts")) {
639 emitKernelArg(DL
, Int8PtrTy
, Align(8), "hidden_printf_buffer", Offset
,
642 Offset
+= 8; // Skipped.
645 if (!Func
.hasFnAttribute("amdgpu-no-hostcall-ptr")) {
646 emitKernelArg(DL
, Int8PtrTy
, Align(8), "hidden_hostcall_buffer", Offset
,
649 Offset
+= 8; // Skipped.
652 if (!Func
.hasFnAttribute("amdgpu-no-multigrid-sync-arg")) {
653 emitKernelArg(DL
, Int8PtrTy
, Align(8), "hidden_multigrid_sync_arg", Offset
,
656 Offset
+= 8; // Skipped.
659 if (!Func
.hasFnAttribute("amdgpu-no-heap-ptr"))
660 emitKernelArg(DL
, Int8PtrTy
, Align(8), "hidden_heap_v1", Offset
, Args
);
662 Offset
+= 8; // Skipped.
664 if (!Func
.hasFnAttribute("amdgpu-no-default-queue")) {
665 emitKernelArg(DL
, Int8PtrTy
, Align(8), "hidden_default_queue", Offset
,
668 Offset
+= 8; // Skipped.
671 if (!Func
.hasFnAttribute("amdgpu-no-completion-action")) {
672 emitKernelArg(DL
, Int8PtrTy
, Align(8), "hidden_completion_action", Offset
,
675 Offset
+= 8; // Skipped.
678 // Emit argument for hidden dynamic lds size
679 if (MFI
.isDynamicLDSUsed()) {
680 emitKernelArg(DL
, Int32Ty
, Align(4), "hidden_dynamic_lds_size", Offset
,
683 Offset
+= 4; // skipped
686 Offset
+= 68; // Reserved.
688 // hidden_private_base and hidden_shared_base are only when the subtarget has
690 if (!ST
.hasApertureRegs()) {
691 emitKernelArg(DL
, Int32Ty
, Align(4), "hidden_private_base", Offset
, Args
);
692 emitKernelArg(DL
, Int32Ty
, Align(4), "hidden_shared_base", Offset
, Args
);
694 Offset
+= 8; // Skipped.
697 if (MFI
.getUserSGPRInfo().hasQueuePtr())
698 emitKernelArg(DL
, Int8PtrTy
, Align(8), "hidden_queue_ptr", Offset
, Args
);
701 void MetadataStreamerMsgPackV5::emitKernelAttrs(const Function
&Func
,
702 msgpack::MapDocNode Kern
) {
703 MetadataStreamerMsgPackV4::emitKernelAttrs(Func
, Kern
);
705 if (Func
.getFnAttribute("uniform-work-group-size").getValueAsBool())
706 Kern
[".uniform_work_group_size"] = Kern
.getDocument()->getNode(1);
709 //===----------------------------------------------------------------------===//
710 // HSAMetadataStreamerV6
711 //===----------------------------------------------------------------------===//
713 void MetadataStreamerMsgPackV6::emitVersion() {
714 auto Version
= HSAMetadataDoc
->getArrayNode();
715 Version
.push_back(Version
.getDocument()->getNode(VersionMajorV6
));
716 Version
.push_back(Version
.getDocument()->getNode(VersionMinorV6
));
717 getRootMetadata("amdhsa.version") = Version
;
720 } // end namespace AMDGPU::HSAMD
721 } // end namespace llvm