1 //===--- AMDGPUHSAMetadataStreamer.cpp --------------------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 /// AMDGPU HSA Metadata Streamer.
13 //===----------------------------------------------------------------------===//
15 #include "AMDGPUHSAMetadataStreamer.h"
17 #include "AMDGPUSubtarget.h"
18 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
19 #include "SIMachineFunctionInfo.h"
20 #include "SIProgramInfo.h"
21 #include "Utils/AMDGPUBaseInfo.h"
22 #include "llvm/ADT/StringSwitch.h"
23 #include "llvm/IR/Constants.h"
24 #include "llvm/IR/Module.h"
25 #include "llvm/Support/raw_ostream.h"
29 static cl::opt
<bool> DumpHSAMetadata(
30 "amdgpu-dump-hsa-metadata",
31 cl::desc("Dump AMDGPU HSA Metadata"));
32 static cl::opt
<bool> VerifyHSAMetadata(
33 "amdgpu-verify-hsa-metadata",
34 cl::desc("Verify AMDGPU HSA Metadata"));
39 //===----------------------------------------------------------------------===//
40 // HSAMetadataStreamerV2
41 //===----------------------------------------------------------------------===//
42 void MetadataStreamerV2::dump(StringRef HSAMetadataString
) const {
43 errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString
<< '\n';
46 void MetadataStreamerV2::verify(StringRef HSAMetadataString
) const {
47 errs() << "AMDGPU HSA Metadata Parser Test: ";
49 HSAMD::Metadata FromHSAMetadataString
;
50 if (fromString(HSAMetadataString
, FromHSAMetadataString
)) {
55 std::string ToHSAMetadataString
;
56 if (toString(FromHSAMetadataString
, ToHSAMetadataString
)) {
61 errs() << (HSAMetadataString
== ToHSAMetadataString
? "PASS" : "FAIL")
63 if (HSAMetadataString
!= ToHSAMetadataString
) {
64 errs() << "Original input: " << HSAMetadataString
<< '\n'
65 << "Produced output: " << ToHSAMetadataString
<< '\n';
70 MetadataStreamerV2::getAccessQualifier(StringRef AccQual
) const {
72 return AccessQualifier::Unknown
;
74 return StringSwitch
<AccessQualifier
>(AccQual
)
75 .Case("read_only", AccessQualifier::ReadOnly
)
76 .Case("write_only", AccessQualifier::WriteOnly
)
77 .Case("read_write", AccessQualifier::ReadWrite
)
78 .Default(AccessQualifier::Default
);
82 MetadataStreamerV2::getAddressSpaceQualifier(
83 unsigned AddressSpace
) const {
84 switch (AddressSpace
) {
85 case AMDGPUAS::PRIVATE_ADDRESS
:
86 return AddressSpaceQualifier::Private
;
87 case AMDGPUAS::GLOBAL_ADDRESS
:
88 return AddressSpaceQualifier::Global
;
89 case AMDGPUAS::CONSTANT_ADDRESS
:
90 return AddressSpaceQualifier::Constant
;
91 case AMDGPUAS::LOCAL_ADDRESS
:
92 return AddressSpaceQualifier::Local
;
93 case AMDGPUAS::FLAT_ADDRESS
:
94 return AddressSpaceQualifier::Generic
;
95 case AMDGPUAS::REGION_ADDRESS
:
96 return AddressSpaceQualifier::Region
;
98 return AddressSpaceQualifier::Unknown
;
102 ValueKind
MetadataStreamerV2::getValueKind(Type
*Ty
, StringRef TypeQual
,
103 StringRef BaseTypeName
) const {
104 if (TypeQual
.find("pipe") != StringRef::npos
)
105 return ValueKind::Pipe
;
107 return StringSwitch
<ValueKind
>(BaseTypeName
)
108 .Case("image1d_t", ValueKind::Image
)
109 .Case("image1d_array_t", ValueKind::Image
)
110 .Case("image1d_buffer_t", ValueKind::Image
)
111 .Case("image2d_t", ValueKind::Image
)
112 .Case("image2d_array_t", ValueKind::Image
)
113 .Case("image2d_array_depth_t", ValueKind::Image
)
114 .Case("image2d_array_msaa_t", ValueKind::Image
)
115 .Case("image2d_array_msaa_depth_t", ValueKind::Image
)
116 .Case("image2d_depth_t", ValueKind::Image
)
117 .Case("image2d_msaa_t", ValueKind::Image
)
118 .Case("image2d_msaa_depth_t", ValueKind::Image
)
119 .Case("image3d_t", ValueKind::Image
)
120 .Case("sampler_t", ValueKind::Sampler
)
121 .Case("queue_t", ValueKind::Queue
)
122 .Default(isa
<PointerType
>(Ty
) ?
123 (Ty
->getPointerAddressSpace() ==
124 AMDGPUAS::LOCAL_ADDRESS
?
125 ValueKind::DynamicSharedPointer
:
126 ValueKind::GlobalBuffer
) :
130 ValueType
MetadataStreamerV2::getValueType(Type
*Ty
, StringRef TypeName
) const {
131 switch (Ty
->getTypeID()) {
132 case Type::IntegerTyID
: {
133 auto Signed
= !TypeName
.startswith("u");
134 switch (Ty
->getIntegerBitWidth()) {
136 return Signed
? ValueType::I8
: ValueType::U8
;
138 return Signed
? ValueType::I16
: ValueType::U16
;
140 return Signed
? ValueType::I32
: ValueType::U32
;
142 return Signed
? ValueType::I64
: ValueType::U64
;
144 return ValueType::Struct
;
148 return ValueType::F16
;
149 case Type::FloatTyID
:
150 return ValueType::F32
;
151 case Type::DoubleTyID
:
152 return ValueType::F64
;
153 case Type::PointerTyID
:
154 return getValueType(Ty
->getPointerElementType(), TypeName
);
155 case Type::VectorTyID
:
156 return getValueType(Ty
->getVectorElementType(), TypeName
);
158 return ValueType::Struct
;
162 std::string
MetadataStreamerV2::getTypeName(Type
*Ty
, bool Signed
) const {
163 switch (Ty
->getTypeID()) {
164 case Type::IntegerTyID
: {
166 return (Twine('u') + getTypeName(Ty
, true)).str();
168 auto BitWidth
= Ty
->getIntegerBitWidth();
179 return (Twine('i') + Twine(BitWidth
)).str();
184 case Type::FloatTyID
:
186 case Type::DoubleTyID
:
188 case Type::VectorTyID
: {
189 auto VecTy
= cast
<VectorType
>(Ty
);
190 auto ElTy
= VecTy
->getElementType();
191 auto NumElements
= VecTy
->getVectorNumElements();
192 return (Twine(getTypeName(ElTy
, Signed
)) + Twine(NumElements
)).str();
199 std::vector
<uint32_t>
200 MetadataStreamerV2::getWorkGroupDimensions(MDNode
*Node
) const {
201 std::vector
<uint32_t> Dims
;
202 if (Node
->getNumOperands() != 3)
205 for (auto &Op
: Node
->operands())
206 Dims
.push_back(mdconst::extract
<ConstantInt
>(Op
)->getZExtValue());
210 Kernel::CodeProps::Metadata
211 MetadataStreamerV2::getHSACodeProps(const MachineFunction
&MF
,
212 const SIProgramInfo
&ProgramInfo
) const {
213 const GCNSubtarget
&STM
= MF
.getSubtarget
<GCNSubtarget
>();
214 const SIMachineFunctionInfo
&MFI
= *MF
.getInfo
<SIMachineFunctionInfo
>();
215 HSAMD::Kernel::CodeProps::Metadata HSACodeProps
;
216 const Function
&F
= MF
.getFunction();
218 assert(F
.getCallingConv() == CallingConv::AMDGPU_KERNEL
||
219 F
.getCallingConv() == CallingConv::SPIR_KERNEL
);
221 unsigned MaxKernArgAlign
;
222 HSACodeProps
.mKernargSegmentSize
= STM
.getKernArgSegmentSize(F
,
224 HSACodeProps
.mGroupSegmentFixedSize
= ProgramInfo
.LDSSize
;
225 HSACodeProps
.mPrivateSegmentFixedSize
= ProgramInfo
.ScratchSize
;
226 HSACodeProps
.mKernargSegmentAlign
= std::max(MaxKernArgAlign
, 4u);
227 HSACodeProps
.mWavefrontSize
= STM
.getWavefrontSize();
228 HSACodeProps
.mNumSGPRs
= ProgramInfo
.NumSGPR
;
229 HSACodeProps
.mNumVGPRs
= ProgramInfo
.NumVGPR
;
230 HSACodeProps
.mMaxFlatWorkGroupSize
= MFI
.getMaxFlatWorkGroupSize();
231 HSACodeProps
.mIsDynamicCallStack
= ProgramInfo
.DynamicCallStack
;
232 HSACodeProps
.mIsXNACKEnabled
= STM
.isXNACKEnabled();
233 HSACodeProps
.mNumSpilledSGPRs
= MFI
.getNumSpilledSGPRs();
234 HSACodeProps
.mNumSpilledVGPRs
= MFI
.getNumSpilledVGPRs();
239 Kernel::DebugProps::Metadata
240 MetadataStreamerV2::getHSADebugProps(const MachineFunction
&MF
,
241 const SIProgramInfo
&ProgramInfo
) const {
242 return HSAMD::Kernel::DebugProps::Metadata();
245 void MetadataStreamerV2::emitVersion() {
246 auto &Version
= HSAMetadata
.mVersion
;
248 Version
.push_back(VersionMajor
);
249 Version
.push_back(VersionMinor
);
252 void MetadataStreamerV2::emitPrintf(const Module
&Mod
) {
253 auto &Printf
= HSAMetadata
.mPrintf
;
255 auto Node
= Mod
.getNamedMetadata("llvm.printf.fmts");
259 for (auto Op
: Node
->operands())
260 if (Op
->getNumOperands())
261 Printf
.push_back(cast
<MDString
>(Op
->getOperand(0))->getString());
264 void MetadataStreamerV2::emitKernelLanguage(const Function
&Func
) {
265 auto &Kernel
= HSAMetadata
.mKernels
.back();
267 // TODO: What about other languages?
268 auto Node
= Func
.getParent()->getNamedMetadata("opencl.ocl.version");
269 if (!Node
|| !Node
->getNumOperands())
271 auto Op0
= Node
->getOperand(0);
272 if (Op0
->getNumOperands() <= 1)
275 Kernel
.mLanguage
= "OpenCL C";
276 Kernel
.mLanguageVersion
.push_back(
277 mdconst::extract
<ConstantInt
>(Op0
->getOperand(0))->getZExtValue());
278 Kernel
.mLanguageVersion
.push_back(
279 mdconst::extract
<ConstantInt
>(Op0
->getOperand(1))->getZExtValue());
282 void MetadataStreamerV2::emitKernelAttrs(const Function
&Func
) {
283 auto &Attrs
= HSAMetadata
.mKernels
.back().mAttrs
;
285 if (auto Node
= Func
.getMetadata("reqd_work_group_size"))
286 Attrs
.mReqdWorkGroupSize
= getWorkGroupDimensions(Node
);
287 if (auto Node
= Func
.getMetadata("work_group_size_hint"))
288 Attrs
.mWorkGroupSizeHint
= getWorkGroupDimensions(Node
);
289 if (auto Node
= Func
.getMetadata("vec_type_hint")) {
290 Attrs
.mVecTypeHint
= getTypeName(
291 cast
<ValueAsMetadata
>(Node
->getOperand(0))->getType(),
292 mdconst::extract
<ConstantInt
>(Node
->getOperand(1))->getZExtValue());
294 if (Func
.hasFnAttribute("runtime-handle")) {
295 Attrs
.mRuntimeHandle
=
296 Func
.getFnAttribute("runtime-handle").getValueAsString().str();
300 void MetadataStreamerV2::emitKernelArgs(const Function
&Func
) {
301 for (auto &Arg
: Func
.args())
304 emitHiddenKernelArgs(Func
);
307 void MetadataStreamerV2::emitKernelArg(const Argument
&Arg
) {
308 auto Func
= Arg
.getParent();
309 auto ArgNo
= Arg
.getArgNo();
313 Node
= Func
->getMetadata("kernel_arg_name");
314 if (Node
&& ArgNo
< Node
->getNumOperands())
315 Name
= cast
<MDString
>(Node
->getOperand(ArgNo
))->getString();
316 else if (Arg
.hasName())
317 Name
= Arg
.getName();
320 Node
= Func
->getMetadata("kernel_arg_type");
321 if (Node
&& ArgNo
< Node
->getNumOperands())
322 TypeName
= cast
<MDString
>(Node
->getOperand(ArgNo
))->getString();
324 StringRef BaseTypeName
;
325 Node
= Func
->getMetadata("kernel_arg_base_type");
326 if (Node
&& ArgNo
< Node
->getNumOperands())
327 BaseTypeName
= cast
<MDString
>(Node
->getOperand(ArgNo
))->getString();
330 if (Arg
.getType()->isPointerTy() && Arg
.onlyReadsMemory() &&
331 Arg
.hasNoAliasAttr()) {
332 AccQual
= "read_only";
334 Node
= Func
->getMetadata("kernel_arg_access_qual");
335 if (Node
&& ArgNo
< Node
->getNumOperands())
336 AccQual
= cast
<MDString
>(Node
->getOperand(ArgNo
))->getString();
340 Node
= Func
->getMetadata("kernel_arg_type_qual");
341 if (Node
&& ArgNo
< Node
->getNumOperands())
342 TypeQual
= cast
<MDString
>(Node
->getOperand(ArgNo
))->getString();
344 Type
*Ty
= Arg
.getType();
345 const DataLayout
&DL
= Func
->getParent()->getDataLayout();
347 unsigned PointeeAlign
= 0;
348 if (auto PtrTy
= dyn_cast
<PointerType
>(Ty
)) {
349 if (PtrTy
->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS
) {
350 PointeeAlign
= Arg
.getParamAlignment();
351 if (PointeeAlign
== 0)
352 PointeeAlign
= DL
.getABITypeAlignment(PtrTy
->getElementType());
356 emitKernelArg(DL
, Ty
, getValueKind(Arg
.getType(), TypeQual
, BaseTypeName
),
357 PointeeAlign
, Name
, TypeName
, BaseTypeName
, AccQual
, TypeQual
);
360 void MetadataStreamerV2::emitKernelArg(const DataLayout
&DL
, Type
*Ty
,
362 unsigned PointeeAlign
, StringRef Name
,
364 StringRef BaseTypeName
,
365 StringRef AccQual
, StringRef TypeQual
) {
366 HSAMetadata
.mKernels
.back().mArgs
.push_back(Kernel::Arg::Metadata());
367 auto &Arg
= HSAMetadata
.mKernels
.back().mArgs
.back();
370 Arg
.mTypeName
= TypeName
;
371 Arg
.mSize
= DL
.getTypeAllocSize(Ty
);
372 Arg
.mAlign
= DL
.getABITypeAlignment(Ty
);
373 Arg
.mValueKind
= ValueKind
;
374 Arg
.mValueType
= getValueType(Ty
, BaseTypeName
);
375 Arg
.mPointeeAlign
= PointeeAlign
;
377 if (auto PtrTy
= dyn_cast
<PointerType
>(Ty
))
378 Arg
.mAddrSpaceQual
= getAddressSpaceQualifier(PtrTy
->getAddressSpace());
380 Arg
.mAccQual
= getAccessQualifier(AccQual
);
382 // TODO: Emit Arg.mActualAccQual.
384 SmallVector
<StringRef
, 1> SplitTypeQuals
;
385 TypeQual
.split(SplitTypeQuals
, " ", -1, false);
386 for (StringRef Key
: SplitTypeQuals
) {
387 auto P
= StringSwitch
<bool*>(Key
)
388 .Case("const", &Arg
.mIsConst
)
389 .Case("restrict", &Arg
.mIsRestrict
)
390 .Case("volatile", &Arg
.mIsVolatile
)
391 .Case("pipe", &Arg
.mIsPipe
)
398 void MetadataStreamerV2::emitHiddenKernelArgs(const Function
&Func
) {
399 int HiddenArgNumBytes
=
400 getIntegerAttribute(Func
, "amdgpu-implicitarg-num-bytes", 0);
402 if (!HiddenArgNumBytes
)
405 auto &DL
= Func
.getParent()->getDataLayout();
406 auto Int64Ty
= Type::getInt64Ty(Func
.getContext());
408 if (HiddenArgNumBytes
>= 8)
409 emitKernelArg(DL
, Int64Ty
, ValueKind::HiddenGlobalOffsetX
);
410 if (HiddenArgNumBytes
>= 16)
411 emitKernelArg(DL
, Int64Ty
, ValueKind::HiddenGlobalOffsetY
);
412 if (HiddenArgNumBytes
>= 24)
413 emitKernelArg(DL
, Int64Ty
, ValueKind::HiddenGlobalOffsetZ
);
415 auto Int8PtrTy
= Type::getInt8PtrTy(Func
.getContext(),
416 AMDGPUAS::GLOBAL_ADDRESS
);
418 // Emit "printf buffer" argument if printf is used, otherwise emit dummy
420 if (HiddenArgNumBytes
>= 32) {
421 if (Func
.getParent()->getNamedMetadata("llvm.printf.fmts"))
422 emitKernelArg(DL
, Int8PtrTy
, ValueKind::HiddenPrintfBuffer
);
424 emitKernelArg(DL
, Int8PtrTy
, ValueKind::HiddenNone
);
427 // Emit "default queue" and "completion action" arguments if enqueue kernel is
428 // used, otherwise emit dummy "none" arguments.
429 if (HiddenArgNumBytes
>= 48) {
430 if (Func
.hasFnAttribute("calls-enqueue-kernel")) {
431 emitKernelArg(DL
, Int8PtrTy
, ValueKind::HiddenDefaultQueue
);
432 emitKernelArg(DL
, Int8PtrTy
, ValueKind::HiddenCompletionAction
);
434 emitKernelArg(DL
, Int8PtrTy
, ValueKind::HiddenNone
);
435 emitKernelArg(DL
, Int8PtrTy
, ValueKind::HiddenNone
);
439 // Emit the pointer argument for multi-grid object.
440 if (HiddenArgNumBytes
>= 56)
441 emitKernelArg(DL
, Int8PtrTy
, ValueKind::HiddenMultiGridSyncArg
);
444 bool MetadataStreamerV2::emitTo(AMDGPUTargetStreamer
&TargetStreamer
) {
445 return TargetStreamer
.EmitHSAMetadata(getHSAMetadata());
448 void MetadataStreamerV2::begin(const Module
&Mod
) {
453 void MetadataStreamerV2::end() {
454 std::string HSAMetadataString
;
455 if (toString(HSAMetadata
, HSAMetadataString
))
459 dump(HSAMetadataString
);
460 if (VerifyHSAMetadata
)
461 verify(HSAMetadataString
);
464 void MetadataStreamerV2::emitKernel(const MachineFunction
&MF
,
465 const SIProgramInfo
&ProgramInfo
) {
466 auto &Func
= MF
.getFunction();
467 if (Func
.getCallingConv() != CallingConv::AMDGPU_KERNEL
)
470 auto CodeProps
= getHSACodeProps(MF
, ProgramInfo
);
471 auto DebugProps
= getHSADebugProps(MF
, ProgramInfo
);
473 HSAMetadata
.mKernels
.push_back(Kernel::Metadata());
474 auto &Kernel
= HSAMetadata
.mKernels
.back();
476 Kernel
.mName
= Func
.getName();
477 Kernel
.mSymbolName
= (Twine(Func
.getName()) + Twine("@kd")).str();
478 emitKernelLanguage(Func
);
479 emitKernelAttrs(Func
);
480 emitKernelArgs(Func
);
481 HSAMetadata
.mKernels
.back().mCodeProps
= CodeProps
;
482 HSAMetadata
.mKernels
.back().mDebugProps
= DebugProps
;
485 //===----------------------------------------------------------------------===//
486 // HSAMetadataStreamerV3
487 //===----------------------------------------------------------------------===//
489 void MetadataStreamerV3::dump(StringRef HSAMetadataString
) const {
490 errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString
<< '\n';
493 void MetadataStreamerV3::verify(StringRef HSAMetadataString
) const {
494 errs() << "AMDGPU HSA Metadata Parser Test: ";
496 msgpack::Document FromHSAMetadataString
;
498 if (!FromHSAMetadataString
.fromYAML(HSAMetadataString
)) {
503 std::string ToHSAMetadataString
;
504 raw_string_ostream
StrOS(ToHSAMetadataString
);
505 FromHSAMetadataString
.toYAML(StrOS
);
507 errs() << (HSAMetadataString
== StrOS
.str() ? "PASS" : "FAIL") << '\n';
508 if (HSAMetadataString
!= ToHSAMetadataString
) {
509 errs() << "Original input: " << HSAMetadataString
<< '\n'
510 << "Produced output: " << StrOS
.str() << '\n';
515 MetadataStreamerV3::getAccessQualifier(StringRef AccQual
) const {
516 return StringSwitch
<Optional
<StringRef
>>(AccQual
)
517 .Case("read_only", StringRef("read_only"))
518 .Case("write_only", StringRef("write_only"))
519 .Case("read_write", StringRef("read_write"))
524 MetadataStreamerV3::getAddressSpaceQualifier(unsigned AddressSpace
) const {
525 switch (AddressSpace
) {
526 case AMDGPUAS::PRIVATE_ADDRESS
:
527 return StringRef("private");
528 case AMDGPUAS::GLOBAL_ADDRESS
:
529 return StringRef("global");
530 case AMDGPUAS::CONSTANT_ADDRESS
:
531 return StringRef("constant");
532 case AMDGPUAS::LOCAL_ADDRESS
:
533 return StringRef("local");
534 case AMDGPUAS::FLAT_ADDRESS
:
535 return StringRef("generic");
536 case AMDGPUAS::REGION_ADDRESS
:
537 return StringRef("region");
543 StringRef
MetadataStreamerV3::getValueKind(Type
*Ty
, StringRef TypeQual
,
544 StringRef BaseTypeName
) const {
545 if (TypeQual
.find("pipe") != StringRef::npos
)
548 return StringSwitch
<StringRef
>(BaseTypeName
)
549 .Case("image1d_t", "image")
550 .Case("image1d_array_t", "image")
551 .Case("image1d_buffer_t", "image")
552 .Case("image2d_t", "image")
553 .Case("image2d_array_t", "image")
554 .Case("image2d_array_depth_t", "image")
555 .Case("image2d_array_msaa_t", "image")
556 .Case("image2d_array_msaa_depth_t", "image")
557 .Case("image2d_depth_t", "image")
558 .Case("image2d_msaa_t", "image")
559 .Case("image2d_msaa_depth_t", "image")
560 .Case("image3d_t", "image")
561 .Case("sampler_t", "sampler")
562 .Case("queue_t", "queue")
563 .Default(isa
<PointerType
>(Ty
)
564 ? (Ty
->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS
565 ? "dynamic_shared_pointer"
570 StringRef
MetadataStreamerV3::getValueType(Type
*Ty
, StringRef TypeName
) const {
571 switch (Ty
->getTypeID()) {
572 case Type::IntegerTyID
: {
573 auto Signed
= !TypeName
.startswith("u");
574 switch (Ty
->getIntegerBitWidth()) {
576 return Signed
? "i8" : "u8";
578 return Signed
? "i16" : "u16";
580 return Signed
? "i32" : "u32";
582 return Signed
? "i64" : "u64";
589 case Type::FloatTyID
:
591 case Type::DoubleTyID
:
593 case Type::PointerTyID
:
594 return getValueType(Ty
->getPointerElementType(), TypeName
);
595 case Type::VectorTyID
:
596 return getValueType(Ty
->getVectorElementType(), TypeName
);
602 std::string
MetadataStreamerV3::getTypeName(Type
*Ty
, bool Signed
) const {
603 switch (Ty
->getTypeID()) {
604 case Type::IntegerTyID
: {
606 return (Twine('u') + getTypeName(Ty
, true)).str();
608 auto BitWidth
= Ty
->getIntegerBitWidth();
619 return (Twine('i') + Twine(BitWidth
)).str();
624 case Type::FloatTyID
:
626 case Type::DoubleTyID
:
628 case Type::VectorTyID
: {
629 auto VecTy
= cast
<VectorType
>(Ty
);
630 auto ElTy
= VecTy
->getElementType();
631 auto NumElements
= VecTy
->getVectorNumElements();
632 return (Twine(getTypeName(ElTy
, Signed
)) + Twine(NumElements
)).str();
639 msgpack::ArrayDocNode
640 MetadataStreamerV3::getWorkGroupDimensions(MDNode
*Node
) const {
641 auto Dims
= HSAMetadataDoc
->getArrayNode();
642 if (Node
->getNumOperands() != 3)
645 for (auto &Op
: Node
->operands())
646 Dims
.push_back(Dims
.getDocument()->getNode(
647 uint64_t(mdconst::extract
<ConstantInt
>(Op
)->getZExtValue())));
651 void MetadataStreamerV3::emitVersion() {
652 auto Version
= HSAMetadataDoc
->getArrayNode();
653 Version
.push_back(Version
.getDocument()->getNode(VersionMajor
));
654 Version
.push_back(Version
.getDocument()->getNode(VersionMinor
));
655 getRootMetadata("amdhsa.version") = Version
;
658 void MetadataStreamerV3::emitPrintf(const Module
&Mod
) {
659 auto Node
= Mod
.getNamedMetadata("llvm.printf.fmts");
663 auto Printf
= HSAMetadataDoc
->getArrayNode();
664 for (auto Op
: Node
->operands())
665 if (Op
->getNumOperands())
666 Printf
.push_back(Printf
.getDocument()->getNode(
667 cast
<MDString
>(Op
->getOperand(0))->getString(), /*Copy=*/true));
668 getRootMetadata("amdhsa.printf") = Printf
;
671 void MetadataStreamerV3::emitKernelLanguage(const Function
&Func
,
672 msgpack::MapDocNode Kern
) {
673 // TODO: What about other languages?
674 auto Node
= Func
.getParent()->getNamedMetadata("opencl.ocl.version");
675 if (!Node
|| !Node
->getNumOperands())
677 auto Op0
= Node
->getOperand(0);
678 if (Op0
->getNumOperands() <= 1)
681 Kern
[".language"] = Kern
.getDocument()->getNode("OpenCL C");
682 auto LanguageVersion
= Kern
.getDocument()->getArrayNode();
683 LanguageVersion
.push_back(Kern
.getDocument()->getNode(
684 mdconst::extract
<ConstantInt
>(Op0
->getOperand(0))->getZExtValue()));
685 LanguageVersion
.push_back(Kern
.getDocument()->getNode(
686 mdconst::extract
<ConstantInt
>(Op0
->getOperand(1))->getZExtValue()));
687 Kern
[".language_version"] = LanguageVersion
;
690 void MetadataStreamerV3::emitKernelAttrs(const Function
&Func
,
691 msgpack::MapDocNode Kern
) {
693 if (auto Node
= Func
.getMetadata("reqd_work_group_size"))
694 Kern
[".reqd_workgroup_size"] = getWorkGroupDimensions(Node
);
695 if (auto Node
= Func
.getMetadata("work_group_size_hint"))
696 Kern
[".workgroup_size_hint"] = getWorkGroupDimensions(Node
);
697 if (auto Node
= Func
.getMetadata("vec_type_hint")) {
698 Kern
[".vec_type_hint"] = Kern
.getDocument()->getNode(
700 cast
<ValueAsMetadata
>(Node
->getOperand(0))->getType(),
701 mdconst::extract
<ConstantInt
>(Node
->getOperand(1))->getZExtValue()),
704 if (Func
.hasFnAttribute("runtime-handle")) {
705 Kern
[".device_enqueue_symbol"] = Kern
.getDocument()->getNode(
706 Func
.getFnAttribute("runtime-handle").getValueAsString().str(),
711 void MetadataStreamerV3::emitKernelArgs(const Function
&Func
,
712 msgpack::MapDocNode Kern
) {
714 auto Args
= HSAMetadataDoc
->getArrayNode();
715 for (auto &Arg
: Func
.args())
716 emitKernelArg(Arg
, Offset
, Args
);
718 emitHiddenKernelArgs(Func
, Offset
, Args
);
720 Kern
[".args"] = Args
;
723 void MetadataStreamerV3::emitKernelArg(const Argument
&Arg
, unsigned &Offset
,
724 msgpack::ArrayDocNode Args
) {
725 auto Func
= Arg
.getParent();
726 auto ArgNo
= Arg
.getArgNo();
730 Node
= Func
->getMetadata("kernel_arg_name");
731 if (Node
&& ArgNo
< Node
->getNumOperands())
732 Name
= cast
<MDString
>(Node
->getOperand(ArgNo
))->getString();
733 else if (Arg
.hasName())
734 Name
= Arg
.getName();
737 Node
= Func
->getMetadata("kernel_arg_type");
738 if (Node
&& ArgNo
< Node
->getNumOperands())
739 TypeName
= cast
<MDString
>(Node
->getOperand(ArgNo
))->getString();
741 StringRef BaseTypeName
;
742 Node
= Func
->getMetadata("kernel_arg_base_type");
743 if (Node
&& ArgNo
< Node
->getNumOperands())
744 BaseTypeName
= cast
<MDString
>(Node
->getOperand(ArgNo
))->getString();
747 if (Arg
.getType()->isPointerTy() && Arg
.onlyReadsMemory() &&
748 Arg
.hasNoAliasAttr()) {
749 AccQual
= "read_only";
751 Node
= Func
->getMetadata("kernel_arg_access_qual");
752 if (Node
&& ArgNo
< Node
->getNumOperands())
753 AccQual
= cast
<MDString
>(Node
->getOperand(ArgNo
))->getString();
757 Node
= Func
->getMetadata("kernel_arg_type_qual");
758 if (Node
&& ArgNo
< Node
->getNumOperands())
759 TypeQual
= cast
<MDString
>(Node
->getOperand(ArgNo
))->getString();
761 Type
*Ty
= Arg
.getType();
762 const DataLayout
&DL
= Func
->getParent()->getDataLayout();
764 unsigned PointeeAlign
= 0;
765 if (auto PtrTy
= dyn_cast
<PointerType
>(Ty
)) {
766 if (PtrTy
->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS
) {
767 PointeeAlign
= Arg
.getParamAlignment();
768 if (PointeeAlign
== 0)
769 PointeeAlign
= DL
.getABITypeAlignment(PtrTy
->getElementType());
773 emitKernelArg(Func
->getParent()->getDataLayout(), Arg
.getType(),
774 getValueKind(Arg
.getType(), TypeQual
, BaseTypeName
), Offset
,
775 Args
, PointeeAlign
, Name
, TypeName
, BaseTypeName
, AccQual
,
779 void MetadataStreamerV3::emitKernelArg(const DataLayout
&DL
, Type
*Ty
,
780 StringRef ValueKind
, unsigned &Offset
,
781 msgpack::ArrayDocNode Args
,
782 unsigned PointeeAlign
, StringRef Name
,
784 StringRef BaseTypeName
,
785 StringRef AccQual
, StringRef TypeQual
) {
786 auto Arg
= Args
.getDocument()->getMapNode();
789 Arg
[".name"] = Arg
.getDocument()->getNode(Name
, /*Copy=*/true);
790 if (!TypeName
.empty())
791 Arg
[".type_name"] = Arg
.getDocument()->getNode(TypeName
, /*Copy=*/true);
792 auto Size
= DL
.getTypeAllocSize(Ty
);
793 auto Align
= DL
.getABITypeAlignment(Ty
);
794 Arg
[".size"] = Arg
.getDocument()->getNode(Size
);
795 Offset
= alignTo(Offset
, Align
);
796 Arg
[".offset"] = Arg
.getDocument()->getNode(Offset
);
798 Arg
[".value_kind"] = Arg
.getDocument()->getNode(ValueKind
, /*Copy=*/true);
800 Arg
.getDocument()->getNode(getValueType(Ty
, BaseTypeName
), /*Copy=*/true);
802 Arg
[".pointee_align"] = Arg
.getDocument()->getNode(PointeeAlign
);
804 if (auto PtrTy
= dyn_cast
<PointerType
>(Ty
))
805 if (auto Qualifier
= getAddressSpaceQualifier(PtrTy
->getAddressSpace()))
806 Arg
[".address_space"] = Arg
.getDocument()->getNode(*Qualifier
, /*Copy=*/true);
808 if (auto AQ
= getAccessQualifier(AccQual
))
809 Arg
[".access"] = Arg
.getDocument()->getNode(*AQ
, /*Copy=*/true);
811 // TODO: Emit Arg[".actual_access"].
813 SmallVector
<StringRef
, 1> SplitTypeQuals
;
814 TypeQual
.split(SplitTypeQuals
, " ", -1, false);
815 for (StringRef Key
: SplitTypeQuals
) {
817 Arg
[".is_const"] = Arg
.getDocument()->getNode(true);
818 else if (Key
== "restrict")
819 Arg
[".is_restrict"] = Arg
.getDocument()->getNode(true);
820 else if (Key
== "volatile")
821 Arg
[".is_volatile"] = Arg
.getDocument()->getNode(true);
822 else if (Key
== "pipe")
823 Arg
[".is_pipe"] = Arg
.getDocument()->getNode(true);
829 void MetadataStreamerV3::emitHiddenKernelArgs(const Function
&Func
,
831 msgpack::ArrayDocNode Args
) {
832 int HiddenArgNumBytes
=
833 getIntegerAttribute(Func
, "amdgpu-implicitarg-num-bytes", 0);
835 if (!HiddenArgNumBytes
)
838 auto &DL
= Func
.getParent()->getDataLayout();
839 auto Int64Ty
= Type::getInt64Ty(Func
.getContext());
841 if (HiddenArgNumBytes
>= 8)
842 emitKernelArg(DL
, Int64Ty
, "hidden_global_offset_x", Offset
, Args
);
843 if (HiddenArgNumBytes
>= 16)
844 emitKernelArg(DL
, Int64Ty
, "hidden_global_offset_y", Offset
, Args
);
845 if (HiddenArgNumBytes
>= 24)
846 emitKernelArg(DL
, Int64Ty
, "hidden_global_offset_z", Offset
, Args
);
849 Type::getInt8PtrTy(Func
.getContext(), AMDGPUAS::GLOBAL_ADDRESS
);
851 // Emit "printf buffer" argument if printf is used, otherwise emit dummy
853 if (HiddenArgNumBytes
>= 32) {
854 if (Func
.getParent()->getNamedMetadata("llvm.printf.fmts"))
855 emitKernelArg(DL
, Int8PtrTy
, "hidden_printf_buffer", Offset
, Args
);
857 emitKernelArg(DL
, Int8PtrTy
, "hidden_none", Offset
, Args
);
860 // Emit "default queue" and "completion action" arguments if enqueue kernel is
861 // used, otherwise emit dummy "none" arguments.
862 if (HiddenArgNumBytes
>= 48) {
863 if (Func
.hasFnAttribute("calls-enqueue-kernel")) {
864 emitKernelArg(DL
, Int8PtrTy
, "hidden_default_queue", Offset
, Args
);
865 emitKernelArg(DL
, Int8PtrTy
, "hidden_completion_action", Offset
, Args
);
867 emitKernelArg(DL
, Int8PtrTy
, "hidden_none", Offset
, Args
);
868 emitKernelArg(DL
, Int8PtrTy
, "hidden_none", Offset
, Args
);
872 // Emit the pointer argument for multi-grid object.
873 if (HiddenArgNumBytes
>= 56)
874 emitKernelArg(DL
, Int8PtrTy
, "hidden_multigrid_sync_arg", Offset
, Args
);
878 MetadataStreamerV3::getHSAKernelProps(const MachineFunction
&MF
,
879 const SIProgramInfo
&ProgramInfo
) const {
880 const GCNSubtarget
&STM
= MF
.getSubtarget
<GCNSubtarget
>();
881 const SIMachineFunctionInfo
&MFI
= *MF
.getInfo
<SIMachineFunctionInfo
>();
882 const Function
&F
= MF
.getFunction();
884 auto Kern
= HSAMetadataDoc
->getMapNode();
886 unsigned MaxKernArgAlign
;
887 Kern
[".kernarg_segment_size"] = Kern
.getDocument()->getNode(
888 STM
.getKernArgSegmentSize(F
, MaxKernArgAlign
));
889 Kern
[".group_segment_fixed_size"] =
890 Kern
.getDocument()->getNode(ProgramInfo
.LDSSize
);
891 Kern
[".private_segment_fixed_size"] =
892 Kern
.getDocument()->getNode(ProgramInfo
.ScratchSize
);
893 Kern
[".kernarg_segment_align"] =
894 Kern
.getDocument()->getNode(std::max(uint32_t(4), MaxKernArgAlign
));
895 Kern
[".wavefront_size"] =
896 Kern
.getDocument()->getNode(STM
.getWavefrontSize());
897 Kern
[".sgpr_count"] = Kern
.getDocument()->getNode(ProgramInfo
.NumSGPR
);
898 Kern
[".vgpr_count"] = Kern
.getDocument()->getNode(ProgramInfo
.NumVGPR
);
899 Kern
[".max_flat_workgroup_size"] =
900 Kern
.getDocument()->getNode(MFI
.getMaxFlatWorkGroupSize());
901 Kern
[".sgpr_spill_count"] =
902 Kern
.getDocument()->getNode(MFI
.getNumSpilledSGPRs());
903 Kern
[".vgpr_spill_count"] =
904 Kern
.getDocument()->getNode(MFI
.getNumSpilledVGPRs());
909 bool MetadataStreamerV3::emitTo(AMDGPUTargetStreamer
&TargetStreamer
) {
910 return TargetStreamer
.EmitHSAMetadata(*HSAMetadataDoc
, true);
913 void MetadataStreamerV3::begin(const Module
&Mod
) {
916 getRootMetadata("amdhsa.kernels") = HSAMetadataDoc
->getArrayNode();
919 void MetadataStreamerV3::end() {
920 std::string HSAMetadataString
;
921 raw_string_ostream
StrOS(HSAMetadataString
);
922 HSAMetadataDoc
->toYAML(StrOS
);
926 if (VerifyHSAMetadata
)
930 void MetadataStreamerV3::emitKernel(const MachineFunction
&MF
,
931 const SIProgramInfo
&ProgramInfo
) {
932 auto &Func
= MF
.getFunction();
933 auto Kern
= getHSAKernelProps(MF
, ProgramInfo
);
935 assert(Func
.getCallingConv() == CallingConv::AMDGPU_KERNEL
||
936 Func
.getCallingConv() == CallingConv::SPIR_KERNEL
);
939 getRootMetadata("amdhsa.kernels").getArray(/*Convert=*/true);
942 Kern
[".name"] = Kern
.getDocument()->getNode(Func
.getName());
943 Kern
[".symbol"] = Kern
.getDocument()->getNode(
944 (Twine(Func
.getName()) + Twine(".kd")).str(), /*Copy=*/true);
945 emitKernelLanguage(Func
, Kern
);
946 emitKernelAttrs(Func
, Kern
);
947 emitKernelArgs(Func
, Kern
);
950 Kernels
.push_back(Kern
);
953 } // end namespace HSAMD
954 } // end namespace AMDGPU
955 } // end namespace llvm