1 //===--- AMDGPUHSAMetadataStreamer.cpp --------------------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 /// AMDGPU HSA Metadata Streamer.
13 //===----------------------------------------------------------------------===//
15 #include "AMDGPUHSAMetadataStreamer.h"
17 #include "AMDGPUSubtarget.h"
18 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
19 #include "SIMachineFunctionInfo.h"
20 #include "SIProgramInfo.h"
21 #include "Utils/AMDGPUBaseInfo.h"
22 #include "llvm/ADT/StringSwitch.h"
23 #include "llvm/IR/Constants.h"
24 #include "llvm/IR/Module.h"
25 #include "llvm/Support/raw_ostream.h"
29 static cl::opt
<bool> DumpHSAMetadata(
30 "amdgpu-dump-hsa-metadata",
31 cl::desc("Dump AMDGPU HSA Metadata"));
32 static cl::opt
<bool> VerifyHSAMetadata(
33 "amdgpu-verify-hsa-metadata",
34 cl::desc("Verify AMDGPU HSA Metadata"));
39 //===----------------------------------------------------------------------===//
40 // HSAMetadataStreamerV2
41 //===----------------------------------------------------------------------===//
42 void MetadataStreamerV2::dump(StringRef HSAMetadataString
) const {
43 errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString
<< '\n';
46 void MetadataStreamerV2::verify(StringRef HSAMetadataString
) const {
47 errs() << "AMDGPU HSA Metadata Parser Test: ";
49 HSAMD::Metadata FromHSAMetadataString
;
50 if (fromString(HSAMetadataString
, FromHSAMetadataString
)) {
55 std::string ToHSAMetadataString
;
56 if (toString(FromHSAMetadataString
, ToHSAMetadataString
)) {
61 errs() << (HSAMetadataString
== ToHSAMetadataString
? "PASS" : "FAIL")
63 if (HSAMetadataString
!= ToHSAMetadataString
) {
64 errs() << "Original input: " << HSAMetadataString
<< '\n'
65 << "Produced output: " << ToHSAMetadataString
<< '\n';
70 MetadataStreamerV2::getAccessQualifier(StringRef AccQual
) const {
72 return AccessQualifier::Unknown
;
74 return StringSwitch
<AccessQualifier
>(AccQual
)
75 .Case("read_only", AccessQualifier::ReadOnly
)
76 .Case("write_only", AccessQualifier::WriteOnly
)
77 .Case("read_write", AccessQualifier::ReadWrite
)
78 .Default(AccessQualifier::Default
);
82 MetadataStreamerV2::getAddressSpaceQualifier(
83 unsigned AddressSpace
) const {
84 switch (AddressSpace
) {
85 case AMDGPUAS::PRIVATE_ADDRESS
:
86 return AddressSpaceQualifier::Private
;
87 case AMDGPUAS::GLOBAL_ADDRESS
:
88 return AddressSpaceQualifier::Global
;
89 case AMDGPUAS::CONSTANT_ADDRESS
:
90 return AddressSpaceQualifier::Constant
;
91 case AMDGPUAS::LOCAL_ADDRESS
:
92 return AddressSpaceQualifier::Local
;
93 case AMDGPUAS::FLAT_ADDRESS
:
94 return AddressSpaceQualifier::Generic
;
95 case AMDGPUAS::REGION_ADDRESS
:
96 return AddressSpaceQualifier::Region
;
98 return AddressSpaceQualifier::Unknown
;
102 ValueKind
MetadataStreamerV2::getValueKind(Type
*Ty
, StringRef TypeQual
,
103 StringRef BaseTypeName
) const {
104 if (TypeQual
.find("pipe") != StringRef::npos
)
105 return ValueKind::Pipe
;
107 return StringSwitch
<ValueKind
>(BaseTypeName
)
108 .Case("image1d_t", ValueKind::Image
)
109 .Case("image1d_array_t", ValueKind::Image
)
110 .Case("image1d_buffer_t", ValueKind::Image
)
111 .Case("image2d_t", ValueKind::Image
)
112 .Case("image2d_array_t", ValueKind::Image
)
113 .Case("image2d_array_depth_t", ValueKind::Image
)
114 .Case("image2d_array_msaa_t", ValueKind::Image
)
115 .Case("image2d_array_msaa_depth_t", ValueKind::Image
)
116 .Case("image2d_depth_t", ValueKind::Image
)
117 .Case("image2d_msaa_t", ValueKind::Image
)
118 .Case("image2d_msaa_depth_t", ValueKind::Image
)
119 .Case("image3d_t", ValueKind::Image
)
120 .Case("sampler_t", ValueKind::Sampler
)
121 .Case("queue_t", ValueKind::Queue
)
122 .Default(isa
<PointerType
>(Ty
) ?
123 (Ty
->getPointerAddressSpace() ==
124 AMDGPUAS::LOCAL_ADDRESS
?
125 ValueKind::DynamicSharedPointer
:
126 ValueKind::GlobalBuffer
) :
130 ValueType
MetadataStreamerV2::getValueType(Type
*Ty
, StringRef TypeName
) const {
131 switch (Ty
->getTypeID()) {
132 case Type::IntegerTyID
: {
133 auto Signed
= !TypeName
.startswith("u");
134 switch (Ty
->getIntegerBitWidth()) {
136 return Signed
? ValueType::I8
: ValueType::U8
;
138 return Signed
? ValueType::I16
: ValueType::U16
;
140 return Signed
? ValueType::I32
: ValueType::U32
;
142 return Signed
? ValueType::I64
: ValueType::U64
;
144 return ValueType::Struct
;
148 return ValueType::F16
;
149 case Type::FloatTyID
:
150 return ValueType::F32
;
151 case Type::DoubleTyID
:
152 return ValueType::F64
;
153 case Type::PointerTyID
:
154 return getValueType(Ty
->getPointerElementType(), TypeName
);
155 case Type::VectorTyID
:
156 return getValueType(Ty
->getVectorElementType(), TypeName
);
158 return ValueType::Struct
;
162 std::string
MetadataStreamerV2::getTypeName(Type
*Ty
, bool Signed
) const {
163 switch (Ty
->getTypeID()) {
164 case Type::IntegerTyID
: {
166 return (Twine('u') + getTypeName(Ty
, true)).str();
168 auto BitWidth
= Ty
->getIntegerBitWidth();
179 return (Twine('i') + Twine(BitWidth
)).str();
184 case Type::FloatTyID
:
186 case Type::DoubleTyID
:
188 case Type::VectorTyID
: {
189 auto VecTy
= cast
<VectorType
>(Ty
);
190 auto ElTy
= VecTy
->getElementType();
191 auto NumElements
= VecTy
->getVectorNumElements();
192 return (Twine(getTypeName(ElTy
, Signed
)) + Twine(NumElements
)).str();
199 std::vector
<uint32_t>
200 MetadataStreamerV2::getWorkGroupDimensions(MDNode
*Node
) const {
201 std::vector
<uint32_t> Dims
;
202 if (Node
->getNumOperands() != 3)
205 for (auto &Op
: Node
->operands())
206 Dims
.push_back(mdconst::extract
<ConstantInt
>(Op
)->getZExtValue());
210 Kernel::CodeProps::Metadata
211 MetadataStreamerV2::getHSACodeProps(const MachineFunction
&MF
,
212 const SIProgramInfo
&ProgramInfo
) const {
213 const GCNSubtarget
&STM
= MF
.getSubtarget
<GCNSubtarget
>();
214 const SIMachineFunctionInfo
&MFI
= *MF
.getInfo
<SIMachineFunctionInfo
>();
215 HSAMD::Kernel::CodeProps::Metadata HSACodeProps
;
216 const Function
&F
= MF
.getFunction();
218 assert(F
.getCallingConv() == CallingConv::AMDGPU_KERNEL
||
219 F
.getCallingConv() == CallingConv::SPIR_KERNEL
);
221 Align MaxKernArgAlign
;
222 HSACodeProps
.mKernargSegmentSize
= STM
.getKernArgSegmentSize(F
,
224 HSACodeProps
.mGroupSegmentFixedSize
= ProgramInfo
.LDSSize
;
225 HSACodeProps
.mPrivateSegmentFixedSize
= ProgramInfo
.ScratchSize
;
226 HSACodeProps
.mKernargSegmentAlign
=
227 std::max(MaxKernArgAlign
, Align(4)).value();
228 HSACodeProps
.mWavefrontSize
= STM
.getWavefrontSize();
229 HSACodeProps
.mNumSGPRs
= ProgramInfo
.NumSGPR
;
230 HSACodeProps
.mNumVGPRs
= ProgramInfo
.NumVGPR
;
231 HSACodeProps
.mMaxFlatWorkGroupSize
= MFI
.getMaxFlatWorkGroupSize();
232 HSACodeProps
.mIsDynamicCallStack
= ProgramInfo
.DynamicCallStack
;
233 HSACodeProps
.mIsXNACKEnabled
= STM
.isXNACKEnabled();
234 HSACodeProps
.mNumSpilledSGPRs
= MFI
.getNumSpilledSGPRs();
235 HSACodeProps
.mNumSpilledVGPRs
= MFI
.getNumSpilledVGPRs();
240 Kernel::DebugProps::Metadata
241 MetadataStreamerV2::getHSADebugProps(const MachineFunction
&MF
,
242 const SIProgramInfo
&ProgramInfo
) const {
243 return HSAMD::Kernel::DebugProps::Metadata();
246 void MetadataStreamerV2::emitVersion() {
247 auto &Version
= HSAMetadata
.mVersion
;
249 Version
.push_back(VersionMajor
);
250 Version
.push_back(VersionMinor
);
253 void MetadataStreamerV2::emitPrintf(const Module
&Mod
) {
254 auto &Printf
= HSAMetadata
.mPrintf
;
256 auto Node
= Mod
.getNamedMetadata("llvm.printf.fmts");
260 for (auto Op
: Node
->operands())
261 if (Op
->getNumOperands())
262 Printf
.push_back(cast
<MDString
>(Op
->getOperand(0))->getString());
265 void MetadataStreamerV2::emitKernelLanguage(const Function
&Func
) {
266 auto &Kernel
= HSAMetadata
.mKernels
.back();
268 // TODO: What about other languages?
269 auto Node
= Func
.getParent()->getNamedMetadata("opencl.ocl.version");
270 if (!Node
|| !Node
->getNumOperands())
272 auto Op0
= Node
->getOperand(0);
273 if (Op0
->getNumOperands() <= 1)
276 Kernel
.mLanguage
= "OpenCL C";
277 Kernel
.mLanguageVersion
.push_back(
278 mdconst::extract
<ConstantInt
>(Op0
->getOperand(0))->getZExtValue());
279 Kernel
.mLanguageVersion
.push_back(
280 mdconst::extract
<ConstantInt
>(Op0
->getOperand(1))->getZExtValue());
283 void MetadataStreamerV2::emitKernelAttrs(const Function
&Func
) {
284 auto &Attrs
= HSAMetadata
.mKernels
.back().mAttrs
;
286 if (auto Node
= Func
.getMetadata("reqd_work_group_size"))
287 Attrs
.mReqdWorkGroupSize
= getWorkGroupDimensions(Node
);
288 if (auto Node
= Func
.getMetadata("work_group_size_hint"))
289 Attrs
.mWorkGroupSizeHint
= getWorkGroupDimensions(Node
);
290 if (auto Node
= Func
.getMetadata("vec_type_hint")) {
291 Attrs
.mVecTypeHint
= getTypeName(
292 cast
<ValueAsMetadata
>(Node
->getOperand(0))->getType(),
293 mdconst::extract
<ConstantInt
>(Node
->getOperand(1))->getZExtValue());
295 if (Func
.hasFnAttribute("runtime-handle")) {
296 Attrs
.mRuntimeHandle
=
297 Func
.getFnAttribute("runtime-handle").getValueAsString().str();
301 void MetadataStreamerV2::emitKernelArgs(const Function
&Func
) {
302 for (auto &Arg
: Func
.args())
305 emitHiddenKernelArgs(Func
);
308 void MetadataStreamerV2::emitKernelArg(const Argument
&Arg
) {
309 auto Func
= Arg
.getParent();
310 auto ArgNo
= Arg
.getArgNo();
314 Node
= Func
->getMetadata("kernel_arg_name");
315 if (Node
&& ArgNo
< Node
->getNumOperands())
316 Name
= cast
<MDString
>(Node
->getOperand(ArgNo
))->getString();
317 else if (Arg
.hasName())
318 Name
= Arg
.getName();
321 Node
= Func
->getMetadata("kernel_arg_type");
322 if (Node
&& ArgNo
< Node
->getNumOperands())
323 TypeName
= cast
<MDString
>(Node
->getOperand(ArgNo
))->getString();
325 StringRef BaseTypeName
;
326 Node
= Func
->getMetadata("kernel_arg_base_type");
327 if (Node
&& ArgNo
< Node
->getNumOperands())
328 BaseTypeName
= cast
<MDString
>(Node
->getOperand(ArgNo
))->getString();
331 if (Arg
.getType()->isPointerTy() && Arg
.onlyReadsMemory() &&
332 Arg
.hasNoAliasAttr()) {
333 AccQual
= "read_only";
335 Node
= Func
->getMetadata("kernel_arg_access_qual");
336 if (Node
&& ArgNo
< Node
->getNumOperands())
337 AccQual
= cast
<MDString
>(Node
->getOperand(ArgNo
))->getString();
341 Node
= Func
->getMetadata("kernel_arg_type_qual");
342 if (Node
&& ArgNo
< Node
->getNumOperands())
343 TypeQual
= cast
<MDString
>(Node
->getOperand(ArgNo
))->getString();
345 Type
*Ty
= Arg
.getType();
346 const DataLayout
&DL
= Func
->getParent()->getDataLayout();
348 unsigned PointeeAlign
= 0;
349 if (auto PtrTy
= dyn_cast
<PointerType
>(Ty
)) {
350 if (PtrTy
->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS
) {
351 PointeeAlign
= Arg
.getParamAlignment();
352 if (PointeeAlign
== 0)
353 PointeeAlign
= DL
.getABITypeAlignment(PtrTy
->getElementType());
357 emitKernelArg(DL
, Ty
, getValueKind(Arg
.getType(), TypeQual
, BaseTypeName
),
358 PointeeAlign
, Name
, TypeName
, BaseTypeName
, AccQual
, TypeQual
);
361 void MetadataStreamerV2::emitKernelArg(const DataLayout
&DL
, Type
*Ty
,
363 unsigned PointeeAlign
, StringRef Name
,
365 StringRef BaseTypeName
,
366 StringRef AccQual
, StringRef TypeQual
) {
367 HSAMetadata
.mKernels
.back().mArgs
.push_back(Kernel::Arg::Metadata());
368 auto &Arg
= HSAMetadata
.mKernels
.back().mArgs
.back();
371 Arg
.mTypeName
= TypeName
;
372 Arg
.mSize
= DL
.getTypeAllocSize(Ty
);
373 Arg
.mAlign
= DL
.getABITypeAlignment(Ty
);
374 Arg
.mValueKind
= ValueKind
;
375 Arg
.mValueType
= getValueType(Ty
, BaseTypeName
);
376 Arg
.mPointeeAlign
= PointeeAlign
;
378 if (auto PtrTy
= dyn_cast
<PointerType
>(Ty
))
379 Arg
.mAddrSpaceQual
= getAddressSpaceQualifier(PtrTy
->getAddressSpace());
381 Arg
.mAccQual
= getAccessQualifier(AccQual
);
383 // TODO: Emit Arg.mActualAccQual.
385 SmallVector
<StringRef
, 1> SplitTypeQuals
;
386 TypeQual
.split(SplitTypeQuals
, " ", -1, false);
387 for (StringRef Key
: SplitTypeQuals
) {
388 auto P
= StringSwitch
<bool*>(Key
)
389 .Case("const", &Arg
.mIsConst
)
390 .Case("restrict", &Arg
.mIsRestrict
)
391 .Case("volatile", &Arg
.mIsVolatile
)
392 .Case("pipe", &Arg
.mIsPipe
)
399 void MetadataStreamerV2::emitHiddenKernelArgs(const Function
&Func
) {
400 int HiddenArgNumBytes
=
401 getIntegerAttribute(Func
, "amdgpu-implicitarg-num-bytes", 0);
403 if (!HiddenArgNumBytes
)
406 auto &DL
= Func
.getParent()->getDataLayout();
407 auto Int64Ty
= Type::getInt64Ty(Func
.getContext());
409 if (HiddenArgNumBytes
>= 8)
410 emitKernelArg(DL
, Int64Ty
, ValueKind::HiddenGlobalOffsetX
);
411 if (HiddenArgNumBytes
>= 16)
412 emitKernelArg(DL
, Int64Ty
, ValueKind::HiddenGlobalOffsetY
);
413 if (HiddenArgNumBytes
>= 24)
414 emitKernelArg(DL
, Int64Ty
, ValueKind::HiddenGlobalOffsetZ
);
416 auto Int8PtrTy
= Type::getInt8PtrTy(Func
.getContext(),
417 AMDGPUAS::GLOBAL_ADDRESS
);
419 // Emit "printf buffer" argument if printf is used, otherwise emit dummy
421 if (HiddenArgNumBytes
>= 32) {
422 if (Func
.getParent()->getNamedMetadata("llvm.printf.fmts"))
423 emitKernelArg(DL
, Int8PtrTy
, ValueKind::HiddenPrintfBuffer
);
425 emitKernelArg(DL
, Int8PtrTy
, ValueKind::HiddenNone
);
428 // Emit "default queue" and "completion action" arguments if enqueue kernel is
429 // used, otherwise emit dummy "none" arguments.
430 if (HiddenArgNumBytes
>= 48) {
431 if (Func
.hasFnAttribute("calls-enqueue-kernel")) {
432 emitKernelArg(DL
, Int8PtrTy
, ValueKind::HiddenDefaultQueue
);
433 emitKernelArg(DL
, Int8PtrTy
, ValueKind::HiddenCompletionAction
);
435 emitKernelArg(DL
, Int8PtrTy
, ValueKind::HiddenNone
);
436 emitKernelArg(DL
, Int8PtrTy
, ValueKind::HiddenNone
);
440 // Emit the pointer argument for multi-grid object.
441 if (HiddenArgNumBytes
>= 56)
442 emitKernelArg(DL
, Int8PtrTy
, ValueKind::HiddenMultiGridSyncArg
);
445 bool MetadataStreamerV2::emitTo(AMDGPUTargetStreamer
&TargetStreamer
) {
446 return TargetStreamer
.EmitHSAMetadata(getHSAMetadata());
449 void MetadataStreamerV2::begin(const Module
&Mod
) {
454 void MetadataStreamerV2::end() {
455 std::string HSAMetadataString
;
456 if (toString(HSAMetadata
, HSAMetadataString
))
460 dump(HSAMetadataString
);
461 if (VerifyHSAMetadata
)
462 verify(HSAMetadataString
);
465 void MetadataStreamerV2::emitKernel(const MachineFunction
&MF
,
466 const SIProgramInfo
&ProgramInfo
) {
467 auto &Func
= MF
.getFunction();
468 if (Func
.getCallingConv() != CallingConv::AMDGPU_KERNEL
)
471 auto CodeProps
= getHSACodeProps(MF
, ProgramInfo
);
472 auto DebugProps
= getHSADebugProps(MF
, ProgramInfo
);
474 HSAMetadata
.mKernels
.push_back(Kernel::Metadata());
475 auto &Kernel
= HSAMetadata
.mKernels
.back();
477 Kernel
.mName
= Func
.getName();
478 Kernel
.mSymbolName
= (Twine(Func
.getName()) + Twine("@kd")).str();
479 emitKernelLanguage(Func
);
480 emitKernelAttrs(Func
);
481 emitKernelArgs(Func
);
482 HSAMetadata
.mKernels
.back().mCodeProps
= CodeProps
;
483 HSAMetadata
.mKernels
.back().mDebugProps
= DebugProps
;
486 //===----------------------------------------------------------------------===//
487 // HSAMetadataStreamerV3
488 //===----------------------------------------------------------------------===//
490 void MetadataStreamerV3::dump(StringRef HSAMetadataString
) const {
491 errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString
<< '\n';
494 void MetadataStreamerV3::verify(StringRef HSAMetadataString
) const {
495 errs() << "AMDGPU HSA Metadata Parser Test: ";
497 msgpack::Document FromHSAMetadataString
;
499 if (!FromHSAMetadataString
.fromYAML(HSAMetadataString
)) {
504 std::string ToHSAMetadataString
;
505 raw_string_ostream
StrOS(ToHSAMetadataString
);
506 FromHSAMetadataString
.toYAML(StrOS
);
508 errs() << (HSAMetadataString
== StrOS
.str() ? "PASS" : "FAIL") << '\n';
509 if (HSAMetadataString
!= ToHSAMetadataString
) {
510 errs() << "Original input: " << HSAMetadataString
<< '\n'
511 << "Produced output: " << StrOS
.str() << '\n';
516 MetadataStreamerV3::getAccessQualifier(StringRef AccQual
) const {
517 return StringSwitch
<Optional
<StringRef
>>(AccQual
)
518 .Case("read_only", StringRef("read_only"))
519 .Case("write_only", StringRef("write_only"))
520 .Case("read_write", StringRef("read_write"))
525 MetadataStreamerV3::getAddressSpaceQualifier(unsigned AddressSpace
) const {
526 switch (AddressSpace
) {
527 case AMDGPUAS::PRIVATE_ADDRESS
:
528 return StringRef("private");
529 case AMDGPUAS::GLOBAL_ADDRESS
:
530 return StringRef("global");
531 case AMDGPUAS::CONSTANT_ADDRESS
:
532 return StringRef("constant");
533 case AMDGPUAS::LOCAL_ADDRESS
:
534 return StringRef("local");
535 case AMDGPUAS::FLAT_ADDRESS
:
536 return StringRef("generic");
537 case AMDGPUAS::REGION_ADDRESS
:
538 return StringRef("region");
544 StringRef
MetadataStreamerV3::getValueKind(Type
*Ty
, StringRef TypeQual
,
545 StringRef BaseTypeName
) const {
546 if (TypeQual
.find("pipe") != StringRef::npos
)
549 return StringSwitch
<StringRef
>(BaseTypeName
)
550 .Case("image1d_t", "image")
551 .Case("image1d_array_t", "image")
552 .Case("image1d_buffer_t", "image")
553 .Case("image2d_t", "image")
554 .Case("image2d_array_t", "image")
555 .Case("image2d_array_depth_t", "image")
556 .Case("image2d_array_msaa_t", "image")
557 .Case("image2d_array_msaa_depth_t", "image")
558 .Case("image2d_depth_t", "image")
559 .Case("image2d_msaa_t", "image")
560 .Case("image2d_msaa_depth_t", "image")
561 .Case("image3d_t", "image")
562 .Case("sampler_t", "sampler")
563 .Case("queue_t", "queue")
564 .Default(isa
<PointerType
>(Ty
)
565 ? (Ty
->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS
566 ? "dynamic_shared_pointer"
571 StringRef
MetadataStreamerV3::getValueType(Type
*Ty
, StringRef TypeName
) const {
572 switch (Ty
->getTypeID()) {
573 case Type::IntegerTyID
: {
574 auto Signed
= !TypeName
.startswith("u");
575 switch (Ty
->getIntegerBitWidth()) {
577 return Signed
? "i8" : "u8";
579 return Signed
? "i16" : "u16";
581 return Signed
? "i32" : "u32";
583 return Signed
? "i64" : "u64";
590 case Type::FloatTyID
:
592 case Type::DoubleTyID
:
594 case Type::PointerTyID
:
595 return getValueType(Ty
->getPointerElementType(), TypeName
);
596 case Type::VectorTyID
:
597 return getValueType(Ty
->getVectorElementType(), TypeName
);
603 std::string
MetadataStreamerV3::getTypeName(Type
*Ty
, bool Signed
) const {
604 switch (Ty
->getTypeID()) {
605 case Type::IntegerTyID
: {
607 return (Twine('u') + getTypeName(Ty
, true)).str();
609 auto BitWidth
= Ty
->getIntegerBitWidth();
620 return (Twine('i') + Twine(BitWidth
)).str();
625 case Type::FloatTyID
:
627 case Type::DoubleTyID
:
629 case Type::VectorTyID
: {
630 auto VecTy
= cast
<VectorType
>(Ty
);
631 auto ElTy
= VecTy
->getElementType();
632 auto NumElements
= VecTy
->getVectorNumElements();
633 return (Twine(getTypeName(ElTy
, Signed
)) + Twine(NumElements
)).str();
640 msgpack::ArrayDocNode
641 MetadataStreamerV3::getWorkGroupDimensions(MDNode
*Node
) const {
642 auto Dims
= HSAMetadataDoc
->getArrayNode();
643 if (Node
->getNumOperands() != 3)
646 for (auto &Op
: Node
->operands())
647 Dims
.push_back(Dims
.getDocument()->getNode(
648 uint64_t(mdconst::extract
<ConstantInt
>(Op
)->getZExtValue())));
652 void MetadataStreamerV3::emitVersion() {
653 auto Version
= HSAMetadataDoc
->getArrayNode();
654 Version
.push_back(Version
.getDocument()->getNode(VersionMajor
));
655 Version
.push_back(Version
.getDocument()->getNode(VersionMinor
));
656 getRootMetadata("amdhsa.version") = Version
;
659 void MetadataStreamerV3::emitPrintf(const Module
&Mod
) {
660 auto Node
= Mod
.getNamedMetadata("llvm.printf.fmts");
664 auto Printf
= HSAMetadataDoc
->getArrayNode();
665 for (auto Op
: Node
->operands())
666 if (Op
->getNumOperands())
667 Printf
.push_back(Printf
.getDocument()->getNode(
668 cast
<MDString
>(Op
->getOperand(0))->getString(), /*Copy=*/true));
669 getRootMetadata("amdhsa.printf") = Printf
;
672 void MetadataStreamerV3::emitKernelLanguage(const Function
&Func
,
673 msgpack::MapDocNode Kern
) {
674 // TODO: What about other languages?
675 auto Node
= Func
.getParent()->getNamedMetadata("opencl.ocl.version");
676 if (!Node
|| !Node
->getNumOperands())
678 auto Op0
= Node
->getOperand(0);
679 if (Op0
->getNumOperands() <= 1)
682 Kern
[".language"] = Kern
.getDocument()->getNode("OpenCL C");
683 auto LanguageVersion
= Kern
.getDocument()->getArrayNode();
684 LanguageVersion
.push_back(Kern
.getDocument()->getNode(
685 mdconst::extract
<ConstantInt
>(Op0
->getOperand(0))->getZExtValue()));
686 LanguageVersion
.push_back(Kern
.getDocument()->getNode(
687 mdconst::extract
<ConstantInt
>(Op0
->getOperand(1))->getZExtValue()));
688 Kern
[".language_version"] = LanguageVersion
;
691 void MetadataStreamerV3::emitKernelAttrs(const Function
&Func
,
692 msgpack::MapDocNode Kern
) {
694 if (auto Node
= Func
.getMetadata("reqd_work_group_size"))
695 Kern
[".reqd_workgroup_size"] = getWorkGroupDimensions(Node
);
696 if (auto Node
= Func
.getMetadata("work_group_size_hint"))
697 Kern
[".workgroup_size_hint"] = getWorkGroupDimensions(Node
);
698 if (auto Node
= Func
.getMetadata("vec_type_hint")) {
699 Kern
[".vec_type_hint"] = Kern
.getDocument()->getNode(
701 cast
<ValueAsMetadata
>(Node
->getOperand(0))->getType(),
702 mdconst::extract
<ConstantInt
>(Node
->getOperand(1))->getZExtValue()),
705 if (Func
.hasFnAttribute("runtime-handle")) {
706 Kern
[".device_enqueue_symbol"] = Kern
.getDocument()->getNode(
707 Func
.getFnAttribute("runtime-handle").getValueAsString().str(),
712 void MetadataStreamerV3::emitKernelArgs(const Function
&Func
,
713 msgpack::MapDocNode Kern
) {
715 auto Args
= HSAMetadataDoc
->getArrayNode();
716 for (auto &Arg
: Func
.args())
717 emitKernelArg(Arg
, Offset
, Args
);
719 emitHiddenKernelArgs(Func
, Offset
, Args
);
721 Kern
[".args"] = Args
;
724 void MetadataStreamerV3::emitKernelArg(const Argument
&Arg
, unsigned &Offset
,
725 msgpack::ArrayDocNode Args
) {
726 auto Func
= Arg
.getParent();
727 auto ArgNo
= Arg
.getArgNo();
731 Node
= Func
->getMetadata("kernel_arg_name");
732 if (Node
&& ArgNo
< Node
->getNumOperands())
733 Name
= cast
<MDString
>(Node
->getOperand(ArgNo
))->getString();
734 else if (Arg
.hasName())
735 Name
= Arg
.getName();
738 Node
= Func
->getMetadata("kernel_arg_type");
739 if (Node
&& ArgNo
< Node
->getNumOperands())
740 TypeName
= cast
<MDString
>(Node
->getOperand(ArgNo
))->getString();
742 StringRef BaseTypeName
;
743 Node
= Func
->getMetadata("kernel_arg_base_type");
744 if (Node
&& ArgNo
< Node
->getNumOperands())
745 BaseTypeName
= cast
<MDString
>(Node
->getOperand(ArgNo
))->getString();
748 if (Arg
.getType()->isPointerTy() && Arg
.onlyReadsMemory() &&
749 Arg
.hasNoAliasAttr()) {
750 AccQual
= "read_only";
752 Node
= Func
->getMetadata("kernel_arg_access_qual");
753 if (Node
&& ArgNo
< Node
->getNumOperands())
754 AccQual
= cast
<MDString
>(Node
->getOperand(ArgNo
))->getString();
758 Node
= Func
->getMetadata("kernel_arg_type_qual");
759 if (Node
&& ArgNo
< Node
->getNumOperands())
760 TypeQual
= cast
<MDString
>(Node
->getOperand(ArgNo
))->getString();
762 Type
*Ty
= Arg
.getType();
763 const DataLayout
&DL
= Func
->getParent()->getDataLayout();
765 unsigned PointeeAlign
= 0;
766 if (auto PtrTy
= dyn_cast
<PointerType
>(Ty
)) {
767 if (PtrTy
->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS
) {
768 PointeeAlign
= Arg
.getParamAlignment();
769 if (PointeeAlign
== 0)
770 PointeeAlign
= DL
.getABITypeAlignment(PtrTy
->getElementType());
774 emitKernelArg(Func
->getParent()->getDataLayout(), Arg
.getType(),
775 getValueKind(Arg
.getType(), TypeQual
, BaseTypeName
), Offset
,
776 Args
, PointeeAlign
, Name
, TypeName
, BaseTypeName
, AccQual
,
780 void MetadataStreamerV3::emitKernelArg(const DataLayout
&DL
, Type
*Ty
,
781 StringRef ValueKind
, unsigned &Offset
,
782 msgpack::ArrayDocNode Args
,
783 unsigned PointeeAlign
, StringRef Name
,
785 StringRef BaseTypeName
,
786 StringRef AccQual
, StringRef TypeQual
) {
787 auto Arg
= Args
.getDocument()->getMapNode();
790 Arg
[".name"] = Arg
.getDocument()->getNode(Name
, /*Copy=*/true);
791 if (!TypeName
.empty())
792 Arg
[".type_name"] = Arg
.getDocument()->getNode(TypeName
, /*Copy=*/true);
793 auto Size
= DL
.getTypeAllocSize(Ty
);
794 auto Align
= DL
.getABITypeAlignment(Ty
);
795 Arg
[".size"] = Arg
.getDocument()->getNode(Size
);
796 Offset
= alignTo(Offset
, Align
);
797 Arg
[".offset"] = Arg
.getDocument()->getNode(Offset
);
799 Arg
[".value_kind"] = Arg
.getDocument()->getNode(ValueKind
, /*Copy=*/true);
801 Arg
.getDocument()->getNode(getValueType(Ty
, BaseTypeName
), /*Copy=*/true);
803 Arg
[".pointee_align"] = Arg
.getDocument()->getNode(PointeeAlign
);
805 if (auto PtrTy
= dyn_cast
<PointerType
>(Ty
))
806 if (auto Qualifier
= getAddressSpaceQualifier(PtrTy
->getAddressSpace()))
807 Arg
[".address_space"] = Arg
.getDocument()->getNode(*Qualifier
, /*Copy=*/true);
809 if (auto AQ
= getAccessQualifier(AccQual
))
810 Arg
[".access"] = Arg
.getDocument()->getNode(*AQ
, /*Copy=*/true);
812 // TODO: Emit Arg[".actual_access"].
814 SmallVector
<StringRef
, 1> SplitTypeQuals
;
815 TypeQual
.split(SplitTypeQuals
, " ", -1, false);
816 for (StringRef Key
: SplitTypeQuals
) {
818 Arg
[".is_const"] = Arg
.getDocument()->getNode(true);
819 else if (Key
== "restrict")
820 Arg
[".is_restrict"] = Arg
.getDocument()->getNode(true);
821 else if (Key
== "volatile")
822 Arg
[".is_volatile"] = Arg
.getDocument()->getNode(true);
823 else if (Key
== "pipe")
824 Arg
[".is_pipe"] = Arg
.getDocument()->getNode(true);
830 void MetadataStreamerV3::emitHiddenKernelArgs(const Function
&Func
,
832 msgpack::ArrayDocNode Args
) {
833 int HiddenArgNumBytes
=
834 getIntegerAttribute(Func
, "amdgpu-implicitarg-num-bytes", 0);
836 if (!HiddenArgNumBytes
)
839 auto &DL
= Func
.getParent()->getDataLayout();
840 auto Int64Ty
= Type::getInt64Ty(Func
.getContext());
842 if (HiddenArgNumBytes
>= 8)
843 emitKernelArg(DL
, Int64Ty
, "hidden_global_offset_x", Offset
, Args
);
844 if (HiddenArgNumBytes
>= 16)
845 emitKernelArg(DL
, Int64Ty
, "hidden_global_offset_y", Offset
, Args
);
846 if (HiddenArgNumBytes
>= 24)
847 emitKernelArg(DL
, Int64Ty
, "hidden_global_offset_z", Offset
, Args
);
850 Type::getInt8PtrTy(Func
.getContext(), AMDGPUAS::GLOBAL_ADDRESS
);
852 // Emit "printf buffer" argument if printf is used, otherwise emit dummy
854 if (HiddenArgNumBytes
>= 32) {
855 if (Func
.getParent()->getNamedMetadata("llvm.printf.fmts"))
856 emitKernelArg(DL
, Int8PtrTy
, "hidden_printf_buffer", Offset
, Args
);
858 emitKernelArg(DL
, Int8PtrTy
, "hidden_none", Offset
, Args
);
861 // Emit "default queue" and "completion action" arguments if enqueue kernel is
862 // used, otherwise emit dummy "none" arguments.
863 if (HiddenArgNumBytes
>= 48) {
864 if (Func
.hasFnAttribute("calls-enqueue-kernel")) {
865 emitKernelArg(DL
, Int8PtrTy
, "hidden_default_queue", Offset
, Args
);
866 emitKernelArg(DL
, Int8PtrTy
, "hidden_completion_action", Offset
, Args
);
868 emitKernelArg(DL
, Int8PtrTy
, "hidden_none", Offset
, Args
);
869 emitKernelArg(DL
, Int8PtrTy
, "hidden_none", Offset
, Args
);
873 // Emit the pointer argument for multi-grid object.
874 if (HiddenArgNumBytes
>= 56)
875 emitKernelArg(DL
, Int8PtrTy
, "hidden_multigrid_sync_arg", Offset
, Args
);
879 MetadataStreamerV3::getHSAKernelProps(const MachineFunction
&MF
,
880 const SIProgramInfo
&ProgramInfo
) const {
881 const GCNSubtarget
&STM
= MF
.getSubtarget
<GCNSubtarget
>();
882 const SIMachineFunctionInfo
&MFI
= *MF
.getInfo
<SIMachineFunctionInfo
>();
883 const Function
&F
= MF
.getFunction();
885 auto Kern
= HSAMetadataDoc
->getMapNode();
887 Align MaxKernArgAlign
;
888 Kern
[".kernarg_segment_size"] = Kern
.getDocument()->getNode(
889 STM
.getKernArgSegmentSize(F
, MaxKernArgAlign
));
890 Kern
[".group_segment_fixed_size"] =
891 Kern
.getDocument()->getNode(ProgramInfo
.LDSSize
);
892 Kern
[".private_segment_fixed_size"] =
893 Kern
.getDocument()->getNode(ProgramInfo
.ScratchSize
);
894 Kern
[".kernarg_segment_align"] =
895 Kern
.getDocument()->getNode(std::max(Align(4), MaxKernArgAlign
).value());
896 Kern
[".wavefront_size"] =
897 Kern
.getDocument()->getNode(STM
.getWavefrontSize());
898 Kern
[".sgpr_count"] = Kern
.getDocument()->getNode(ProgramInfo
.NumSGPR
);
899 Kern
[".vgpr_count"] = Kern
.getDocument()->getNode(ProgramInfo
.NumVGPR
);
900 Kern
[".max_flat_workgroup_size"] =
901 Kern
.getDocument()->getNode(MFI
.getMaxFlatWorkGroupSize());
902 Kern
[".sgpr_spill_count"] =
903 Kern
.getDocument()->getNode(MFI
.getNumSpilledSGPRs());
904 Kern
[".vgpr_spill_count"] =
905 Kern
.getDocument()->getNode(MFI
.getNumSpilledVGPRs());
910 bool MetadataStreamerV3::emitTo(AMDGPUTargetStreamer
&TargetStreamer
) {
911 return TargetStreamer
.EmitHSAMetadata(*HSAMetadataDoc
, true);
914 void MetadataStreamerV3::begin(const Module
&Mod
) {
917 getRootMetadata("amdhsa.kernels") = HSAMetadataDoc
->getArrayNode();
920 void MetadataStreamerV3::end() {
921 std::string HSAMetadataString
;
922 raw_string_ostream
StrOS(HSAMetadataString
);
923 HSAMetadataDoc
->toYAML(StrOS
);
927 if (VerifyHSAMetadata
)
931 void MetadataStreamerV3::emitKernel(const MachineFunction
&MF
,
932 const SIProgramInfo
&ProgramInfo
) {
933 auto &Func
= MF
.getFunction();
934 auto Kern
= getHSAKernelProps(MF
, ProgramInfo
);
936 assert(Func
.getCallingConv() == CallingConv::AMDGPU_KERNEL
||
937 Func
.getCallingConv() == CallingConv::SPIR_KERNEL
);
940 getRootMetadata("amdhsa.kernels").getArray(/*Convert=*/true);
943 Kern
[".name"] = Kern
.getDocument()->getNode(Func
.getName());
944 Kern
[".symbol"] = Kern
.getDocument()->getNode(
945 (Twine(Func
.getName()) + Twine(".kd")).str(), /*Copy=*/true);
946 emitKernelLanguage(Func
, Kern
);
947 emitKernelAttrs(Func
, Kern
);
948 emitKernelArgs(Func
, Kern
);
951 Kernels
.push_back(Kern
);
954 } // end namespace HSAMD
955 } // end namespace AMDGPU
956 } // end namespace llvm