1 //===--- AMDGPUHSAMetadataStreamer.cpp --------------------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 /// AMDGPU HSA Metadata Streamer.
13 //===----------------------------------------------------------------------===//
15 #include "AMDGPUHSAMetadataStreamer.h"
17 #include "AMDGPUSubtarget.h"
18 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
19 #include "SIMachineFunctionInfo.h"
20 #include "SIProgramInfo.h"
21 #include "Utils/AMDGPUBaseInfo.h"
22 #include "llvm/ADT/StringSwitch.h"
23 #include "llvm/IR/Constants.h"
24 #include "llvm/IR/Module.h"
25 #include "llvm/Support/raw_ostream.h"
29 static cl::opt
<bool> DumpHSAMetadata(
30 "amdgpu-dump-hsa-metadata",
31 cl::desc("Dump AMDGPU HSA Metadata"));
32 static cl::opt
<bool> VerifyHSAMetadata(
33 "amdgpu-verify-hsa-metadata",
34 cl::desc("Verify AMDGPU HSA Metadata"));
39 //===----------------------------------------------------------------------===//
40 // HSAMetadataStreamerV2
41 //===----------------------------------------------------------------------===//
42 void MetadataStreamerV2::dump(StringRef HSAMetadataString
) const {
43 errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString
<< '\n';
46 void MetadataStreamerV2::verify(StringRef HSAMetadataString
) const {
47 errs() << "AMDGPU HSA Metadata Parser Test: ";
49 HSAMD::Metadata FromHSAMetadataString
;
50 if (fromString(HSAMetadataString
, FromHSAMetadataString
)) {
55 std::string ToHSAMetadataString
;
56 if (toString(FromHSAMetadataString
, ToHSAMetadataString
)) {
61 errs() << (HSAMetadataString
== ToHSAMetadataString
? "PASS" : "FAIL")
63 if (HSAMetadataString
!= ToHSAMetadataString
) {
64 errs() << "Original input: " << HSAMetadataString
<< '\n'
65 << "Produced output: " << ToHSAMetadataString
<< '\n';
70 MetadataStreamerV2::getAccessQualifier(StringRef AccQual
) const {
72 return AccessQualifier::Unknown
;
74 return StringSwitch
<AccessQualifier
>(AccQual
)
75 .Case("read_only", AccessQualifier::ReadOnly
)
76 .Case("write_only", AccessQualifier::WriteOnly
)
77 .Case("read_write", AccessQualifier::ReadWrite
)
78 .Default(AccessQualifier::Default
);
82 MetadataStreamerV2::getAddressSpaceQualifier(
83 unsigned AddressSpace
) const {
84 switch (AddressSpace
) {
85 case AMDGPUAS::PRIVATE_ADDRESS
:
86 return AddressSpaceQualifier::Private
;
87 case AMDGPUAS::GLOBAL_ADDRESS
:
88 return AddressSpaceQualifier::Global
;
89 case AMDGPUAS::CONSTANT_ADDRESS
:
90 return AddressSpaceQualifier::Constant
;
91 case AMDGPUAS::LOCAL_ADDRESS
:
92 return AddressSpaceQualifier::Local
;
93 case AMDGPUAS::FLAT_ADDRESS
:
94 return AddressSpaceQualifier::Generic
;
95 case AMDGPUAS::REGION_ADDRESS
:
96 return AddressSpaceQualifier::Region
;
98 return AddressSpaceQualifier::Unknown
;
102 ValueKind
MetadataStreamerV2::getValueKind(Type
*Ty
, StringRef TypeQual
,
103 StringRef BaseTypeName
) const {
104 if (TypeQual
.find("pipe") != StringRef::npos
)
105 return ValueKind::Pipe
;
107 return StringSwitch
<ValueKind
>(BaseTypeName
)
108 .Case("image1d_t", ValueKind::Image
)
109 .Case("image1d_array_t", ValueKind::Image
)
110 .Case("image1d_buffer_t", ValueKind::Image
)
111 .Case("image2d_t", ValueKind::Image
)
112 .Case("image2d_array_t", ValueKind::Image
)
113 .Case("image2d_array_depth_t", ValueKind::Image
)
114 .Case("image2d_array_msaa_t", ValueKind::Image
)
115 .Case("image2d_array_msaa_depth_t", ValueKind::Image
)
116 .Case("image2d_depth_t", ValueKind::Image
)
117 .Case("image2d_msaa_t", ValueKind::Image
)
118 .Case("image2d_msaa_depth_t", ValueKind::Image
)
119 .Case("image3d_t", ValueKind::Image
)
120 .Case("sampler_t", ValueKind::Sampler
)
121 .Case("queue_t", ValueKind::Queue
)
122 .Default(isa
<PointerType
>(Ty
) ?
123 (Ty
->getPointerAddressSpace() ==
124 AMDGPUAS::LOCAL_ADDRESS
?
125 ValueKind::DynamicSharedPointer
:
126 ValueKind::GlobalBuffer
) :
130 ValueType
MetadataStreamerV2::getValueType(Type
*Ty
, StringRef TypeName
) const {
131 switch (Ty
->getTypeID()) {
132 case Type::IntegerTyID
: {
133 auto Signed
= !TypeName
.startswith("u");
134 switch (Ty
->getIntegerBitWidth()) {
136 return Signed
? ValueType::I8
: ValueType::U8
;
138 return Signed
? ValueType::I16
: ValueType::U16
;
140 return Signed
? ValueType::I32
: ValueType::U32
;
142 return Signed
? ValueType::I64
: ValueType::U64
;
144 return ValueType::Struct
;
148 return ValueType::F16
;
149 case Type::FloatTyID
:
150 return ValueType::F32
;
151 case Type::DoubleTyID
:
152 return ValueType::F64
;
153 case Type::PointerTyID
:
154 return getValueType(Ty
->getPointerElementType(), TypeName
);
155 case Type::VectorTyID
:
156 return getValueType(Ty
->getVectorElementType(), TypeName
);
158 return ValueType::Struct
;
162 std::string
MetadataStreamerV2::getTypeName(Type
*Ty
, bool Signed
) const {
163 switch (Ty
->getTypeID()) {
164 case Type::IntegerTyID
: {
166 return (Twine('u') + getTypeName(Ty
, true)).str();
168 auto BitWidth
= Ty
->getIntegerBitWidth();
179 return (Twine('i') + Twine(BitWidth
)).str();
184 case Type::FloatTyID
:
186 case Type::DoubleTyID
:
188 case Type::VectorTyID
: {
189 auto VecTy
= cast
<VectorType
>(Ty
);
190 auto ElTy
= VecTy
->getElementType();
191 auto NumElements
= VecTy
->getVectorNumElements();
192 return (Twine(getTypeName(ElTy
, Signed
)) + Twine(NumElements
)).str();
199 std::vector
<uint32_t>
200 MetadataStreamerV2::getWorkGroupDimensions(MDNode
*Node
) const {
201 std::vector
<uint32_t> Dims
;
202 if (Node
->getNumOperands() != 3)
205 for (auto &Op
: Node
->operands())
206 Dims
.push_back(mdconst::extract
<ConstantInt
>(Op
)->getZExtValue());
210 Kernel::CodeProps::Metadata
211 MetadataStreamerV2::getHSACodeProps(const MachineFunction
&MF
,
212 const SIProgramInfo
&ProgramInfo
) const {
213 const GCNSubtarget
&STM
= MF
.getSubtarget
<GCNSubtarget
>();
214 const SIMachineFunctionInfo
&MFI
= *MF
.getInfo
<SIMachineFunctionInfo
>();
215 HSAMD::Kernel::CodeProps::Metadata HSACodeProps
;
216 const Function
&F
= MF
.getFunction();
218 assert(F
.getCallingConv() == CallingConv::AMDGPU_KERNEL
||
219 F
.getCallingConv() == CallingConv::SPIR_KERNEL
);
221 unsigned MaxKernArgAlign
;
222 HSACodeProps
.mKernargSegmentSize
= STM
.getKernArgSegmentSize(F
,
224 HSACodeProps
.mGroupSegmentFixedSize
= ProgramInfo
.LDSSize
;
225 HSACodeProps
.mPrivateSegmentFixedSize
= ProgramInfo
.ScratchSize
;
226 HSACodeProps
.mKernargSegmentAlign
= std::max(MaxKernArgAlign
, 4u);
227 HSACodeProps
.mWavefrontSize
= STM
.getWavefrontSize();
228 HSACodeProps
.mNumSGPRs
= ProgramInfo
.NumSGPR
;
229 HSACodeProps
.mNumVGPRs
= ProgramInfo
.NumVGPR
;
230 HSACodeProps
.mMaxFlatWorkGroupSize
= MFI
.getMaxFlatWorkGroupSize();
231 HSACodeProps
.mIsDynamicCallStack
= ProgramInfo
.DynamicCallStack
;
232 HSACodeProps
.mIsXNACKEnabled
= STM
.isXNACKEnabled();
233 HSACodeProps
.mNumSpilledSGPRs
= MFI
.getNumSpilledSGPRs();
234 HSACodeProps
.mNumSpilledVGPRs
= MFI
.getNumSpilledVGPRs();
239 Kernel::DebugProps::Metadata
240 MetadataStreamerV2::getHSADebugProps(const MachineFunction
&MF
,
241 const SIProgramInfo
&ProgramInfo
) const {
242 const GCNSubtarget
&STM
= MF
.getSubtarget
<GCNSubtarget
>();
243 HSAMD::Kernel::DebugProps::Metadata HSADebugProps
;
245 if (!STM
.debuggerSupported())
246 return HSADebugProps
;
248 HSADebugProps
.mDebuggerABIVersion
.push_back(1);
249 HSADebugProps
.mDebuggerABIVersion
.push_back(0);
251 if (STM
.debuggerEmitPrologue()) {
252 HSADebugProps
.mPrivateSegmentBufferSGPR
=
253 ProgramInfo
.DebuggerPrivateSegmentBufferSGPR
;
254 HSADebugProps
.mWavefrontPrivateSegmentOffsetSGPR
=
255 ProgramInfo
.DebuggerWavefrontPrivateSegmentOffsetSGPR
;
258 return HSADebugProps
;
261 void MetadataStreamerV2::emitVersion() {
262 auto &Version
= HSAMetadata
.mVersion
;
264 Version
.push_back(VersionMajor
);
265 Version
.push_back(VersionMinor
);
268 void MetadataStreamerV2::emitPrintf(const Module
&Mod
) {
269 auto &Printf
= HSAMetadata
.mPrintf
;
271 auto Node
= Mod
.getNamedMetadata("llvm.printf.fmts");
275 for (auto Op
: Node
->operands())
276 if (Op
->getNumOperands())
277 Printf
.push_back(cast
<MDString
>(Op
->getOperand(0))->getString());
280 void MetadataStreamerV2::emitKernelLanguage(const Function
&Func
) {
281 auto &Kernel
= HSAMetadata
.mKernels
.back();
283 // TODO: What about other languages?
284 auto Node
= Func
.getParent()->getNamedMetadata("opencl.ocl.version");
285 if (!Node
|| !Node
->getNumOperands())
287 auto Op0
= Node
->getOperand(0);
288 if (Op0
->getNumOperands() <= 1)
291 Kernel
.mLanguage
= "OpenCL C";
292 Kernel
.mLanguageVersion
.push_back(
293 mdconst::extract
<ConstantInt
>(Op0
->getOperand(0))->getZExtValue());
294 Kernel
.mLanguageVersion
.push_back(
295 mdconst::extract
<ConstantInt
>(Op0
->getOperand(1))->getZExtValue());
298 void MetadataStreamerV2::emitKernelAttrs(const Function
&Func
) {
299 auto &Attrs
= HSAMetadata
.mKernels
.back().mAttrs
;
301 if (auto Node
= Func
.getMetadata("reqd_work_group_size"))
302 Attrs
.mReqdWorkGroupSize
= getWorkGroupDimensions(Node
);
303 if (auto Node
= Func
.getMetadata("work_group_size_hint"))
304 Attrs
.mWorkGroupSizeHint
= getWorkGroupDimensions(Node
);
305 if (auto Node
= Func
.getMetadata("vec_type_hint")) {
306 Attrs
.mVecTypeHint
= getTypeName(
307 cast
<ValueAsMetadata
>(Node
->getOperand(0))->getType(),
308 mdconst::extract
<ConstantInt
>(Node
->getOperand(1))->getZExtValue());
310 if (Func
.hasFnAttribute("runtime-handle")) {
311 Attrs
.mRuntimeHandle
=
312 Func
.getFnAttribute("runtime-handle").getValueAsString().str();
316 void MetadataStreamerV2::emitKernelArgs(const Function
&Func
) {
317 for (auto &Arg
: Func
.args())
320 emitHiddenKernelArgs(Func
);
323 void MetadataStreamerV2::emitKernelArg(const Argument
&Arg
) {
324 auto Func
= Arg
.getParent();
325 auto ArgNo
= Arg
.getArgNo();
329 Node
= Func
->getMetadata("kernel_arg_name");
330 if (Node
&& ArgNo
< Node
->getNumOperands())
331 Name
= cast
<MDString
>(Node
->getOperand(ArgNo
))->getString();
332 else if (Arg
.hasName())
333 Name
= Arg
.getName();
336 Node
= Func
->getMetadata("kernel_arg_type");
337 if (Node
&& ArgNo
< Node
->getNumOperands())
338 TypeName
= cast
<MDString
>(Node
->getOperand(ArgNo
))->getString();
340 StringRef BaseTypeName
;
341 Node
= Func
->getMetadata("kernel_arg_base_type");
342 if (Node
&& ArgNo
< Node
->getNumOperands())
343 BaseTypeName
= cast
<MDString
>(Node
->getOperand(ArgNo
))->getString();
346 if (Arg
.getType()->isPointerTy() && Arg
.onlyReadsMemory() &&
347 Arg
.hasNoAliasAttr()) {
348 AccQual
= "read_only";
350 Node
= Func
->getMetadata("kernel_arg_access_qual");
351 if (Node
&& ArgNo
< Node
->getNumOperands())
352 AccQual
= cast
<MDString
>(Node
->getOperand(ArgNo
))->getString();
356 Node
= Func
->getMetadata("kernel_arg_type_qual");
357 if (Node
&& ArgNo
< Node
->getNumOperands())
358 TypeQual
= cast
<MDString
>(Node
->getOperand(ArgNo
))->getString();
360 Type
*Ty
= Arg
.getType();
361 const DataLayout
&DL
= Func
->getParent()->getDataLayout();
363 unsigned PointeeAlign
= 0;
364 if (auto PtrTy
= dyn_cast
<PointerType
>(Ty
)) {
365 if (PtrTy
->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS
) {
366 PointeeAlign
= Arg
.getParamAlignment();
367 if (PointeeAlign
== 0)
368 PointeeAlign
= DL
.getABITypeAlignment(PtrTy
->getElementType());
372 emitKernelArg(DL
, Ty
, getValueKind(Arg
.getType(), TypeQual
, BaseTypeName
),
373 PointeeAlign
, Name
, TypeName
, BaseTypeName
, AccQual
, TypeQual
);
376 void MetadataStreamerV2::emitKernelArg(const DataLayout
&DL
, Type
*Ty
,
378 unsigned PointeeAlign
, StringRef Name
,
380 StringRef BaseTypeName
,
381 StringRef AccQual
, StringRef TypeQual
) {
382 HSAMetadata
.mKernels
.back().mArgs
.push_back(Kernel::Arg::Metadata());
383 auto &Arg
= HSAMetadata
.mKernels
.back().mArgs
.back();
386 Arg
.mTypeName
= TypeName
;
387 Arg
.mSize
= DL
.getTypeAllocSize(Ty
);
388 Arg
.mAlign
= DL
.getABITypeAlignment(Ty
);
389 Arg
.mValueKind
= ValueKind
;
390 Arg
.mValueType
= getValueType(Ty
, BaseTypeName
);
391 Arg
.mPointeeAlign
= PointeeAlign
;
393 if (auto PtrTy
= dyn_cast
<PointerType
>(Ty
))
394 Arg
.mAddrSpaceQual
= getAddressSpaceQualifier(PtrTy
->getAddressSpace());
396 Arg
.mAccQual
= getAccessQualifier(AccQual
);
398 // TODO: Emit Arg.mActualAccQual.
400 SmallVector
<StringRef
, 1> SplitTypeQuals
;
401 TypeQual
.split(SplitTypeQuals
, " ", -1, false);
402 for (StringRef Key
: SplitTypeQuals
) {
403 auto P
= StringSwitch
<bool*>(Key
)
404 .Case("const", &Arg
.mIsConst
)
405 .Case("restrict", &Arg
.mIsRestrict
)
406 .Case("volatile", &Arg
.mIsVolatile
)
407 .Case("pipe", &Arg
.mIsPipe
)
414 void MetadataStreamerV2::emitHiddenKernelArgs(const Function
&Func
) {
415 int HiddenArgNumBytes
=
416 getIntegerAttribute(Func
, "amdgpu-implicitarg-num-bytes", 0);
418 if (!HiddenArgNumBytes
)
421 auto &DL
= Func
.getParent()->getDataLayout();
422 auto Int64Ty
= Type::getInt64Ty(Func
.getContext());
424 if (HiddenArgNumBytes
>= 8)
425 emitKernelArg(DL
, Int64Ty
, ValueKind::HiddenGlobalOffsetX
);
426 if (HiddenArgNumBytes
>= 16)
427 emitKernelArg(DL
, Int64Ty
, ValueKind::HiddenGlobalOffsetY
);
428 if (HiddenArgNumBytes
>= 24)
429 emitKernelArg(DL
, Int64Ty
, ValueKind::HiddenGlobalOffsetZ
);
431 auto Int8PtrTy
= Type::getInt8PtrTy(Func
.getContext(),
432 AMDGPUAS::GLOBAL_ADDRESS
);
434 // Emit "printf buffer" argument if printf is used, otherwise emit dummy
436 if (HiddenArgNumBytes
>= 32) {
437 if (Func
.getParent()->getNamedMetadata("llvm.printf.fmts"))
438 emitKernelArg(DL
, Int8PtrTy
, ValueKind::HiddenPrintfBuffer
);
440 emitKernelArg(DL
, Int8PtrTy
, ValueKind::HiddenNone
);
443 // Emit "default queue" and "completion action" arguments if enqueue kernel is
444 // used, otherwise emit dummy "none" arguments.
445 if (HiddenArgNumBytes
>= 48) {
446 if (Func
.hasFnAttribute("calls-enqueue-kernel")) {
447 emitKernelArg(DL
, Int8PtrTy
, ValueKind::HiddenDefaultQueue
);
448 emitKernelArg(DL
, Int8PtrTy
, ValueKind::HiddenCompletionAction
);
450 emitKernelArg(DL
, Int8PtrTy
, ValueKind::HiddenNone
);
451 emitKernelArg(DL
, Int8PtrTy
, ValueKind::HiddenNone
);
456 bool MetadataStreamerV2::emitTo(AMDGPUTargetStreamer
&TargetStreamer
) {
457 return TargetStreamer
.EmitHSAMetadata(getHSAMetadata());
460 void MetadataStreamerV2::begin(const Module
&Mod
) {
465 void MetadataStreamerV2::end() {
466 std::string HSAMetadataString
;
467 if (toString(HSAMetadata
, HSAMetadataString
))
471 dump(HSAMetadataString
);
472 if (VerifyHSAMetadata
)
473 verify(HSAMetadataString
);
476 void MetadataStreamerV2::emitKernel(const MachineFunction
&MF
,
477 const SIProgramInfo
&ProgramInfo
) {
478 auto &Func
= MF
.getFunction();
479 if (Func
.getCallingConv() != CallingConv::AMDGPU_KERNEL
)
482 auto CodeProps
= getHSACodeProps(MF
, ProgramInfo
);
483 auto DebugProps
= getHSADebugProps(MF
, ProgramInfo
);
485 HSAMetadata
.mKernels
.push_back(Kernel::Metadata());
486 auto &Kernel
= HSAMetadata
.mKernels
.back();
488 Kernel
.mName
= Func
.getName();
489 Kernel
.mSymbolName
= (Twine(Func
.getName()) + Twine("@kd")).str();
490 emitKernelLanguage(Func
);
491 emitKernelAttrs(Func
);
492 emitKernelArgs(Func
);
493 HSAMetadata
.mKernels
.back().mCodeProps
= CodeProps
;
494 HSAMetadata
.mKernels
.back().mDebugProps
= DebugProps
;
497 //===----------------------------------------------------------------------===//
498 // HSAMetadataStreamerV3
499 //===----------------------------------------------------------------------===//
501 void MetadataStreamerV3::dump(StringRef HSAMetadataString
) const {
502 errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString
<< '\n';
505 void MetadataStreamerV3::verify(StringRef HSAMetadataString
) const {
506 errs() << "AMDGPU HSA Metadata Parser Test: ";
508 std::shared_ptr
<msgpack::Node
> FromHSAMetadataString
=
509 std::make_shared
<msgpack::MapNode
>();
511 yaml::Input
YIn(HSAMetadataString
);
512 YIn
>> FromHSAMetadataString
;
518 std::string ToHSAMetadataString
;
519 raw_string_ostream
StrOS(ToHSAMetadataString
);
520 yaml::Output
YOut(StrOS
);
521 YOut
<< FromHSAMetadataString
;
523 errs() << (HSAMetadataString
== StrOS
.str() ? "PASS" : "FAIL") << '\n';
524 if (HSAMetadataString
!= ToHSAMetadataString
) {
525 errs() << "Original input: " << HSAMetadataString
<< '\n'
526 << "Produced output: " << StrOS
.str() << '\n';
531 MetadataStreamerV3::getAccessQualifier(StringRef AccQual
) const {
532 return StringSwitch
<Optional
<StringRef
>>(AccQual
)
533 .Case("read_only", StringRef("read_only"))
534 .Case("write_only", StringRef("write_only"))
535 .Case("read_write", StringRef("read_write"))
540 MetadataStreamerV3::getAddressSpaceQualifier(unsigned AddressSpace
) const {
541 switch (AddressSpace
) {
542 case AMDGPUAS::PRIVATE_ADDRESS
:
543 return StringRef("private");
544 case AMDGPUAS::GLOBAL_ADDRESS
:
545 return StringRef("global");
546 case AMDGPUAS::CONSTANT_ADDRESS
:
547 return StringRef("constant");
548 case AMDGPUAS::LOCAL_ADDRESS
:
549 return StringRef("local");
550 case AMDGPUAS::FLAT_ADDRESS
:
551 return StringRef("generic");
552 case AMDGPUAS::REGION_ADDRESS
:
553 return StringRef("region");
559 StringRef
MetadataStreamerV3::getValueKind(Type
*Ty
, StringRef TypeQual
,
560 StringRef BaseTypeName
) const {
561 if (TypeQual
.find("pipe") != StringRef::npos
)
564 return StringSwitch
<StringRef
>(BaseTypeName
)
565 .Case("image1d_t", "image")
566 .Case("image1d_array_t", "image")
567 .Case("image1d_buffer_t", "image")
568 .Case("image2d_t", "image")
569 .Case("image2d_array_t", "image")
570 .Case("image2d_array_depth_t", "image")
571 .Case("image2d_array_msaa_t", "image")
572 .Case("image2d_array_msaa_depth_t", "image")
573 .Case("image2d_depth_t", "image")
574 .Case("image2d_msaa_t", "image")
575 .Case("image2d_msaa_depth_t", "image")
576 .Case("image3d_t", "image")
577 .Case("sampler_t", "sampler")
578 .Case("queue_t", "queue")
579 .Default(isa
<PointerType
>(Ty
)
580 ? (Ty
->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS
581 ? "dynamic_shared_pointer"
586 StringRef
MetadataStreamerV3::getValueType(Type
*Ty
, StringRef TypeName
) const {
587 switch (Ty
->getTypeID()) {
588 case Type::IntegerTyID
: {
589 auto Signed
= !TypeName
.startswith("u");
590 switch (Ty
->getIntegerBitWidth()) {
592 return Signed
? "i8" : "u8";
594 return Signed
? "i16" : "u16";
596 return Signed
? "i32" : "u32";
598 return Signed
? "i64" : "u64";
605 case Type::FloatTyID
:
607 case Type::DoubleTyID
:
609 case Type::PointerTyID
:
610 return getValueType(Ty
->getPointerElementType(), TypeName
);
611 case Type::VectorTyID
:
612 return getValueType(Ty
->getVectorElementType(), TypeName
);
618 std::string
MetadataStreamerV3::getTypeName(Type
*Ty
, bool Signed
) const {
619 switch (Ty
->getTypeID()) {
620 case Type::IntegerTyID
: {
622 return (Twine('u') + getTypeName(Ty
, true)).str();
624 auto BitWidth
= Ty
->getIntegerBitWidth();
635 return (Twine('i') + Twine(BitWidth
)).str();
640 case Type::FloatTyID
:
642 case Type::DoubleTyID
:
644 case Type::VectorTyID
: {
645 auto VecTy
= cast
<VectorType
>(Ty
);
646 auto ElTy
= VecTy
->getElementType();
647 auto NumElements
= VecTy
->getVectorNumElements();
648 return (Twine(getTypeName(ElTy
, Signed
)) + Twine(NumElements
)).str();
655 std::shared_ptr
<msgpack::ArrayNode
>
656 MetadataStreamerV3::getWorkGroupDimensions(MDNode
*Node
) const {
657 auto Dims
= std::make_shared
<msgpack::ArrayNode
>();
658 if (Node
->getNumOperands() != 3)
661 for (auto &Op
: Node
->operands())
662 Dims
->push_back(std::make_shared
<msgpack::ScalarNode
>(
663 mdconst::extract
<ConstantInt
>(Op
)->getZExtValue()));
667 void MetadataStreamerV3::emitVersion() {
668 auto Version
= std::make_shared
<msgpack::ArrayNode
>();
669 Version
->push_back(std::make_shared
<msgpack::ScalarNode
>(V3::VersionMajor
));
670 Version
->push_back(std::make_shared
<msgpack::ScalarNode
>(V3::VersionMinor
));
671 getRootMetadata("amdhsa.version") = std::move(Version
);
674 void MetadataStreamerV3::emitPrintf(const Module
&Mod
) {
675 auto Node
= Mod
.getNamedMetadata("llvm.printf.fmts");
679 auto Printf
= std::make_shared
<msgpack::ArrayNode
>();
680 for (auto Op
: Node
->operands())
681 if (Op
->getNumOperands())
682 Printf
->push_back(std::make_shared
<msgpack::ScalarNode
>(
683 cast
<MDString
>(Op
->getOperand(0))->getString()));
684 getRootMetadata("amdhsa.printf") = std::move(Printf
);
687 void MetadataStreamerV3::emitKernelLanguage(const Function
&Func
,
688 msgpack::MapNode
&Kern
) {
689 // TODO: What about other languages?
690 auto Node
= Func
.getParent()->getNamedMetadata("opencl.ocl.version");
691 if (!Node
|| !Node
->getNumOperands())
693 auto Op0
= Node
->getOperand(0);
694 if (Op0
->getNumOperands() <= 1)
697 Kern
[".language"] = std::make_shared
<msgpack::ScalarNode
>("OpenCL C");
698 auto LanguageVersion
= std::make_shared
<msgpack::ArrayNode
>();
699 LanguageVersion
->push_back(std::make_shared
<msgpack::ScalarNode
>(
700 mdconst::extract
<ConstantInt
>(Op0
->getOperand(0))->getZExtValue()));
701 LanguageVersion
->push_back(std::make_shared
<msgpack::ScalarNode
>(
702 mdconst::extract
<ConstantInt
>(Op0
->getOperand(1))->getZExtValue()));
703 Kern
[".language_version"] = std::move(LanguageVersion
);
706 void MetadataStreamerV3::emitKernelAttrs(const Function
&Func
,
707 msgpack::MapNode
&Kern
) {
709 if (auto Node
= Func
.getMetadata("reqd_work_group_size"))
710 Kern
[".reqd_workgroup_size"] = getWorkGroupDimensions(Node
);
711 if (auto Node
= Func
.getMetadata("work_group_size_hint"))
712 Kern
[".workgroup_size_hint"] = getWorkGroupDimensions(Node
);
713 if (auto Node
= Func
.getMetadata("vec_type_hint")) {
714 Kern
[".vec_type_hint"] = std::make_shared
<msgpack::ScalarNode
>(getTypeName(
715 cast
<ValueAsMetadata
>(Node
->getOperand(0))->getType(),
716 mdconst::extract
<ConstantInt
>(Node
->getOperand(1))->getZExtValue()));
718 if (Func
.hasFnAttribute("runtime-handle")) {
719 Kern
[".device_enqueue_symbol"] = std::make_shared
<msgpack::ScalarNode
>(
720 Func
.getFnAttribute("runtime-handle").getValueAsString().str());
724 void MetadataStreamerV3::emitKernelArgs(const Function
&Func
,
725 msgpack::MapNode
&Kern
) {
727 auto Args
= std::make_shared
<msgpack::ArrayNode
>();
728 for (auto &Arg
: Func
.args())
729 emitKernelArg(Arg
, Offset
, *Args
);
731 emitHiddenKernelArgs(Func
, Offset
, *Args
);
733 // TODO: What about other languages?
734 if (Func
.getParent()->getNamedMetadata("opencl.ocl.version")) {
735 auto &DL
= Func
.getParent()->getDataLayout();
736 auto Int64Ty
= Type::getInt64Ty(Func
.getContext());
738 emitKernelArg(DL
, Int64Ty
, "hidden_global_offset_x", Offset
, *Args
);
739 emitKernelArg(DL
, Int64Ty
, "hidden_global_offset_y", Offset
, *Args
);
740 emitKernelArg(DL
, Int64Ty
, "hidden_global_offset_z", Offset
, *Args
);
743 Type::getInt8PtrTy(Func
.getContext(), AMDGPUAS::GLOBAL_ADDRESS
);
745 // Emit "printf buffer" argument if printf is used, otherwise emit dummy
747 if (Func
.getParent()->getNamedMetadata("llvm.printf.fmts"))
748 emitKernelArg(DL
, Int8PtrTy
, "hidden_printf_buffer", Offset
, *Args
);
750 emitKernelArg(DL
, Int8PtrTy
, "hidden_none", Offset
, *Args
);
752 // Emit "default queue" and "completion action" arguments if enqueue kernel
753 // is used, otherwise emit dummy "none" arguments.
754 if (Func
.hasFnAttribute("calls-enqueue-kernel")) {
755 emitKernelArg(DL
, Int8PtrTy
, "hidden_default_queue", Offset
, *Args
);
756 emitKernelArg(DL
, Int8PtrTy
, "hidden_completion_action", Offset
, *Args
);
758 emitKernelArg(DL
, Int8PtrTy
, "hidden_none", Offset
, *Args
);
759 emitKernelArg(DL
, Int8PtrTy
, "hidden_none", Offset
, *Args
);
763 Kern
[".args"] = std::move(Args
);
766 void MetadataStreamerV3::emitKernelArg(const Argument
&Arg
, unsigned &Offset
,
767 msgpack::ArrayNode
&Args
) {
768 auto Func
= Arg
.getParent();
769 auto ArgNo
= Arg
.getArgNo();
773 Node
= Func
->getMetadata("kernel_arg_name");
774 if (Node
&& ArgNo
< Node
->getNumOperands())
775 Name
= cast
<MDString
>(Node
->getOperand(ArgNo
))->getString();
776 else if (Arg
.hasName())
777 Name
= Arg
.getName();
780 Node
= Func
->getMetadata("kernel_arg_type");
781 if (Node
&& ArgNo
< Node
->getNumOperands())
782 TypeName
= cast
<MDString
>(Node
->getOperand(ArgNo
))->getString();
784 StringRef BaseTypeName
;
785 Node
= Func
->getMetadata("kernel_arg_base_type");
786 if (Node
&& ArgNo
< Node
->getNumOperands())
787 BaseTypeName
= cast
<MDString
>(Node
->getOperand(ArgNo
))->getString();
790 if (Arg
.getType()->isPointerTy() && Arg
.onlyReadsMemory() &&
791 Arg
.hasNoAliasAttr()) {
792 AccQual
= "read_only";
794 Node
= Func
->getMetadata("kernel_arg_access_qual");
795 if (Node
&& ArgNo
< Node
->getNumOperands())
796 AccQual
= cast
<MDString
>(Node
->getOperand(ArgNo
))->getString();
800 Node
= Func
->getMetadata("kernel_arg_type_qual");
801 if (Node
&& ArgNo
< Node
->getNumOperands())
802 TypeQual
= cast
<MDString
>(Node
->getOperand(ArgNo
))->getString();
804 Type
*Ty
= Arg
.getType();
805 const DataLayout
&DL
= Func
->getParent()->getDataLayout();
807 unsigned PointeeAlign
= 0;
808 if (auto PtrTy
= dyn_cast
<PointerType
>(Ty
)) {
809 if (PtrTy
->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS
) {
810 PointeeAlign
= Arg
.getParamAlignment();
811 if (PointeeAlign
== 0)
812 PointeeAlign
= DL
.getABITypeAlignment(PtrTy
->getElementType());
816 emitKernelArg(Func
->getParent()->getDataLayout(), Arg
.getType(),
817 getValueKind(Arg
.getType(), TypeQual
, BaseTypeName
), Offset
,
818 Args
, PointeeAlign
, Name
, TypeName
, BaseTypeName
, AccQual
,
822 void MetadataStreamerV3::emitKernelArg(const DataLayout
&DL
, Type
*Ty
,
823 StringRef ValueKind
, unsigned &Offset
,
824 msgpack::ArrayNode
&Args
,
825 unsigned PointeeAlign
, StringRef Name
,
827 StringRef BaseTypeName
,
828 StringRef AccQual
, StringRef TypeQual
) {
829 auto ArgPtr
= std::make_shared
<msgpack::MapNode
>();
833 Arg
[".name"] = std::make_shared
<msgpack::ScalarNode
>(Name
);
834 if (!TypeName
.empty())
835 Arg
[".type_name"] = std::make_shared
<msgpack::ScalarNode
>(TypeName
);
836 auto Size
= DL
.getTypeAllocSize(Ty
);
837 auto Align
= DL
.getABITypeAlignment(Ty
);
838 Arg
[".size"] = std::make_shared
<msgpack::ScalarNode
>(Size
);
839 Offset
= alignTo(Offset
, Align
);
840 Arg
[".offset"] = std::make_shared
<msgpack::ScalarNode
>(Offset
);
842 Arg
[".value_kind"] = std::make_shared
<msgpack::ScalarNode
>(ValueKind
);
844 std::make_shared
<msgpack::ScalarNode
>(getValueType(Ty
, BaseTypeName
));
846 Arg
[".pointee_align"] = std::make_shared
<msgpack::ScalarNode
>(PointeeAlign
);
848 if (auto PtrTy
= dyn_cast
<PointerType
>(Ty
))
849 if (auto Qualifier
= getAddressSpaceQualifier(PtrTy
->getAddressSpace()))
850 Arg
[".address_space"] = std::make_shared
<msgpack::ScalarNode
>(*Qualifier
);
852 if (auto AQ
= getAccessQualifier(AccQual
))
853 Arg
[".access"] = std::make_shared
<msgpack::ScalarNode
>(*AQ
);
855 // TODO: Emit Arg[".actual_access"].
857 SmallVector
<StringRef
, 1> SplitTypeQuals
;
858 TypeQual
.split(SplitTypeQuals
, " ", -1, false);
859 for (StringRef Key
: SplitTypeQuals
) {
861 Arg
[".is_const"] = std::make_shared
<msgpack::ScalarNode
>(true);
862 else if (Key
== "restrict")
863 Arg
[".is_restrict"] = std::make_shared
<msgpack::ScalarNode
>(true);
864 else if (Key
== "volatile")
865 Arg
[".is_volatile"] = std::make_shared
<msgpack::ScalarNode
>(true);
866 else if (Key
== "pipe")
867 Arg
[".is_pipe"] = std::make_shared
<msgpack::ScalarNode
>(true);
870 Args
.push_back(std::move(ArgPtr
));
873 void MetadataStreamerV3::emitHiddenKernelArgs(const Function
&Func
,
875 msgpack::ArrayNode
&Args
) {
876 int HiddenArgNumBytes
=
877 getIntegerAttribute(Func
, "amdgpu-implicitarg-num-bytes", 0);
879 if (!HiddenArgNumBytes
)
882 auto &DL
= Func
.getParent()->getDataLayout();
883 auto Int64Ty
= Type::getInt64Ty(Func
.getContext());
885 if (HiddenArgNumBytes
>= 8)
886 emitKernelArg(DL
, Int64Ty
, "hidden_global_offset_x", Offset
, Args
);
887 if (HiddenArgNumBytes
>= 16)
888 emitKernelArg(DL
, Int64Ty
, "hidden_global_offset_y", Offset
, Args
);
889 if (HiddenArgNumBytes
>= 24)
890 emitKernelArg(DL
, Int64Ty
, "hidden_global_offset_z", Offset
, Args
);
893 Type::getInt8PtrTy(Func
.getContext(), AMDGPUAS::GLOBAL_ADDRESS
);
895 // Emit "printf buffer" argument if printf is used, otherwise emit dummy
897 if (HiddenArgNumBytes
>= 32) {
898 if (Func
.getParent()->getNamedMetadata("llvm.printf.fmts"))
899 emitKernelArg(DL
, Int8PtrTy
, "hidden_printf_buffer", Offset
, Args
);
901 emitKernelArg(DL
, Int8PtrTy
, "hidden_none", Offset
, Args
);
904 // Emit "default queue" and "completion action" arguments if enqueue kernel is
905 // used, otherwise emit dummy "none" arguments.
906 if (HiddenArgNumBytes
>= 48) {
907 if (Func
.hasFnAttribute("calls-enqueue-kernel")) {
908 emitKernelArg(DL
, Int8PtrTy
, "hidden_default_queue", Offset
, Args
);
909 emitKernelArg(DL
, Int8PtrTy
, "hidden_completion_action", Offset
, Args
);
911 emitKernelArg(DL
, Int8PtrTy
, "hidden_none", Offset
, Args
);
912 emitKernelArg(DL
, Int8PtrTy
, "hidden_none", Offset
, Args
);
917 std::shared_ptr
<msgpack::MapNode
>
918 MetadataStreamerV3::getHSAKernelProps(const MachineFunction
&MF
,
919 const SIProgramInfo
&ProgramInfo
) const {
920 const GCNSubtarget
&STM
= MF
.getSubtarget
<GCNSubtarget
>();
921 const SIMachineFunctionInfo
&MFI
= *MF
.getInfo
<SIMachineFunctionInfo
>();
922 const Function
&F
= MF
.getFunction();
924 auto HSAKernelProps
= std::make_shared
<msgpack::MapNode
>();
925 auto &Kern
= *HSAKernelProps
;
927 unsigned MaxKernArgAlign
;
928 Kern
[".kernarg_segment_size"] = std::make_shared
<msgpack::ScalarNode
>(
929 STM
.getKernArgSegmentSize(F
, MaxKernArgAlign
));
930 Kern
[".group_segment_fixed_size"] =
931 std::make_shared
<msgpack::ScalarNode
>(ProgramInfo
.LDSSize
);
932 Kern
[".private_segment_fixed_size"] =
933 std::make_shared
<msgpack::ScalarNode
>(ProgramInfo
.ScratchSize
);
934 Kern
[".kernarg_segment_align"] =
935 std::make_shared
<msgpack::ScalarNode
>(std::max(uint32_t(4), MaxKernArgAlign
));
936 Kern
[".wavefront_size"] =
937 std::make_shared
<msgpack::ScalarNode
>(STM
.getWavefrontSize());
938 Kern
[".sgpr_count"] = std::make_shared
<msgpack::ScalarNode
>(ProgramInfo
.NumSGPR
);
939 Kern
[".vgpr_count"] = std::make_shared
<msgpack::ScalarNode
>(ProgramInfo
.NumVGPR
);
940 Kern
[".max_flat_workgroup_size"] =
941 std::make_shared
<msgpack::ScalarNode
>(MFI
.getMaxFlatWorkGroupSize());
942 Kern
[".sgpr_spill_count"] =
943 std::make_shared
<msgpack::ScalarNode
>(MFI
.getNumSpilledSGPRs());
944 Kern
[".vgpr_spill_count"] =
945 std::make_shared
<msgpack::ScalarNode
>(MFI
.getNumSpilledVGPRs());
947 return HSAKernelProps
;
950 bool MetadataStreamerV3::emitTo(AMDGPUTargetStreamer
&TargetStreamer
) {
951 return TargetStreamer
.EmitHSAMetadata(getHSAMetadataRoot(), true);
954 void MetadataStreamerV3::begin(const Module
&Mod
) {
957 getRootMetadata("amdhsa.kernels").reset(new msgpack::ArrayNode());
960 void MetadataStreamerV3::end() {
961 std::string HSAMetadataString
;
962 raw_string_ostream
StrOS(HSAMetadataString
);
963 yaml::Output
YOut(StrOS
);
964 YOut
<< HSAMetadataRoot
;
968 if (VerifyHSAMetadata
)
972 void MetadataStreamerV3::emitKernel(const MachineFunction
&MF
,
973 const SIProgramInfo
&ProgramInfo
) {
974 auto &Func
= MF
.getFunction();
975 auto KernelProps
= getHSAKernelProps(MF
, ProgramInfo
);
977 assert(Func
.getCallingConv() == CallingConv::AMDGPU_KERNEL
||
978 Func
.getCallingConv() == CallingConv::SPIR_KERNEL
);
980 auto &KernelsNode
= getRootMetadata("amdhsa.kernels");
981 auto Kernels
= cast
<msgpack::ArrayNode
>(KernelsNode
.get());
984 auto &Kern
= *KernelProps
;
985 Kern
[".name"] = std::make_shared
<msgpack::ScalarNode
>(Func
.getName());
986 Kern
[".symbol"] = std::make_shared
<msgpack::ScalarNode
>(
987 (Twine(Func
.getName()) + Twine(".kd")).str());
988 emitKernelLanguage(Func
, Kern
);
989 emitKernelAttrs(Func
, Kern
);
990 emitKernelArgs(Func
, Kern
);
993 Kernels
->push_back(std::move(KernelProps
));
996 } // end namespace HSAMD
997 } // end namespace AMDGPU
998 } // end namespace llvm