1 //===--- AMDGPUHSAMetadataStreamer.cpp --------------------------*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
11 /// AMDGPU HSA Metadata Streamer.
14 //===----------------------------------------------------------------------===//
16 #include "AMDGPUHSAMetadataStreamer.h"
18 #include "AMDGPUSubtarget.h"
19 #include "SIMachineFunctionInfo.h"
20 #include "SIProgramInfo.h"
21 #include "Utils/AMDGPUBaseInfo.h"
22 #include "llvm/ADT/StringSwitch.h"
23 #include "llvm/IR/Constants.h"
24 #include "llvm/IR/Module.h"
25 #include "llvm/Support/raw_ostream.h"
29 static cl::opt
<bool> DumpHSAMetadata(
30 "amdgpu-dump-hsa-metadata",
31 cl::desc("Dump AMDGPU HSA Metadata"));
32 static cl::opt
<bool> VerifyHSAMetadata(
33 "amdgpu-verify-hsa-metadata",
34 cl::desc("Verify AMDGPU HSA Metadata"));
39 void MetadataStreamer::dump(StringRef HSAMetadataString
) const {
40 errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString
<< '\n';
43 void MetadataStreamer::verify(StringRef HSAMetadataString
) const {
44 errs() << "AMDGPU HSA Metadata Parser Test: ";
46 HSAMD::Metadata FromHSAMetadataString
;
47 if (fromString(HSAMetadataString
, FromHSAMetadataString
)) {
52 std::string ToHSAMetadataString
;
53 if (toString(FromHSAMetadataString
, ToHSAMetadataString
)) {
58 errs() << (HSAMetadataString
== ToHSAMetadataString
? "PASS" : "FAIL")
60 if (HSAMetadataString
!= ToHSAMetadataString
) {
61 errs() << "Original input: " << HSAMetadataString
<< '\n'
62 << "Produced output: " << ToHSAMetadataString
<< '\n';
66 AccessQualifier
MetadataStreamer::getAccessQualifier(StringRef AccQual
) const {
68 return AccessQualifier::Unknown
;
70 return StringSwitch
<AccessQualifier
>(AccQual
)
71 .Case("read_only", AccessQualifier::ReadOnly
)
72 .Case("write_only", AccessQualifier::WriteOnly
)
73 .Case("read_write", AccessQualifier::ReadWrite
)
74 .Default(AccessQualifier::Default
);
77 AddressSpaceQualifier
MetadataStreamer::getAddressSpaceQualifer(
78 unsigned AddressSpace
) const {
79 if (AddressSpace
== AMDGPUASI
.PRIVATE_ADDRESS
)
80 return AddressSpaceQualifier::Private
;
81 if (AddressSpace
== AMDGPUASI
.GLOBAL_ADDRESS
)
82 return AddressSpaceQualifier::Global
;
83 if (AddressSpace
== AMDGPUASI
.CONSTANT_ADDRESS
)
84 return AddressSpaceQualifier::Constant
;
85 if (AddressSpace
== AMDGPUASI
.LOCAL_ADDRESS
)
86 return AddressSpaceQualifier::Local
;
87 if (AddressSpace
== AMDGPUASI
.FLAT_ADDRESS
)
88 return AddressSpaceQualifier::Generic
;
89 if (AddressSpace
== AMDGPUASI
.REGION_ADDRESS
)
90 return AddressSpaceQualifier::Region
;
92 llvm_unreachable("Unknown address space qualifier");
95 ValueKind
MetadataStreamer::getValueKind(Type
*Ty
, StringRef TypeQual
,
96 StringRef BaseTypeName
) const {
97 if (TypeQual
.find("pipe") != StringRef::npos
)
98 return ValueKind::Pipe
;
100 return StringSwitch
<ValueKind
>(BaseTypeName
)
101 .Case("image1d_t", ValueKind::Image
)
102 .Case("image1d_array_t", ValueKind::Image
)
103 .Case("image1d_buffer_t", ValueKind::Image
)
104 .Case("image2d_t", ValueKind::Image
)
105 .Case("image2d_array_t", ValueKind::Image
)
106 .Case("image2d_array_depth_t", ValueKind::Image
)
107 .Case("image2d_array_msaa_t", ValueKind::Image
)
108 .Case("image2d_array_msaa_depth_t", ValueKind::Image
)
109 .Case("image2d_depth_t", ValueKind::Image
)
110 .Case("image2d_msaa_t", ValueKind::Image
)
111 .Case("image2d_msaa_depth_t", ValueKind::Image
)
112 .Case("image3d_t", ValueKind::Image
)
113 .Case("sampler_t", ValueKind::Sampler
)
114 .Case("queue_t", ValueKind::Queue
)
115 .Default(isa
<PointerType
>(Ty
) ?
116 (Ty
->getPointerAddressSpace() ==
117 AMDGPUASI
.LOCAL_ADDRESS
?
118 ValueKind::DynamicSharedPointer
:
119 ValueKind::GlobalBuffer
) :
123 ValueType
MetadataStreamer::getValueType(Type
*Ty
, StringRef TypeName
) const {
124 switch (Ty
->getTypeID()) {
125 case Type::IntegerTyID
: {
126 auto Signed
= !TypeName
.startswith("u");
127 switch (Ty
->getIntegerBitWidth()) {
129 return Signed
? ValueType::I8
: ValueType::U8
;
131 return Signed
? ValueType::I16
: ValueType::U16
;
133 return Signed
? ValueType::I32
: ValueType::U32
;
135 return Signed
? ValueType::I64
: ValueType::U64
;
137 return ValueType::Struct
;
141 return ValueType::F16
;
142 case Type::FloatTyID
:
143 return ValueType::F32
;
144 case Type::DoubleTyID
:
145 return ValueType::F64
;
146 case Type::PointerTyID
:
147 return getValueType(Ty
->getPointerElementType(), TypeName
);
148 case Type::VectorTyID
:
149 return getValueType(Ty
->getVectorElementType(), TypeName
);
151 return ValueType::Struct
;
155 std::string
MetadataStreamer::getTypeName(Type
*Ty
, bool Signed
) const {
156 switch (Ty
->getTypeID()) {
157 case Type::IntegerTyID
: {
159 return (Twine('u') + getTypeName(Ty
, true)).str();
161 auto BitWidth
= Ty
->getIntegerBitWidth();
172 return (Twine('i') + Twine(BitWidth
)).str();
177 case Type::FloatTyID
:
179 case Type::DoubleTyID
:
181 case Type::VectorTyID
: {
182 auto VecTy
= cast
<VectorType
>(Ty
);
183 auto ElTy
= VecTy
->getElementType();
184 auto NumElements
= VecTy
->getVectorNumElements();
185 return (Twine(getTypeName(ElTy
, Signed
)) + Twine(NumElements
)).str();
192 std::vector
<uint32_t> MetadataStreamer::getWorkGroupDimensions(
193 MDNode
*Node
) const {
194 std::vector
<uint32_t> Dims
;
195 if (Node
->getNumOperands() != 3)
198 for (auto &Op
: Node
->operands())
199 Dims
.push_back(mdconst::extract
<ConstantInt
>(Op
)->getZExtValue());
203 Kernel::CodeProps::Metadata
MetadataStreamer::getHSACodeProps(
204 const MachineFunction
&MF
,
205 const SIProgramInfo
&ProgramInfo
) const {
206 const GCNSubtarget
&STM
= MF
.getSubtarget
<GCNSubtarget
>();
207 const SIMachineFunctionInfo
&MFI
= *MF
.getInfo
<SIMachineFunctionInfo
>();
208 HSAMD::Kernel::CodeProps::Metadata HSACodeProps
;
209 const Function
&F
= MF
.getFunction();
211 // Avoid asserting on erroneous cases.
212 if (F
.getCallingConv() != CallingConv::AMDGPU_KERNEL
)
215 HSACodeProps
.mKernargSegmentSize
=
216 STM
.getKernArgSegmentSize(F
, MFI
.getExplicitKernArgSize());
217 HSACodeProps
.mGroupSegmentFixedSize
= ProgramInfo
.LDSSize
;
218 HSACodeProps
.mPrivateSegmentFixedSize
= ProgramInfo
.ScratchSize
;
219 HSACodeProps
.mKernargSegmentAlign
=
220 std::max(uint32_t(4), MFI
.getMaxKernArgAlign());
221 HSACodeProps
.mWavefrontSize
= STM
.getWavefrontSize();
222 HSACodeProps
.mNumSGPRs
= ProgramInfo
.NumSGPR
;
223 HSACodeProps
.mNumVGPRs
= ProgramInfo
.NumVGPR
;
224 HSACodeProps
.mMaxFlatWorkGroupSize
= MFI
.getMaxFlatWorkGroupSize();
225 HSACodeProps
.mIsDynamicCallStack
= ProgramInfo
.DynamicCallStack
;
226 HSACodeProps
.mIsXNACKEnabled
= STM
.isXNACKEnabled();
227 HSACodeProps
.mNumSpilledSGPRs
= MFI
.getNumSpilledSGPRs();
228 HSACodeProps
.mNumSpilledVGPRs
= MFI
.getNumSpilledVGPRs();
233 Kernel::DebugProps::Metadata
MetadataStreamer::getHSADebugProps(
234 const MachineFunction
&MF
,
235 const SIProgramInfo
&ProgramInfo
) const {
236 const GCNSubtarget
&STM
= MF
.getSubtarget
<GCNSubtarget
>();
237 HSAMD::Kernel::DebugProps::Metadata HSADebugProps
;
239 if (!STM
.debuggerSupported())
240 return HSADebugProps
;
242 HSADebugProps
.mDebuggerABIVersion
.push_back(1);
243 HSADebugProps
.mDebuggerABIVersion
.push_back(0);
245 if (STM
.debuggerEmitPrologue()) {
246 HSADebugProps
.mPrivateSegmentBufferSGPR
=
247 ProgramInfo
.DebuggerPrivateSegmentBufferSGPR
;
248 HSADebugProps
.mWavefrontPrivateSegmentOffsetSGPR
=
249 ProgramInfo
.DebuggerWavefrontPrivateSegmentOffsetSGPR
;
252 return HSADebugProps
;
255 void MetadataStreamer::emitVersion() {
256 auto &Version
= HSAMetadata
.mVersion
;
258 Version
.push_back(VersionMajor
);
259 Version
.push_back(VersionMinor
);
262 void MetadataStreamer::emitPrintf(const Module
&Mod
) {
263 auto &Printf
= HSAMetadata
.mPrintf
;
265 auto Node
= Mod
.getNamedMetadata("llvm.printf.fmts");
269 for (auto Op
: Node
->operands())
270 if (Op
->getNumOperands())
271 Printf
.push_back(cast
<MDString
>(Op
->getOperand(0))->getString());
274 void MetadataStreamer::emitKernelLanguage(const Function
&Func
) {
275 auto &Kernel
= HSAMetadata
.mKernels
.back();
277 // TODO: What about other languages?
278 auto Node
= Func
.getParent()->getNamedMetadata("opencl.ocl.version");
279 if (!Node
|| !Node
->getNumOperands())
281 auto Op0
= Node
->getOperand(0);
282 if (Op0
->getNumOperands() <= 1)
285 Kernel
.mLanguage
= "OpenCL C";
286 Kernel
.mLanguageVersion
.push_back(
287 mdconst::extract
<ConstantInt
>(Op0
->getOperand(0))->getZExtValue());
288 Kernel
.mLanguageVersion
.push_back(
289 mdconst::extract
<ConstantInt
>(Op0
->getOperand(1))->getZExtValue());
292 void MetadataStreamer::emitKernelAttrs(const Function
&Func
) {
293 auto &Attrs
= HSAMetadata
.mKernels
.back().mAttrs
;
295 if (auto Node
= Func
.getMetadata("reqd_work_group_size"))
296 Attrs
.mReqdWorkGroupSize
= getWorkGroupDimensions(Node
);
297 if (auto Node
= Func
.getMetadata("work_group_size_hint"))
298 Attrs
.mWorkGroupSizeHint
= getWorkGroupDimensions(Node
);
299 if (auto Node
= Func
.getMetadata("vec_type_hint")) {
300 Attrs
.mVecTypeHint
= getTypeName(
301 cast
<ValueAsMetadata
>(Node
->getOperand(0))->getType(),
302 mdconst::extract
<ConstantInt
>(Node
->getOperand(1))->getZExtValue());
304 if (Func
.hasFnAttribute("runtime-handle")) {
305 Attrs
.mRuntimeHandle
=
306 Func
.getFnAttribute("runtime-handle").getValueAsString().str();
310 void MetadataStreamer::emitKernelArgs(const Function
&Func
) {
311 for (auto &Arg
: Func
.args())
314 emitHiddenKernelArgs(Func
);
317 void MetadataStreamer::emitKernelArg(const Argument
&Arg
) {
318 auto Func
= Arg
.getParent();
319 auto ArgNo
= Arg
.getArgNo();
323 Node
= Func
->getMetadata("kernel_arg_name");
324 if (Node
&& ArgNo
< Node
->getNumOperands())
325 Name
= cast
<MDString
>(Node
->getOperand(ArgNo
))->getString();
326 else if (Arg
.hasName())
327 Name
= Arg
.getName();
330 Node
= Func
->getMetadata("kernel_arg_type");
331 if (Node
&& ArgNo
< Node
->getNumOperands())
332 TypeName
= cast
<MDString
>(Node
->getOperand(ArgNo
))->getString();
334 StringRef BaseTypeName
;
335 Node
= Func
->getMetadata("kernel_arg_base_type");
336 if (Node
&& ArgNo
< Node
->getNumOperands())
337 BaseTypeName
= cast
<MDString
>(Node
->getOperand(ArgNo
))->getString();
340 if (Arg
.getType()->isPointerTy() && Arg
.onlyReadsMemory() &&
341 Arg
.hasNoAliasAttr()) {
342 AccQual
= "read_only";
344 Node
= Func
->getMetadata("kernel_arg_access_qual");
345 if (Node
&& ArgNo
< Node
->getNumOperands())
346 AccQual
= cast
<MDString
>(Node
->getOperand(ArgNo
))->getString();
350 Node
= Func
->getMetadata("kernel_arg_type_qual");
351 if (Node
&& ArgNo
< Node
->getNumOperands())
352 TypeQual
= cast
<MDString
>(Node
->getOperand(ArgNo
))->getString();
354 Type
*Ty
= Arg
.getType();
355 const DataLayout
&DL
= Func
->getParent()->getDataLayout();
357 unsigned PointeeAlign
= 0;
358 if (auto PtrTy
= dyn_cast
<PointerType
>(Ty
)) {
359 if (PtrTy
->getAddressSpace() == AMDGPUASI
.LOCAL_ADDRESS
) {
360 PointeeAlign
= Arg
.getParamAlignment();
361 if (PointeeAlign
== 0)
362 PointeeAlign
= DL
.getABITypeAlignment(PtrTy
->getElementType());
366 emitKernelArg(DL
, Ty
, getValueKind(Arg
.getType(), TypeQual
, BaseTypeName
),
367 PointeeAlign
, Name
, TypeName
, BaseTypeName
, AccQual
, TypeQual
);
370 void MetadataStreamer::emitKernelArg(const DataLayout
&DL
, Type
*Ty
,
372 unsigned PointeeAlign
,
374 StringRef TypeName
, StringRef BaseTypeName
,
375 StringRef AccQual
, StringRef TypeQual
) {
376 HSAMetadata
.mKernels
.back().mArgs
.push_back(Kernel::Arg::Metadata());
377 auto &Arg
= HSAMetadata
.mKernels
.back().mArgs
.back();
380 Arg
.mTypeName
= TypeName
;
381 Arg
.mSize
= DL
.getTypeAllocSize(Ty
);
382 Arg
.mAlign
= DL
.getABITypeAlignment(Ty
);
383 Arg
.mValueKind
= ValueKind
;
384 Arg
.mValueType
= getValueType(Ty
, BaseTypeName
);
385 Arg
.mPointeeAlign
= PointeeAlign
;
387 if (auto PtrTy
= dyn_cast
<PointerType
>(Ty
))
388 Arg
.mAddrSpaceQual
= getAddressSpaceQualifer(PtrTy
->getAddressSpace());
390 Arg
.mAccQual
= getAccessQualifier(AccQual
);
392 // TODO: Emit Arg.mActualAccQual.
394 SmallVector
<StringRef
, 1> SplitTypeQuals
;
395 TypeQual
.split(SplitTypeQuals
, " ", -1, false);
396 for (StringRef Key
: SplitTypeQuals
) {
397 auto P
= StringSwitch
<bool*>(Key
)
398 .Case("const", &Arg
.mIsConst
)
399 .Case("restrict", &Arg
.mIsRestrict
)
400 .Case("volatile", &Arg
.mIsVolatile
)
401 .Case("pipe", &Arg
.mIsPipe
)
408 void MetadataStreamer::emitHiddenKernelArgs(const Function
&Func
) {
409 int HiddenArgNumBytes
=
410 getIntegerAttribute(Func
, "amdgpu-implicitarg-num-bytes", 0);
412 if (!HiddenArgNumBytes
)
415 auto &DL
= Func
.getParent()->getDataLayout();
416 auto Int64Ty
= Type::getInt64Ty(Func
.getContext());
418 if (HiddenArgNumBytes
>= 8)
419 emitKernelArg(DL
, Int64Ty
, ValueKind::HiddenGlobalOffsetX
);
420 if (HiddenArgNumBytes
>= 16)
421 emitKernelArg(DL
, Int64Ty
, ValueKind::HiddenGlobalOffsetY
);
422 if (HiddenArgNumBytes
>= 24)
423 emitKernelArg(DL
, Int64Ty
, ValueKind::HiddenGlobalOffsetZ
);
425 auto Int8PtrTy
= Type::getInt8PtrTy(Func
.getContext(),
426 AMDGPUASI
.GLOBAL_ADDRESS
);
428 // Emit "printf buffer" argument if printf is used, otherwise emit dummy
430 if (HiddenArgNumBytes
>= 32) {
431 if (Func
.getParent()->getNamedMetadata("llvm.printf.fmts"))
432 emitKernelArg(DL
, Int8PtrTy
, ValueKind::HiddenPrintfBuffer
);
434 emitKernelArg(DL
, Int8PtrTy
, ValueKind::HiddenNone
);
437 // Emit "default queue" and "completion action" arguments if enqueue kernel is
438 // used, otherwise emit dummy "none" arguments.
439 if (HiddenArgNumBytes
>= 48) {
440 if (Func
.hasFnAttribute("calls-enqueue-kernel")) {
441 emitKernelArg(DL
, Int8PtrTy
, ValueKind::HiddenDefaultQueue
);
442 emitKernelArg(DL
, Int8PtrTy
, ValueKind::HiddenCompletionAction
);
444 emitKernelArg(DL
, Int8PtrTy
, ValueKind::HiddenNone
);
445 emitKernelArg(DL
, Int8PtrTy
, ValueKind::HiddenNone
);
450 void MetadataStreamer::begin(const Module
&Mod
) {
451 AMDGPUASI
= getAMDGPUAS(Mod
);
456 void MetadataStreamer::end() {
457 std::string HSAMetadataString
;
458 if (toString(HSAMetadata
, HSAMetadataString
))
462 dump(HSAMetadataString
);
463 if (VerifyHSAMetadata
)
464 verify(HSAMetadataString
);
467 void MetadataStreamer::emitKernel(const MachineFunction
&MF
, const SIProgramInfo
&ProgramInfo
) {
468 auto &Func
= MF
.getFunction();
469 auto CodeProps
= getHSACodeProps(MF
, ProgramInfo
);
470 auto DebugProps
= getHSADebugProps(MF
, ProgramInfo
);
472 if (Func
.getCallingConv() != CallingConv::AMDGPU_KERNEL
)
475 HSAMetadata
.mKernels
.push_back(Kernel::Metadata());
476 auto &Kernel
= HSAMetadata
.mKernels
.back();
478 Kernel
.mName
= Func
.getName();
479 Kernel
.mSymbolName
= (Twine(Func
.getName()) + Twine("@kd")).str();
480 emitKernelLanguage(Func
);
481 emitKernelAttrs(Func
);
482 emitKernelArgs(Func
);
483 HSAMetadata
.mKernels
.back().mCodeProps
= CodeProps
;
484 HSAMetadata
.mKernels
.back().mDebugProps
= DebugProps
;
487 } // end namespace HSAMD
488 } // end namespace AMDGPU
489 } // end namespace llvm