1 //===--- AMDGPUHSAMetadataStreamer.cpp --------------------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 /// AMDGPU HSA Metadata Streamer.
13 //===----------------------------------------------------------------------===//
15 #include "AMDGPUHSAMetadataStreamer.h"
17 #include "GCNSubtarget.h"
18 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
19 #include "SIMachineFunctionInfo.h"
20 #include "SIProgramInfo.h"
21 #include "llvm/IR/Module.h"
24 static std::pair
<Type
*, Align
> getArgumentTypeAlign(const Argument
&Arg
,
25 const DataLayout
&DL
) {
26 Type
*Ty
= Arg
.getType();
28 if (Arg
.hasByRefAttr()) {
29 Ty
= Arg
.getParamByRefType();
30 ArgAlign
= Arg
.getParamAlign();
34 ArgAlign
= DL
.getABITypeAlign(Ty
);
36 return std::make_pair(Ty
, *ArgAlign
);
41 static cl::opt
<bool> DumpHSAMetadata(
42 "amdgpu-dump-hsa-metadata",
43 cl::desc("Dump AMDGPU HSA Metadata"));
44 static cl::opt
<bool> VerifyHSAMetadata(
45 "amdgpu-verify-hsa-metadata",
46 cl::desc("Verify AMDGPU HSA Metadata"));
51 //===----------------------------------------------------------------------===//
52 // HSAMetadataStreamerV2
53 //===----------------------------------------------------------------------===//
54 void MetadataStreamerV2::dump(StringRef HSAMetadataString
) const {
55 errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString
<< '\n';
58 void MetadataStreamerV2::verify(StringRef HSAMetadataString
) const {
59 errs() << "AMDGPU HSA Metadata Parser Test: ";
61 HSAMD::Metadata FromHSAMetadataString
;
62 if (fromString(HSAMetadataString
, FromHSAMetadataString
)) {
67 std::string ToHSAMetadataString
;
68 if (toString(FromHSAMetadataString
, ToHSAMetadataString
)) {
73 errs() << (HSAMetadataString
== ToHSAMetadataString
? "PASS" : "FAIL")
75 if (HSAMetadataString
!= ToHSAMetadataString
) {
76 errs() << "Original input: " << HSAMetadataString
<< '\n'
77 << "Produced output: " << ToHSAMetadataString
<< '\n';
82 MetadataStreamerV2::getAccessQualifier(StringRef AccQual
) const {
84 return AccessQualifier::Unknown
;
86 return StringSwitch
<AccessQualifier
>(AccQual
)
87 .Case("read_only", AccessQualifier::ReadOnly
)
88 .Case("write_only", AccessQualifier::WriteOnly
)
89 .Case("read_write", AccessQualifier::ReadWrite
)
90 .Default(AccessQualifier::Default
);
94 MetadataStreamerV2::getAddressSpaceQualifier(
95 unsigned AddressSpace
) const {
96 switch (AddressSpace
) {
97 case AMDGPUAS::PRIVATE_ADDRESS
:
98 return AddressSpaceQualifier::Private
;
99 case AMDGPUAS::GLOBAL_ADDRESS
:
100 return AddressSpaceQualifier::Global
;
101 case AMDGPUAS::CONSTANT_ADDRESS
:
102 return AddressSpaceQualifier::Constant
;
103 case AMDGPUAS::LOCAL_ADDRESS
:
104 return AddressSpaceQualifier::Local
;
105 case AMDGPUAS::FLAT_ADDRESS
:
106 return AddressSpaceQualifier::Generic
;
107 case AMDGPUAS::REGION_ADDRESS
:
108 return AddressSpaceQualifier::Region
;
110 return AddressSpaceQualifier::Unknown
;
114 ValueKind
MetadataStreamerV2::getValueKind(Type
*Ty
, StringRef TypeQual
,
115 StringRef BaseTypeName
) const {
116 if (TypeQual
.find("pipe") != StringRef::npos
)
117 return ValueKind::Pipe
;
119 return StringSwitch
<ValueKind
>(BaseTypeName
)
120 .Case("image1d_t", ValueKind::Image
)
121 .Case("image1d_array_t", ValueKind::Image
)
122 .Case("image1d_buffer_t", ValueKind::Image
)
123 .Case("image2d_t", ValueKind::Image
)
124 .Case("image2d_array_t", ValueKind::Image
)
125 .Case("image2d_array_depth_t", ValueKind::Image
)
126 .Case("image2d_array_msaa_t", ValueKind::Image
)
127 .Case("image2d_array_msaa_depth_t", ValueKind::Image
)
128 .Case("image2d_depth_t", ValueKind::Image
)
129 .Case("image2d_msaa_t", ValueKind::Image
)
130 .Case("image2d_msaa_depth_t", ValueKind::Image
)
131 .Case("image3d_t", ValueKind::Image
)
132 .Case("sampler_t", ValueKind::Sampler
)
133 .Case("queue_t", ValueKind::Queue
)
134 .Default(isa
<PointerType
>(Ty
) ?
135 (Ty
->getPointerAddressSpace() ==
136 AMDGPUAS::LOCAL_ADDRESS
?
137 ValueKind::DynamicSharedPointer
:
138 ValueKind::GlobalBuffer
) :
142 std::string
MetadataStreamerV2::getTypeName(Type
*Ty
, bool Signed
) const {
143 switch (Ty
->getTypeID()) {
144 case Type::IntegerTyID
: {
146 return (Twine('u') + getTypeName(Ty
, true)).str();
148 auto BitWidth
= Ty
->getIntegerBitWidth();
159 return (Twine('i') + Twine(BitWidth
)).str();
164 case Type::FloatTyID
:
166 case Type::DoubleTyID
:
168 case Type::FixedVectorTyID
: {
169 auto VecTy
= cast
<FixedVectorType
>(Ty
);
170 auto ElTy
= VecTy
->getElementType();
171 auto NumElements
= VecTy
->getNumElements();
172 return (Twine(getTypeName(ElTy
, Signed
)) + Twine(NumElements
)).str();
179 std::vector
<uint32_t>
180 MetadataStreamerV2::getWorkGroupDimensions(MDNode
*Node
) const {
181 std::vector
<uint32_t> Dims
;
182 if (Node
->getNumOperands() != 3)
185 for (auto &Op
: Node
->operands())
186 Dims
.push_back(mdconst::extract
<ConstantInt
>(Op
)->getZExtValue());
190 Kernel::CodeProps::Metadata
191 MetadataStreamerV2::getHSACodeProps(const MachineFunction
&MF
,
192 const SIProgramInfo
&ProgramInfo
) const {
193 const GCNSubtarget
&STM
= MF
.getSubtarget
<GCNSubtarget
>();
194 const SIMachineFunctionInfo
&MFI
= *MF
.getInfo
<SIMachineFunctionInfo
>();
195 HSAMD::Kernel::CodeProps::Metadata HSACodeProps
;
196 const Function
&F
= MF
.getFunction();
198 assert(F
.getCallingConv() == CallingConv::AMDGPU_KERNEL
||
199 F
.getCallingConv() == CallingConv::SPIR_KERNEL
);
201 Align MaxKernArgAlign
;
202 HSACodeProps
.mKernargSegmentSize
= STM
.getKernArgSegmentSize(F
,
204 HSACodeProps
.mGroupSegmentFixedSize
= ProgramInfo
.LDSSize
;
205 HSACodeProps
.mPrivateSegmentFixedSize
= ProgramInfo
.ScratchSize
;
206 HSACodeProps
.mKernargSegmentAlign
=
207 std::max(MaxKernArgAlign
, Align(4)).value();
208 HSACodeProps
.mWavefrontSize
= STM
.getWavefrontSize();
209 HSACodeProps
.mNumSGPRs
= ProgramInfo
.NumSGPR
;
210 HSACodeProps
.mNumVGPRs
= ProgramInfo
.NumVGPR
;
211 HSACodeProps
.mMaxFlatWorkGroupSize
= MFI
.getMaxFlatWorkGroupSize();
212 HSACodeProps
.mIsDynamicCallStack
= ProgramInfo
.DynamicCallStack
;
213 HSACodeProps
.mIsXNACKEnabled
= STM
.isXNACKEnabled();
214 HSACodeProps
.mNumSpilledSGPRs
= MFI
.getNumSpilledSGPRs();
215 HSACodeProps
.mNumSpilledVGPRs
= MFI
.getNumSpilledVGPRs();
220 Kernel::DebugProps::Metadata
221 MetadataStreamerV2::getHSADebugProps(const MachineFunction
&MF
,
222 const SIProgramInfo
&ProgramInfo
) const {
223 return HSAMD::Kernel::DebugProps::Metadata();
226 void MetadataStreamerV2::emitVersion() {
227 auto &Version
= HSAMetadata
.mVersion
;
229 Version
.push_back(VersionMajorV2
);
230 Version
.push_back(VersionMinorV2
);
233 void MetadataStreamerV2::emitPrintf(const Module
&Mod
) {
234 auto &Printf
= HSAMetadata
.mPrintf
;
236 auto Node
= Mod
.getNamedMetadata("llvm.printf.fmts");
240 for (auto Op
: Node
->operands())
241 if (Op
->getNumOperands())
243 std::string(cast
<MDString
>(Op
->getOperand(0))->getString()));
246 void MetadataStreamerV2::emitKernelLanguage(const Function
&Func
) {
247 auto &Kernel
= HSAMetadata
.mKernels
.back();
249 // TODO: What about other languages?
250 auto Node
= Func
.getParent()->getNamedMetadata("opencl.ocl.version");
251 if (!Node
|| !Node
->getNumOperands())
253 auto Op0
= Node
->getOperand(0);
254 if (Op0
->getNumOperands() <= 1)
257 Kernel
.mLanguage
= "OpenCL C";
258 Kernel
.mLanguageVersion
.push_back(
259 mdconst::extract
<ConstantInt
>(Op0
->getOperand(0))->getZExtValue());
260 Kernel
.mLanguageVersion
.push_back(
261 mdconst::extract
<ConstantInt
>(Op0
->getOperand(1))->getZExtValue());
264 void MetadataStreamerV2::emitKernelAttrs(const Function
&Func
) {
265 auto &Attrs
= HSAMetadata
.mKernels
.back().mAttrs
;
267 if (auto Node
= Func
.getMetadata("reqd_work_group_size"))
268 Attrs
.mReqdWorkGroupSize
= getWorkGroupDimensions(Node
);
269 if (auto Node
= Func
.getMetadata("work_group_size_hint"))
270 Attrs
.mWorkGroupSizeHint
= getWorkGroupDimensions(Node
);
271 if (auto Node
= Func
.getMetadata("vec_type_hint")) {
272 Attrs
.mVecTypeHint
= getTypeName(
273 cast
<ValueAsMetadata
>(Node
->getOperand(0))->getType(),
274 mdconst::extract
<ConstantInt
>(Node
->getOperand(1))->getZExtValue());
276 if (Func
.hasFnAttribute("runtime-handle")) {
277 Attrs
.mRuntimeHandle
=
278 Func
.getFnAttribute("runtime-handle").getValueAsString().str();
282 void MetadataStreamerV2::emitKernelArgs(const Function
&Func
) {
283 for (auto &Arg
: Func
.args())
286 emitHiddenKernelArgs(Func
);
289 void MetadataStreamerV2::emitKernelArg(const Argument
&Arg
) {
290 auto Func
= Arg
.getParent();
291 auto ArgNo
= Arg
.getArgNo();
295 Node
= Func
->getMetadata("kernel_arg_name");
296 if (Node
&& ArgNo
< Node
->getNumOperands())
297 Name
= cast
<MDString
>(Node
->getOperand(ArgNo
))->getString();
298 else if (Arg
.hasName())
299 Name
= Arg
.getName();
302 Node
= Func
->getMetadata("kernel_arg_type");
303 if (Node
&& ArgNo
< Node
->getNumOperands())
304 TypeName
= cast
<MDString
>(Node
->getOperand(ArgNo
))->getString();
306 StringRef BaseTypeName
;
307 Node
= Func
->getMetadata("kernel_arg_base_type");
308 if (Node
&& ArgNo
< Node
->getNumOperands())
309 BaseTypeName
= cast
<MDString
>(Node
->getOperand(ArgNo
))->getString();
312 if (Arg
.getType()->isPointerTy() && Arg
.onlyReadsMemory() &&
313 Arg
.hasNoAliasAttr()) {
314 AccQual
= "read_only";
316 Node
= Func
->getMetadata("kernel_arg_access_qual");
317 if (Node
&& ArgNo
< Node
->getNumOperands())
318 AccQual
= cast
<MDString
>(Node
->getOperand(ArgNo
))->getString();
322 Node
= Func
->getMetadata("kernel_arg_type_qual");
323 if (Node
&& ArgNo
< Node
->getNumOperands())
324 TypeQual
= cast
<MDString
>(Node
->getOperand(ArgNo
))->getString();
326 const DataLayout
&DL
= Func
->getParent()->getDataLayout();
328 MaybeAlign PointeeAlign
;
329 if (auto PtrTy
= dyn_cast
<PointerType
>(Arg
.getType())) {
330 if (PtrTy
->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS
) {
331 // FIXME: Should report this for all address spaces
332 PointeeAlign
= DL
.getValueOrABITypeAlignment(Arg
.getParamAlign(),
333 PtrTy
->getElementType());
339 std::tie(ArgTy
, ArgAlign
) = getArgumentTypeAlign(Arg
, DL
);
341 emitKernelArg(DL
, ArgTy
, ArgAlign
,
342 getValueKind(ArgTy
, TypeQual
, BaseTypeName
), PointeeAlign
, Name
,
343 TypeName
, BaseTypeName
, AccQual
, TypeQual
);
346 void MetadataStreamerV2::emitKernelArg(const DataLayout
&DL
, Type
*Ty
,
347 Align Alignment
, ValueKind ValueKind
,
348 MaybeAlign PointeeAlign
, StringRef Name
,
350 StringRef BaseTypeName
,
351 StringRef AccQual
, StringRef TypeQual
) {
352 HSAMetadata
.mKernels
.back().mArgs
.push_back(Kernel::Arg::Metadata());
353 auto &Arg
= HSAMetadata
.mKernels
.back().mArgs
.back();
355 Arg
.mName
= std::string(Name
);
356 Arg
.mTypeName
= std::string(TypeName
);
357 Arg
.mSize
= DL
.getTypeAllocSize(Ty
);
358 Arg
.mAlign
= Alignment
.value();
359 Arg
.mValueKind
= ValueKind
;
360 Arg
.mPointeeAlign
= PointeeAlign
? PointeeAlign
->value() : 0;
362 if (auto PtrTy
= dyn_cast
<PointerType
>(Ty
))
363 Arg
.mAddrSpaceQual
= getAddressSpaceQualifier(PtrTy
->getAddressSpace());
365 Arg
.mAccQual
= getAccessQualifier(AccQual
);
367 // TODO: Emit Arg.mActualAccQual.
369 SmallVector
<StringRef
, 1> SplitTypeQuals
;
370 TypeQual
.split(SplitTypeQuals
, " ", -1, false);
371 for (StringRef Key
: SplitTypeQuals
) {
372 auto P
= StringSwitch
<bool*>(Key
)
373 .Case("const", &Arg
.mIsConst
)
374 .Case("restrict", &Arg
.mIsRestrict
)
375 .Case("volatile", &Arg
.mIsVolatile
)
376 .Case("pipe", &Arg
.mIsPipe
)
383 void MetadataStreamerV2::emitHiddenKernelArgs(const Function
&Func
) {
384 int HiddenArgNumBytes
=
385 getIntegerAttribute(Func
, "amdgpu-implicitarg-num-bytes", 0);
387 if (!HiddenArgNumBytes
)
390 auto &DL
= Func
.getParent()->getDataLayout();
391 auto Int64Ty
= Type::getInt64Ty(Func
.getContext());
393 if (HiddenArgNumBytes
>= 8)
394 emitKernelArg(DL
, Int64Ty
, Align(8), ValueKind::HiddenGlobalOffsetX
);
395 if (HiddenArgNumBytes
>= 16)
396 emitKernelArg(DL
, Int64Ty
, Align(8), ValueKind::HiddenGlobalOffsetY
);
397 if (HiddenArgNumBytes
>= 24)
398 emitKernelArg(DL
, Int64Ty
, Align(8), ValueKind::HiddenGlobalOffsetZ
);
400 auto Int8PtrTy
= Type::getInt8PtrTy(Func
.getContext(),
401 AMDGPUAS::GLOBAL_ADDRESS
);
403 // Emit "printf buffer" argument if printf is used, otherwise emit dummy
405 if (HiddenArgNumBytes
>= 32) {
406 if (Func
.getParent()->getNamedMetadata("llvm.printf.fmts"))
407 emitKernelArg(DL
, Int8PtrTy
, Align(8), ValueKind::HiddenPrintfBuffer
);
408 else if (Func
.getParent()->getFunction("__ockl_hostcall_internal")) {
409 // The printf runtime binding pass should have ensured that hostcall and
410 // printf are not used in the same module.
411 assert(!Func
.getParent()->getNamedMetadata("llvm.printf.fmts"));
412 emitKernelArg(DL
, Int8PtrTy
, Align(8), ValueKind::HiddenHostcallBuffer
);
414 emitKernelArg(DL
, Int8PtrTy
, Align(8), ValueKind::HiddenNone
);
417 // Emit "default queue" and "completion action" arguments if enqueue kernel is
418 // used, otherwise emit dummy "none" arguments.
419 if (HiddenArgNumBytes
>= 48) {
420 if (Func
.hasFnAttribute("calls-enqueue-kernel")) {
421 emitKernelArg(DL
, Int8PtrTy
, Align(8), ValueKind::HiddenDefaultQueue
);
422 emitKernelArg(DL
, Int8PtrTy
, Align(8), ValueKind::HiddenCompletionAction
);
424 emitKernelArg(DL
, Int8PtrTy
, Align(8), ValueKind::HiddenNone
);
425 emitKernelArg(DL
, Int8PtrTy
, Align(8), ValueKind::HiddenNone
);
429 // Emit the pointer argument for multi-grid object.
430 if (HiddenArgNumBytes
>= 56)
431 emitKernelArg(DL
, Int8PtrTy
, Align(8), ValueKind::HiddenMultiGridSyncArg
);
434 bool MetadataStreamerV2::emitTo(AMDGPUTargetStreamer
&TargetStreamer
) {
435 return TargetStreamer
.EmitHSAMetadata(getHSAMetadata());
438 void MetadataStreamerV2::begin(const Module
&Mod
,
439 const IsaInfo::AMDGPUTargetID
&TargetID
) {
444 void MetadataStreamerV2::end() {
445 std::string HSAMetadataString
;
446 if (toString(HSAMetadata
, HSAMetadataString
))
450 dump(HSAMetadataString
);
451 if (VerifyHSAMetadata
)
452 verify(HSAMetadataString
);
455 void MetadataStreamerV2::emitKernel(const MachineFunction
&MF
,
456 const SIProgramInfo
&ProgramInfo
) {
457 auto &Func
= MF
.getFunction();
458 if (Func
.getCallingConv() != CallingConv::AMDGPU_KERNEL
)
461 auto CodeProps
= getHSACodeProps(MF
, ProgramInfo
);
462 auto DebugProps
= getHSADebugProps(MF
, ProgramInfo
);
464 HSAMetadata
.mKernels
.push_back(Kernel::Metadata());
465 auto &Kernel
= HSAMetadata
.mKernels
.back();
467 Kernel
.mName
= std::string(Func
.getName());
468 Kernel
.mSymbolName
= (Twine(Func
.getName()) + Twine("@kd")).str();
469 emitKernelLanguage(Func
);
470 emitKernelAttrs(Func
);
471 emitKernelArgs(Func
);
472 HSAMetadata
.mKernels
.back().mCodeProps
= CodeProps
;
473 HSAMetadata
.mKernels
.back().mDebugProps
= DebugProps
;
476 //===----------------------------------------------------------------------===//
477 // HSAMetadataStreamerV3
478 //===----------------------------------------------------------------------===//
480 void MetadataStreamerV3::dump(StringRef HSAMetadataString
) const {
481 errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString
<< '\n';
484 void MetadataStreamerV3::verify(StringRef HSAMetadataString
) const {
485 errs() << "AMDGPU HSA Metadata Parser Test: ";
487 msgpack::Document FromHSAMetadataString
;
489 if (!FromHSAMetadataString
.fromYAML(HSAMetadataString
)) {
494 std::string ToHSAMetadataString
;
495 raw_string_ostream
StrOS(ToHSAMetadataString
);
496 FromHSAMetadataString
.toYAML(StrOS
);
498 errs() << (HSAMetadataString
== StrOS
.str() ? "PASS" : "FAIL") << '\n';
499 if (HSAMetadataString
!= ToHSAMetadataString
) {
500 errs() << "Original input: " << HSAMetadataString
<< '\n'
501 << "Produced output: " << StrOS
.str() << '\n';
506 MetadataStreamerV3::getAccessQualifier(StringRef AccQual
) const {
507 return StringSwitch
<Optional
<StringRef
>>(AccQual
)
508 .Case("read_only", StringRef("read_only"))
509 .Case("write_only", StringRef("write_only"))
510 .Case("read_write", StringRef("read_write"))
515 MetadataStreamerV3::getAddressSpaceQualifier(unsigned AddressSpace
) const {
516 switch (AddressSpace
) {
517 case AMDGPUAS::PRIVATE_ADDRESS
:
518 return StringRef("private");
519 case AMDGPUAS::GLOBAL_ADDRESS
:
520 return StringRef("global");
521 case AMDGPUAS::CONSTANT_ADDRESS
:
522 return StringRef("constant");
523 case AMDGPUAS::LOCAL_ADDRESS
:
524 return StringRef("local");
525 case AMDGPUAS::FLAT_ADDRESS
:
526 return StringRef("generic");
527 case AMDGPUAS::REGION_ADDRESS
:
528 return StringRef("region");
534 StringRef
MetadataStreamerV3::getValueKind(Type
*Ty
, StringRef TypeQual
,
535 StringRef BaseTypeName
) const {
536 if (TypeQual
.find("pipe") != StringRef::npos
)
539 return StringSwitch
<StringRef
>(BaseTypeName
)
540 .Case("image1d_t", "image")
541 .Case("image1d_array_t", "image")
542 .Case("image1d_buffer_t", "image")
543 .Case("image2d_t", "image")
544 .Case("image2d_array_t", "image")
545 .Case("image2d_array_depth_t", "image")
546 .Case("image2d_array_msaa_t", "image")
547 .Case("image2d_array_msaa_depth_t", "image")
548 .Case("image2d_depth_t", "image")
549 .Case("image2d_msaa_t", "image")
550 .Case("image2d_msaa_depth_t", "image")
551 .Case("image3d_t", "image")
552 .Case("sampler_t", "sampler")
553 .Case("queue_t", "queue")
554 .Default(isa
<PointerType
>(Ty
)
555 ? (Ty
->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS
556 ? "dynamic_shared_pointer"
561 std::string
MetadataStreamerV3::getTypeName(Type
*Ty
, bool Signed
) const {
562 switch (Ty
->getTypeID()) {
563 case Type::IntegerTyID
: {
565 return (Twine('u') + getTypeName(Ty
, true)).str();
567 auto BitWidth
= Ty
->getIntegerBitWidth();
578 return (Twine('i') + Twine(BitWidth
)).str();
583 case Type::FloatTyID
:
585 case Type::DoubleTyID
:
587 case Type::FixedVectorTyID
: {
588 auto VecTy
= cast
<FixedVectorType
>(Ty
);
589 auto ElTy
= VecTy
->getElementType();
590 auto NumElements
= VecTy
->getNumElements();
591 return (Twine(getTypeName(ElTy
, Signed
)) + Twine(NumElements
)).str();
598 msgpack::ArrayDocNode
599 MetadataStreamerV3::getWorkGroupDimensions(MDNode
*Node
) const {
600 auto Dims
= HSAMetadataDoc
->getArrayNode();
601 if (Node
->getNumOperands() != 3)
604 for (auto &Op
: Node
->operands())
605 Dims
.push_back(Dims
.getDocument()->getNode(
606 uint64_t(mdconst::extract
<ConstantInt
>(Op
)->getZExtValue())));
610 void MetadataStreamerV3::emitVersion() {
611 auto Version
= HSAMetadataDoc
->getArrayNode();
612 Version
.push_back(Version
.getDocument()->getNode(VersionMajorV3
));
613 Version
.push_back(Version
.getDocument()->getNode(VersionMinorV3
));
614 getRootMetadata("amdhsa.version") = Version
;
617 void MetadataStreamerV3::emitPrintf(const Module
&Mod
) {
618 auto Node
= Mod
.getNamedMetadata("llvm.printf.fmts");
622 auto Printf
= HSAMetadataDoc
->getArrayNode();
623 for (auto Op
: Node
->operands())
624 if (Op
->getNumOperands())
625 Printf
.push_back(Printf
.getDocument()->getNode(
626 cast
<MDString
>(Op
->getOperand(0))->getString(), /*Copy=*/true));
627 getRootMetadata("amdhsa.printf") = Printf
;
630 void MetadataStreamerV3::emitKernelLanguage(const Function
&Func
,
631 msgpack::MapDocNode Kern
) {
632 // TODO: What about other languages?
633 auto Node
= Func
.getParent()->getNamedMetadata("opencl.ocl.version");
634 if (!Node
|| !Node
->getNumOperands())
636 auto Op0
= Node
->getOperand(0);
637 if (Op0
->getNumOperands() <= 1)
640 Kern
[".language"] = Kern
.getDocument()->getNode("OpenCL C");
641 auto LanguageVersion
= Kern
.getDocument()->getArrayNode();
642 LanguageVersion
.push_back(Kern
.getDocument()->getNode(
643 mdconst::extract
<ConstantInt
>(Op0
->getOperand(0))->getZExtValue()));
644 LanguageVersion
.push_back(Kern
.getDocument()->getNode(
645 mdconst::extract
<ConstantInt
>(Op0
->getOperand(1))->getZExtValue()));
646 Kern
[".language_version"] = LanguageVersion
;
649 void MetadataStreamerV3::emitKernelAttrs(const Function
&Func
,
650 msgpack::MapDocNode Kern
) {
652 if (auto Node
= Func
.getMetadata("reqd_work_group_size"))
653 Kern
[".reqd_workgroup_size"] = getWorkGroupDimensions(Node
);
654 if (auto Node
= Func
.getMetadata("work_group_size_hint"))
655 Kern
[".workgroup_size_hint"] = getWorkGroupDimensions(Node
);
656 if (auto Node
= Func
.getMetadata("vec_type_hint")) {
657 Kern
[".vec_type_hint"] = Kern
.getDocument()->getNode(
659 cast
<ValueAsMetadata
>(Node
->getOperand(0))->getType(),
660 mdconst::extract
<ConstantInt
>(Node
->getOperand(1))->getZExtValue()),
663 if (Func
.hasFnAttribute("runtime-handle")) {
664 Kern
[".device_enqueue_symbol"] = Kern
.getDocument()->getNode(
665 Func
.getFnAttribute("runtime-handle").getValueAsString().str(),
668 if (Func
.hasFnAttribute("device-init"))
669 Kern
[".kind"] = Kern
.getDocument()->getNode("init");
670 else if (Func
.hasFnAttribute("device-fini"))
671 Kern
[".kind"] = Kern
.getDocument()->getNode("fini");
674 void MetadataStreamerV3::emitKernelArgs(const Function
&Func
,
675 msgpack::MapDocNode Kern
) {
677 auto Args
= HSAMetadataDoc
->getArrayNode();
678 for (auto &Arg
: Func
.args())
679 emitKernelArg(Arg
, Offset
, Args
);
681 emitHiddenKernelArgs(Func
, Offset
, Args
);
683 Kern
[".args"] = Args
;
686 void MetadataStreamerV3::emitKernelArg(const Argument
&Arg
, unsigned &Offset
,
687 msgpack::ArrayDocNode Args
) {
688 auto Func
= Arg
.getParent();
689 auto ArgNo
= Arg
.getArgNo();
693 Node
= Func
->getMetadata("kernel_arg_name");
694 if (Node
&& ArgNo
< Node
->getNumOperands())
695 Name
= cast
<MDString
>(Node
->getOperand(ArgNo
))->getString();
696 else if (Arg
.hasName())
697 Name
= Arg
.getName();
700 Node
= Func
->getMetadata("kernel_arg_type");
701 if (Node
&& ArgNo
< Node
->getNumOperands())
702 TypeName
= cast
<MDString
>(Node
->getOperand(ArgNo
))->getString();
704 StringRef BaseTypeName
;
705 Node
= Func
->getMetadata("kernel_arg_base_type");
706 if (Node
&& ArgNo
< Node
->getNumOperands())
707 BaseTypeName
= cast
<MDString
>(Node
->getOperand(ArgNo
))->getString();
710 if (Arg
.getType()->isPointerTy() && Arg
.onlyReadsMemory() &&
711 Arg
.hasNoAliasAttr()) {
712 AccQual
= "read_only";
714 Node
= Func
->getMetadata("kernel_arg_access_qual");
715 if (Node
&& ArgNo
< Node
->getNumOperands())
716 AccQual
= cast
<MDString
>(Node
->getOperand(ArgNo
))->getString();
720 Node
= Func
->getMetadata("kernel_arg_type_qual");
721 if (Node
&& ArgNo
< Node
->getNumOperands())
722 TypeQual
= cast
<MDString
>(Node
->getOperand(ArgNo
))->getString();
724 const DataLayout
&DL
= Func
->getParent()->getDataLayout();
726 MaybeAlign PointeeAlign
;
727 Type
*Ty
= Arg
.hasByRefAttr() ? Arg
.getParamByRefType() : Arg
.getType();
729 // FIXME: Need to distinguish in memory alignment from pointer alignment.
730 if (auto PtrTy
= dyn_cast
<PointerType
>(Ty
)) {
731 if (PtrTy
->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS
) {
732 PointeeAlign
= DL
.getValueOrABITypeAlignment(Arg
.getParamAlign(),
733 PtrTy
->getElementType());
737 // There's no distinction between byval aggregates and raw aggregates.
740 std::tie(ArgTy
, ArgAlign
) = getArgumentTypeAlign(Arg
, DL
);
742 emitKernelArg(DL
, ArgTy
, ArgAlign
,
743 getValueKind(ArgTy
, TypeQual
, BaseTypeName
), Offset
, Args
,
744 PointeeAlign
, Name
, TypeName
, BaseTypeName
, AccQual
, TypeQual
);
747 void MetadataStreamerV3::emitKernelArg(
748 const DataLayout
&DL
, Type
*Ty
, Align Alignment
, StringRef ValueKind
,
749 unsigned &Offset
, msgpack::ArrayDocNode Args
, MaybeAlign PointeeAlign
,
750 StringRef Name
, StringRef TypeName
, StringRef BaseTypeName
,
751 StringRef AccQual
, StringRef TypeQual
) {
752 auto Arg
= Args
.getDocument()->getMapNode();
755 Arg
[".name"] = Arg
.getDocument()->getNode(Name
, /*Copy=*/true);
756 if (!TypeName
.empty())
757 Arg
[".type_name"] = Arg
.getDocument()->getNode(TypeName
, /*Copy=*/true);
758 auto Size
= DL
.getTypeAllocSize(Ty
);
759 Arg
[".size"] = Arg
.getDocument()->getNode(Size
);
760 Offset
= alignTo(Offset
, Alignment
);
761 Arg
[".offset"] = Arg
.getDocument()->getNode(Offset
);
763 Arg
[".value_kind"] = Arg
.getDocument()->getNode(ValueKind
, /*Copy=*/true);
765 Arg
[".pointee_align"] = Arg
.getDocument()->getNode(PointeeAlign
->value());
767 if (auto PtrTy
= dyn_cast
<PointerType
>(Ty
))
768 if (auto Qualifier
= getAddressSpaceQualifier(PtrTy
->getAddressSpace()))
769 Arg
[".address_space"] = Arg
.getDocument()->getNode(*Qualifier
, /*Copy=*/true);
771 if (auto AQ
= getAccessQualifier(AccQual
))
772 Arg
[".access"] = Arg
.getDocument()->getNode(*AQ
, /*Copy=*/true);
774 // TODO: Emit Arg[".actual_access"].
776 SmallVector
<StringRef
, 1> SplitTypeQuals
;
777 TypeQual
.split(SplitTypeQuals
, " ", -1, false);
778 for (StringRef Key
: SplitTypeQuals
) {
780 Arg
[".is_const"] = Arg
.getDocument()->getNode(true);
781 else if (Key
== "restrict")
782 Arg
[".is_restrict"] = Arg
.getDocument()->getNode(true);
783 else if (Key
== "volatile")
784 Arg
[".is_volatile"] = Arg
.getDocument()->getNode(true);
785 else if (Key
== "pipe")
786 Arg
[".is_pipe"] = Arg
.getDocument()->getNode(true);
792 void MetadataStreamerV3::emitHiddenKernelArgs(const Function
&Func
,
794 msgpack::ArrayDocNode Args
) {
795 int HiddenArgNumBytes
=
796 getIntegerAttribute(Func
, "amdgpu-implicitarg-num-bytes", 0);
798 if (!HiddenArgNumBytes
)
801 auto &DL
= Func
.getParent()->getDataLayout();
802 auto Int64Ty
= Type::getInt64Ty(Func
.getContext());
804 if (HiddenArgNumBytes
>= 8)
805 emitKernelArg(DL
, Int64Ty
, Align(8), "hidden_global_offset_x", Offset
,
807 if (HiddenArgNumBytes
>= 16)
808 emitKernelArg(DL
, Int64Ty
, Align(8), "hidden_global_offset_y", Offset
,
810 if (HiddenArgNumBytes
>= 24)
811 emitKernelArg(DL
, Int64Ty
, Align(8), "hidden_global_offset_z", Offset
,
815 Type::getInt8PtrTy(Func
.getContext(), AMDGPUAS::GLOBAL_ADDRESS
);
817 // Emit "printf buffer" argument if printf is used, otherwise emit dummy
819 if (HiddenArgNumBytes
>= 32) {
820 if (Func
.getParent()->getNamedMetadata("llvm.printf.fmts"))
821 emitKernelArg(DL
, Int8PtrTy
, Align(8), "hidden_printf_buffer", Offset
,
823 else if (Func
.getParent()->getFunction("__ockl_hostcall_internal")) {
824 // The printf runtime binding pass should have ensured that hostcall and
825 // printf are not used in the same module.
826 assert(!Func
.getParent()->getNamedMetadata("llvm.printf.fmts"));
827 emitKernelArg(DL
, Int8PtrTy
, Align(8), "hidden_hostcall_buffer", Offset
,
830 emitKernelArg(DL
, Int8PtrTy
, Align(8), "hidden_none", Offset
, Args
);
833 // Emit "default queue" and "completion action" arguments if enqueue kernel is
834 // used, otherwise emit dummy "none" arguments.
835 if (HiddenArgNumBytes
>= 48) {
836 if (Func
.hasFnAttribute("calls-enqueue-kernel")) {
837 emitKernelArg(DL
, Int8PtrTy
, Align(8), "hidden_default_queue", Offset
,
839 emitKernelArg(DL
, Int8PtrTy
, Align(8), "hidden_completion_action", Offset
,
842 emitKernelArg(DL
, Int8PtrTy
, Align(8), "hidden_none", Offset
, Args
);
843 emitKernelArg(DL
, Int8PtrTy
, Align(8), "hidden_none", Offset
, Args
);
847 // Emit the pointer argument for multi-grid object.
848 if (HiddenArgNumBytes
>= 56)
849 emitKernelArg(DL
, Int8PtrTy
, Align(8), "hidden_multigrid_sync_arg", Offset
,
854 MetadataStreamerV3::getHSAKernelProps(const MachineFunction
&MF
,
855 const SIProgramInfo
&ProgramInfo
) const {
856 const GCNSubtarget
&STM
= MF
.getSubtarget
<GCNSubtarget
>();
857 const SIMachineFunctionInfo
&MFI
= *MF
.getInfo
<SIMachineFunctionInfo
>();
858 const Function
&F
= MF
.getFunction();
860 auto Kern
= HSAMetadataDoc
->getMapNode();
862 Align MaxKernArgAlign
;
863 Kern
[".kernarg_segment_size"] = Kern
.getDocument()->getNode(
864 STM
.getKernArgSegmentSize(F
, MaxKernArgAlign
));
865 Kern
[".group_segment_fixed_size"] =
866 Kern
.getDocument()->getNode(ProgramInfo
.LDSSize
);
867 Kern
[".private_segment_fixed_size"] =
868 Kern
.getDocument()->getNode(ProgramInfo
.ScratchSize
);
869 Kern
[".kernarg_segment_align"] =
870 Kern
.getDocument()->getNode(std::max(Align(4), MaxKernArgAlign
).value());
871 Kern
[".wavefront_size"] =
872 Kern
.getDocument()->getNode(STM
.getWavefrontSize());
873 Kern
[".sgpr_count"] = Kern
.getDocument()->getNode(ProgramInfo
.NumSGPR
);
874 Kern
[".vgpr_count"] = Kern
.getDocument()->getNode(ProgramInfo
.NumVGPR
);
875 Kern
[".max_flat_workgroup_size"] =
876 Kern
.getDocument()->getNode(MFI
.getMaxFlatWorkGroupSize());
877 Kern
[".sgpr_spill_count"] =
878 Kern
.getDocument()->getNode(MFI
.getNumSpilledSGPRs());
879 Kern
[".vgpr_spill_count"] =
880 Kern
.getDocument()->getNode(MFI
.getNumSpilledVGPRs());
885 bool MetadataStreamerV3::emitTo(AMDGPUTargetStreamer
&TargetStreamer
) {
886 return TargetStreamer
.EmitHSAMetadata(*HSAMetadataDoc
, true);
889 void MetadataStreamerV3::begin(const Module
&Mod
,
890 const IsaInfo::AMDGPUTargetID
&TargetID
) {
893 getRootMetadata("amdhsa.kernels") = HSAMetadataDoc
->getArrayNode();
896 void MetadataStreamerV3::end() {
897 std::string HSAMetadataString
;
898 raw_string_ostream
StrOS(HSAMetadataString
);
899 HSAMetadataDoc
->toYAML(StrOS
);
903 if (VerifyHSAMetadata
)
907 void MetadataStreamerV3::emitKernel(const MachineFunction
&MF
,
908 const SIProgramInfo
&ProgramInfo
) {
909 auto &Func
= MF
.getFunction();
910 auto Kern
= getHSAKernelProps(MF
, ProgramInfo
);
912 assert(Func
.getCallingConv() == CallingConv::AMDGPU_KERNEL
||
913 Func
.getCallingConv() == CallingConv::SPIR_KERNEL
);
916 getRootMetadata("amdhsa.kernels").getArray(/*Convert=*/true);
919 Kern
[".name"] = Kern
.getDocument()->getNode(Func
.getName());
920 Kern
[".symbol"] = Kern
.getDocument()->getNode(
921 (Twine(Func
.getName()) + Twine(".kd")).str(), /*Copy=*/true);
922 emitKernelLanguage(Func
, Kern
);
923 emitKernelAttrs(Func
, Kern
);
924 emitKernelArgs(Func
, Kern
);
927 Kernels
.push_back(Kern
);
930 //===----------------------------------------------------------------------===//
931 // HSAMetadataStreamerV4
932 //===----------------------------------------------------------------------===//
934 void MetadataStreamerV4::emitVersion() {
935 auto Version
= HSAMetadataDoc
->getArrayNode();
936 Version
.push_back(Version
.getDocument()->getNode(VersionMajorV4
));
937 Version
.push_back(Version
.getDocument()->getNode(VersionMinorV4
));
938 getRootMetadata("amdhsa.version") = Version
;
941 void MetadataStreamerV4::emitTargetID(const IsaInfo::AMDGPUTargetID
&TargetID
) {
942 getRootMetadata("amdhsa.target") =
943 HSAMetadataDoc
->getNode(TargetID
.toString(), /*Copy=*/true);
946 void MetadataStreamerV4::begin(const Module
&Mod
,
947 const IsaInfo::AMDGPUTargetID
&TargetID
) {
949 emitTargetID(TargetID
);
951 getRootMetadata("amdhsa.kernels") = HSAMetadataDoc
->getArrayNode();
954 } // end namespace HSAMD
955 } // end namespace AMDGPU
956 } // end namespace llvm