[ORC] Add std::tuple support to SimplePackedSerialization.
[llvm-project.git] / llvm / lib / Target / AMDGPU / AMDGPUHSAMetadataStreamer.cpp
bloba50093f5bb08464b735f7f1d13ae8530381c84f6
1 //===--- AMDGPUHSAMetadataStreamer.cpp --------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// AMDGPU HSA Metadata Streamer.
11 ///
13 //===----------------------------------------------------------------------===//
15 #include "AMDGPUHSAMetadataStreamer.h"
16 #include "AMDGPU.h"
17 #include "GCNSubtarget.h"
18 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
19 #include "SIMachineFunctionInfo.h"
20 #include "SIProgramInfo.h"
21 #include "llvm/IR/Module.h"
22 using namespace llvm;
24 static std::pair<Type *, Align> getArgumentTypeAlign(const Argument &Arg,
25 const DataLayout &DL) {
26 Type *Ty = Arg.getType();
27 MaybeAlign ArgAlign;
28 if (Arg.hasByRefAttr()) {
29 Ty = Arg.getParamByRefType();
30 ArgAlign = Arg.getParamAlign();
33 if (!ArgAlign)
34 ArgAlign = DL.getABITypeAlign(Ty);
36 return std::make_pair(Ty, *ArgAlign);
39 namespace llvm {
41 static cl::opt<bool> DumpHSAMetadata(
42 "amdgpu-dump-hsa-metadata",
43 cl::desc("Dump AMDGPU HSA Metadata"));
44 static cl::opt<bool> VerifyHSAMetadata(
45 "amdgpu-verify-hsa-metadata",
46 cl::desc("Verify AMDGPU HSA Metadata"));
48 namespace AMDGPU {
49 namespace HSAMD {
51 //===----------------------------------------------------------------------===//
52 // HSAMetadataStreamerV2
53 //===----------------------------------------------------------------------===//
54 void MetadataStreamerV2::dump(StringRef HSAMetadataString) const {
55 errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n';
58 void MetadataStreamerV2::verify(StringRef HSAMetadataString) const {
59 errs() << "AMDGPU HSA Metadata Parser Test: ";
61 HSAMD::Metadata FromHSAMetadataString;
62 if (fromString(HSAMetadataString, FromHSAMetadataString)) {
63 errs() << "FAIL\n";
64 return;
67 std::string ToHSAMetadataString;
68 if (toString(FromHSAMetadataString, ToHSAMetadataString)) {
69 errs() << "FAIL\n";
70 return;
73 errs() << (HSAMetadataString == ToHSAMetadataString ? "PASS" : "FAIL")
74 << '\n';
75 if (HSAMetadataString != ToHSAMetadataString) {
76 errs() << "Original input: " << HSAMetadataString << '\n'
77 << "Produced output: " << ToHSAMetadataString << '\n';
81 AccessQualifier
82 MetadataStreamerV2::getAccessQualifier(StringRef AccQual) const {
83 if (AccQual.empty())
84 return AccessQualifier::Unknown;
86 return StringSwitch<AccessQualifier>(AccQual)
87 .Case("read_only", AccessQualifier::ReadOnly)
88 .Case("write_only", AccessQualifier::WriteOnly)
89 .Case("read_write", AccessQualifier::ReadWrite)
90 .Default(AccessQualifier::Default);
93 AddressSpaceQualifier
94 MetadataStreamerV2::getAddressSpaceQualifier(
95 unsigned AddressSpace) const {
96 switch (AddressSpace) {
97 case AMDGPUAS::PRIVATE_ADDRESS:
98 return AddressSpaceQualifier::Private;
99 case AMDGPUAS::GLOBAL_ADDRESS:
100 return AddressSpaceQualifier::Global;
101 case AMDGPUAS::CONSTANT_ADDRESS:
102 return AddressSpaceQualifier::Constant;
103 case AMDGPUAS::LOCAL_ADDRESS:
104 return AddressSpaceQualifier::Local;
105 case AMDGPUAS::FLAT_ADDRESS:
106 return AddressSpaceQualifier::Generic;
107 case AMDGPUAS::REGION_ADDRESS:
108 return AddressSpaceQualifier::Region;
109 default:
110 return AddressSpaceQualifier::Unknown;
114 ValueKind MetadataStreamerV2::getValueKind(Type *Ty, StringRef TypeQual,
115 StringRef BaseTypeName) const {
116 if (TypeQual.find("pipe") != StringRef::npos)
117 return ValueKind::Pipe;
119 return StringSwitch<ValueKind>(BaseTypeName)
120 .Case("image1d_t", ValueKind::Image)
121 .Case("image1d_array_t", ValueKind::Image)
122 .Case("image1d_buffer_t", ValueKind::Image)
123 .Case("image2d_t", ValueKind::Image)
124 .Case("image2d_array_t", ValueKind::Image)
125 .Case("image2d_array_depth_t", ValueKind::Image)
126 .Case("image2d_array_msaa_t", ValueKind::Image)
127 .Case("image2d_array_msaa_depth_t", ValueKind::Image)
128 .Case("image2d_depth_t", ValueKind::Image)
129 .Case("image2d_msaa_t", ValueKind::Image)
130 .Case("image2d_msaa_depth_t", ValueKind::Image)
131 .Case("image3d_t", ValueKind::Image)
132 .Case("sampler_t", ValueKind::Sampler)
133 .Case("queue_t", ValueKind::Queue)
134 .Default(isa<PointerType>(Ty) ?
135 (Ty->getPointerAddressSpace() ==
136 AMDGPUAS::LOCAL_ADDRESS ?
137 ValueKind::DynamicSharedPointer :
138 ValueKind::GlobalBuffer) :
139 ValueKind::ByValue);
142 std::string MetadataStreamerV2::getTypeName(Type *Ty, bool Signed) const {
143 switch (Ty->getTypeID()) {
144 case Type::IntegerTyID: {
145 if (!Signed)
146 return (Twine('u') + getTypeName(Ty, true)).str();
148 auto BitWidth = Ty->getIntegerBitWidth();
149 switch (BitWidth) {
150 case 8:
151 return "char";
152 case 16:
153 return "short";
154 case 32:
155 return "int";
156 case 64:
157 return "long";
158 default:
159 return (Twine('i') + Twine(BitWidth)).str();
162 case Type::HalfTyID:
163 return "half";
164 case Type::FloatTyID:
165 return "float";
166 case Type::DoubleTyID:
167 return "double";
168 case Type::FixedVectorTyID: {
169 auto VecTy = cast<FixedVectorType>(Ty);
170 auto ElTy = VecTy->getElementType();
171 auto NumElements = VecTy->getNumElements();
172 return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str();
174 default:
175 return "unknown";
179 std::vector<uint32_t>
180 MetadataStreamerV2::getWorkGroupDimensions(MDNode *Node) const {
181 std::vector<uint32_t> Dims;
182 if (Node->getNumOperands() != 3)
183 return Dims;
185 for (auto &Op : Node->operands())
186 Dims.push_back(mdconst::extract<ConstantInt>(Op)->getZExtValue());
187 return Dims;
190 Kernel::CodeProps::Metadata
191 MetadataStreamerV2::getHSACodeProps(const MachineFunction &MF,
192 const SIProgramInfo &ProgramInfo) const {
193 const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
194 const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
195 HSAMD::Kernel::CodeProps::Metadata HSACodeProps;
196 const Function &F = MF.getFunction();
198 assert(F.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
199 F.getCallingConv() == CallingConv::SPIR_KERNEL);
201 Align MaxKernArgAlign;
202 HSACodeProps.mKernargSegmentSize = STM.getKernArgSegmentSize(F,
203 MaxKernArgAlign);
204 HSACodeProps.mGroupSegmentFixedSize = ProgramInfo.LDSSize;
205 HSACodeProps.mPrivateSegmentFixedSize = ProgramInfo.ScratchSize;
206 HSACodeProps.mKernargSegmentAlign =
207 std::max(MaxKernArgAlign, Align(4)).value();
208 HSACodeProps.mWavefrontSize = STM.getWavefrontSize();
209 HSACodeProps.mNumSGPRs = ProgramInfo.NumSGPR;
210 HSACodeProps.mNumVGPRs = ProgramInfo.NumVGPR;
211 HSACodeProps.mMaxFlatWorkGroupSize = MFI.getMaxFlatWorkGroupSize();
212 HSACodeProps.mIsDynamicCallStack = ProgramInfo.DynamicCallStack;
213 HSACodeProps.mIsXNACKEnabled = STM.isXNACKEnabled();
214 HSACodeProps.mNumSpilledSGPRs = MFI.getNumSpilledSGPRs();
215 HSACodeProps.mNumSpilledVGPRs = MFI.getNumSpilledVGPRs();
217 return HSACodeProps;
220 Kernel::DebugProps::Metadata
221 MetadataStreamerV2::getHSADebugProps(const MachineFunction &MF,
222 const SIProgramInfo &ProgramInfo) const {
223 return HSAMD::Kernel::DebugProps::Metadata();
226 void MetadataStreamerV2::emitVersion() {
227 auto &Version = HSAMetadata.mVersion;
229 Version.push_back(VersionMajorV2);
230 Version.push_back(VersionMinorV2);
233 void MetadataStreamerV2::emitPrintf(const Module &Mod) {
234 auto &Printf = HSAMetadata.mPrintf;
236 auto Node = Mod.getNamedMetadata("llvm.printf.fmts");
237 if (!Node)
238 return;
240 for (auto Op : Node->operands())
241 if (Op->getNumOperands())
242 Printf.push_back(
243 std::string(cast<MDString>(Op->getOperand(0))->getString()));
246 void MetadataStreamerV2::emitKernelLanguage(const Function &Func) {
247 auto &Kernel = HSAMetadata.mKernels.back();
249 // TODO: What about other languages?
250 auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version");
251 if (!Node || !Node->getNumOperands())
252 return;
253 auto Op0 = Node->getOperand(0);
254 if (Op0->getNumOperands() <= 1)
255 return;
257 Kernel.mLanguage = "OpenCL C";
258 Kernel.mLanguageVersion.push_back(
259 mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue());
260 Kernel.mLanguageVersion.push_back(
261 mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue());
264 void MetadataStreamerV2::emitKernelAttrs(const Function &Func) {
265 auto &Attrs = HSAMetadata.mKernels.back().mAttrs;
267 if (auto Node = Func.getMetadata("reqd_work_group_size"))
268 Attrs.mReqdWorkGroupSize = getWorkGroupDimensions(Node);
269 if (auto Node = Func.getMetadata("work_group_size_hint"))
270 Attrs.mWorkGroupSizeHint = getWorkGroupDimensions(Node);
271 if (auto Node = Func.getMetadata("vec_type_hint")) {
272 Attrs.mVecTypeHint = getTypeName(
273 cast<ValueAsMetadata>(Node->getOperand(0))->getType(),
274 mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue());
276 if (Func.hasFnAttribute("runtime-handle")) {
277 Attrs.mRuntimeHandle =
278 Func.getFnAttribute("runtime-handle").getValueAsString().str();
282 void MetadataStreamerV2::emitKernelArgs(const Function &Func) {
283 for (auto &Arg : Func.args())
284 emitKernelArg(Arg);
286 emitHiddenKernelArgs(Func);
289 void MetadataStreamerV2::emitKernelArg(const Argument &Arg) {
290 auto Func = Arg.getParent();
291 auto ArgNo = Arg.getArgNo();
292 const MDNode *Node;
294 StringRef Name;
295 Node = Func->getMetadata("kernel_arg_name");
296 if (Node && ArgNo < Node->getNumOperands())
297 Name = cast<MDString>(Node->getOperand(ArgNo))->getString();
298 else if (Arg.hasName())
299 Name = Arg.getName();
301 StringRef TypeName;
302 Node = Func->getMetadata("kernel_arg_type");
303 if (Node && ArgNo < Node->getNumOperands())
304 TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
306 StringRef BaseTypeName;
307 Node = Func->getMetadata("kernel_arg_base_type");
308 if (Node && ArgNo < Node->getNumOperands())
309 BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
311 StringRef AccQual;
312 if (Arg.getType()->isPointerTy() && Arg.onlyReadsMemory() &&
313 Arg.hasNoAliasAttr()) {
314 AccQual = "read_only";
315 } else {
316 Node = Func->getMetadata("kernel_arg_access_qual");
317 if (Node && ArgNo < Node->getNumOperands())
318 AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
321 StringRef TypeQual;
322 Node = Func->getMetadata("kernel_arg_type_qual");
323 if (Node && ArgNo < Node->getNumOperands())
324 TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
326 const DataLayout &DL = Func->getParent()->getDataLayout();
328 MaybeAlign PointeeAlign;
329 if (auto PtrTy = dyn_cast<PointerType>(Arg.getType())) {
330 if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
331 // FIXME: Should report this for all address spaces
332 PointeeAlign = DL.getValueOrABITypeAlignment(Arg.getParamAlign(),
333 PtrTy->getElementType());
337 Type *ArgTy;
338 Align ArgAlign;
339 std::tie(ArgTy, ArgAlign) = getArgumentTypeAlign(Arg, DL);
341 emitKernelArg(DL, ArgTy, ArgAlign,
342 getValueKind(ArgTy, TypeQual, BaseTypeName), PointeeAlign, Name,
343 TypeName, BaseTypeName, AccQual, TypeQual);
346 void MetadataStreamerV2::emitKernelArg(const DataLayout &DL, Type *Ty,
347 Align Alignment, ValueKind ValueKind,
348 MaybeAlign PointeeAlign, StringRef Name,
349 StringRef TypeName,
350 StringRef BaseTypeName,
351 StringRef AccQual, StringRef TypeQual) {
352 HSAMetadata.mKernels.back().mArgs.push_back(Kernel::Arg::Metadata());
353 auto &Arg = HSAMetadata.mKernels.back().mArgs.back();
355 Arg.mName = std::string(Name);
356 Arg.mTypeName = std::string(TypeName);
357 Arg.mSize = DL.getTypeAllocSize(Ty);
358 Arg.mAlign = Alignment.value();
359 Arg.mValueKind = ValueKind;
360 Arg.mPointeeAlign = PointeeAlign ? PointeeAlign->value() : 0;
362 if (auto PtrTy = dyn_cast<PointerType>(Ty))
363 Arg.mAddrSpaceQual = getAddressSpaceQualifier(PtrTy->getAddressSpace());
365 Arg.mAccQual = getAccessQualifier(AccQual);
367 // TODO: Emit Arg.mActualAccQual.
369 SmallVector<StringRef, 1> SplitTypeQuals;
370 TypeQual.split(SplitTypeQuals, " ", -1, false);
371 for (StringRef Key : SplitTypeQuals) {
372 auto P = StringSwitch<bool*>(Key)
373 .Case("const", &Arg.mIsConst)
374 .Case("restrict", &Arg.mIsRestrict)
375 .Case("volatile", &Arg.mIsVolatile)
376 .Case("pipe", &Arg.mIsPipe)
377 .Default(nullptr);
378 if (P)
379 *P = true;
383 void MetadataStreamerV2::emitHiddenKernelArgs(const Function &Func) {
384 int HiddenArgNumBytes =
385 getIntegerAttribute(Func, "amdgpu-implicitarg-num-bytes", 0);
387 if (!HiddenArgNumBytes)
388 return;
390 auto &DL = Func.getParent()->getDataLayout();
391 auto Int64Ty = Type::getInt64Ty(Func.getContext());
393 if (HiddenArgNumBytes >= 8)
394 emitKernelArg(DL, Int64Ty, Align(8), ValueKind::HiddenGlobalOffsetX);
395 if (HiddenArgNumBytes >= 16)
396 emitKernelArg(DL, Int64Ty, Align(8), ValueKind::HiddenGlobalOffsetY);
397 if (HiddenArgNumBytes >= 24)
398 emitKernelArg(DL, Int64Ty, Align(8), ValueKind::HiddenGlobalOffsetZ);
400 auto Int8PtrTy = Type::getInt8PtrTy(Func.getContext(),
401 AMDGPUAS::GLOBAL_ADDRESS);
403 // Emit "printf buffer" argument if printf is used, otherwise emit dummy
404 // "none" argument.
405 if (HiddenArgNumBytes >= 32) {
406 if (Func.getParent()->getNamedMetadata("llvm.printf.fmts"))
407 emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenPrintfBuffer);
408 else if (Func.getParent()->getFunction("__ockl_hostcall_internal")) {
409 // The printf runtime binding pass should have ensured that hostcall and
410 // printf are not used in the same module.
411 assert(!Func.getParent()->getNamedMetadata("llvm.printf.fmts"));
412 emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenHostcallBuffer);
413 } else
414 emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenNone);
417 // Emit "default queue" and "completion action" arguments if enqueue kernel is
418 // used, otherwise emit dummy "none" arguments.
419 if (HiddenArgNumBytes >= 48) {
420 if (Func.hasFnAttribute("calls-enqueue-kernel")) {
421 emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenDefaultQueue);
422 emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenCompletionAction);
423 } else {
424 emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenNone);
425 emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenNone);
429 // Emit the pointer argument for multi-grid object.
430 if (HiddenArgNumBytes >= 56)
431 emitKernelArg(DL, Int8PtrTy, Align(8), ValueKind::HiddenMultiGridSyncArg);
434 bool MetadataStreamerV2::emitTo(AMDGPUTargetStreamer &TargetStreamer) {
435 return TargetStreamer.EmitHSAMetadata(getHSAMetadata());
438 void MetadataStreamerV2::begin(const Module &Mod,
439 const IsaInfo::AMDGPUTargetID &TargetID) {
440 emitVersion();
441 emitPrintf(Mod);
444 void MetadataStreamerV2::end() {
445 std::string HSAMetadataString;
446 if (toString(HSAMetadata, HSAMetadataString))
447 return;
449 if (DumpHSAMetadata)
450 dump(HSAMetadataString);
451 if (VerifyHSAMetadata)
452 verify(HSAMetadataString);
455 void MetadataStreamerV2::emitKernel(const MachineFunction &MF,
456 const SIProgramInfo &ProgramInfo) {
457 auto &Func = MF.getFunction();
458 if (Func.getCallingConv() != CallingConv::AMDGPU_KERNEL)
459 return;
461 auto CodeProps = getHSACodeProps(MF, ProgramInfo);
462 auto DebugProps = getHSADebugProps(MF, ProgramInfo);
464 HSAMetadata.mKernels.push_back(Kernel::Metadata());
465 auto &Kernel = HSAMetadata.mKernels.back();
467 Kernel.mName = std::string(Func.getName());
468 Kernel.mSymbolName = (Twine(Func.getName()) + Twine("@kd")).str();
469 emitKernelLanguage(Func);
470 emitKernelAttrs(Func);
471 emitKernelArgs(Func);
472 HSAMetadata.mKernels.back().mCodeProps = CodeProps;
473 HSAMetadata.mKernels.back().mDebugProps = DebugProps;
476 //===----------------------------------------------------------------------===//
477 // HSAMetadataStreamerV3
478 //===----------------------------------------------------------------------===//
480 void MetadataStreamerV3::dump(StringRef HSAMetadataString) const {
481 errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n';
484 void MetadataStreamerV3::verify(StringRef HSAMetadataString) const {
485 errs() << "AMDGPU HSA Metadata Parser Test: ";
487 msgpack::Document FromHSAMetadataString;
489 if (!FromHSAMetadataString.fromYAML(HSAMetadataString)) {
490 errs() << "FAIL\n";
491 return;
494 std::string ToHSAMetadataString;
495 raw_string_ostream StrOS(ToHSAMetadataString);
496 FromHSAMetadataString.toYAML(StrOS);
498 errs() << (HSAMetadataString == StrOS.str() ? "PASS" : "FAIL") << '\n';
499 if (HSAMetadataString != ToHSAMetadataString) {
500 errs() << "Original input: " << HSAMetadataString << '\n'
501 << "Produced output: " << StrOS.str() << '\n';
505 Optional<StringRef>
506 MetadataStreamerV3::getAccessQualifier(StringRef AccQual) const {
507 return StringSwitch<Optional<StringRef>>(AccQual)
508 .Case("read_only", StringRef("read_only"))
509 .Case("write_only", StringRef("write_only"))
510 .Case("read_write", StringRef("read_write"))
511 .Default(None);
514 Optional<StringRef>
515 MetadataStreamerV3::getAddressSpaceQualifier(unsigned AddressSpace) const {
516 switch (AddressSpace) {
517 case AMDGPUAS::PRIVATE_ADDRESS:
518 return StringRef("private");
519 case AMDGPUAS::GLOBAL_ADDRESS:
520 return StringRef("global");
521 case AMDGPUAS::CONSTANT_ADDRESS:
522 return StringRef("constant");
523 case AMDGPUAS::LOCAL_ADDRESS:
524 return StringRef("local");
525 case AMDGPUAS::FLAT_ADDRESS:
526 return StringRef("generic");
527 case AMDGPUAS::REGION_ADDRESS:
528 return StringRef("region");
529 default:
530 return None;
534 StringRef MetadataStreamerV3::getValueKind(Type *Ty, StringRef TypeQual,
535 StringRef BaseTypeName) const {
536 if (TypeQual.find("pipe") != StringRef::npos)
537 return "pipe";
539 return StringSwitch<StringRef>(BaseTypeName)
540 .Case("image1d_t", "image")
541 .Case("image1d_array_t", "image")
542 .Case("image1d_buffer_t", "image")
543 .Case("image2d_t", "image")
544 .Case("image2d_array_t", "image")
545 .Case("image2d_array_depth_t", "image")
546 .Case("image2d_array_msaa_t", "image")
547 .Case("image2d_array_msaa_depth_t", "image")
548 .Case("image2d_depth_t", "image")
549 .Case("image2d_msaa_t", "image")
550 .Case("image2d_msaa_depth_t", "image")
551 .Case("image3d_t", "image")
552 .Case("sampler_t", "sampler")
553 .Case("queue_t", "queue")
554 .Default(isa<PointerType>(Ty)
555 ? (Ty->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS
556 ? "dynamic_shared_pointer"
557 : "global_buffer")
558 : "by_value");
561 std::string MetadataStreamerV3::getTypeName(Type *Ty, bool Signed) const {
562 switch (Ty->getTypeID()) {
563 case Type::IntegerTyID: {
564 if (!Signed)
565 return (Twine('u') + getTypeName(Ty, true)).str();
567 auto BitWidth = Ty->getIntegerBitWidth();
568 switch (BitWidth) {
569 case 8:
570 return "char";
571 case 16:
572 return "short";
573 case 32:
574 return "int";
575 case 64:
576 return "long";
577 default:
578 return (Twine('i') + Twine(BitWidth)).str();
581 case Type::HalfTyID:
582 return "half";
583 case Type::FloatTyID:
584 return "float";
585 case Type::DoubleTyID:
586 return "double";
587 case Type::FixedVectorTyID: {
588 auto VecTy = cast<FixedVectorType>(Ty);
589 auto ElTy = VecTy->getElementType();
590 auto NumElements = VecTy->getNumElements();
591 return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str();
593 default:
594 return "unknown";
598 msgpack::ArrayDocNode
599 MetadataStreamerV3::getWorkGroupDimensions(MDNode *Node) const {
600 auto Dims = HSAMetadataDoc->getArrayNode();
601 if (Node->getNumOperands() != 3)
602 return Dims;
604 for (auto &Op : Node->operands())
605 Dims.push_back(Dims.getDocument()->getNode(
606 uint64_t(mdconst::extract<ConstantInt>(Op)->getZExtValue())));
607 return Dims;
610 void MetadataStreamerV3::emitVersion() {
611 auto Version = HSAMetadataDoc->getArrayNode();
612 Version.push_back(Version.getDocument()->getNode(VersionMajorV3));
613 Version.push_back(Version.getDocument()->getNode(VersionMinorV3));
614 getRootMetadata("amdhsa.version") = Version;
617 void MetadataStreamerV3::emitPrintf(const Module &Mod) {
618 auto Node = Mod.getNamedMetadata("llvm.printf.fmts");
619 if (!Node)
620 return;
622 auto Printf = HSAMetadataDoc->getArrayNode();
623 for (auto Op : Node->operands())
624 if (Op->getNumOperands())
625 Printf.push_back(Printf.getDocument()->getNode(
626 cast<MDString>(Op->getOperand(0))->getString(), /*Copy=*/true));
627 getRootMetadata("amdhsa.printf") = Printf;
630 void MetadataStreamerV3::emitKernelLanguage(const Function &Func,
631 msgpack::MapDocNode Kern) {
632 // TODO: What about other languages?
633 auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version");
634 if (!Node || !Node->getNumOperands())
635 return;
636 auto Op0 = Node->getOperand(0);
637 if (Op0->getNumOperands() <= 1)
638 return;
640 Kern[".language"] = Kern.getDocument()->getNode("OpenCL C");
641 auto LanguageVersion = Kern.getDocument()->getArrayNode();
642 LanguageVersion.push_back(Kern.getDocument()->getNode(
643 mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue()));
644 LanguageVersion.push_back(Kern.getDocument()->getNode(
645 mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue()));
646 Kern[".language_version"] = LanguageVersion;
649 void MetadataStreamerV3::emitKernelAttrs(const Function &Func,
650 msgpack::MapDocNode Kern) {
652 if (auto Node = Func.getMetadata("reqd_work_group_size"))
653 Kern[".reqd_workgroup_size"] = getWorkGroupDimensions(Node);
654 if (auto Node = Func.getMetadata("work_group_size_hint"))
655 Kern[".workgroup_size_hint"] = getWorkGroupDimensions(Node);
656 if (auto Node = Func.getMetadata("vec_type_hint")) {
657 Kern[".vec_type_hint"] = Kern.getDocument()->getNode(
658 getTypeName(
659 cast<ValueAsMetadata>(Node->getOperand(0))->getType(),
660 mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue()),
661 /*Copy=*/true);
663 if (Func.hasFnAttribute("runtime-handle")) {
664 Kern[".device_enqueue_symbol"] = Kern.getDocument()->getNode(
665 Func.getFnAttribute("runtime-handle").getValueAsString().str(),
666 /*Copy=*/true);
668 if (Func.hasFnAttribute("device-init"))
669 Kern[".kind"] = Kern.getDocument()->getNode("init");
670 else if (Func.hasFnAttribute("device-fini"))
671 Kern[".kind"] = Kern.getDocument()->getNode("fini");
674 void MetadataStreamerV3::emitKernelArgs(const Function &Func,
675 msgpack::MapDocNode Kern) {
676 unsigned Offset = 0;
677 auto Args = HSAMetadataDoc->getArrayNode();
678 for (auto &Arg : Func.args())
679 emitKernelArg(Arg, Offset, Args);
681 emitHiddenKernelArgs(Func, Offset, Args);
683 Kern[".args"] = Args;
686 void MetadataStreamerV3::emitKernelArg(const Argument &Arg, unsigned &Offset,
687 msgpack::ArrayDocNode Args) {
688 auto Func = Arg.getParent();
689 auto ArgNo = Arg.getArgNo();
690 const MDNode *Node;
692 StringRef Name;
693 Node = Func->getMetadata("kernel_arg_name");
694 if (Node && ArgNo < Node->getNumOperands())
695 Name = cast<MDString>(Node->getOperand(ArgNo))->getString();
696 else if (Arg.hasName())
697 Name = Arg.getName();
699 StringRef TypeName;
700 Node = Func->getMetadata("kernel_arg_type");
701 if (Node && ArgNo < Node->getNumOperands())
702 TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
704 StringRef BaseTypeName;
705 Node = Func->getMetadata("kernel_arg_base_type");
706 if (Node && ArgNo < Node->getNumOperands())
707 BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
709 StringRef AccQual;
710 if (Arg.getType()->isPointerTy() && Arg.onlyReadsMemory() &&
711 Arg.hasNoAliasAttr()) {
712 AccQual = "read_only";
713 } else {
714 Node = Func->getMetadata("kernel_arg_access_qual");
715 if (Node && ArgNo < Node->getNumOperands())
716 AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
719 StringRef TypeQual;
720 Node = Func->getMetadata("kernel_arg_type_qual");
721 if (Node && ArgNo < Node->getNumOperands())
722 TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
724 const DataLayout &DL = Func->getParent()->getDataLayout();
726 MaybeAlign PointeeAlign;
727 Type *Ty = Arg.hasByRefAttr() ? Arg.getParamByRefType() : Arg.getType();
729 // FIXME: Need to distinguish in memory alignment from pointer alignment.
730 if (auto PtrTy = dyn_cast<PointerType>(Ty)) {
731 if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
732 PointeeAlign = DL.getValueOrABITypeAlignment(Arg.getParamAlign(),
733 PtrTy->getElementType());
737 // There's no distinction between byval aggregates and raw aggregates.
738 Type *ArgTy;
739 Align ArgAlign;
740 std::tie(ArgTy, ArgAlign) = getArgumentTypeAlign(Arg, DL);
742 emitKernelArg(DL, ArgTy, ArgAlign,
743 getValueKind(ArgTy, TypeQual, BaseTypeName), Offset, Args,
744 PointeeAlign, Name, TypeName, BaseTypeName, AccQual, TypeQual);
747 void MetadataStreamerV3::emitKernelArg(
748 const DataLayout &DL, Type *Ty, Align Alignment, StringRef ValueKind,
749 unsigned &Offset, msgpack::ArrayDocNode Args, MaybeAlign PointeeAlign,
750 StringRef Name, StringRef TypeName, StringRef BaseTypeName,
751 StringRef AccQual, StringRef TypeQual) {
752 auto Arg = Args.getDocument()->getMapNode();
754 if (!Name.empty())
755 Arg[".name"] = Arg.getDocument()->getNode(Name, /*Copy=*/true);
756 if (!TypeName.empty())
757 Arg[".type_name"] = Arg.getDocument()->getNode(TypeName, /*Copy=*/true);
758 auto Size = DL.getTypeAllocSize(Ty);
759 Arg[".size"] = Arg.getDocument()->getNode(Size);
760 Offset = alignTo(Offset, Alignment);
761 Arg[".offset"] = Arg.getDocument()->getNode(Offset);
762 Offset += Size;
763 Arg[".value_kind"] = Arg.getDocument()->getNode(ValueKind, /*Copy=*/true);
764 if (PointeeAlign)
765 Arg[".pointee_align"] = Arg.getDocument()->getNode(PointeeAlign->value());
767 if (auto PtrTy = dyn_cast<PointerType>(Ty))
768 if (auto Qualifier = getAddressSpaceQualifier(PtrTy->getAddressSpace()))
769 Arg[".address_space"] = Arg.getDocument()->getNode(*Qualifier, /*Copy=*/true);
771 if (auto AQ = getAccessQualifier(AccQual))
772 Arg[".access"] = Arg.getDocument()->getNode(*AQ, /*Copy=*/true);
774 // TODO: Emit Arg[".actual_access"].
776 SmallVector<StringRef, 1> SplitTypeQuals;
777 TypeQual.split(SplitTypeQuals, " ", -1, false);
778 for (StringRef Key : SplitTypeQuals) {
779 if (Key == "const")
780 Arg[".is_const"] = Arg.getDocument()->getNode(true);
781 else if (Key == "restrict")
782 Arg[".is_restrict"] = Arg.getDocument()->getNode(true);
783 else if (Key == "volatile")
784 Arg[".is_volatile"] = Arg.getDocument()->getNode(true);
785 else if (Key == "pipe")
786 Arg[".is_pipe"] = Arg.getDocument()->getNode(true);
789 Args.push_back(Arg);
792 void MetadataStreamerV3::emitHiddenKernelArgs(const Function &Func,
793 unsigned &Offset,
794 msgpack::ArrayDocNode Args) {
795 int HiddenArgNumBytes =
796 getIntegerAttribute(Func, "amdgpu-implicitarg-num-bytes", 0);
798 if (!HiddenArgNumBytes)
799 return;
801 auto &DL = Func.getParent()->getDataLayout();
802 auto Int64Ty = Type::getInt64Ty(Func.getContext());
804 if (HiddenArgNumBytes >= 8)
805 emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_x", Offset,
806 Args);
807 if (HiddenArgNumBytes >= 16)
808 emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_y", Offset,
809 Args);
810 if (HiddenArgNumBytes >= 24)
811 emitKernelArg(DL, Int64Ty, Align(8), "hidden_global_offset_z", Offset,
812 Args);
814 auto Int8PtrTy =
815 Type::getInt8PtrTy(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS);
817 // Emit "printf buffer" argument if printf is used, otherwise emit dummy
818 // "none" argument.
819 if (HiddenArgNumBytes >= 32) {
820 if (Func.getParent()->getNamedMetadata("llvm.printf.fmts"))
821 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_printf_buffer", Offset,
822 Args);
823 else if (Func.getParent()->getFunction("__ockl_hostcall_internal")) {
824 // The printf runtime binding pass should have ensured that hostcall and
825 // printf are not used in the same module.
826 assert(!Func.getParent()->getNamedMetadata("llvm.printf.fmts"));
827 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_hostcall_buffer", Offset,
828 Args);
829 } else
830 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args);
833 // Emit "default queue" and "completion action" arguments if enqueue kernel is
834 // used, otherwise emit dummy "none" arguments.
835 if (HiddenArgNumBytes >= 48) {
836 if (Func.hasFnAttribute("calls-enqueue-kernel")) {
837 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_default_queue", Offset,
838 Args);
839 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_completion_action", Offset,
840 Args);
841 } else {
842 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args);
843 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_none", Offset, Args);
847 // Emit the pointer argument for multi-grid object.
848 if (HiddenArgNumBytes >= 56)
849 emitKernelArg(DL, Int8PtrTy, Align(8), "hidden_multigrid_sync_arg", Offset,
850 Args);
853 msgpack::MapDocNode
854 MetadataStreamerV3::getHSAKernelProps(const MachineFunction &MF,
855 const SIProgramInfo &ProgramInfo) const {
856 const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
857 const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
858 const Function &F = MF.getFunction();
860 auto Kern = HSAMetadataDoc->getMapNode();
862 Align MaxKernArgAlign;
863 Kern[".kernarg_segment_size"] = Kern.getDocument()->getNode(
864 STM.getKernArgSegmentSize(F, MaxKernArgAlign));
865 Kern[".group_segment_fixed_size"] =
866 Kern.getDocument()->getNode(ProgramInfo.LDSSize);
867 Kern[".private_segment_fixed_size"] =
868 Kern.getDocument()->getNode(ProgramInfo.ScratchSize);
869 Kern[".kernarg_segment_align"] =
870 Kern.getDocument()->getNode(std::max(Align(4), MaxKernArgAlign).value());
871 Kern[".wavefront_size"] =
872 Kern.getDocument()->getNode(STM.getWavefrontSize());
873 Kern[".sgpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumSGPR);
874 Kern[".vgpr_count"] = Kern.getDocument()->getNode(ProgramInfo.NumVGPR);
875 Kern[".max_flat_workgroup_size"] =
876 Kern.getDocument()->getNode(MFI.getMaxFlatWorkGroupSize());
877 Kern[".sgpr_spill_count"] =
878 Kern.getDocument()->getNode(MFI.getNumSpilledSGPRs());
879 Kern[".vgpr_spill_count"] =
880 Kern.getDocument()->getNode(MFI.getNumSpilledVGPRs());
882 return Kern;
885 bool MetadataStreamerV3::emitTo(AMDGPUTargetStreamer &TargetStreamer) {
886 return TargetStreamer.EmitHSAMetadata(*HSAMetadataDoc, true);
889 void MetadataStreamerV3::begin(const Module &Mod,
890 const IsaInfo::AMDGPUTargetID &TargetID) {
891 emitVersion();
892 emitPrintf(Mod);
893 getRootMetadata("amdhsa.kernels") = HSAMetadataDoc->getArrayNode();
896 void MetadataStreamerV3::end() {
897 std::string HSAMetadataString;
898 raw_string_ostream StrOS(HSAMetadataString);
899 HSAMetadataDoc->toYAML(StrOS);
901 if (DumpHSAMetadata)
902 dump(StrOS.str());
903 if (VerifyHSAMetadata)
904 verify(StrOS.str());
907 void MetadataStreamerV3::emitKernel(const MachineFunction &MF,
908 const SIProgramInfo &ProgramInfo) {
909 auto &Func = MF.getFunction();
910 auto Kern = getHSAKernelProps(MF, ProgramInfo);
912 assert(Func.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
913 Func.getCallingConv() == CallingConv::SPIR_KERNEL);
915 auto Kernels =
916 getRootMetadata("amdhsa.kernels").getArray(/*Convert=*/true);
919 Kern[".name"] = Kern.getDocument()->getNode(Func.getName());
920 Kern[".symbol"] = Kern.getDocument()->getNode(
921 (Twine(Func.getName()) + Twine(".kd")).str(), /*Copy=*/true);
922 emitKernelLanguage(Func, Kern);
923 emitKernelAttrs(Func, Kern);
924 emitKernelArgs(Func, Kern);
927 Kernels.push_back(Kern);
930 //===----------------------------------------------------------------------===//
931 // HSAMetadataStreamerV4
932 //===----------------------------------------------------------------------===//
934 void MetadataStreamerV4::emitVersion() {
935 auto Version = HSAMetadataDoc->getArrayNode();
936 Version.push_back(Version.getDocument()->getNode(VersionMajorV4));
937 Version.push_back(Version.getDocument()->getNode(VersionMinorV4));
938 getRootMetadata("amdhsa.version") = Version;
941 void MetadataStreamerV4::emitTargetID(const IsaInfo::AMDGPUTargetID &TargetID) {
942 getRootMetadata("amdhsa.target") =
943 HSAMetadataDoc->getNode(TargetID.toString(), /*Copy=*/true);
946 void MetadataStreamerV4::begin(const Module &Mod,
947 const IsaInfo::AMDGPUTargetID &TargetID) {
948 emitVersion();
949 emitTargetID(TargetID);
950 emitPrintf(Mod);
951 getRootMetadata("amdhsa.kernels") = HSAMetadataDoc->getArrayNode();
954 } // end namespace HSAMD
955 } // end namespace AMDGPU
956 } // end namespace llvm