Revert r354244 "[DAGCombiner] Eliminate dead stores to stack."
[llvm-complete.git] / lib / Target / AMDGPU / AMDGPUHSAMetadataStreamer.cpp
blob15cee69e935a96cf7885e78b313dfbb5d211c542
1 //===--- AMDGPUHSAMetadataStreamer.cpp --------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// AMDGPU HSA Metadata Streamer.
11 ///
13 //===----------------------------------------------------------------------===//
15 #include "AMDGPUHSAMetadataStreamer.h"
16 #include "AMDGPU.h"
17 #include "AMDGPUSubtarget.h"
18 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
19 #include "SIMachineFunctionInfo.h"
20 #include "SIProgramInfo.h"
21 #include "Utils/AMDGPUBaseInfo.h"
22 #include "llvm/ADT/StringSwitch.h"
23 #include "llvm/IR/Constants.h"
24 #include "llvm/IR/Module.h"
25 #include "llvm/Support/raw_ostream.h"
27 namespace llvm {
29 static cl::opt<bool> DumpHSAMetadata(
30 "amdgpu-dump-hsa-metadata",
31 cl::desc("Dump AMDGPU HSA Metadata"));
32 static cl::opt<bool> VerifyHSAMetadata(
33 "amdgpu-verify-hsa-metadata",
34 cl::desc("Verify AMDGPU HSA Metadata"));
36 namespace AMDGPU {
37 namespace HSAMD {
39 //===----------------------------------------------------------------------===//
40 // HSAMetadataStreamerV2
41 //===----------------------------------------------------------------------===//
42 void MetadataStreamerV2::dump(StringRef HSAMetadataString) const {
43 errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n';
46 void MetadataStreamerV2::verify(StringRef HSAMetadataString) const {
47 errs() << "AMDGPU HSA Metadata Parser Test: ";
49 HSAMD::Metadata FromHSAMetadataString;
50 if (fromString(HSAMetadataString, FromHSAMetadataString)) {
51 errs() << "FAIL\n";
52 return;
55 std::string ToHSAMetadataString;
56 if (toString(FromHSAMetadataString, ToHSAMetadataString)) {
57 errs() << "FAIL\n";
58 return;
61 errs() << (HSAMetadataString == ToHSAMetadataString ? "PASS" : "FAIL")
62 << '\n';
63 if (HSAMetadataString != ToHSAMetadataString) {
64 errs() << "Original input: " << HSAMetadataString << '\n'
65 << "Produced output: " << ToHSAMetadataString << '\n';
69 AccessQualifier
70 MetadataStreamerV2::getAccessQualifier(StringRef AccQual) const {
71 if (AccQual.empty())
72 return AccessQualifier::Unknown;
74 return StringSwitch<AccessQualifier>(AccQual)
75 .Case("read_only", AccessQualifier::ReadOnly)
76 .Case("write_only", AccessQualifier::WriteOnly)
77 .Case("read_write", AccessQualifier::ReadWrite)
78 .Default(AccessQualifier::Default);
81 AddressSpaceQualifier
82 MetadataStreamerV2::getAddressSpaceQualifier(
83 unsigned AddressSpace) const {
84 switch (AddressSpace) {
85 case AMDGPUAS::PRIVATE_ADDRESS:
86 return AddressSpaceQualifier::Private;
87 case AMDGPUAS::GLOBAL_ADDRESS:
88 return AddressSpaceQualifier::Global;
89 case AMDGPUAS::CONSTANT_ADDRESS:
90 return AddressSpaceQualifier::Constant;
91 case AMDGPUAS::LOCAL_ADDRESS:
92 return AddressSpaceQualifier::Local;
93 case AMDGPUAS::FLAT_ADDRESS:
94 return AddressSpaceQualifier::Generic;
95 case AMDGPUAS::REGION_ADDRESS:
96 return AddressSpaceQualifier::Region;
97 default:
98 return AddressSpaceQualifier::Unknown;
102 ValueKind MetadataStreamerV2::getValueKind(Type *Ty, StringRef TypeQual,
103 StringRef BaseTypeName) const {
104 if (TypeQual.find("pipe") != StringRef::npos)
105 return ValueKind::Pipe;
107 return StringSwitch<ValueKind>(BaseTypeName)
108 .Case("image1d_t", ValueKind::Image)
109 .Case("image1d_array_t", ValueKind::Image)
110 .Case("image1d_buffer_t", ValueKind::Image)
111 .Case("image2d_t", ValueKind::Image)
112 .Case("image2d_array_t", ValueKind::Image)
113 .Case("image2d_array_depth_t", ValueKind::Image)
114 .Case("image2d_array_msaa_t", ValueKind::Image)
115 .Case("image2d_array_msaa_depth_t", ValueKind::Image)
116 .Case("image2d_depth_t", ValueKind::Image)
117 .Case("image2d_msaa_t", ValueKind::Image)
118 .Case("image2d_msaa_depth_t", ValueKind::Image)
119 .Case("image3d_t", ValueKind::Image)
120 .Case("sampler_t", ValueKind::Sampler)
121 .Case("queue_t", ValueKind::Queue)
122 .Default(isa<PointerType>(Ty) ?
123 (Ty->getPointerAddressSpace() ==
124 AMDGPUAS::LOCAL_ADDRESS ?
125 ValueKind::DynamicSharedPointer :
126 ValueKind::GlobalBuffer) :
127 ValueKind::ByValue);
130 ValueType MetadataStreamerV2::getValueType(Type *Ty, StringRef TypeName) const {
131 switch (Ty->getTypeID()) {
132 case Type::IntegerTyID: {
133 auto Signed = !TypeName.startswith("u");
134 switch (Ty->getIntegerBitWidth()) {
135 case 8:
136 return Signed ? ValueType::I8 : ValueType::U8;
137 case 16:
138 return Signed ? ValueType::I16 : ValueType::U16;
139 case 32:
140 return Signed ? ValueType::I32 : ValueType::U32;
141 case 64:
142 return Signed ? ValueType::I64 : ValueType::U64;
143 default:
144 return ValueType::Struct;
147 case Type::HalfTyID:
148 return ValueType::F16;
149 case Type::FloatTyID:
150 return ValueType::F32;
151 case Type::DoubleTyID:
152 return ValueType::F64;
153 case Type::PointerTyID:
154 return getValueType(Ty->getPointerElementType(), TypeName);
155 case Type::VectorTyID:
156 return getValueType(Ty->getVectorElementType(), TypeName);
157 default:
158 return ValueType::Struct;
162 std::string MetadataStreamerV2::getTypeName(Type *Ty, bool Signed) const {
163 switch (Ty->getTypeID()) {
164 case Type::IntegerTyID: {
165 if (!Signed)
166 return (Twine('u') + getTypeName(Ty, true)).str();
168 auto BitWidth = Ty->getIntegerBitWidth();
169 switch (BitWidth) {
170 case 8:
171 return "char";
172 case 16:
173 return "short";
174 case 32:
175 return "int";
176 case 64:
177 return "long";
178 default:
179 return (Twine('i') + Twine(BitWidth)).str();
182 case Type::HalfTyID:
183 return "half";
184 case Type::FloatTyID:
185 return "float";
186 case Type::DoubleTyID:
187 return "double";
188 case Type::VectorTyID: {
189 auto VecTy = cast<VectorType>(Ty);
190 auto ElTy = VecTy->getElementType();
191 auto NumElements = VecTy->getVectorNumElements();
192 return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str();
194 default:
195 return "unknown";
199 std::vector<uint32_t>
200 MetadataStreamerV2::getWorkGroupDimensions(MDNode *Node) const {
201 std::vector<uint32_t> Dims;
202 if (Node->getNumOperands() != 3)
203 return Dims;
205 for (auto &Op : Node->operands())
206 Dims.push_back(mdconst::extract<ConstantInt>(Op)->getZExtValue());
207 return Dims;
210 Kernel::CodeProps::Metadata
211 MetadataStreamerV2::getHSACodeProps(const MachineFunction &MF,
212 const SIProgramInfo &ProgramInfo) const {
213 const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
214 const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
215 HSAMD::Kernel::CodeProps::Metadata HSACodeProps;
216 const Function &F = MF.getFunction();
218 assert(F.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
219 F.getCallingConv() == CallingConv::SPIR_KERNEL);
221 unsigned MaxKernArgAlign;
222 HSACodeProps.mKernargSegmentSize = STM.getKernArgSegmentSize(F,
223 MaxKernArgAlign);
224 HSACodeProps.mGroupSegmentFixedSize = ProgramInfo.LDSSize;
225 HSACodeProps.mPrivateSegmentFixedSize = ProgramInfo.ScratchSize;
226 HSACodeProps.mKernargSegmentAlign = std::max(MaxKernArgAlign, 4u);
227 HSACodeProps.mWavefrontSize = STM.getWavefrontSize();
228 HSACodeProps.mNumSGPRs = ProgramInfo.NumSGPR;
229 HSACodeProps.mNumVGPRs = ProgramInfo.NumVGPR;
230 HSACodeProps.mMaxFlatWorkGroupSize = MFI.getMaxFlatWorkGroupSize();
231 HSACodeProps.mIsDynamicCallStack = ProgramInfo.DynamicCallStack;
232 HSACodeProps.mIsXNACKEnabled = STM.isXNACKEnabled();
233 HSACodeProps.mNumSpilledSGPRs = MFI.getNumSpilledSGPRs();
234 HSACodeProps.mNumSpilledVGPRs = MFI.getNumSpilledVGPRs();
236 return HSACodeProps;
239 Kernel::DebugProps::Metadata
240 MetadataStreamerV2::getHSADebugProps(const MachineFunction &MF,
241 const SIProgramInfo &ProgramInfo) const {
242 const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
243 HSAMD::Kernel::DebugProps::Metadata HSADebugProps;
245 if (!STM.debuggerSupported())
246 return HSADebugProps;
248 HSADebugProps.mDebuggerABIVersion.push_back(1);
249 HSADebugProps.mDebuggerABIVersion.push_back(0);
251 if (STM.debuggerEmitPrologue()) {
252 HSADebugProps.mPrivateSegmentBufferSGPR =
253 ProgramInfo.DebuggerPrivateSegmentBufferSGPR;
254 HSADebugProps.mWavefrontPrivateSegmentOffsetSGPR =
255 ProgramInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR;
258 return HSADebugProps;
261 void MetadataStreamerV2::emitVersion() {
262 auto &Version = HSAMetadata.mVersion;
264 Version.push_back(VersionMajor);
265 Version.push_back(VersionMinor);
268 void MetadataStreamerV2::emitPrintf(const Module &Mod) {
269 auto &Printf = HSAMetadata.mPrintf;
271 auto Node = Mod.getNamedMetadata("llvm.printf.fmts");
272 if (!Node)
273 return;
275 for (auto Op : Node->operands())
276 if (Op->getNumOperands())
277 Printf.push_back(cast<MDString>(Op->getOperand(0))->getString());
280 void MetadataStreamerV2::emitKernelLanguage(const Function &Func) {
281 auto &Kernel = HSAMetadata.mKernels.back();
283 // TODO: What about other languages?
284 auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version");
285 if (!Node || !Node->getNumOperands())
286 return;
287 auto Op0 = Node->getOperand(0);
288 if (Op0->getNumOperands() <= 1)
289 return;
291 Kernel.mLanguage = "OpenCL C";
292 Kernel.mLanguageVersion.push_back(
293 mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue());
294 Kernel.mLanguageVersion.push_back(
295 mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue());
298 void MetadataStreamerV2::emitKernelAttrs(const Function &Func) {
299 auto &Attrs = HSAMetadata.mKernels.back().mAttrs;
301 if (auto Node = Func.getMetadata("reqd_work_group_size"))
302 Attrs.mReqdWorkGroupSize = getWorkGroupDimensions(Node);
303 if (auto Node = Func.getMetadata("work_group_size_hint"))
304 Attrs.mWorkGroupSizeHint = getWorkGroupDimensions(Node);
305 if (auto Node = Func.getMetadata("vec_type_hint")) {
306 Attrs.mVecTypeHint = getTypeName(
307 cast<ValueAsMetadata>(Node->getOperand(0))->getType(),
308 mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue());
310 if (Func.hasFnAttribute("runtime-handle")) {
311 Attrs.mRuntimeHandle =
312 Func.getFnAttribute("runtime-handle").getValueAsString().str();
316 void MetadataStreamerV2::emitKernelArgs(const Function &Func) {
317 for (auto &Arg : Func.args())
318 emitKernelArg(Arg);
320 emitHiddenKernelArgs(Func);
323 void MetadataStreamerV2::emitKernelArg(const Argument &Arg) {
324 auto Func = Arg.getParent();
325 auto ArgNo = Arg.getArgNo();
326 const MDNode *Node;
328 StringRef Name;
329 Node = Func->getMetadata("kernel_arg_name");
330 if (Node && ArgNo < Node->getNumOperands())
331 Name = cast<MDString>(Node->getOperand(ArgNo))->getString();
332 else if (Arg.hasName())
333 Name = Arg.getName();
335 StringRef TypeName;
336 Node = Func->getMetadata("kernel_arg_type");
337 if (Node && ArgNo < Node->getNumOperands())
338 TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
340 StringRef BaseTypeName;
341 Node = Func->getMetadata("kernel_arg_base_type");
342 if (Node && ArgNo < Node->getNumOperands())
343 BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
345 StringRef AccQual;
346 if (Arg.getType()->isPointerTy() && Arg.onlyReadsMemory() &&
347 Arg.hasNoAliasAttr()) {
348 AccQual = "read_only";
349 } else {
350 Node = Func->getMetadata("kernel_arg_access_qual");
351 if (Node && ArgNo < Node->getNumOperands())
352 AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
355 StringRef TypeQual;
356 Node = Func->getMetadata("kernel_arg_type_qual");
357 if (Node && ArgNo < Node->getNumOperands())
358 TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
360 Type *Ty = Arg.getType();
361 const DataLayout &DL = Func->getParent()->getDataLayout();
363 unsigned PointeeAlign = 0;
364 if (auto PtrTy = dyn_cast<PointerType>(Ty)) {
365 if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
366 PointeeAlign = Arg.getParamAlignment();
367 if (PointeeAlign == 0)
368 PointeeAlign = DL.getABITypeAlignment(PtrTy->getElementType());
372 emitKernelArg(DL, Ty, getValueKind(Arg.getType(), TypeQual, BaseTypeName),
373 PointeeAlign, Name, TypeName, BaseTypeName, AccQual, TypeQual);
376 void MetadataStreamerV2::emitKernelArg(const DataLayout &DL, Type *Ty,
377 ValueKind ValueKind,
378 unsigned PointeeAlign, StringRef Name,
379 StringRef TypeName,
380 StringRef BaseTypeName,
381 StringRef AccQual, StringRef TypeQual) {
382 HSAMetadata.mKernels.back().mArgs.push_back(Kernel::Arg::Metadata());
383 auto &Arg = HSAMetadata.mKernels.back().mArgs.back();
385 Arg.mName = Name;
386 Arg.mTypeName = TypeName;
387 Arg.mSize = DL.getTypeAllocSize(Ty);
388 Arg.mAlign = DL.getABITypeAlignment(Ty);
389 Arg.mValueKind = ValueKind;
390 Arg.mValueType = getValueType(Ty, BaseTypeName);
391 Arg.mPointeeAlign = PointeeAlign;
393 if (auto PtrTy = dyn_cast<PointerType>(Ty))
394 Arg.mAddrSpaceQual = getAddressSpaceQualifier(PtrTy->getAddressSpace());
396 Arg.mAccQual = getAccessQualifier(AccQual);
398 // TODO: Emit Arg.mActualAccQual.
400 SmallVector<StringRef, 1> SplitTypeQuals;
401 TypeQual.split(SplitTypeQuals, " ", -1, false);
402 for (StringRef Key : SplitTypeQuals) {
403 auto P = StringSwitch<bool*>(Key)
404 .Case("const", &Arg.mIsConst)
405 .Case("restrict", &Arg.mIsRestrict)
406 .Case("volatile", &Arg.mIsVolatile)
407 .Case("pipe", &Arg.mIsPipe)
408 .Default(nullptr);
409 if (P)
410 *P = true;
414 void MetadataStreamerV2::emitHiddenKernelArgs(const Function &Func) {
415 int HiddenArgNumBytes =
416 getIntegerAttribute(Func, "amdgpu-implicitarg-num-bytes", 0);
418 if (!HiddenArgNumBytes)
419 return;
421 auto &DL = Func.getParent()->getDataLayout();
422 auto Int64Ty = Type::getInt64Ty(Func.getContext());
424 if (HiddenArgNumBytes >= 8)
425 emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetX);
426 if (HiddenArgNumBytes >= 16)
427 emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetY);
428 if (HiddenArgNumBytes >= 24)
429 emitKernelArg(DL, Int64Ty, ValueKind::HiddenGlobalOffsetZ);
431 auto Int8PtrTy = Type::getInt8PtrTy(Func.getContext(),
432 AMDGPUAS::GLOBAL_ADDRESS);
434 // Emit "printf buffer" argument if printf is used, otherwise emit dummy
435 // "none" argument.
436 if (HiddenArgNumBytes >= 32) {
437 if (Func.getParent()->getNamedMetadata("llvm.printf.fmts"))
438 emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenPrintfBuffer);
439 else
440 emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone);
443 // Emit "default queue" and "completion action" arguments if enqueue kernel is
444 // used, otherwise emit dummy "none" arguments.
445 if (HiddenArgNumBytes >= 48) {
446 if (Func.hasFnAttribute("calls-enqueue-kernel")) {
447 emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenDefaultQueue);
448 emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenCompletionAction);
449 } else {
450 emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone);
451 emitKernelArg(DL, Int8PtrTy, ValueKind::HiddenNone);
456 bool MetadataStreamerV2::emitTo(AMDGPUTargetStreamer &TargetStreamer) {
457 return TargetStreamer.EmitHSAMetadata(getHSAMetadata());
460 void MetadataStreamerV2::begin(const Module &Mod) {
461 emitVersion();
462 emitPrintf(Mod);
465 void MetadataStreamerV2::end() {
466 std::string HSAMetadataString;
467 if (toString(HSAMetadata, HSAMetadataString))
468 return;
470 if (DumpHSAMetadata)
471 dump(HSAMetadataString);
472 if (VerifyHSAMetadata)
473 verify(HSAMetadataString);
476 void MetadataStreamerV2::emitKernel(const MachineFunction &MF,
477 const SIProgramInfo &ProgramInfo) {
478 auto &Func = MF.getFunction();
479 if (Func.getCallingConv() != CallingConv::AMDGPU_KERNEL)
480 return;
482 auto CodeProps = getHSACodeProps(MF, ProgramInfo);
483 auto DebugProps = getHSADebugProps(MF, ProgramInfo);
485 HSAMetadata.mKernels.push_back(Kernel::Metadata());
486 auto &Kernel = HSAMetadata.mKernels.back();
488 Kernel.mName = Func.getName();
489 Kernel.mSymbolName = (Twine(Func.getName()) + Twine("@kd")).str();
490 emitKernelLanguage(Func);
491 emitKernelAttrs(Func);
492 emitKernelArgs(Func);
493 HSAMetadata.mKernels.back().mCodeProps = CodeProps;
494 HSAMetadata.mKernels.back().mDebugProps = DebugProps;
497 //===----------------------------------------------------------------------===//
498 // HSAMetadataStreamerV3
499 //===----------------------------------------------------------------------===//
501 void MetadataStreamerV3::dump(StringRef HSAMetadataString) const {
502 errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n';
505 void MetadataStreamerV3::verify(StringRef HSAMetadataString) const {
506 errs() << "AMDGPU HSA Metadata Parser Test: ";
508 std::shared_ptr<msgpack::Node> FromHSAMetadataString =
509 std::make_shared<msgpack::MapNode>();
511 yaml::Input YIn(HSAMetadataString);
512 YIn >> FromHSAMetadataString;
513 if (YIn.error()) {
514 errs() << "FAIL\n";
515 return;
518 std::string ToHSAMetadataString;
519 raw_string_ostream StrOS(ToHSAMetadataString);
520 yaml::Output YOut(StrOS);
521 YOut << FromHSAMetadataString;
523 errs() << (HSAMetadataString == StrOS.str() ? "PASS" : "FAIL") << '\n';
524 if (HSAMetadataString != ToHSAMetadataString) {
525 errs() << "Original input: " << HSAMetadataString << '\n'
526 << "Produced output: " << StrOS.str() << '\n';
530 Optional<StringRef>
531 MetadataStreamerV3::getAccessQualifier(StringRef AccQual) const {
532 return StringSwitch<Optional<StringRef>>(AccQual)
533 .Case("read_only", StringRef("read_only"))
534 .Case("write_only", StringRef("write_only"))
535 .Case("read_write", StringRef("read_write"))
536 .Default(None);
539 Optional<StringRef>
540 MetadataStreamerV3::getAddressSpaceQualifier(unsigned AddressSpace) const {
541 switch (AddressSpace) {
542 case AMDGPUAS::PRIVATE_ADDRESS:
543 return StringRef("private");
544 case AMDGPUAS::GLOBAL_ADDRESS:
545 return StringRef("global");
546 case AMDGPUAS::CONSTANT_ADDRESS:
547 return StringRef("constant");
548 case AMDGPUAS::LOCAL_ADDRESS:
549 return StringRef("local");
550 case AMDGPUAS::FLAT_ADDRESS:
551 return StringRef("generic");
552 case AMDGPUAS::REGION_ADDRESS:
553 return StringRef("region");
554 default:
555 return None;
559 StringRef MetadataStreamerV3::getValueKind(Type *Ty, StringRef TypeQual,
560 StringRef BaseTypeName) const {
561 if (TypeQual.find("pipe") != StringRef::npos)
562 return "pipe";
564 return StringSwitch<StringRef>(BaseTypeName)
565 .Case("image1d_t", "image")
566 .Case("image1d_array_t", "image")
567 .Case("image1d_buffer_t", "image")
568 .Case("image2d_t", "image")
569 .Case("image2d_array_t", "image")
570 .Case("image2d_array_depth_t", "image")
571 .Case("image2d_array_msaa_t", "image")
572 .Case("image2d_array_msaa_depth_t", "image")
573 .Case("image2d_depth_t", "image")
574 .Case("image2d_msaa_t", "image")
575 .Case("image2d_msaa_depth_t", "image")
576 .Case("image3d_t", "image")
577 .Case("sampler_t", "sampler")
578 .Case("queue_t", "queue")
579 .Default(isa<PointerType>(Ty)
580 ? (Ty->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS
581 ? "dynamic_shared_pointer"
582 : "global_buffer")
583 : "by_value");
586 StringRef MetadataStreamerV3::getValueType(Type *Ty, StringRef TypeName) const {
587 switch (Ty->getTypeID()) {
588 case Type::IntegerTyID: {
589 auto Signed = !TypeName.startswith("u");
590 switch (Ty->getIntegerBitWidth()) {
591 case 8:
592 return Signed ? "i8" : "u8";
593 case 16:
594 return Signed ? "i16" : "u16";
595 case 32:
596 return Signed ? "i32" : "u32";
597 case 64:
598 return Signed ? "i64" : "u64";
599 default:
600 return "struct";
603 case Type::HalfTyID:
604 return "f16";
605 case Type::FloatTyID:
606 return "f32";
607 case Type::DoubleTyID:
608 return "f64";
609 case Type::PointerTyID:
610 return getValueType(Ty->getPointerElementType(), TypeName);
611 case Type::VectorTyID:
612 return getValueType(Ty->getVectorElementType(), TypeName);
613 default:
614 return "struct";
618 std::string MetadataStreamerV3::getTypeName(Type *Ty, bool Signed) const {
619 switch (Ty->getTypeID()) {
620 case Type::IntegerTyID: {
621 if (!Signed)
622 return (Twine('u') + getTypeName(Ty, true)).str();
624 auto BitWidth = Ty->getIntegerBitWidth();
625 switch (BitWidth) {
626 case 8:
627 return "char";
628 case 16:
629 return "short";
630 case 32:
631 return "int";
632 case 64:
633 return "long";
634 default:
635 return (Twine('i') + Twine(BitWidth)).str();
638 case Type::HalfTyID:
639 return "half";
640 case Type::FloatTyID:
641 return "float";
642 case Type::DoubleTyID:
643 return "double";
644 case Type::VectorTyID: {
645 auto VecTy = cast<VectorType>(Ty);
646 auto ElTy = VecTy->getElementType();
647 auto NumElements = VecTy->getVectorNumElements();
648 return (Twine(getTypeName(ElTy, Signed)) + Twine(NumElements)).str();
650 default:
651 return "unknown";
655 std::shared_ptr<msgpack::ArrayNode>
656 MetadataStreamerV3::getWorkGroupDimensions(MDNode *Node) const {
657 auto Dims = std::make_shared<msgpack::ArrayNode>();
658 if (Node->getNumOperands() != 3)
659 return Dims;
661 for (auto &Op : Node->operands())
662 Dims->push_back(std::make_shared<msgpack::ScalarNode>(
663 mdconst::extract<ConstantInt>(Op)->getZExtValue()));
664 return Dims;
667 void MetadataStreamerV3::emitVersion() {
668 auto Version = std::make_shared<msgpack::ArrayNode>();
669 Version->push_back(std::make_shared<msgpack::ScalarNode>(V3::VersionMajor));
670 Version->push_back(std::make_shared<msgpack::ScalarNode>(V3::VersionMinor));
671 getRootMetadata("amdhsa.version") = std::move(Version);
674 void MetadataStreamerV3::emitPrintf(const Module &Mod) {
675 auto Node = Mod.getNamedMetadata("llvm.printf.fmts");
676 if (!Node)
677 return;
679 auto Printf = std::make_shared<msgpack::ArrayNode>();
680 for (auto Op : Node->operands())
681 if (Op->getNumOperands())
682 Printf->push_back(std::make_shared<msgpack::ScalarNode>(
683 cast<MDString>(Op->getOperand(0))->getString()));
684 getRootMetadata("amdhsa.printf") = std::move(Printf);
687 void MetadataStreamerV3::emitKernelLanguage(const Function &Func,
688 msgpack::MapNode &Kern) {
689 // TODO: What about other languages?
690 auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version");
691 if (!Node || !Node->getNumOperands())
692 return;
693 auto Op0 = Node->getOperand(0);
694 if (Op0->getNumOperands() <= 1)
695 return;
697 Kern[".language"] = std::make_shared<msgpack::ScalarNode>("OpenCL C");
698 auto LanguageVersion = std::make_shared<msgpack::ArrayNode>();
699 LanguageVersion->push_back(std::make_shared<msgpack::ScalarNode>(
700 mdconst::extract<ConstantInt>(Op0->getOperand(0))->getZExtValue()));
701 LanguageVersion->push_back(std::make_shared<msgpack::ScalarNode>(
702 mdconst::extract<ConstantInt>(Op0->getOperand(1))->getZExtValue()));
703 Kern[".language_version"] = std::move(LanguageVersion);
706 void MetadataStreamerV3::emitKernelAttrs(const Function &Func,
707 msgpack::MapNode &Kern) {
709 if (auto Node = Func.getMetadata("reqd_work_group_size"))
710 Kern[".reqd_workgroup_size"] = getWorkGroupDimensions(Node);
711 if (auto Node = Func.getMetadata("work_group_size_hint"))
712 Kern[".workgroup_size_hint"] = getWorkGroupDimensions(Node);
713 if (auto Node = Func.getMetadata("vec_type_hint")) {
714 Kern[".vec_type_hint"] = std::make_shared<msgpack::ScalarNode>(getTypeName(
715 cast<ValueAsMetadata>(Node->getOperand(0))->getType(),
716 mdconst::extract<ConstantInt>(Node->getOperand(1))->getZExtValue()));
718 if (Func.hasFnAttribute("runtime-handle")) {
719 Kern[".device_enqueue_symbol"] = std::make_shared<msgpack::ScalarNode>(
720 Func.getFnAttribute("runtime-handle").getValueAsString().str());
724 void MetadataStreamerV3::emitKernelArgs(const Function &Func,
725 msgpack::MapNode &Kern) {
726 unsigned Offset = 0;
727 auto Args = std::make_shared<msgpack::ArrayNode>();
728 for (auto &Arg : Func.args())
729 emitKernelArg(Arg, Offset, *Args);
731 emitHiddenKernelArgs(Func, Offset, *Args);
733 // TODO: What about other languages?
734 if (Func.getParent()->getNamedMetadata("opencl.ocl.version")) {
735 auto &DL = Func.getParent()->getDataLayout();
736 auto Int64Ty = Type::getInt64Ty(Func.getContext());
738 emitKernelArg(DL, Int64Ty, "hidden_global_offset_x", Offset, *Args);
739 emitKernelArg(DL, Int64Ty, "hidden_global_offset_y", Offset, *Args);
740 emitKernelArg(DL, Int64Ty, "hidden_global_offset_z", Offset, *Args);
742 auto Int8PtrTy =
743 Type::getInt8PtrTy(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS);
745 // Emit "printf buffer" argument if printf is used, otherwise emit dummy
746 // "none" argument.
747 if (Func.getParent()->getNamedMetadata("llvm.printf.fmts"))
748 emitKernelArg(DL, Int8PtrTy, "hidden_printf_buffer", Offset, *Args);
749 else
750 emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, *Args);
752 // Emit "default queue" and "completion action" arguments if enqueue kernel
753 // is used, otherwise emit dummy "none" arguments.
754 if (Func.hasFnAttribute("calls-enqueue-kernel")) {
755 emitKernelArg(DL, Int8PtrTy, "hidden_default_queue", Offset, *Args);
756 emitKernelArg(DL, Int8PtrTy, "hidden_completion_action", Offset, *Args);
757 } else {
758 emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, *Args);
759 emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, *Args);
763 Kern[".args"] = std::move(Args);
766 void MetadataStreamerV3::emitKernelArg(const Argument &Arg, unsigned &Offset,
767 msgpack::ArrayNode &Args) {
768 auto Func = Arg.getParent();
769 auto ArgNo = Arg.getArgNo();
770 const MDNode *Node;
772 StringRef Name;
773 Node = Func->getMetadata("kernel_arg_name");
774 if (Node && ArgNo < Node->getNumOperands())
775 Name = cast<MDString>(Node->getOperand(ArgNo))->getString();
776 else if (Arg.hasName())
777 Name = Arg.getName();
779 StringRef TypeName;
780 Node = Func->getMetadata("kernel_arg_type");
781 if (Node && ArgNo < Node->getNumOperands())
782 TypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
784 StringRef BaseTypeName;
785 Node = Func->getMetadata("kernel_arg_base_type");
786 if (Node && ArgNo < Node->getNumOperands())
787 BaseTypeName = cast<MDString>(Node->getOperand(ArgNo))->getString();
789 StringRef AccQual;
790 if (Arg.getType()->isPointerTy() && Arg.onlyReadsMemory() &&
791 Arg.hasNoAliasAttr()) {
792 AccQual = "read_only";
793 } else {
794 Node = Func->getMetadata("kernel_arg_access_qual");
795 if (Node && ArgNo < Node->getNumOperands())
796 AccQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
799 StringRef TypeQual;
800 Node = Func->getMetadata("kernel_arg_type_qual");
801 if (Node && ArgNo < Node->getNumOperands())
802 TypeQual = cast<MDString>(Node->getOperand(ArgNo))->getString();
804 Type *Ty = Arg.getType();
805 const DataLayout &DL = Func->getParent()->getDataLayout();
807 unsigned PointeeAlign = 0;
808 if (auto PtrTy = dyn_cast<PointerType>(Ty)) {
809 if (PtrTy->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) {
810 PointeeAlign = Arg.getParamAlignment();
811 if (PointeeAlign == 0)
812 PointeeAlign = DL.getABITypeAlignment(PtrTy->getElementType());
816 emitKernelArg(Func->getParent()->getDataLayout(), Arg.getType(),
817 getValueKind(Arg.getType(), TypeQual, BaseTypeName), Offset,
818 Args, PointeeAlign, Name, TypeName, BaseTypeName, AccQual,
819 TypeQual);
822 void MetadataStreamerV3::emitKernelArg(const DataLayout &DL, Type *Ty,
823 StringRef ValueKind, unsigned &Offset,
824 msgpack::ArrayNode &Args,
825 unsigned PointeeAlign, StringRef Name,
826 StringRef TypeName,
827 StringRef BaseTypeName,
828 StringRef AccQual, StringRef TypeQual) {
829 auto ArgPtr = std::make_shared<msgpack::MapNode>();
830 auto &Arg = *ArgPtr;
832 if (!Name.empty())
833 Arg[".name"] = std::make_shared<msgpack::ScalarNode>(Name);
834 if (!TypeName.empty())
835 Arg[".type_name"] = std::make_shared<msgpack::ScalarNode>(TypeName);
836 auto Size = DL.getTypeAllocSize(Ty);
837 auto Align = DL.getABITypeAlignment(Ty);
838 Arg[".size"] = std::make_shared<msgpack::ScalarNode>(Size);
839 Offset = alignTo(Offset, Align);
840 Arg[".offset"] = std::make_shared<msgpack::ScalarNode>(Offset);
841 Offset += Size;
842 Arg[".value_kind"] = std::make_shared<msgpack::ScalarNode>(ValueKind);
843 Arg[".value_type"] =
844 std::make_shared<msgpack::ScalarNode>(getValueType(Ty, BaseTypeName));
845 if (PointeeAlign)
846 Arg[".pointee_align"] = std::make_shared<msgpack::ScalarNode>(PointeeAlign);
848 if (auto PtrTy = dyn_cast<PointerType>(Ty))
849 if (auto Qualifier = getAddressSpaceQualifier(PtrTy->getAddressSpace()))
850 Arg[".address_space"] = std::make_shared<msgpack::ScalarNode>(*Qualifier);
852 if (auto AQ = getAccessQualifier(AccQual))
853 Arg[".access"] = std::make_shared<msgpack::ScalarNode>(*AQ);
855 // TODO: Emit Arg[".actual_access"].
857 SmallVector<StringRef, 1> SplitTypeQuals;
858 TypeQual.split(SplitTypeQuals, " ", -1, false);
859 for (StringRef Key : SplitTypeQuals) {
860 if (Key == "const")
861 Arg[".is_const"] = std::make_shared<msgpack::ScalarNode>(true);
862 else if (Key == "restrict")
863 Arg[".is_restrict"] = std::make_shared<msgpack::ScalarNode>(true);
864 else if (Key == "volatile")
865 Arg[".is_volatile"] = std::make_shared<msgpack::ScalarNode>(true);
866 else if (Key == "pipe")
867 Arg[".is_pipe"] = std::make_shared<msgpack::ScalarNode>(true);
870 Args.push_back(std::move(ArgPtr));
873 void MetadataStreamerV3::emitHiddenKernelArgs(const Function &Func,
874 unsigned &Offset,
875 msgpack::ArrayNode &Args) {
876 int HiddenArgNumBytes =
877 getIntegerAttribute(Func, "amdgpu-implicitarg-num-bytes", 0);
879 if (!HiddenArgNumBytes)
880 return;
882 auto &DL = Func.getParent()->getDataLayout();
883 auto Int64Ty = Type::getInt64Ty(Func.getContext());
885 if (HiddenArgNumBytes >= 8)
886 emitKernelArg(DL, Int64Ty, "hidden_global_offset_x", Offset, Args);
887 if (HiddenArgNumBytes >= 16)
888 emitKernelArg(DL, Int64Ty, "hidden_global_offset_y", Offset, Args);
889 if (HiddenArgNumBytes >= 24)
890 emitKernelArg(DL, Int64Ty, "hidden_global_offset_z", Offset, Args);
892 auto Int8PtrTy =
893 Type::getInt8PtrTy(Func.getContext(), AMDGPUAS::GLOBAL_ADDRESS);
895 // Emit "printf buffer" argument if printf is used, otherwise emit dummy
896 // "none" argument.
897 if (HiddenArgNumBytes >= 32) {
898 if (Func.getParent()->getNamedMetadata("llvm.printf.fmts"))
899 emitKernelArg(DL, Int8PtrTy, "hidden_printf_buffer", Offset, Args);
900 else
901 emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, Args);
904 // Emit "default queue" and "completion action" arguments if enqueue kernel is
905 // used, otherwise emit dummy "none" arguments.
906 if (HiddenArgNumBytes >= 48) {
907 if (Func.hasFnAttribute("calls-enqueue-kernel")) {
908 emitKernelArg(DL, Int8PtrTy, "hidden_default_queue", Offset, Args);
909 emitKernelArg(DL, Int8PtrTy, "hidden_completion_action", Offset, Args);
910 } else {
911 emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, Args);
912 emitKernelArg(DL, Int8PtrTy, "hidden_none", Offset, Args);
917 std::shared_ptr<msgpack::MapNode>
918 MetadataStreamerV3::getHSAKernelProps(const MachineFunction &MF,
919 const SIProgramInfo &ProgramInfo) const {
920 const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
921 const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
922 const Function &F = MF.getFunction();
924 auto HSAKernelProps = std::make_shared<msgpack::MapNode>();
925 auto &Kern = *HSAKernelProps;
927 unsigned MaxKernArgAlign;
928 Kern[".kernarg_segment_size"] = std::make_shared<msgpack::ScalarNode>(
929 STM.getKernArgSegmentSize(F, MaxKernArgAlign));
930 Kern[".group_segment_fixed_size"] =
931 std::make_shared<msgpack::ScalarNode>(ProgramInfo.LDSSize);
932 Kern[".private_segment_fixed_size"] =
933 std::make_shared<msgpack::ScalarNode>(ProgramInfo.ScratchSize);
934 Kern[".kernarg_segment_align"] =
935 std::make_shared<msgpack::ScalarNode>(std::max(uint32_t(4), MaxKernArgAlign));
936 Kern[".wavefront_size"] =
937 std::make_shared<msgpack::ScalarNode>(STM.getWavefrontSize());
938 Kern[".sgpr_count"] = std::make_shared<msgpack::ScalarNode>(ProgramInfo.NumSGPR);
939 Kern[".vgpr_count"] = std::make_shared<msgpack::ScalarNode>(ProgramInfo.NumVGPR);
940 Kern[".max_flat_workgroup_size"] =
941 std::make_shared<msgpack::ScalarNode>(MFI.getMaxFlatWorkGroupSize());
942 Kern[".sgpr_spill_count"] =
943 std::make_shared<msgpack::ScalarNode>(MFI.getNumSpilledSGPRs());
944 Kern[".vgpr_spill_count"] =
945 std::make_shared<msgpack::ScalarNode>(MFI.getNumSpilledVGPRs());
947 return HSAKernelProps;
950 bool MetadataStreamerV3::emitTo(AMDGPUTargetStreamer &TargetStreamer) {
951 return TargetStreamer.EmitHSAMetadata(getHSAMetadataRoot(), true);
954 void MetadataStreamerV3::begin(const Module &Mod) {
955 emitVersion();
956 emitPrintf(Mod);
957 getRootMetadata("amdhsa.kernels").reset(new msgpack::ArrayNode());
960 void MetadataStreamerV3::end() {
961 std::string HSAMetadataString;
962 raw_string_ostream StrOS(HSAMetadataString);
963 yaml::Output YOut(StrOS);
964 YOut << HSAMetadataRoot;
966 if (DumpHSAMetadata)
967 dump(StrOS.str());
968 if (VerifyHSAMetadata)
969 verify(StrOS.str());
972 void MetadataStreamerV3::emitKernel(const MachineFunction &MF,
973 const SIProgramInfo &ProgramInfo) {
974 auto &Func = MF.getFunction();
975 auto KernelProps = getHSAKernelProps(MF, ProgramInfo);
977 assert(Func.getCallingConv() == CallingConv::AMDGPU_KERNEL ||
978 Func.getCallingConv() == CallingConv::SPIR_KERNEL);
980 auto &KernelsNode = getRootMetadata("amdhsa.kernels");
981 auto Kernels = cast<msgpack::ArrayNode>(KernelsNode.get());
984 auto &Kern = *KernelProps;
985 Kern[".name"] = std::make_shared<msgpack::ScalarNode>(Func.getName());
986 Kern[".symbol"] = std::make_shared<msgpack::ScalarNode>(
987 (Twine(Func.getName()) + Twine(".kd")).str());
988 emitKernelLanguage(Func, Kern);
989 emitKernelAttrs(Func, Kern);
990 emitKernelArgs(Func, Kern);
993 Kernels->push_back(std::move(KernelProps));
996 } // end namespace HSAMD
997 } // end namespace AMDGPU
998 } // end namespace llvm