[ORC] Add std::tuple support to SimplePackedSerialization.
[llvm-project.git] / llvm / lib / Target / AMDGPU / AMDGPUAttributor.cpp
blob0c24903490f0f50bd2af593a1cace7b426774a60
1 //===- AMDGPUAttributor.cpp -----------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file This pass uses Attributor framework to deduce AMDGPU attributes.
11 //===----------------------------------------------------------------------===//
13 #include "AMDGPU.h"
14 #include "GCNSubtarget.h"
15 #include "llvm/CodeGen/TargetPassConfig.h"
16 #include "llvm/IR/IntrinsicsAMDGPU.h"
17 #include "llvm/IR/IntrinsicsR600.h"
18 #include "llvm/Target/TargetMachine.h"
19 #include "llvm/Transforms/IPO/Attributor.h"
21 #define DEBUG_TYPE "amdgpu-attributor"
23 using namespace llvm;
25 static constexpr StringLiteral ImplicitAttrNames[] = {
26 // X ids unnecessarily propagated to kernels.
27 "amdgpu-work-item-id-x", "amdgpu-work-item-id-y",
28 "amdgpu-work-item-id-z", "amdgpu-work-group-id-x",
29 "amdgpu-work-group-id-y", "amdgpu-work-group-id-z",
30 "amdgpu-dispatch-ptr", "amdgpu-dispatch-id",
31 "amdgpu-queue-ptr", "amdgpu-implicitarg-ptr"};
33 // We do not need to note the x workitem or workgroup id because they are always
34 // initialized.
36 // TODO: We should not add the attributes if the known compile time workgroup
37 // size is 1 for y/z.
38 static StringRef intrinsicToAttrName(Intrinsic::ID ID, bool &NonKernelOnly,
39 bool &IsQueuePtr) {
40 switch (ID) {
41 case Intrinsic::amdgcn_workitem_id_x:
42 NonKernelOnly = true;
43 return "amdgpu-work-item-id-x";
44 case Intrinsic::amdgcn_workgroup_id_x:
45 NonKernelOnly = true;
46 return "amdgpu-work-group-id-x";
47 case Intrinsic::amdgcn_workitem_id_y:
48 case Intrinsic::r600_read_tidig_y:
49 return "amdgpu-work-item-id-y";
50 case Intrinsic::amdgcn_workitem_id_z:
51 case Intrinsic::r600_read_tidig_z:
52 return "amdgpu-work-item-id-z";
53 case Intrinsic::amdgcn_workgroup_id_y:
54 case Intrinsic::r600_read_tgid_y:
55 return "amdgpu-work-group-id-y";
56 case Intrinsic::amdgcn_workgroup_id_z:
57 case Intrinsic::r600_read_tgid_z:
58 return "amdgpu-work-group-id-z";
59 case Intrinsic::amdgcn_dispatch_ptr:
60 return "amdgpu-dispatch-ptr";
61 case Intrinsic::amdgcn_dispatch_id:
62 return "amdgpu-dispatch-id";
63 case Intrinsic::amdgcn_kernarg_segment_ptr:
64 return "amdgpu-kernarg-segment-ptr";
65 case Intrinsic::amdgcn_implicitarg_ptr:
66 return "amdgpu-implicitarg-ptr";
67 case Intrinsic::amdgcn_queue_ptr:
68 case Intrinsic::amdgcn_is_shared:
69 case Intrinsic::amdgcn_is_private:
70 // TODO: Does not require queue ptr on gfx9+
71 case Intrinsic::trap:
72 case Intrinsic::debugtrap:
73 IsQueuePtr = true;
74 return "amdgpu-queue-ptr";
75 default:
76 return "";
80 static bool castRequiresQueuePtr(unsigned SrcAS) {
81 return SrcAS == AMDGPUAS::LOCAL_ADDRESS || SrcAS == AMDGPUAS::PRIVATE_ADDRESS;
84 static bool isDSAddress(const Constant *C) {
85 const GlobalValue *GV = dyn_cast<GlobalValue>(C);
86 if (!GV)
87 return false;
88 unsigned AS = GV->getAddressSpace();
89 return AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::REGION_ADDRESS;
92 class AMDGPUInformationCache : public InformationCache {
93 public:
94 AMDGPUInformationCache(const Module &M, AnalysisGetter &AG,
95 BumpPtrAllocator &Allocator,
96 SetVector<Function *> *CGSCC, TargetMachine &TM)
97 : InformationCache(M, AG, Allocator, CGSCC), TM(TM) {}
98 TargetMachine &TM;
100 enum ConstantStatus { DS_GLOBAL = 1 << 0, ADDR_SPACE_CAST = 1 << 1 };
102 /// Check if the subtarget has aperture regs.
103 bool hasApertureRegs(Function &F) {
104 const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);
105 return ST.hasApertureRegs();
108 private:
109 /// Check if the ConstantExpr \p CE requires queue ptr attribute.
110 static bool visitConstExpr(const ConstantExpr *CE) {
111 if (CE->getOpcode() == Instruction::AddrSpaceCast) {
112 unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace();
113 return castRequiresQueuePtr(SrcAS);
115 return false;
118 /// Get the constant access bitmap for \p C.
119 uint8_t getConstantAccess(const Constant *C) {
120 auto It = ConstantStatus.find(C);
121 if (It != ConstantStatus.end())
122 return It->second;
124 uint8_t Result = 0;
125 if (isDSAddress(C))
126 Result = DS_GLOBAL;
128 if (const auto *CE = dyn_cast<ConstantExpr>(C))
129 if (visitConstExpr(CE))
130 Result |= ADDR_SPACE_CAST;
132 for (const Use &U : C->operands()) {
133 const auto *OpC = dyn_cast<Constant>(U);
134 if (!OpC)
135 continue;
137 Result |= getConstantAccess(OpC);
139 return Result;
142 public:
143 /// Returns true if \p Fn needs a queue ptr attribute because of \p C.
144 bool needsQueuePtr(const Constant *C, Function &Fn) {
145 bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(Fn.getCallingConv());
146 bool HasAperture = hasApertureRegs(Fn);
148 // No need to explore the constants.
149 if (!IsNonEntryFunc && HasAperture)
150 return false;
152 uint8_t Access = getConstantAccess(C);
154 // We need to trap on DS globals in non-entry functions.
155 if (IsNonEntryFunc && (Access & DS_GLOBAL))
156 return true;
158 return !HasAperture && (Access & ADDR_SPACE_CAST);
161 private:
162 /// Used to determine if the Constant needs a queue ptr attribute.
163 DenseMap<const Constant *, uint8_t> ConstantStatus;
166 struct AAAMDAttributes : public StateWrapper<BooleanState, AbstractAttribute> {
167 using Base = StateWrapper<BooleanState, AbstractAttribute>;
168 AAAMDAttributes(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
170 /// Create an abstract attribute view for the position \p IRP.
171 static AAAMDAttributes &createForPosition(const IRPosition &IRP,
172 Attributor &A);
174 /// See AbstractAttribute::getName().
175 const std::string getName() const override { return "AAAMDAttributes"; }
177 /// See AbstractAttribute::getIdAddr().
178 const char *getIdAddr() const override { return &ID; }
180 /// This function should return true if the type of the \p AA is
181 /// AAAMDAttributes.
182 static bool classof(const AbstractAttribute *AA) {
183 return (AA->getIdAddr() == &ID);
186 virtual const DenseSet<StringRef> &getAttributes() const = 0;
188 /// Unique ID (due to the unique address)
189 static const char ID;
191 const char AAAMDAttributes::ID = 0;
193 struct AAAMDWorkGroupSize
194 : public StateWrapper<BooleanState, AbstractAttribute> {
195 using Base = StateWrapper<BooleanState, AbstractAttribute>;
196 AAAMDWorkGroupSize(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
198 /// Create an abstract attribute view for the position \p IRP.
199 static AAAMDWorkGroupSize &createForPosition(const IRPosition &IRP,
200 Attributor &A);
202 /// See AbstractAttribute::getName().
203 const std::string getName() const override { return "AAAMDWorkGroupSize"; }
205 /// See AbstractAttribute::getIdAddr().
206 const char *getIdAddr() const override { return &ID; }
208 /// This function should return true if the type of the \p AA is
209 /// AAAMDAttributes.
210 static bool classof(const AbstractAttribute *AA) {
211 return (AA->getIdAddr() == &ID);
214 /// Unique ID (due to the unique address)
215 static const char ID;
217 const char AAAMDWorkGroupSize::ID = 0;
219 struct AAAMDWorkGroupSizeFunction : public AAAMDWorkGroupSize {
220 AAAMDWorkGroupSizeFunction(const IRPosition &IRP, Attributor &A)
221 : AAAMDWorkGroupSize(IRP, A) {}
223 void initialize(Attributor &A) override {
224 Function *F = getAssociatedFunction();
225 CallingConv::ID CC = F->getCallingConv();
227 if (CC != CallingConv::AMDGPU_KERNEL)
228 return;
230 bool InitialValue = false;
231 if (F->hasFnAttribute("uniform-work-group-size"))
232 InitialValue = F->getFnAttribute("uniform-work-group-size")
233 .getValueAsString()
234 .equals("true");
236 if (InitialValue)
237 indicateOptimisticFixpoint();
238 else
239 indicatePessimisticFixpoint();
242 ChangeStatus updateImpl(Attributor &A) override {
243 ChangeStatus Change = ChangeStatus::UNCHANGED;
245 auto CheckCallSite = [&](AbstractCallSite CS) {
246 Function *Caller = CS.getInstruction()->getFunction();
247 LLVM_DEBUG(dbgs() << "[AAAMDWorkGroupSize] Call " << Caller->getName()
248 << "->" << getAssociatedFunction()->getName() << "\n");
250 const auto &CallerInfo = A.getAAFor<AAAMDWorkGroupSize>(
251 *this, IRPosition::function(*Caller), DepClassTy::REQUIRED);
253 Change = Change | clampStateAndIndicateChange(this->getState(),
254 CallerInfo.getState());
256 return true;
259 bool AllCallSitesKnown = true;
260 if (!A.checkForAllCallSites(CheckCallSite, *this, true, AllCallSitesKnown))
261 indicatePessimisticFixpoint();
263 return Change;
266 ChangeStatus manifest(Attributor &A) override {
267 SmallVector<Attribute, 8> AttrList;
268 LLVMContext &Ctx = getAssociatedFunction()->getContext();
270 AttrList.push_back(Attribute::get(Ctx, "uniform-work-group-size",
271 getAssumed() ? "true" : "false"));
272 return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList,
273 /* ForceReplace */ true);
276 bool isValidState() const override {
277 // This state is always valid, even when the state is false.
278 return true;
281 const std::string getAsStr() const override {
282 return "AMDWorkGroupSize[" + std::to_string(getAssumed()) + "]";
285 /// See AbstractAttribute::trackStatistics()
286 void trackStatistics() const override {}
289 AAAMDWorkGroupSize &AAAMDWorkGroupSize::createForPosition(const IRPosition &IRP,
290 Attributor &A) {
291 if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)
292 return *new (A.Allocator) AAAMDWorkGroupSizeFunction(IRP, A);
293 llvm_unreachable("AAAMDWorkGroupSize is only valid for function position");
296 struct AAAMDAttributesFunction : public AAAMDAttributes {
297 AAAMDAttributesFunction(const IRPosition &IRP, Attributor &A)
298 : AAAMDAttributes(IRP, A) {}
300 void initialize(Attributor &A) override {
301 Function *F = getAssociatedFunction();
302 CallingConv::ID CC = F->getCallingConv();
303 bool CallingConvSupportsAllImplicits = (CC != CallingConv::AMDGPU_Gfx);
305 // Ignore functions with graphics calling conventions, these are currently
306 // not allowed to have kernel arguments.
307 if (AMDGPU::isGraphics(F->getCallingConv())) {
308 indicatePessimisticFixpoint();
309 return;
312 for (StringRef Attr : ImplicitAttrNames) {
313 if (F->hasFnAttribute(Attr))
314 Attributes.insert(Attr);
317 // TODO: We shouldn't need this in the future.
318 if (CallingConvSupportsAllImplicits &&
319 F->hasAddressTaken(nullptr, true, true, true)) {
320 for (StringRef AttrName : ImplicitAttrNames) {
321 Attributes.insert(AttrName);
326 ChangeStatus updateImpl(Attributor &A) override {
327 Function *F = getAssociatedFunction();
328 ChangeStatus Change = ChangeStatus::UNCHANGED;
329 bool IsNonEntryFunc = !AMDGPU::isEntryFunctionCC(F->getCallingConv());
330 CallingConv::ID CC = F->getCallingConv();
331 bool CallingConvSupportsAllImplicits = (CC != CallingConv::AMDGPU_Gfx);
332 auto &InfoCache = static_cast<AMDGPUInformationCache &>(A.getInfoCache());
334 auto AddAttribute = [&](StringRef AttrName) {
335 if (Attributes.insert(AttrName).second)
336 Change = ChangeStatus::CHANGED;
339 // Check for Intrinsics and propagate attributes.
340 const AACallEdges &AAEdges = A.getAAFor<AACallEdges>(
341 *this, this->getIRPosition(), DepClassTy::REQUIRED);
343 // We have to assume that we can reach a function with these attributes.
344 // We do not consider inline assembly as a unknown callee.
345 if (CallingConvSupportsAllImplicits && AAEdges.hasNonAsmUnknownCallee()) {
346 for (StringRef AttrName : ImplicitAttrNames) {
347 AddAttribute(AttrName);
351 bool NeedsQueuePtr = false;
352 bool HasCall = false;
353 for (Function *Callee : AAEdges.getOptimisticEdges()) {
354 Intrinsic::ID IID = Callee->getIntrinsicID();
355 if (IID != Intrinsic::not_intrinsic) {
356 if (!IsNonEntryFunc && IID == Intrinsic::amdgcn_kernarg_segment_ptr) {
357 AddAttribute("amdgpu-kernarg-segment-ptr");
358 continue;
361 bool NonKernelOnly = false;
362 StringRef AttrName =
363 intrinsicToAttrName(IID, NonKernelOnly, NeedsQueuePtr);
365 if (!AttrName.empty() && (IsNonEntryFunc || !NonKernelOnly))
366 AddAttribute(AttrName);
368 continue;
371 HasCall = true;
372 const AAAMDAttributes &AAAMD = A.getAAFor<AAAMDAttributes>(
373 *this, IRPosition::function(*Callee), DepClassTy::REQUIRED);
374 const DenseSet<StringRef> &CalleeAttributes = AAAMD.getAttributes();
375 // Propagate implicit attributes from called function.
376 for (StringRef AttrName : ImplicitAttrNames)
377 if (CalleeAttributes.count(AttrName))
378 AddAttribute(AttrName);
381 HasCall |= AAEdges.hasUnknownCallee();
382 if (!IsNonEntryFunc && HasCall)
383 AddAttribute("amdgpu-calls");
385 // Check the function body.
386 auto CheckAlloca = [&](Instruction &I) {
387 AddAttribute("amdgpu-stack-objects");
388 return false;
391 bool UsedAssumedInformation = false;
392 A.checkForAllInstructions(CheckAlloca, *this, {Instruction::Alloca},
393 UsedAssumedInformation);
395 // If we found that we need amdgpu-queue-ptr, nothing else to do.
396 if (NeedsQueuePtr || Attributes.count("amdgpu-queue-ptr")) {
397 AddAttribute("amdgpu-queue-ptr");
398 return Change;
401 auto CheckAddrSpaceCasts = [&](Instruction &I) {
402 unsigned SrcAS = static_cast<AddrSpaceCastInst &>(I).getSrcAddressSpace();
403 if (castRequiresQueuePtr(SrcAS)) {
404 NeedsQueuePtr = true;
405 return false;
407 return true;
410 bool HasApertureRegs = InfoCache.hasApertureRegs(*F);
412 // `checkForAllInstructions` is much more cheaper than going through all
413 // instructions, try it first.
415 // amdgpu-queue-ptr is not needed if aperture regs is present.
416 if (!HasApertureRegs)
417 A.checkForAllInstructions(CheckAddrSpaceCasts, *this,
418 {Instruction::AddrSpaceCast},
419 UsedAssumedInformation);
421 // If we found that we need amdgpu-queue-ptr, nothing else to do.
422 if (NeedsQueuePtr) {
423 AddAttribute("amdgpu-queue-ptr");
424 return Change;
427 if (!IsNonEntryFunc && HasApertureRegs)
428 return Change;
430 for (BasicBlock &BB : *F) {
431 for (Instruction &I : BB) {
432 for (const Use &U : I.operands()) {
433 if (const auto *C = dyn_cast<Constant>(U)) {
434 if (InfoCache.needsQueuePtr(C, *F)) {
435 AddAttribute("amdgpu-queue-ptr");
436 return Change;
443 return Change;
446 ChangeStatus manifest(Attributor &A) override {
447 SmallVector<Attribute, 8> AttrList;
448 LLVMContext &Ctx = getAssociatedFunction()->getContext();
450 for (StringRef AttrName : Attributes)
451 AttrList.push_back(Attribute::get(Ctx, AttrName));
453 return IRAttributeManifest::manifestAttrs(A, getIRPosition(), AttrList,
454 /* ForceReplace */ true);
457 const std::string getAsStr() const override {
458 return "AMDInfo[" + std::to_string(Attributes.size()) + "]";
461 const DenseSet<StringRef> &getAttributes() const override {
462 return Attributes;
465 /// See AbstractAttribute::trackStatistics()
466 void trackStatistics() const override {}
468 private:
469 DenseSet<StringRef> Attributes;
472 AAAMDAttributes &AAAMDAttributes::createForPosition(const IRPosition &IRP,
473 Attributor &A) {
474 if (IRP.getPositionKind() == IRPosition::IRP_FUNCTION)
475 return *new (A.Allocator) AAAMDAttributesFunction(IRP, A);
476 llvm_unreachable("AAAMDAttributes is only valid for function position");
479 class AMDGPUAttributor : public ModulePass {
480 public:
481 AMDGPUAttributor() : ModulePass(ID) {}
483 /// doInitialization - Virtual method overridden by subclasses to do
484 /// any necessary initialization before any pass is run.
485 bool doInitialization(Module &) override {
486 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
487 if (!TPC)
488 report_fatal_error("TargetMachine is required");
490 TM = &TPC->getTM<TargetMachine>();
491 return false;
494 bool runOnModule(Module &M) override {
495 SetVector<Function *> Functions;
496 AnalysisGetter AG;
497 for (Function &F : M) {
498 if (!F.isIntrinsic())
499 Functions.insert(&F);
502 CallGraphUpdater CGUpdater;
503 BumpPtrAllocator Allocator;
504 AMDGPUInformationCache InfoCache(M, AG, Allocator, nullptr, *TM);
505 Attributor A(Functions, InfoCache, CGUpdater);
507 for (Function &F : M) {
508 if (!F.isIntrinsic()) {
509 A.getOrCreateAAFor<AAAMDAttributes>(IRPosition::function(F));
510 A.getOrCreateAAFor<AAAMDWorkGroupSize>(IRPosition::function(F));
514 ChangeStatus Change = A.run();
515 return Change == ChangeStatus::CHANGED;
518 StringRef getPassName() const override { return "AMDGPU Attributor"; }
519 TargetMachine *TM;
520 static char ID;
523 char AMDGPUAttributor::ID = 0;
525 Pass *llvm::createAMDGPUAttributorPass() { return new AMDGPUAttributor(); }
526 INITIALIZE_PASS(AMDGPUAttributor, DEBUG_TYPE, "AMDGPU Attributor", false, false)