1 //===- AMDGPUAttributor.cpp -----------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 /// \file This pass uses Attributor framework to deduce AMDGPU attributes.
11 //===----------------------------------------------------------------------===//
14 #include "GCNSubtarget.h"
15 #include "llvm/CodeGen/TargetPassConfig.h"
16 #include "llvm/IR/IntrinsicsAMDGPU.h"
17 #include "llvm/IR/IntrinsicsR600.h"
18 #include "llvm/Target/TargetMachine.h"
19 #include "llvm/Transforms/IPO/Attributor.h"
21 #define DEBUG_TYPE "amdgpu-attributor"
25 static constexpr StringLiteral ImplicitAttrNames
[] = {
26 // X ids unnecessarily propagated to kernels.
27 "amdgpu-work-item-id-x", "amdgpu-work-item-id-y",
28 "amdgpu-work-item-id-z", "amdgpu-work-group-id-x",
29 "amdgpu-work-group-id-y", "amdgpu-work-group-id-z",
30 "amdgpu-dispatch-ptr", "amdgpu-dispatch-id",
31 "amdgpu-queue-ptr", "amdgpu-implicitarg-ptr"};
33 // We do not need to note the x workitem or workgroup id because they are always
36 // TODO: We should not add the attributes if the known compile time workgroup
38 static StringRef
intrinsicToAttrName(Intrinsic::ID ID
, bool &NonKernelOnly
,
41 case Intrinsic::amdgcn_workitem_id_x
:
43 return "amdgpu-work-item-id-x";
44 case Intrinsic::amdgcn_workgroup_id_x
:
46 return "amdgpu-work-group-id-x";
47 case Intrinsic::amdgcn_workitem_id_y
:
48 case Intrinsic::r600_read_tidig_y
:
49 return "amdgpu-work-item-id-y";
50 case Intrinsic::amdgcn_workitem_id_z
:
51 case Intrinsic::r600_read_tidig_z
:
52 return "amdgpu-work-item-id-z";
53 case Intrinsic::amdgcn_workgroup_id_y
:
54 case Intrinsic::r600_read_tgid_y
:
55 return "amdgpu-work-group-id-y";
56 case Intrinsic::amdgcn_workgroup_id_z
:
57 case Intrinsic::r600_read_tgid_z
:
58 return "amdgpu-work-group-id-z";
59 case Intrinsic::amdgcn_dispatch_ptr
:
60 return "amdgpu-dispatch-ptr";
61 case Intrinsic::amdgcn_dispatch_id
:
62 return "amdgpu-dispatch-id";
63 case Intrinsic::amdgcn_kernarg_segment_ptr
:
64 return "amdgpu-kernarg-segment-ptr";
65 case Intrinsic::amdgcn_implicitarg_ptr
:
66 return "amdgpu-implicitarg-ptr";
67 case Intrinsic::amdgcn_queue_ptr
:
68 case Intrinsic::amdgcn_is_shared
:
69 case Intrinsic::amdgcn_is_private
:
70 // TODO: Does not require queue ptr on gfx9+
72 case Intrinsic::debugtrap
:
74 return "amdgpu-queue-ptr";
80 static bool castRequiresQueuePtr(unsigned SrcAS
) {
81 return SrcAS
== AMDGPUAS::LOCAL_ADDRESS
|| SrcAS
== AMDGPUAS::PRIVATE_ADDRESS
;
84 static bool isDSAddress(const Constant
*C
) {
85 const GlobalValue
*GV
= dyn_cast
<GlobalValue
>(C
);
88 unsigned AS
= GV
->getAddressSpace();
89 return AS
== AMDGPUAS::LOCAL_ADDRESS
|| AS
== AMDGPUAS::REGION_ADDRESS
;
92 class AMDGPUInformationCache
: public InformationCache
{
94 AMDGPUInformationCache(const Module
&M
, AnalysisGetter
&AG
,
95 BumpPtrAllocator
&Allocator
,
96 SetVector
<Function
*> *CGSCC
, TargetMachine
&TM
)
97 : InformationCache(M
, AG
, Allocator
, CGSCC
), TM(TM
) {}
100 enum ConstantStatus
{ DS_GLOBAL
= 1 << 0, ADDR_SPACE_CAST
= 1 << 1 };
102 /// Check if the subtarget has aperture regs.
103 bool hasApertureRegs(Function
&F
) {
104 const GCNSubtarget
&ST
= TM
.getSubtarget
<GCNSubtarget
>(F
);
105 return ST
.hasApertureRegs();
109 /// Check if the ConstantExpr \p CE requires queue ptr attribute.
110 static bool visitConstExpr(const ConstantExpr
*CE
) {
111 if (CE
->getOpcode() == Instruction::AddrSpaceCast
) {
112 unsigned SrcAS
= CE
->getOperand(0)->getType()->getPointerAddressSpace();
113 return castRequiresQueuePtr(SrcAS
);
118 /// Get the constant access bitmap for \p C.
119 uint8_t getConstantAccess(const Constant
*C
) {
120 auto It
= ConstantStatus
.find(C
);
121 if (It
!= ConstantStatus
.end())
128 if (const auto *CE
= dyn_cast
<ConstantExpr
>(C
))
129 if (visitConstExpr(CE
))
130 Result
|= ADDR_SPACE_CAST
;
132 for (const Use
&U
: C
->operands()) {
133 const auto *OpC
= dyn_cast
<Constant
>(U
);
137 Result
|= getConstantAccess(OpC
);
143 /// Returns true if \p Fn needs a queue ptr attribute because of \p C.
144 bool needsQueuePtr(const Constant
*C
, Function
&Fn
) {
145 bool IsNonEntryFunc
= !AMDGPU::isEntryFunctionCC(Fn
.getCallingConv());
146 bool HasAperture
= hasApertureRegs(Fn
);
148 // No need to explore the constants.
149 if (!IsNonEntryFunc
&& HasAperture
)
152 uint8_t Access
= getConstantAccess(C
);
154 // We need to trap on DS globals in non-entry functions.
155 if (IsNonEntryFunc
&& (Access
& DS_GLOBAL
))
158 return !HasAperture
&& (Access
& ADDR_SPACE_CAST
);
162 /// Used to determine if the Constant needs a queue ptr attribute.
163 DenseMap
<const Constant
*, uint8_t> ConstantStatus
;
166 struct AAAMDAttributes
: public StateWrapper
<BooleanState
, AbstractAttribute
> {
167 using Base
= StateWrapper
<BooleanState
, AbstractAttribute
>;
168 AAAMDAttributes(const IRPosition
&IRP
, Attributor
&A
) : Base(IRP
) {}
170 /// Create an abstract attribute view for the position \p IRP.
171 static AAAMDAttributes
&createForPosition(const IRPosition
&IRP
,
174 /// See AbstractAttribute::getName().
175 const std::string
getName() const override
{ return "AAAMDAttributes"; }
177 /// See AbstractAttribute::getIdAddr().
178 const char *getIdAddr() const override
{ return &ID
; }
180 /// This function should return true if the type of the \p AA is
182 static bool classof(const AbstractAttribute
*AA
) {
183 return (AA
->getIdAddr() == &ID
);
186 virtual const DenseSet
<StringRef
> &getAttributes() const = 0;
188 /// Unique ID (due to the unique address)
189 static const char ID
;
191 const char AAAMDAttributes::ID
= 0;
193 struct AAAMDWorkGroupSize
194 : public StateWrapper
<BooleanState
, AbstractAttribute
> {
195 using Base
= StateWrapper
<BooleanState
, AbstractAttribute
>;
196 AAAMDWorkGroupSize(const IRPosition
&IRP
, Attributor
&A
) : Base(IRP
) {}
198 /// Create an abstract attribute view for the position \p IRP.
199 static AAAMDWorkGroupSize
&createForPosition(const IRPosition
&IRP
,
202 /// See AbstractAttribute::getName().
203 const std::string
getName() const override
{ return "AAAMDWorkGroupSize"; }
205 /// See AbstractAttribute::getIdAddr().
206 const char *getIdAddr() const override
{ return &ID
; }
208 /// This function should return true if the type of the \p AA is
210 static bool classof(const AbstractAttribute
*AA
) {
211 return (AA
->getIdAddr() == &ID
);
214 /// Unique ID (due to the unique address)
215 static const char ID
;
217 const char AAAMDWorkGroupSize::ID
= 0;
219 struct AAAMDWorkGroupSizeFunction
: public AAAMDWorkGroupSize
{
220 AAAMDWorkGroupSizeFunction(const IRPosition
&IRP
, Attributor
&A
)
221 : AAAMDWorkGroupSize(IRP
, A
) {}
223 void initialize(Attributor
&A
) override
{
224 Function
*F
= getAssociatedFunction();
225 CallingConv::ID CC
= F
->getCallingConv();
227 if (CC
!= CallingConv::AMDGPU_KERNEL
)
230 bool InitialValue
= false;
231 if (F
->hasFnAttribute("uniform-work-group-size"))
232 InitialValue
= F
->getFnAttribute("uniform-work-group-size")
237 indicateOptimisticFixpoint();
239 indicatePessimisticFixpoint();
242 ChangeStatus
updateImpl(Attributor
&A
) override
{
243 ChangeStatus Change
= ChangeStatus::UNCHANGED
;
245 auto CheckCallSite
= [&](AbstractCallSite CS
) {
246 Function
*Caller
= CS
.getInstruction()->getFunction();
247 LLVM_DEBUG(dbgs() << "[AAAMDWorkGroupSize] Call " << Caller
->getName()
248 << "->" << getAssociatedFunction()->getName() << "\n");
250 const auto &CallerInfo
= A
.getAAFor
<AAAMDWorkGroupSize
>(
251 *this, IRPosition::function(*Caller
), DepClassTy::REQUIRED
);
253 Change
= Change
| clampStateAndIndicateChange(this->getState(),
254 CallerInfo
.getState());
259 bool AllCallSitesKnown
= true;
260 if (!A
.checkForAllCallSites(CheckCallSite
, *this, true, AllCallSitesKnown
))
261 indicatePessimisticFixpoint();
266 ChangeStatus
manifest(Attributor
&A
) override
{
267 SmallVector
<Attribute
, 8> AttrList
;
268 LLVMContext
&Ctx
= getAssociatedFunction()->getContext();
270 AttrList
.push_back(Attribute::get(Ctx
, "uniform-work-group-size",
271 getAssumed() ? "true" : "false"));
272 return IRAttributeManifest::manifestAttrs(A
, getIRPosition(), AttrList
,
273 /* ForceReplace */ true);
276 bool isValidState() const override
{
277 // This state is always valid, even when the state is false.
281 const std::string
getAsStr() const override
{
282 return "AMDWorkGroupSize[" + std::to_string(getAssumed()) + "]";
285 /// See AbstractAttribute::trackStatistics()
286 void trackStatistics() const override
{}
289 AAAMDWorkGroupSize
&AAAMDWorkGroupSize::createForPosition(const IRPosition
&IRP
,
291 if (IRP
.getPositionKind() == IRPosition::IRP_FUNCTION
)
292 return *new (A
.Allocator
) AAAMDWorkGroupSizeFunction(IRP
, A
);
293 llvm_unreachable("AAAMDWorkGroupSize is only valid for function position");
296 struct AAAMDAttributesFunction
: public AAAMDAttributes
{
297 AAAMDAttributesFunction(const IRPosition
&IRP
, Attributor
&A
)
298 : AAAMDAttributes(IRP
, A
) {}
300 void initialize(Attributor
&A
) override
{
301 Function
*F
= getAssociatedFunction();
302 CallingConv::ID CC
= F
->getCallingConv();
303 bool CallingConvSupportsAllImplicits
= (CC
!= CallingConv::AMDGPU_Gfx
);
305 // Ignore functions with graphics calling conventions, these are currently
306 // not allowed to have kernel arguments.
307 if (AMDGPU::isGraphics(F
->getCallingConv())) {
308 indicatePessimisticFixpoint();
312 for (StringRef Attr
: ImplicitAttrNames
) {
313 if (F
->hasFnAttribute(Attr
))
314 Attributes
.insert(Attr
);
317 // TODO: We shouldn't need this in the future.
318 if (CallingConvSupportsAllImplicits
&&
319 F
->hasAddressTaken(nullptr, true, true, true)) {
320 for (StringRef AttrName
: ImplicitAttrNames
) {
321 Attributes
.insert(AttrName
);
326 ChangeStatus
updateImpl(Attributor
&A
) override
{
327 Function
*F
= getAssociatedFunction();
328 ChangeStatus Change
= ChangeStatus::UNCHANGED
;
329 bool IsNonEntryFunc
= !AMDGPU::isEntryFunctionCC(F
->getCallingConv());
330 CallingConv::ID CC
= F
->getCallingConv();
331 bool CallingConvSupportsAllImplicits
= (CC
!= CallingConv::AMDGPU_Gfx
);
332 auto &InfoCache
= static_cast<AMDGPUInformationCache
&>(A
.getInfoCache());
334 auto AddAttribute
= [&](StringRef AttrName
) {
335 if (Attributes
.insert(AttrName
).second
)
336 Change
= ChangeStatus::CHANGED
;
339 // Check for Intrinsics and propagate attributes.
340 const AACallEdges
&AAEdges
= A
.getAAFor
<AACallEdges
>(
341 *this, this->getIRPosition(), DepClassTy::REQUIRED
);
343 // We have to assume that we can reach a function with these attributes.
344 // We do not consider inline assembly as a unknown callee.
345 if (CallingConvSupportsAllImplicits
&& AAEdges
.hasNonAsmUnknownCallee()) {
346 for (StringRef AttrName
: ImplicitAttrNames
) {
347 AddAttribute(AttrName
);
351 bool NeedsQueuePtr
= false;
352 bool HasCall
= false;
353 for (Function
*Callee
: AAEdges
.getOptimisticEdges()) {
354 Intrinsic::ID IID
= Callee
->getIntrinsicID();
355 if (IID
!= Intrinsic::not_intrinsic
) {
356 if (!IsNonEntryFunc
&& IID
== Intrinsic::amdgcn_kernarg_segment_ptr
) {
357 AddAttribute("amdgpu-kernarg-segment-ptr");
361 bool NonKernelOnly
= false;
363 intrinsicToAttrName(IID
, NonKernelOnly
, NeedsQueuePtr
);
365 if (!AttrName
.empty() && (IsNonEntryFunc
|| !NonKernelOnly
))
366 AddAttribute(AttrName
);
372 const AAAMDAttributes
&AAAMD
= A
.getAAFor
<AAAMDAttributes
>(
373 *this, IRPosition::function(*Callee
), DepClassTy::REQUIRED
);
374 const DenseSet
<StringRef
> &CalleeAttributes
= AAAMD
.getAttributes();
375 // Propagate implicit attributes from called function.
376 for (StringRef AttrName
: ImplicitAttrNames
)
377 if (CalleeAttributes
.count(AttrName
))
378 AddAttribute(AttrName
);
381 HasCall
|= AAEdges
.hasUnknownCallee();
382 if (!IsNonEntryFunc
&& HasCall
)
383 AddAttribute("amdgpu-calls");
385 // Check the function body.
386 auto CheckAlloca
= [&](Instruction
&I
) {
387 AddAttribute("amdgpu-stack-objects");
391 bool UsedAssumedInformation
= false;
392 A
.checkForAllInstructions(CheckAlloca
, *this, {Instruction::Alloca
},
393 UsedAssumedInformation
);
395 // If we found that we need amdgpu-queue-ptr, nothing else to do.
396 if (NeedsQueuePtr
|| Attributes
.count("amdgpu-queue-ptr")) {
397 AddAttribute("amdgpu-queue-ptr");
401 auto CheckAddrSpaceCasts
= [&](Instruction
&I
) {
402 unsigned SrcAS
= static_cast<AddrSpaceCastInst
&>(I
).getSrcAddressSpace();
403 if (castRequiresQueuePtr(SrcAS
)) {
404 NeedsQueuePtr
= true;
410 bool HasApertureRegs
= InfoCache
.hasApertureRegs(*F
);
412 // `checkForAllInstructions` is much more cheaper than going through all
413 // instructions, try it first.
415 // amdgpu-queue-ptr is not needed if aperture regs is present.
416 if (!HasApertureRegs
)
417 A
.checkForAllInstructions(CheckAddrSpaceCasts
, *this,
418 {Instruction::AddrSpaceCast
},
419 UsedAssumedInformation
);
421 // If we found that we need amdgpu-queue-ptr, nothing else to do.
423 AddAttribute("amdgpu-queue-ptr");
427 if (!IsNonEntryFunc
&& HasApertureRegs
)
430 for (BasicBlock
&BB
: *F
) {
431 for (Instruction
&I
: BB
) {
432 for (const Use
&U
: I
.operands()) {
433 if (const auto *C
= dyn_cast
<Constant
>(U
)) {
434 if (InfoCache
.needsQueuePtr(C
, *F
)) {
435 AddAttribute("amdgpu-queue-ptr");
446 ChangeStatus
manifest(Attributor
&A
) override
{
447 SmallVector
<Attribute
, 8> AttrList
;
448 LLVMContext
&Ctx
= getAssociatedFunction()->getContext();
450 for (StringRef AttrName
: Attributes
)
451 AttrList
.push_back(Attribute::get(Ctx
, AttrName
));
453 return IRAttributeManifest::manifestAttrs(A
, getIRPosition(), AttrList
,
454 /* ForceReplace */ true);
457 const std::string
getAsStr() const override
{
458 return "AMDInfo[" + std::to_string(Attributes
.size()) + "]";
461 const DenseSet
<StringRef
> &getAttributes() const override
{
465 /// See AbstractAttribute::trackStatistics()
466 void trackStatistics() const override
{}
469 DenseSet
<StringRef
> Attributes
;
472 AAAMDAttributes
&AAAMDAttributes::createForPosition(const IRPosition
&IRP
,
474 if (IRP
.getPositionKind() == IRPosition::IRP_FUNCTION
)
475 return *new (A
.Allocator
) AAAMDAttributesFunction(IRP
, A
);
476 llvm_unreachable("AAAMDAttributes is only valid for function position");
479 class AMDGPUAttributor
: public ModulePass
{
481 AMDGPUAttributor() : ModulePass(ID
) {}
483 /// doInitialization - Virtual method overridden by subclasses to do
484 /// any necessary initialization before any pass is run.
485 bool doInitialization(Module
&) override
{
486 auto *TPC
= getAnalysisIfAvailable
<TargetPassConfig
>();
488 report_fatal_error("TargetMachine is required");
490 TM
= &TPC
->getTM
<TargetMachine
>();
494 bool runOnModule(Module
&M
) override
{
495 SetVector
<Function
*> Functions
;
497 for (Function
&F
: M
) {
498 if (!F
.isIntrinsic())
499 Functions
.insert(&F
);
502 CallGraphUpdater CGUpdater
;
503 BumpPtrAllocator Allocator
;
504 AMDGPUInformationCache
InfoCache(M
, AG
, Allocator
, nullptr, *TM
);
505 Attributor
A(Functions
, InfoCache
, CGUpdater
);
507 for (Function
&F
: M
) {
508 if (!F
.isIntrinsic()) {
509 A
.getOrCreateAAFor
<AAAMDAttributes
>(IRPosition::function(F
));
510 A
.getOrCreateAAFor
<AAAMDWorkGroupSize
>(IRPosition::function(F
));
514 ChangeStatus Change
= A
.run();
515 return Change
== ChangeStatus::CHANGED
;
518 StringRef
getPassName() const override
{ return "AMDGPU Attributor"; }
523 char AMDGPUAttributor::ID
= 0;
525 Pass
*llvm::createAMDGPUAttributorPass() { return new AMDGPUAttributor(); }
526 INITIALIZE_PASS(AMDGPUAttributor
, DEBUG_TYPE
, "AMDGPU Attributor", false, false)