1 //===-- AMDGPUAnnotateUniformValues.cpp - ---------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 /// This pass adds amdgpu.uniform metadata to IR values so this information
11 /// can be used during instruction selection.
13 //===----------------------------------------------------------------------===//
16 #include "llvm/ADT/SetVector.h"
17 #include "llvm/Analysis/LegacyDivergenceAnalysis.h"
18 #include "llvm/Analysis/LoopInfo.h"
19 #include "llvm/Analysis/MemoryDependenceAnalysis.h"
20 #include "llvm/IR/IRBuilder.h"
21 #include "llvm/IR/InstVisitor.h"
22 #include "llvm/Support/Debug.h"
23 #include "llvm/Support/raw_ostream.h"
25 #define DEBUG_TYPE "amdgpu-annotate-uniform"
31 class AMDGPUAnnotateUniformValues
: public FunctionPass
,
32 public InstVisitor
<AMDGPUAnnotateUniformValues
> {
33 LegacyDivergenceAnalysis
*DA
;
34 MemoryDependenceResults
*MDR
;
36 DenseMap
<Value
*, GetElementPtrInst
*> noClobberClones
;
41 AMDGPUAnnotateUniformValues() :
43 bool doInitialization(Module
&M
) override
;
44 bool runOnFunction(Function
&F
) override
;
45 StringRef
getPassName() const override
{
46 return "AMDGPU Annotate Uniform Values";
48 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
49 AU
.addRequired
<LegacyDivergenceAnalysis
>();
50 AU
.addRequired
<MemoryDependenceWrapperPass
>();
51 AU
.addRequired
<LoopInfoWrapperPass
>();
55 void visitBranchInst(BranchInst
&I
);
56 void visitLoadInst(LoadInst
&I
);
57 bool isClobberedInFunction(LoadInst
* Load
);
60 } // End anonymous namespace
62 INITIALIZE_PASS_BEGIN(AMDGPUAnnotateUniformValues
, DEBUG_TYPE
,
63 "Add AMDGPU uniform metadata", false, false)
64 INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis
)
65 INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass
)
66 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass
)
67 INITIALIZE_PASS_END(AMDGPUAnnotateUniformValues
, DEBUG_TYPE
,
68 "Add AMDGPU uniform metadata", false, false)
70 char AMDGPUAnnotateUniformValues::ID
= 0;
72 static void setUniformMetadata(Instruction
*I
) {
73 I
->setMetadata("amdgpu.uniform", MDNode::get(I
->getContext(), {}));
75 static void setNoClobberMetadata(Instruction
*I
) {
76 I
->setMetadata("amdgpu.noclobber", MDNode::get(I
->getContext(), {}));
79 static void DFS(BasicBlock
*Root
, SetVector
<BasicBlock
*> & Set
) {
80 for (auto I
: predecessors(Root
))
85 bool AMDGPUAnnotateUniformValues::isClobberedInFunction(LoadInst
* Load
) {
86 // 1. get Loop for the Load->getparent();
87 // 2. if it exists, collect all the BBs from the most outer
88 // loop and check for the writes. If NOT - start DFS over all preds.
89 // 3. Start DFS over all preds from the most outer loop header.
90 SetVector
<BasicBlock
*> Checklist
;
91 BasicBlock
*Start
= Load
->getParent();
92 Checklist
.insert(Start
);
93 const Value
*Ptr
= Load
->getPointerOperand();
94 const Loop
*L
= LI
->getLoopFor(Start
);
99 P
= P
->getParentLoop();
101 Checklist
.insert(L
->block_begin(), L
->block_end());
102 Start
= L
->getHeader();
105 DFS(Start
, Checklist
);
106 for (auto &BB
: Checklist
) {
107 BasicBlock::iterator StartIt
= (!L
&& (BB
== Load
->getParent())) ?
108 BasicBlock::iterator(Load
) : BB
->end();
109 auto Q
= MDR
->getPointerDependencyFrom(MemoryLocation(Ptr
), true,
111 if (Q
.isClobber() || Q
.isUnknown())
117 void AMDGPUAnnotateUniformValues::visitBranchInst(BranchInst
&I
) {
118 if (DA
->isUniform(&I
))
119 setUniformMetadata(I
.getParent()->getTerminator());
122 void AMDGPUAnnotateUniformValues::visitLoadInst(LoadInst
&I
) {
123 Value
*Ptr
= I
.getPointerOperand();
124 if (!DA
->isUniform(Ptr
))
126 auto isGlobalLoad
= [&](LoadInst
&Load
)->bool {
127 return Load
.getPointerAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS
;
129 // We're tracking up to the Function boundaries
130 // We cannot go beyond because of FunctionPass restrictions
131 // Thus we can ensure that memory not clobbered for memory
132 // operations that live in kernel only.
133 bool NotClobbered
= isKernelFunc
&& !isClobberedInFunction(&I
);
134 Instruction
*PtrI
= dyn_cast
<Instruction
>(Ptr
);
135 if (!PtrI
&& NotClobbered
&& isGlobalLoad(I
)) {
136 if (isa
<Argument
>(Ptr
) || isa
<GlobalValue
>(Ptr
)) {
137 // Lookup for the existing GEP
138 if (noClobberClones
.count(Ptr
)) {
139 PtrI
= noClobberClones
[Ptr
];
141 // Create GEP of the Value
142 Function
*F
= I
.getParent()->getParent();
143 Value
*Idx
= Constant::getIntegerValue(
144 Type::getInt32Ty(Ptr
->getContext()), APInt(64, 0));
145 // Insert GEP at the entry to make it dominate all uses
146 PtrI
= GetElementPtrInst::Create(
147 Ptr
->getType()->getPointerElementType(), Ptr
,
148 ArrayRef
<Value
*>(Idx
), Twine(""), F
->getEntryBlock().getFirstNonPHI());
150 I
.replaceUsesOfWith(Ptr
, PtrI
);
155 setUniformMetadata(PtrI
);
157 setNoClobberMetadata(PtrI
);
161 bool AMDGPUAnnotateUniformValues::doInitialization(Module
&M
) {
165 bool AMDGPUAnnotateUniformValues::runOnFunction(Function
&F
) {
169 DA
= &getAnalysis
<LegacyDivergenceAnalysis
>();
170 MDR
= &getAnalysis
<MemoryDependenceWrapperPass
>().getMemDep();
171 LI
= &getAnalysis
<LoopInfoWrapperPass
>().getLoopInfo();
172 isKernelFunc
= F
.getCallingConv() == CallingConv::AMDGPU_KERNEL
;
175 noClobberClones
.clear();
180 llvm::createAMDGPUAnnotateUniformValues() {
181 return new AMDGPUAnnotateUniformValues();