lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp

   1 //===-- AMDGPUAnnotateUniformValues.cpp - ---------------------------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 /// \file
  10 /// This pass adds amdgpu.uniform metadata to IR values so this information
  11 /// can be used during instruction selection.
  12 //
  13 //===----------------------------------------------------------------------===//
  14
  15 #include "AMDGPU.h"
  16 #include "llvm/ADT/SetVector.h"
  17 #include "llvm/Analysis/LegacyDivergenceAnalysis.h"
  18 #include "llvm/Analysis/LoopInfo.h"
  19 #include "llvm/Analysis/MemoryDependenceAnalysis.h"
  20 #include "llvm/IR/IRBuilder.h"
  21 #include "llvm/IR/InstVisitor.h"
  22 #include "llvm/Support/Debug.h"
  23 #include "llvm/Support/raw_ostream.h"
  24
  25 #define DEBUG_TYPE "amdgpu-annotate-uniform"
  26
  27 using namespace llvm;
  28
  29 namespace {
  30
  31 class AMDGPUAnnotateUniformValues : public FunctionPass,
  32                        public InstVisitor<AMDGPUAnnotateUniformValues> {
  33   LegacyDivergenceAnalysis *DA;
  34   MemoryDependenceResults *MDR;
  35   LoopInfo *LI;
  36   DenseMap<Value*, GetElementPtrInst*> noClobberClones;
  37   bool isKernelFunc;
  38
  39 public:
  40   static char ID;
  41   AMDGPUAnnotateUniformValues() :
  42     FunctionPass(ID) { }
  43   bool doInitialization(Module &M) override;
  44   bool runOnFunction(Function &F) override;
  45   StringRef getPassName() const override {
  46     return "AMDGPU Annotate Uniform Values";
  47   }
  48   void getAnalysisUsage(AnalysisUsage &AU) const override {
  49     AU.addRequired<LegacyDivergenceAnalysis>();
  50     AU.addRequired<MemoryDependenceWrapperPass>();
  51     AU.addRequired<LoopInfoWrapperPass>();
  52     AU.setPreservesAll();
  53  }
  54
  55   void visitBranchInst(BranchInst &I);
  56   void visitLoadInst(LoadInst &I);
  57   bool isClobberedInFunction(LoadInst * Load);
  58 };
  59
  60 } // End anonymous namespace
  61
  62 INITIALIZE_PASS_BEGIN(AMDGPUAnnotateUniformValues, DEBUG_TYPE,
  63                       "Add AMDGPU uniform metadata", false, false)
  64 INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis)
  65 INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass)
  66 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
  67 INITIALIZE_PASS_END(AMDGPUAnnotateUniformValues, DEBUG_TYPE,
  68                     "Add AMDGPU uniform metadata", false, false)
  69
  70 char AMDGPUAnnotateUniformValues::ID = 0;
  71
  72 static void setUniformMetadata(Instruction *I) {
  73   I->setMetadata("amdgpu.uniform", MDNode::get(I->getContext(), {}));
  74 }
  75 static void setNoClobberMetadata(Instruction *I) {
  76   I->setMetadata("amdgpu.noclobber", MDNode::get(I->getContext(), {}));
  77 }
  78
  79 static void DFS(BasicBlock *Root, SetVector<BasicBlock*> & Set) {
  80   for (auto I : predecessors(Root))
  81     if (Set.insert(I))
  82       DFS(I, Set);
  83 }
  84
  85 bool AMDGPUAnnotateUniformValues::isClobberedInFunction(LoadInst * Load) {
  86   // 1. get Loop for the Load->getparent();
  87   // 2. if it exists, collect all the BBs from the most outer
  88   // loop and check for the writes. If NOT - start DFS over all preds.
  89   // 3. Start DFS over all preds from the most outer loop header.
  90   SetVector<BasicBlock *> Checklist;
  91   BasicBlock *Start = Load->getParent();
  92   Checklist.insert(Start);
  93   const Value *Ptr = Load->getPointerOperand();
  94   const Loop *L = LI->getLoopFor(Start);
  95   if (L) {
  96     const Loop *P = L;
  97     do {
  98       L = P;
  99       P = P->getParentLoop();
 100     } while (P);
 101     Checklist.insert(L->block_begin(), L->block_end());
 102     Start = L->getHeader();
 103   }
 104
 105   DFS(Start, Checklist);
 106   for (auto &BB : Checklist) {
 107     BasicBlock::iterator StartIt = (!L && (BB == Load->getParent())) ?
 108       BasicBlock::iterator(Load) : BB->end();
 109     auto Q = MDR->getPointerDependencyFrom(MemoryLocation(Ptr), true,
 110                                            StartIt, BB, Load);
 111     if (Q.isClobber() || Q.isUnknown())
 112       return true;
 113   }
 114   return false;
 115 }
 116
 117 void AMDGPUAnnotateUniformValues::visitBranchInst(BranchInst &I) {
 118   if (DA->isUniform(&I))
 119     setUniformMetadata(I.getParent()->getTerminator());
 120 }
 121
 122 void AMDGPUAnnotateUniformValues::visitLoadInst(LoadInst &I) {
 123   Value *Ptr = I.getPointerOperand();
 124   if (!DA->isUniform(Ptr))
 125     return;
 126   auto isGlobalLoad = [&](LoadInst &Load)->bool {
 127     return Load.getPointerAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
 128   };
 129   // We're tracking up to the Function boundaries
 130   // We cannot go beyond because of FunctionPass restrictions
 131   // Thus we can ensure that memory not clobbered for memory
 132   // operations that live in kernel only.
 133   bool NotClobbered = isKernelFunc &&   !isClobberedInFunction(&I);
 134   Instruction *PtrI = dyn_cast<Instruction>(Ptr);
 135   if (!PtrI && NotClobbered && isGlobalLoad(I)) {
 136     if (isa<Argument>(Ptr) || isa<GlobalValue>(Ptr)) {
 137       // Lookup for the existing GEP
 138       if (noClobberClones.count(Ptr)) {
 139         PtrI = noClobberClones[Ptr];
 140       } else {
 141         // Create GEP of the Value
 142         Function *F = I.getParent()->getParent();
 143         Value *Idx = Constant::getIntegerValue(
 144           Type::getInt32Ty(Ptr->getContext()), APInt(64, 0));
 145         // Insert GEP at the entry to make it dominate all uses
 146         PtrI = GetElementPtrInst::Create(
 147           Ptr->getType()->getPointerElementType(), Ptr,
 148           ArrayRef<Value*>(Idx), Twine(""), F->getEntryBlock().getFirstNonPHI());
 149       }
 150       I.replaceUsesOfWith(Ptr, PtrI);
 151     }
 152   }
 153
 154   if (PtrI) {
 155     setUniformMetadata(PtrI);
 156     if (NotClobbered)
 157       setNoClobberMetadata(PtrI);
 158   }
 159 }
 160
 161 bool AMDGPUAnnotateUniformValues::doInitialization(Module &M) {
 162   return false;
 163 }
 164
 165 bool AMDGPUAnnotateUniformValues::runOnFunction(Function &F) {
 166   if (skipFunction(F))
 167     return false;
 168
 169   DA  = &getAnalysis<LegacyDivergenceAnalysis>();
 170   MDR = &getAnalysis<MemoryDependenceWrapperPass>().getMemDep();
 171   LI  = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
 172   isKernelFunc = F.getCallingConv() == CallingConv::AMDGPU_KERNEL;
 173
 174   visit(F);
 175   noClobberClones.clear();
 176   return true;
 177 }
 178
 179 FunctionPass *
 180 llvm::createAMDGPUAnnotateUniformValues() {
 181   return new AMDGPUAnnotateUniformValues();
 182 }