llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp

   1 //===-- AMDGPUAlwaysInlinePass.cpp - Promote Allocas ----------------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 /// \file
  10 /// This pass marks all internal functions as always_inline and creates
  11 /// duplicates of all other functions and marks the duplicates as always_inline.
  12 //
  13 //===----------------------------------------------------------------------===//
  14
  15 #include "AMDGPU.h"
  16 #include "AMDGPUTargetMachine.h"
  17 #include "Utils/AMDGPUBaseInfo.h"
  18 #include "llvm/IR/Module.h"
  19 #include "llvm/Pass.h"
  20 #include "llvm/Support/CommandLine.h"
  21
  22 using namespace llvm;
  23
  24 namespace {
  25
  26 static cl::opt<bool> StressCalls(
  27   "amdgpu-stress-function-calls",
  28   cl::Hidden,
  29   cl::desc("Force all functions to be noinline"),
  30   cl::init(false));
  31
  32 class AMDGPUAlwaysInline : public ModulePass {
  33   bool GlobalOpt;
  34
  35 public:
  36   static char ID;
  37
  38   AMDGPUAlwaysInline(bool GlobalOpt = false) :
  39     ModulePass(ID), GlobalOpt(GlobalOpt) { }
  40   bool runOnModule(Module &M) override;
  41
  42   void getAnalysisUsage(AnalysisUsage &AU) const override {
  43     AU.setPreservesAll();
  44  }
  45 };
  46
  47 } // End anonymous namespace
  48
  49 INITIALIZE_PASS(AMDGPUAlwaysInline, "amdgpu-always-inline",
  50                 "AMDGPU Inline All Functions", false, false)
  51
  52 char AMDGPUAlwaysInline::ID = 0;
  53
  54 static void
  55 recursivelyVisitUsers(GlobalValue &GV,
  56                       SmallPtrSetImpl<Function *> &FuncsToAlwaysInline) {
  57   SmallVector<User *, 16> Stack(GV.users());
  58
  59   SmallPtrSet<const Value *, 8> Visited;
  60
  61   while (!Stack.empty()) {
  62     User *U = Stack.pop_back_val();
  63     if (!Visited.insert(U).second)
  64       continue;
  65
  66     if (Instruction *I = dyn_cast<Instruction>(U)) {
  67       Function *F = I->getParent()->getParent();
  68       if (!AMDGPU::isEntryFunctionCC(F->getCallingConv())) {
  69         // FIXME: This is a horrible hack. We should always respect noinline,
  70         // and just let us hit the error when we can't handle this.
  71         //
  72         // Unfortunately, clang adds noinline to all functions at -O0. We have
  73         // to override this here. until that's fixed.
  74         F->removeFnAttr(Attribute::NoInline);
  75
  76         FuncsToAlwaysInline.insert(F);
  77         Stack.push_back(F);
  78       }
  79
  80       // No need to look at further users, but we do need to inline any callers.
  81       continue;
  82     }
  83
  84     append_range(Stack, U->users());
  85   }
  86 }
  87
  88 static bool alwaysInlineImpl(Module &M, bool GlobalOpt) {
  89   std::vector<GlobalAlias*> AliasesToRemove;
  90
  91   SmallPtrSet<Function *, 8> FuncsToAlwaysInline;
  92   SmallPtrSet<Function *, 8> FuncsToNoInline;
  93
  94   for (GlobalAlias &A : M.aliases()) {
  95     if (Function* F = dyn_cast<Function>(A.getAliasee())) {
  96       A.replaceAllUsesWith(F);
  97       AliasesToRemove.push_back(&A);
  98     }
  99
 100     // FIXME: If the aliasee isn't a function, it's some kind of constant expr
 101     // cast that won't be inlined through.
 102   }
 103
 104   if (GlobalOpt) {
 105     for (GlobalAlias* A : AliasesToRemove) {
 106       A->eraseFromParent();
 107     }
 108   }
 109
 110   // Always force inlining of any function that uses an LDS global address. This
 111   // is something of a workaround because we don't have a way of supporting LDS
 112   // objects defined in functions. LDS is always allocated by a kernel, and it
 113   // is difficult to manage LDS usage if a function may be used by multiple
 114   // kernels.
 115   //
 116   // OpenCL doesn't allow declaring LDS in non-kernels, so in practice this
 117   // should only appear when IPO passes manages to move LDs defined in a kernel
 118   // into a single user function.
 119
 120   for (GlobalVariable &GV : M.globals()) {
 121     // TODO: Region address
 122     unsigned AS = GV.getAddressSpace();
 123     if ((AS == AMDGPUAS::REGION_ADDRESS) ||
 124         (AS == AMDGPUAS::LOCAL_ADDRESS &&
 125          !AMDGPUTargetMachine::EnableLowerModuleLDS))
 126       recursivelyVisitUsers(GV, FuncsToAlwaysInline);
 127   }
 128
 129   if (!AMDGPUTargetMachine::EnableFunctionCalls || StressCalls) {
 130     auto IncompatAttr
 131       = StressCalls ? Attribute::AlwaysInline : Attribute::NoInline;
 132
 133     for (Function &F : M) {
 134       if (!F.isDeclaration() && !F.use_empty() &&
 135           !F.hasFnAttribute(IncompatAttr)) {
 136         if (StressCalls) {
 137           if (!FuncsToAlwaysInline.count(&F))
 138             FuncsToNoInline.insert(&F);
 139         } else
 140           FuncsToAlwaysInline.insert(&F);
 141       }
 142     }
 143   }
 144
 145   for (Function *F : FuncsToAlwaysInline)
 146     F->addFnAttr(Attribute::AlwaysInline);
 147
 148   for (Function *F : FuncsToNoInline)
 149     F->addFnAttr(Attribute::NoInline);
 150
 151   return !FuncsToAlwaysInline.empty() || !FuncsToNoInline.empty();
 152 }
 153
 154 bool AMDGPUAlwaysInline::runOnModule(Module &M) {
 155   return alwaysInlineImpl(M, GlobalOpt);
 156 }
 157
 158 ModulePass *llvm::createAMDGPUAlwaysInlinePass(bool GlobalOpt) {
 159   return new AMDGPUAlwaysInline(GlobalOpt);
 160 }
 161
 162 PreservedAnalyses AMDGPUAlwaysInlinePass::run(Module &M,
 163                                               ModuleAnalysisManager &AM) {
 164   alwaysInlineImpl(M, GlobalOpt);
 165   return PreservedAnalyses::all();
 166 }