llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp

   1 //===- SpeculativeExecution.cpp ---------------------------------*- C++ -*-===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // This pass hoists instructions to enable speculative execution on
  10 // targets where branches are expensive. This is aimed at GPUs. It
  11 // currently works on simple if-then and if-then-else
  12 // patterns.
  13 //
  14 // Removing branches is not the only motivation for this
  15 // pass. E.g. consider this code and assume that there is no
  16 // addressing mode for multiplying by sizeof(*a):
  17 //
  18 //   if (b > 0)
  19 //     c = a[i + 1]
  20 //   if (d > 0)
  21 //     e = a[i + 2]
  22 //
  23 // turns into
  24 //
  25 //   p = &a[i + 1];
  26 //   if (b > 0)
  27 //     c = *p;
  28 //   q = &a[i + 2];
  29 //   if (d > 0)
  30 //     e = *q;
  31 //
  32 // which could later be optimized to
  33 //
  34 //   r = &a[i];
  35 //   if (b > 0)
  36 //     c = r[1];
  37 //   if (d > 0)
  38 //     e = r[2];
  39 //
  40 // Later passes sink back much of the speculated code that did not enable
  41 // further optimization.
  42 //
  43 // This pass is more aggressive than the function SpeculativeyExecuteBB in
  44 // SimplifyCFG. SimplifyCFG will not speculate if no selects are introduced and
  45 // it will speculate at most one instruction. It also will not speculate if
  46 // there is a value defined in the if-block that is only used in the then-block.
  47 // These restrictions make sense since the speculation in SimplifyCFG seems
  48 // aimed at introducing cheap selects, while this pass is intended to do more
  49 // aggressive speculation while counting on later passes to either capitalize on
  50 // that or clean it up.
  51 //
  52 // If the pass was created by calling
  53 // createSpeculativeExecutionIfHasBranchDivergencePass or the
  54 // -spec-exec-only-if-divergent-target option is present, this pass only has an
  55 // effect on targets where TargetTransformInfo::hasBranchDivergence() is true;
  56 // on other targets, it is a nop.
  57 //
  58 // This lets you include this pass unconditionally in the IR pass pipeline, but
  59 // only enable it for relevant targets.
  60 //
  61 //===----------------------------------------------------------------------===//
  62
  63 #include "llvm/Transforms/Scalar/SpeculativeExecution.h"
  64 #include "llvm/ADT/SmallPtrSet.h"
  65 #include "llvm/Analysis/GlobalsModRef.h"
  66 #include "llvm/Analysis/TargetTransformInfo.h"
  67 #include "llvm/Analysis/ValueTracking.h"
  68 #include "llvm/IR/Instructions.h"
  69 #include "llvm/IR/IntrinsicInst.h"
  70 #include "llvm/IR/Operator.h"
  71 #include "llvm/InitializePasses.h"
  72 #include "llvm/Support/CommandLine.h"
  73 #include "llvm/Support/Debug.h"
  74
  75 using namespace llvm;
  76
  77 #define DEBUG_TYPE "speculative-execution"
  78
  79 // The risk that speculation will not pay off increases with the
  80 // number of instructions speculated, so we put a limit on that.
  81 static cl::opt<unsigned> SpecExecMaxSpeculationCost(
  82     "spec-exec-max-speculation-cost", cl::init(7), cl::Hidden,
  83     cl::desc("Speculative execution is not applied to basic blocks where "
  84              "the cost of the instructions to speculatively execute "
  85              "exceeds this limit."));
  86
  87 // Speculating just a few instructions from a larger block tends not
  88 // to be profitable and this limit prevents that. A reason for that is
  89 // that small basic blocks are more likely to be candidates for
  90 // further optimization.
  91 static cl::opt<unsigned> SpecExecMaxNotHoisted(
  92     "spec-exec-max-not-hoisted", cl::init(5), cl::Hidden,
  93     cl::desc("Speculative execution is not applied to basic blocks where the "
  94              "number of instructions that would not be speculatively executed "
  95              "exceeds this limit."));
  96
  97 static cl::opt<bool> SpecExecOnlyIfDivergentTarget(
  98     "spec-exec-only-if-divergent-target", cl::init(false), cl::Hidden,
  99     cl::desc("Speculative execution is applied only to targets with divergent "
 100              "branches, even if the pass was configured to apply only to all "
 101              "targets."));
 102
 103 namespace {
 104
 105 class SpeculativeExecutionLegacyPass : public FunctionPass {
 106 public:
 107   static char ID;
 108   explicit SpeculativeExecutionLegacyPass(bool OnlyIfDivergentTarget = false)
 109       : FunctionPass(ID), OnlyIfDivergentTarget(OnlyIfDivergentTarget ||
 110                                                 SpecExecOnlyIfDivergentTarget),
 111         Impl(OnlyIfDivergentTarget) {}
 112
 113   void getAnalysisUsage(AnalysisUsage &AU) const override;
 114   bool runOnFunction(Function &F) override;
 115
 116   StringRef getPassName() const override {
 117     if (OnlyIfDivergentTarget)
 118       return "Speculatively execute instructions if target has divergent "
 119              "branches";
 120     return "Speculatively execute instructions";
 121   }
 122
 123 private:
 124   // Variable preserved purely for correct name printing.
 125   const bool OnlyIfDivergentTarget;
 126
 127   SpeculativeExecutionPass Impl;
 128 };
 129 } // namespace
 130
 131 char SpeculativeExecutionLegacyPass::ID = 0;
 132 INITIALIZE_PASS_BEGIN(SpeculativeExecutionLegacyPass, "speculative-execution",
 133                       "Speculatively execute instructions", false, false)
 134 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
 135 INITIALIZE_PASS_END(SpeculativeExecutionLegacyPass, "speculative-execution",
 136                     "Speculatively execute instructions", false, false)
 137
 138 void SpeculativeExecutionLegacyPass::getAnalysisUsage(AnalysisUsage &AU) const {
 139   AU.addRequired<TargetTransformInfoWrapperPass>();
 140   AU.addPreserved<GlobalsAAWrapperPass>();
 141   AU.setPreservesCFG();
 142 }
 143
 144 bool SpeculativeExecutionLegacyPass::runOnFunction(Function &F) {
 145   if (skipFunction(F))
 146     return false;
 147
 148   auto *TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
 149   return Impl.runImpl(F, TTI);
 150 }
 151
 152 namespace llvm {
 153
 154 bool SpeculativeExecutionPass::runImpl(Function &F, TargetTransformInfo *TTI) {
 155   if (OnlyIfDivergentTarget && !TTI->hasBranchDivergence(&F)) {
 156     LLVM_DEBUG(dbgs() << "Not running SpeculativeExecution because "
 157                          "TTI->hasBranchDivergence() is false.\n");
 158     return false;
 159   }
 160
 161   this->TTI = TTI;
 162   bool Changed = false;
 163   for (auto& B : F) {
 164     Changed |= runOnBasicBlock(B);
 165   }
 166   return Changed;
 167 }
 168
 169 bool SpeculativeExecutionPass::runOnBasicBlock(BasicBlock &B) {
 170   BranchInst *BI = dyn_cast<BranchInst>(B.getTerminator());
 171   if (BI == nullptr)
 172     return false;
 173
 174   if (BI->getNumSuccessors() != 2)
 175     return false;
 176   BasicBlock &Succ0 = *BI->getSuccessor(0);
 177   BasicBlock &Succ1 = *BI->getSuccessor(1);
 178
 179   if (&B == &Succ0 || &B == &Succ1 || &Succ0 == &Succ1) {
 180     return false;
 181   }
 182
 183   // Hoist from if-then (triangle).
 184   if (Succ0.getSinglePredecessor() != nullptr &&
 185       Succ0.getSingleSuccessor() == &Succ1) {
 186     return considerHoistingFromTo(Succ0, B);
 187   }
 188
 189   // Hoist from if-else (triangle).
 190   if (Succ1.getSinglePredecessor() != nullptr &&
 191       Succ1.getSingleSuccessor() == &Succ0) {
 192     return considerHoistingFromTo(Succ1, B);
 193   }
 194
 195   // Hoist from if-then-else (diamond), but only if it is equivalent to
 196   // an if-else or if-then due to one of the branches doing nothing.
 197   if (Succ0.getSinglePredecessor() != nullptr &&
 198       Succ1.getSinglePredecessor() != nullptr &&
 199       Succ1.getSingleSuccessor() != nullptr &&
 200       Succ1.getSingleSuccessor() != &B &&
 201       Succ1.getSingleSuccessor() == Succ0.getSingleSuccessor()) {
 202     // If a block has only one instruction, then that is a terminator
 203     // instruction so that the block does nothing. This does happen.
 204     if (Succ1.size() == 1) // equivalent to if-then
 205       return considerHoistingFromTo(Succ0, B);
 206     if (Succ0.size() == 1) // equivalent to if-else
 207       return considerHoistingFromTo(Succ1, B);
 208   }
 209
 210   return false;
 211 }
 212
 213 static InstructionCost ComputeSpeculationCost(const Instruction *I,
 214                                               const TargetTransformInfo &TTI) {
 215   switch (Operator::getOpcode(I)) {
 216     case Instruction::GetElementPtr:
 217     case Instruction::Add:
 218     case Instruction::Mul:
 219     case Instruction::And:
 220     case Instruction::Or:
 221     case Instruction::Select:
 222     case Instruction::Shl:
 223     case Instruction::Sub:
 224     case Instruction::LShr:
 225     case Instruction::AShr:
 226     case Instruction::Xor:
 227     case Instruction::ZExt:
 228     case Instruction::SExt:
 229     case Instruction::Call:
 230     case Instruction::BitCast:
 231     case Instruction::PtrToInt:
 232     case Instruction::IntToPtr:
 233     case Instruction::AddrSpaceCast:
 234     case Instruction::FPToUI:
 235     case Instruction::FPToSI:
 236     case Instruction::UIToFP:
 237     case Instruction::SIToFP:
 238     case Instruction::FPExt:
 239     case Instruction::FPTrunc:
 240     case Instruction::FAdd:
 241     case Instruction::FSub:
 242     case Instruction::FMul:
 243     case Instruction::FDiv:
 244     case Instruction::FRem:
 245     case Instruction::FNeg:
 246     case Instruction::ICmp:
 247     case Instruction::FCmp:
 248     case Instruction::Trunc:
 249     case Instruction::Freeze:
 250     case Instruction::ExtractElement:
 251     case Instruction::InsertElement:
 252     case Instruction::ShuffleVector:
 253     case Instruction::ExtractValue:
 254     case Instruction::InsertValue:
 255       return TTI.getInstructionCost(I, TargetTransformInfo::TCK_SizeAndLatency);
 256
 257     default:
 258       return InstructionCost::getInvalid(); // Disallow anything not explicitly
 259                                             // listed.
 260   }
 261 }
 262
 263 bool SpeculativeExecutionPass::considerHoistingFromTo(
 264     BasicBlock &FromBlock, BasicBlock &ToBlock) {
 265   SmallPtrSet<const Instruction *, 8> NotHoisted;
 266   const auto AllPrecedingUsesFromBlockHoisted = [&NotHoisted](const User *U) {
 267     // Debug variable has special operand to check it's not hoisted.
 268     if (const auto *DVI = dyn_cast<DbgVariableIntrinsic>(U)) {
 269       return all_of(DVI->location_ops(), [&NotHoisted](Value *V) {
 270         if (const auto *I = dyn_cast_or_null<Instruction>(V)) {
 271           if (!NotHoisted.contains(I))
 272             return true;
 273         }
 274         return false;
 275       });
 276     }
 277
 278     // Usially debug label intrinsic corresponds to label in LLVM IR. In these
 279     // cases we should not move it here.
 280     // TODO: Possible special processing needed to detect it is related to a
 281     // hoisted instruction.
 282     if (isa<DbgLabelInst>(U))
 283       return false;
 284
 285     for (const Value *V : U->operand_values()) {
 286       if (const Instruction *I = dyn_cast<Instruction>(V)) {
 287         if (NotHoisted.contains(I))
 288           return false;
 289       }
 290     }
 291     return true;
 292   };
 293
 294   InstructionCost TotalSpeculationCost = 0;
 295   unsigned NotHoistedInstCount = 0;
 296   for (const auto &I : FromBlock) {
 297     const InstructionCost Cost = ComputeSpeculationCost(&I, *TTI);
 298     if (Cost.isValid() && isSafeToSpeculativelyExecute(&I) &&
 299         AllPrecedingUsesFromBlockHoisted(&I)) {
 300       TotalSpeculationCost += Cost;
 301       if (TotalSpeculationCost > SpecExecMaxSpeculationCost)
 302         return false;  // too much to hoist
 303     } else {
 304       // Debug info intrinsics should not be counted for threshold.
 305       if (!isa<DbgInfoIntrinsic>(I))
 306         NotHoistedInstCount++;
 307       if (NotHoistedInstCount > SpecExecMaxNotHoisted)
 308         return false; // too much left behind
 309       NotHoisted.insert(&I);
 310     }
 311   }
 312
 313   for (auto I = FromBlock.begin(); I != FromBlock.end();) {
 314     // We have to increment I before moving Current as moving Current
 315     // changes the list that I is iterating through.
 316     auto Current = I;
 317     ++I;
 318     if (!NotHoisted.count(&*Current)) {
 319       Current->moveBeforePreserving(ToBlock.getTerminator());
 320     }
 321   }
 322   return true;
 323 }
 324
 325 FunctionPass *createSpeculativeExecutionPass() {
 326   return new SpeculativeExecutionLegacyPass();
 327 }
 328
 329 FunctionPass *createSpeculativeExecutionIfHasBranchDivergencePass() {
 330   return new SpeculativeExecutionLegacyPass(/* OnlyIfDivergentTarget = */ true);
 331 }
 332
 333 SpeculativeExecutionPass::SpeculativeExecutionPass(bool OnlyIfDivergentTarget)
 334     : OnlyIfDivergentTarget(OnlyIfDivergentTarget ||
 335                             SpecExecOnlyIfDivergentTarget) {}
 336
 337 PreservedAnalyses SpeculativeExecutionPass::run(Function &F,
 338                                                 FunctionAnalysisManager &AM) {
 339   auto *TTI = &AM.getResult<TargetIRAnalysis>(F);
 340
 341   bool Changed = runImpl(F, TTI);
 342
 343   if (!Changed)
 344     return PreservedAnalyses::all();
 345   PreservedAnalyses PA;
 346   PA.preserveSet<CFGAnalyses>();
 347   return PA;
 348 }
 349 }  // namespace llvm