lib/Analysis/SyncDependenceAnalysis.cpp

   1 //===- SyncDependenceAnalysis.cpp - Divergent Branch Dependence Calculation
   2 //--===//
   3 //
   4 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   5 // See https://llvm.org/LICENSE.txt for license information.
   6 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file implements an algorithm that returns for a divergent branch
  11 // the set of basic blocks whose phi nodes become divergent due to divergent
  12 // control. These are the blocks that are reachable by two disjoint paths from
  13 // the branch or loop exits that have a reaching path that is disjoint from a
  14 // path to the loop latch.
  15 //
  16 // The SyncDependenceAnalysis is used in the DivergenceAnalysis to model
  17 // control-induced divergence in phi nodes.
  18 //
  19 // -- Summary --
  20 // The SyncDependenceAnalysis lazily computes sync dependences [3].
  21 // The analysis evaluates the disjoint path criterion [2] by a reduction
  22 // to SSA construction. The SSA construction algorithm is implemented as
  23 // a simple data-flow analysis [1].
  24 //
  25 // [1] "A Simple, Fast Dominance Algorithm", SPI '01, Cooper, Harvey and Kennedy
  26 // [2] "Efficiently Computing Static Single Assignment Form
  27 //     and the Control Dependence Graph", TOPLAS '91,
  28 //           Cytron, Ferrante, Rosen, Wegman and Zadeck
  29 // [3] "Improving Performance of OpenCL on CPUs", CC '12, Karrenberg and Hack
  30 // [4] "Divergence Analysis", TOPLAS '13, Sampaio, Souza, Collange and Pereira
  31 //
  32 // -- Sync dependence --
  33 // Sync dependence [4] characterizes the control flow aspect of the
  34 // propagation of branch divergence. For example,
  35 //
  36 //   %cond = icmp slt i32 %tid, 10
  37 //   br i1 %cond, label %then, label %else
  38 // then:
  39 //   br label %merge
  40 // else:
  41 //   br label %merge
  42 // merge:
  43 //   %a = phi i32 [ 0, %then ], [ 1, %else ]
  44 //
  45 // Suppose %tid holds the thread ID. Although %a is not data dependent on %tid
  46 // because %tid is not on its use-def chains, %a is sync dependent on %tid
  47 // because the branch "br i1 %cond" depends on %tid and affects which value %a
  48 // is assigned to.
  49 //
  50 // -- Reduction to SSA construction --
  51 // There are two disjoint paths from A to X, if a certain variant of SSA
  52 // construction places a phi node in X under the following set-up scheme [2].
  53 //
  54 // This variant of SSA construction ignores incoming undef values.
  55 // That is paths from the entry without a definition do not result in
  56 // phi nodes.
  57 //
  58 //       entry
  59 //     /      \
  60 //    A        \
  61 //  /   \       Y
  62 // B     C     /
  63 //  \   /  \  /
  64 //    D     E
  65 //     \   /
  66 //       F
  67 // Assume that A contains a divergent branch. We are interested
  68 // in the set of all blocks where each block is reachable from A
  69 // via two disjoint paths. This would be the set {D, F} in this
  70 // case.
  71 // To generally reduce this query to SSA construction we introduce
  72 // a virtual variable x and assign to x different values in each
  73 // successor block of A.
  74 //           entry
  75 //         /      \
  76 //        A        \
  77 //      /   \       Y
  78 // x = 0   x = 1   /
  79 //      \  /   \  /
  80 //        D     E
  81 //         \   /
  82 //           F
  83 // Our flavor of SSA construction for x will construct the following
  84 //            entry
  85 //          /      \
  86 //         A        \
  87 //       /   \       Y
  88 // x0 = 0   x1 = 1  /
  89 //       \   /   \ /
  90 //      x2=phi    E
  91 //         \     /
  92 //          x3=phi
  93 // The blocks D and F contain phi nodes and are thus each reachable
  94 // by two disjoins paths from A.
  95 //
  96 // -- Remarks --
  97 // In case of loop exits we need to check the disjoint path criterion for loops
  98 // [2]. To this end, we check whether the definition of x differs between the
  99 // loop exit and the loop header (_after_ SSA construction).
 100 //
 101 //===----------------------------------------------------------------------===//
 102 #include "llvm/ADT/PostOrderIterator.h"
 103 #include "llvm/ADT/SmallPtrSet.h"
 104 #include "llvm/Analysis/PostDominators.h"
 105 #include "llvm/Analysis/SyncDependenceAnalysis.h"
 106 #include "llvm/IR/BasicBlock.h"
 107 #include "llvm/IR/CFG.h"
 108 #include "llvm/IR/Dominators.h"
 109 #include "llvm/IR/Function.h"
 110
 111 #include <stack>
 112 #include <unordered_set>
 113
 114 #define DEBUG_TYPE "sync-dependence"
 115
 116 namespace llvm {
 117
 118 ConstBlockSet SyncDependenceAnalysis::EmptyBlockSet;
 119
 120 SyncDependenceAnalysis::SyncDependenceAnalysis(const DominatorTree &DT,
 121                                                const PostDominatorTree &PDT,
 122                                                const LoopInfo &LI)
 123     : FuncRPOT(DT.getRoot()->getParent()), DT(DT), PDT(PDT), LI(LI) {}
 124
 125 SyncDependenceAnalysis::~SyncDependenceAnalysis() {}
 126
 127 using FunctionRPOT = ReversePostOrderTraversal<const Function *>;
 128
 129 // divergence propagator for reducible CFGs
 130 struct DivergencePropagator {
 131   const FunctionRPOT &FuncRPOT;
 132   const DominatorTree &DT;
 133   const PostDominatorTree &PDT;
 134   const LoopInfo &LI;
 135
 136   // identified join points
 137   std::unique_ptr<ConstBlockSet> JoinBlocks;
 138
 139   // reached loop exits (by a path disjoint to a path to the loop header)
 140   SmallPtrSet<const BasicBlock *, 4> ReachedLoopExits;
 141
 142   // if DefMap[B] == C then C is the dominating definition at block B
 143   // if DefMap[B] ~ undef then we haven't seen B yet
 144   // if DefMap[B] == B then B is a join point of disjoint paths from X or B is
 145   // an immediate successor of X (initial value).
 146   using DefiningBlockMap = std::map<const BasicBlock *, const BasicBlock *>;
 147   DefiningBlockMap DefMap;
 148
 149   // all blocks with pending visits
 150   std::unordered_set<const BasicBlock *> PendingUpdates;
 151
 152   DivergencePropagator(const FunctionRPOT &FuncRPOT, const DominatorTree &DT,
 153                        const PostDominatorTree &PDT, const LoopInfo &LI)
 154       : FuncRPOT(FuncRPOT), DT(DT), PDT(PDT), LI(LI),
 155         JoinBlocks(new ConstBlockSet) {}
 156
 157   // set the definition at @block and mark @block as pending for a visit
 158   void addPending(const BasicBlock &Block, const BasicBlock &DefBlock) {
 159     bool WasAdded = DefMap.emplace(&Block, &DefBlock).second;
 160     if (WasAdded)
 161       PendingUpdates.insert(&Block);
 162   }
 163
 164   void printDefs(raw_ostream &Out) {
 165     Out << "Propagator::DefMap {\n";
 166     for (const auto *Block : FuncRPOT) {
 167       auto It = DefMap.find(Block);
 168       Out << Block->getName() << " : ";
 169       if (It == DefMap.end()) {
 170         Out << "\n";
 171       } else {
 172         const auto *DefBlock = It->second;
 173         Out << (DefBlock ? DefBlock->getName() : "<null>") << "\n";
 174       }
 175     }
 176     Out << "}\n";
 177   }
 178
 179   // process @succBlock with reaching definition @defBlock
 180   // the original divergent branch was in @parentLoop (if any)
 181   void visitSuccessor(const BasicBlock &SuccBlock, const Loop *ParentLoop,
 182                       const BasicBlock &DefBlock) {
 183
 184     // @succBlock is a loop exit
 185     if (ParentLoop && !ParentLoop->contains(&SuccBlock)) {
 186       DefMap.emplace(&SuccBlock, &DefBlock);
 187       ReachedLoopExits.insert(&SuccBlock);
 188       return;
 189     }
 190
 191     // first reaching def?
 192     auto ItLastDef = DefMap.find(&SuccBlock);
 193     if (ItLastDef == DefMap.end()) {
 194       addPending(SuccBlock, DefBlock);
 195       return;
 196     }
 197
 198     // a join of at least two definitions
 199     if (ItLastDef->second != &DefBlock) {
 200       // do we know this join already?
 201       if (!JoinBlocks->insert(&SuccBlock).second)
 202         return;
 203
 204       // update the definition
 205       addPending(SuccBlock, SuccBlock);
 206     }
 207   }
 208
 209   // find all blocks reachable by two disjoint paths from @rootTerm.
 210   // This method works for both divergent terminators and loops with
 211   // divergent exits.
 212   // @rootBlock is either the block containing the branch or the header of the
 213   // divergent loop.
 214   // @nodeSuccessors is the set of successors of the node (Loop or Terminator)
 215   // headed by @rootBlock.
 216   // @parentLoop is the parent loop of the Loop or the loop that contains the
 217   // Terminator.
 218   template <typename SuccessorIterable>
 219   std::unique_ptr<ConstBlockSet>
 220   computeJoinPoints(const BasicBlock &RootBlock,
 221                     SuccessorIterable NodeSuccessors, const Loop *ParentLoop) {
 222     assert(JoinBlocks);
 223
 224     // immediate post dominator (no join block beyond that block)
 225     const auto *PdNode = PDT.getNode(const_cast<BasicBlock *>(&RootBlock));
 226     const auto *IpdNode = PdNode->getIDom();
 227     const auto *PdBoundBlock = IpdNode ? IpdNode->getBlock() : nullptr;
 228
 229     // bootstrap with branch targets
 230     for (const auto *SuccBlock : NodeSuccessors) {
 231       DefMap.emplace(SuccBlock, SuccBlock);
 232
 233       if (ParentLoop && !ParentLoop->contains(SuccBlock)) {
 234         // immediate loop exit from node.
 235         ReachedLoopExits.insert(SuccBlock);
 236         continue;
 237       } else {
 238         // regular successor
 239         PendingUpdates.insert(SuccBlock);
 240       }
 241     }
 242
 243     auto ItBeginRPO = FuncRPOT.begin();
 244
 245     // skip until term (TODO RPOT won't let us start at @term directly)
 246     for (; *ItBeginRPO != &RootBlock; ++ItBeginRPO) {}
 247
 248     auto ItEndRPO = FuncRPOT.end();
 249     assert(ItBeginRPO != ItEndRPO);
 250
 251     // propagate definitions at the immediate successors of the node in RPO
 252     auto ItBlockRPO = ItBeginRPO;
 253     while (++ItBlockRPO != ItEndRPO && *ItBlockRPO != PdBoundBlock) {
 254       const auto *Block = *ItBlockRPO;
 255
 256       // skip @block if not pending update
 257       auto ItPending = PendingUpdates.find(Block);
 258       if (ItPending == PendingUpdates.end())
 259         continue;
 260       PendingUpdates.erase(ItPending);
 261
 262       // propagate definition at @block to its successors
 263       auto ItDef = DefMap.find(Block);
 264       const auto *DefBlock = ItDef->second;
 265       assert(DefBlock);
 266
 267       auto *BlockLoop = LI.getLoopFor(Block);
 268       if (ParentLoop &&
 269           (ParentLoop != BlockLoop && ParentLoop->contains(BlockLoop))) {
 270         // if the successor is the header of a nested loop pretend its a
 271         // single node with the loop's exits as successors
 272         SmallVector<BasicBlock *, 4> BlockLoopExits;
 273         BlockLoop->getExitBlocks(BlockLoopExits);
 274         for (const auto *BlockLoopExit : BlockLoopExits) {
 275           visitSuccessor(*BlockLoopExit, ParentLoop, *DefBlock);
 276         }
 277
 278       } else {
 279         // the successors are either on the same loop level or loop exits
 280         for (const auto *SuccBlock : successors(Block)) {
 281           visitSuccessor(*SuccBlock, ParentLoop, *DefBlock);
 282         }
 283       }
 284     }
 285
 286     // We need to know the definition at the parent loop header to decide
 287     // whether the definition at the header is different from the definition at
 288     // the loop exits, which would indicate a divergent loop exits.
 289     //
 290     // A // loop header
 291     // |
 292     // B // nested loop header
 293     // |
 294     // C -> X (exit from B loop) -..-> (A latch)
 295     // |
 296     // D -> back to B (B latch)
 297     // |
 298     // proper exit from both loops
 299     //
 300     // D post-dominates B as it is the only proper exit from the "A loop".
 301     // If C has a divergent branch, propagation will therefore stop at D.
 302     // That implies that B will never receive a definition.
 303     // But that definition can only be the same as at D (D itself in thise case)
 304     // because all paths to anywhere have to pass through D.
 305     //
 306     const BasicBlock *ParentLoopHeader =
 307         ParentLoop ? ParentLoop->getHeader() : nullptr;
 308     if (ParentLoop && ParentLoop->contains(PdBoundBlock)) {
 309       DefMap[ParentLoopHeader] = DefMap[PdBoundBlock];
 310     }
 311
 312     // analyze reached loop exits
 313     if (!ReachedLoopExits.empty()) {
 314       assert(ParentLoop);
 315       const auto *HeaderDefBlock = DefMap[ParentLoopHeader];
 316       LLVM_DEBUG(printDefs(dbgs()));
 317       assert(HeaderDefBlock && "no definition in header of carrying loop");
 318
 319       for (const auto *ExitBlock : ReachedLoopExits) {
 320         auto ItExitDef = DefMap.find(ExitBlock);
 321         assert((ItExitDef != DefMap.end()) &&
 322                "no reaching def at reachable loop exit");
 323         if (ItExitDef->second != HeaderDefBlock) {
 324           JoinBlocks->insert(ExitBlock);
 325         }
 326       }
 327     }
 328
 329     return std::move(JoinBlocks);
 330   }
 331 };
 332
 333 const ConstBlockSet &SyncDependenceAnalysis::join_blocks(const Loop &Loop) {
 334   using LoopExitVec = SmallVector<BasicBlock *, 4>;
 335   LoopExitVec LoopExits;
 336   Loop.getExitBlocks(LoopExits);
 337   if (LoopExits.size() < 1) {
 338     return EmptyBlockSet;
 339   }
 340
 341   // already available in cache?
 342   auto ItCached = CachedLoopExitJoins.find(&Loop);
 343   if (ItCached != CachedLoopExitJoins.end())
 344     return *ItCached->second;
 345
 346   // compute all join points
 347   DivergencePropagator Propagator{FuncRPOT, DT, PDT, LI};
 348   auto JoinBlocks = Propagator.computeJoinPoints<const LoopExitVec &>(
 349       *Loop.getHeader(), LoopExits, Loop.getParentLoop());
 350
 351   auto ItInserted = CachedLoopExitJoins.emplace(&Loop, std::move(JoinBlocks));
 352   assert(ItInserted.second);
 353   return *ItInserted.first->second;
 354 }
 355
 356 const ConstBlockSet &
 357 SyncDependenceAnalysis::join_blocks(const Instruction &Term) {
 358   // trivial case
 359   if (Term.getNumSuccessors() < 1) {
 360     return EmptyBlockSet;
 361   }
 362
 363   // already available in cache?
 364   auto ItCached = CachedBranchJoins.find(&Term);
 365   if (ItCached != CachedBranchJoins.end())
 366     return *ItCached->second;
 367
 368   // compute all join points
 369   DivergencePropagator Propagator{FuncRPOT, DT, PDT, LI};
 370   const auto &TermBlock = *Term.getParent();
 371   auto JoinBlocks = Propagator.computeJoinPoints<succ_const_range>(
 372       TermBlock, successors(Term.getParent()), LI.getLoopFor(&TermBlock));
 373
 374   auto ItInserted = CachedBranchJoins.emplace(&Term, std::move(JoinBlocks));
 375   assert(ItInserted.second);
 376   return *ItInserted.first->second;
 377 }
 378
 379 } // namespace llvm