llvm/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp

   1 //=- WebAssemblyFixIrreducibleControlFlow.cpp - Fix irreducible control flow -//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 ///
   9 /// \file
  10 /// This file implements a pass that removes irreducible control flow.
  11 /// Irreducible control flow means multiple-entry loops, which this pass
  12 /// transforms to have a single entry.
  13 ///
  14 /// Note that LLVM has a generic pass that lowers irreducible control flow, but
  15 /// it linearizes control flow, turning diamonds into two triangles, which is
  16 /// both unnecessary and undesirable for WebAssembly.
  17 ///
  18 /// The big picture: We recursively process each "region", defined as a group
  19 /// of blocks with a single entry and no branches back to that entry. A region
  20 /// may be the entire function body, or the inner part of a loop, i.e., the
  21 /// loop's body without branches back to the loop entry. In each region we fix
  22 /// up multi-entry loops by adding a new block that can dispatch to each of the
  23 /// loop entries, based on the value of a label "helper" variable, and we
  24 /// replace direct branches to the entries with assignments to the label
  25 /// variable and a branch to the dispatch block. Then the dispatch block is the
  26 /// single entry in the loop containing the previous multiple entries. After
  27 /// ensuring all the loops in a region are reducible, we recurse into them. The
  28 /// total time complexity of this pass is:
  29 ///
  30 ///   O(NumBlocks * NumNestedLoops * NumIrreducibleLoops +
  31 ///     NumLoops * NumLoops)
  32 ///
  33 /// This pass is similar to what the Relooper [1] does. Both identify looping
  34 /// code that requires multiple entries, and resolve it in a similar way (in
  35 /// Relooper terminology, we implement a Multiple shape in a Loop shape). Note
  36 /// also that like the Relooper, we implement a "minimal" intervention: we only
  37 /// use the "label" helper for the blocks we absolutely must and no others. We
  38 /// also prioritize code size and do not duplicate code in order to resolve
  39 /// irreducibility. The graph algorithms for finding loops and entries and so
  40 /// forth are also similar to the Relooper. The main differences between this
  41 /// pass and the Relooper are:
  42 ///
  43 ///  * We just care about irreducibility, so we just look at loops.
  44 ///  * The Relooper emits structured control flow (with ifs etc.), while we
  45 ///    emit a CFG.
  46 ///
  47 /// [1] Alon Zakai. 2011. Emscripten: an LLVM-to-JavaScript compiler. In
  48 /// Proceedings of the ACM international conference companion on Object oriented
  49 /// programming systems languages and applications companion (SPLASH '11). ACM,
  50 /// New York, NY, USA, 301-312. DOI=10.1145/2048147.2048224
  51 /// http://doi.acm.org/10.1145/2048147.2048224
  52 ///
  53 //===----------------------------------------------------------------------===//
  54
  55 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
  56 #include "WebAssembly.h"
  57 #include "WebAssemblySubtarget.h"
  58 #include "llvm/CodeGen/MachineInstrBuilder.h"
  59 #include "llvm/Support/Debug.h"
  60 using namespace llvm;
  61
  62 #define DEBUG_TYPE "wasm-fix-irreducible-control-flow"
  63
  64 namespace {
  65
  66 using BlockVector = SmallVector<MachineBasicBlock *, 4>;
  67 using BlockSet = SmallPtrSet<MachineBasicBlock *, 4>;
  68
  69 static BlockVector getSortedEntries(const BlockSet &Entries) {
  70   BlockVector SortedEntries(Entries.begin(), Entries.end());
  71   llvm::sort(SortedEntries,
  72              [](const MachineBasicBlock *A, const MachineBasicBlock *B) {
  73                auto ANum = A->getNumber();
  74                auto BNum = B->getNumber();
  75                return ANum < BNum;
  76              });
  77   return SortedEntries;
  78 }
  79
  80 // Calculates reachability in a region. Ignores branches to blocks outside of
  81 // the region, and ignores branches to the region entry (for the case where
  82 // the region is the inner part of a loop).
  83 class ReachabilityGraph {
  84 public:
  85   ReachabilityGraph(MachineBasicBlock *Entry, const BlockSet &Blocks)
  86       : Entry(Entry), Blocks(Blocks) {
  87 #ifndef NDEBUG
  88     // The region must have a single entry.
  89     for (auto *MBB : Blocks) {
  90       if (MBB != Entry) {
  91         for (auto *Pred : MBB->predecessors()) {
  92           assert(inRegion(Pred));
  93         }
  94       }
  95     }
  96 #endif
  97     calculate();
  98   }
  99
 100   bool canReach(MachineBasicBlock *From, MachineBasicBlock *To) const {
 101     assert(inRegion(From) && inRegion(To));
 102     auto I = Reachable.find(From);
 103     if (I == Reachable.end())
 104       return false;
 105     return I->second.count(To);
 106   }
 107
 108   // "Loopers" are blocks that are in a loop. We detect these by finding blocks
 109   // that can reach themselves.
 110   const BlockSet &getLoopers() const { return Loopers; }
 111
 112   // Get all blocks that are loop entries.
 113   const BlockSet &getLoopEntries() const { return LoopEntries; }
 114
 115   // Get all blocks that enter a particular loop from outside.
 116   const BlockSet &getLoopEnterers(MachineBasicBlock *LoopEntry) const {
 117     assert(inRegion(LoopEntry));
 118     auto I = LoopEnterers.find(LoopEntry);
 119     assert(I != LoopEnterers.end());
 120     return I->second;
 121   }
 122
 123 private:
 124   MachineBasicBlock *Entry;
 125   const BlockSet &Blocks;
 126
 127   BlockSet Loopers, LoopEntries;
 128   DenseMap<MachineBasicBlock *, BlockSet> LoopEnterers;
 129
 130   bool inRegion(MachineBasicBlock *MBB) const { return Blocks.count(MBB); }
 131
 132   // Maps a block to all the other blocks it can reach.
 133   DenseMap<MachineBasicBlock *, BlockSet> Reachable;
 134
 135   void calculate() {
 136     // Reachability computation work list. Contains pairs of recent additions
 137     // (A, B) where we just added a link A => B.
 138     using BlockPair = std::pair<MachineBasicBlock *, MachineBasicBlock *>;
 139     SmallVector<BlockPair, 4> WorkList;
 140
 141     // Add all relevant direct branches.
 142     for (auto *MBB : Blocks) {
 143       for (auto *Succ : MBB->successors()) {
 144         if (Succ != Entry && inRegion(Succ)) {
 145           Reachable[MBB].insert(Succ);
 146           WorkList.emplace_back(MBB, Succ);
 147         }
 148       }
 149     }
 150
 151     while (!WorkList.empty()) {
 152       MachineBasicBlock *MBB, *Succ;
 153       std::tie(MBB, Succ) = WorkList.pop_back_val();
 154       assert(inRegion(MBB) && Succ != Entry && inRegion(Succ));
 155       if (MBB != Entry) {
 156         // We recently added MBB => Succ, and that means we may have enabled
 157         // Pred => MBB => Succ.
 158         for (auto *Pred : MBB->predecessors()) {
 159           if (Reachable[Pred].insert(Succ).second) {
 160             WorkList.emplace_back(Pred, Succ);
 161           }
 162         }
 163       }
 164     }
 165
 166     // Blocks that can return to themselves are in a loop.
 167     for (auto *MBB : Blocks) {
 168       if (canReach(MBB, MBB)) {
 169         Loopers.insert(MBB);
 170       }
 171     }
 172     assert(!Loopers.count(Entry));
 173
 174     // Find the loop entries - loopers reachable from blocks not in that loop -
 175     // and those outside blocks that reach them, the "loop enterers".
 176     for (auto *Looper : Loopers) {
 177       for (auto *Pred : Looper->predecessors()) {
 178         // Pred can reach Looper. If Looper can reach Pred, it is in the loop;
 179         // otherwise, it is a block that enters into the loop.
 180         if (!canReach(Looper, Pred)) {
 181           LoopEntries.insert(Looper);
 182           LoopEnterers[Looper].insert(Pred);
 183         }
 184       }
 185     }
 186   }
 187 };
 188
 189 // Finds the blocks in a single-entry loop, given the loop entry and the
 190 // list of blocks that enter the loop.
 191 class LoopBlocks {
 192 public:
 193   LoopBlocks(MachineBasicBlock *Entry, const BlockSet &Enterers)
 194       : Entry(Entry), Enterers(Enterers) {
 195     calculate();
 196   }
 197
 198   BlockSet &getBlocks() { return Blocks; }
 199
 200 private:
 201   MachineBasicBlock *Entry;
 202   const BlockSet &Enterers;
 203
 204   BlockSet Blocks;
 205
 206   void calculate() {
 207     // Going backwards from the loop entry, if we ignore the blocks entering
 208     // from outside, we will traverse all the blocks in the loop.
 209     BlockVector WorkList;
 210     BlockSet AddedToWorkList;
 211     Blocks.insert(Entry);
 212     for (auto *Pred : Entry->predecessors()) {
 213       if (!Enterers.count(Pred)) {
 214         WorkList.push_back(Pred);
 215         AddedToWorkList.insert(Pred);
 216       }
 217     }
 218
 219     while (!WorkList.empty()) {
 220       auto *MBB = WorkList.pop_back_val();
 221       assert(!Enterers.count(MBB));
 222       if (Blocks.insert(MBB).second) {
 223         for (auto *Pred : MBB->predecessors()) {
 224           if (!AddedToWorkList.count(Pred)) {
 225             WorkList.push_back(Pred);
 226             AddedToWorkList.insert(Pred);
 227           }
 228         }
 229       }
 230     }
 231   }
 232 };
 233
 234 class WebAssemblyFixIrreducibleControlFlow final : public MachineFunctionPass {
 235   StringRef getPassName() const override {
 236     return "WebAssembly Fix Irreducible Control Flow";
 237   }
 238
 239   bool runOnMachineFunction(MachineFunction &MF) override;
 240
 241   bool processRegion(MachineBasicBlock *Entry, BlockSet &Blocks,
 242                      MachineFunction &MF);
 243
 244   void makeSingleEntryLoop(BlockSet &Entries, BlockSet &Blocks,
 245                            MachineFunction &MF, const ReachabilityGraph &Graph);
 246
 247 public:
 248   static char ID; // Pass identification, replacement for typeid
 249   WebAssemblyFixIrreducibleControlFlow() : MachineFunctionPass(ID) {}
 250 };
 251
 252 bool WebAssemblyFixIrreducibleControlFlow::processRegion(
 253     MachineBasicBlock *Entry, BlockSet &Blocks, MachineFunction &MF) {
 254   bool Changed = false;
 255   // Remove irreducibility before processing child loops, which may take
 256   // multiple iterations.
 257   while (true) {
 258     ReachabilityGraph Graph(Entry, Blocks);
 259
 260     bool FoundIrreducibility = false;
 261
 262     for (auto *LoopEntry : getSortedEntries(Graph.getLoopEntries())) {
 263       // Find mutual entries - all entries which can reach this one, and
 264       // are reached by it (that always includes LoopEntry itself). All mutual
 265       // entries must be in the same loop, so if we have more than one, then we
 266       // have irreducible control flow.
 267       //
 268       // (Note that we need to sort the entries here, as otherwise the order can
 269       // matter: being mutual is a symmetric relationship, and each set of
 270       // mutuals will be handled properly no matter which we see first. However,
 271       // there can be multiple disjoint sets of mutuals, and which we process
 272       // first changes the output.)
 273       //
 274       // Note that irreducibility may involve inner loops, e.g. imagine A
 275       // starts one loop, and it has B inside it which starts an inner loop.
 276       // If we add a branch from all the way on the outside to B, then in a
 277       // sense B is no longer an "inner" loop, semantically speaking. We will
 278       // fix that irreducibility by adding a block that dispatches to either
 279       // either A or B, so B will no longer be an inner loop in our output.
 280       // (A fancier approach might try to keep it as such.)
 281       //
 282       // Note that we still need to recurse into inner loops later, to handle
 283       // the case where the irreducibility is entirely nested - we would not
 284       // be able to identify that at this point, since the enclosing loop is
 285       // a group of blocks all of whom can reach each other. (We'll see the
 286       // irreducibility after removing branches to the top of that enclosing
 287       // loop.)
 288       BlockSet MutualLoopEntries;
 289       MutualLoopEntries.insert(LoopEntry);
 290       for (auto *OtherLoopEntry : Graph.getLoopEntries()) {
 291         if (OtherLoopEntry != LoopEntry &&
 292             Graph.canReach(LoopEntry, OtherLoopEntry) &&
 293             Graph.canReach(OtherLoopEntry, LoopEntry)) {
 294           MutualLoopEntries.insert(OtherLoopEntry);
 295         }
 296       }
 297
 298       if (MutualLoopEntries.size() > 1) {
 299         makeSingleEntryLoop(MutualLoopEntries, Blocks, MF, Graph);
 300         FoundIrreducibility = true;
 301         Changed = true;
 302         break;
 303       }
 304     }
 305     // Only go on to actually process the inner loops when we are done
 306     // removing irreducible control flow and changing the graph. Modifying
 307     // the graph as we go is possible, and that might let us avoid looking at
 308     // the already-fixed loops again if we are careful, but all that is
 309     // complex and bug-prone. Since irreducible loops are rare, just starting
 310     // another iteration is best.
 311     if (FoundIrreducibility) {
 312       continue;
 313     }
 314
 315     for (auto *LoopEntry : Graph.getLoopEntries()) {
 316       LoopBlocks InnerBlocks(LoopEntry, Graph.getLoopEnterers(LoopEntry));
 317       // Each of these calls to processRegion may change the graph, but are
 318       // guaranteed not to interfere with each other. The only changes we make
 319       // to the graph are to add blocks on the way to a loop entry. As the
 320       // loops are disjoint, that means we may only alter branches that exit
 321       // another loop, which are ignored when recursing into that other loop
 322       // anyhow.
 323       if (processRegion(LoopEntry, InnerBlocks.getBlocks(), MF)) {
 324         Changed = true;
 325       }
 326     }
 327
 328     return Changed;
 329   }
 330 }
 331
 332 // Given a set of entries to a single loop, create a single entry for that
 333 // loop by creating a dispatch block for them, routing control flow using
 334 // a helper variable. Also updates Blocks with any new blocks created, so
 335 // that we properly track all the blocks in the region. But this does not update
 336 // ReachabilityGraph; this will be updated in the caller of this function as
 337 // needed.
 338 void WebAssemblyFixIrreducibleControlFlow::makeSingleEntryLoop(
 339     BlockSet &Entries, BlockSet &Blocks, MachineFunction &MF,
 340     const ReachabilityGraph &Graph) {
 341   assert(Entries.size() >= 2);
 342
 343   // Sort the entries to ensure a deterministic build.
 344   BlockVector SortedEntries = getSortedEntries(Entries);
 345
 346 #ifndef NDEBUG
 347   for (auto Block : SortedEntries)
 348     assert(Block->getNumber() != -1);
 349   if (SortedEntries.size() > 1) {
 350     for (auto I = SortedEntries.begin(), E = SortedEntries.end() - 1; I != E;
 351          ++I) {
 352       auto ANum = (*I)->getNumber();
 353       auto BNum = (*(std::next(I)))->getNumber();
 354       assert(ANum != BNum);
 355     }
 356   }
 357 #endif
 358
 359   // Create a dispatch block which will contain a jump table to the entries.
 360   MachineBasicBlock *Dispatch = MF.CreateMachineBasicBlock();
 361   MF.insert(MF.end(), Dispatch);
 362   Blocks.insert(Dispatch);
 363
 364   // Add the jump table.
 365   const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
 366   MachineInstrBuilder MIB =
 367       BuildMI(Dispatch, DebugLoc(), TII.get(WebAssembly::BR_TABLE_I32));
 368
 369   // Add the register which will be used to tell the jump table which block to
 370   // jump to.
 371   MachineRegisterInfo &MRI = MF.getRegInfo();
 372   Register Reg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
 373   MIB.addReg(Reg);
 374
 375   // Compute the indices in the superheader, one for each bad block, and
 376   // add them as successors.
 377   DenseMap<MachineBasicBlock *, unsigned> Indices;
 378   for (auto *Entry : SortedEntries) {
 379     auto Pair = Indices.insert(std::make_pair(Entry, 0));
 380     assert(Pair.second);
 381
 382     unsigned Index = MIB.getInstr()->getNumExplicitOperands() - 1;
 383     Pair.first->second = Index;
 384
 385     MIB.addMBB(Entry);
 386     Dispatch->addSuccessor(Entry);
 387   }
 388
 389   // Rewrite the problematic successors for every block that wants to reach
 390   // the bad blocks. For simplicity, we just introduce a new block for every
 391   // edge we need to rewrite. (Fancier things are possible.)
 392
 393   BlockVector AllPreds;
 394   for (auto *Entry : SortedEntries) {
 395     for (auto *Pred : Entry->predecessors()) {
 396       if (Pred != Dispatch) {
 397         AllPreds.push_back(Pred);
 398       }
 399     }
 400   }
 401
 402   // This set stores predecessors within this loop.
 403   DenseSet<MachineBasicBlock *> InLoop;
 404   for (auto *Pred : AllPreds) {
 405     for (auto *Entry : Pred->successors()) {
 406       if (!Entries.count(Entry))
 407         continue;
 408       if (Graph.canReach(Entry, Pred)) {
 409         InLoop.insert(Pred);
 410         break;
 411       }
 412     }
 413   }
 414
 415   // Record if each entry has a layout predecessor. This map stores
 416   // <<loop entry, Predecessor is within the loop?>, layout predecessor>
 417   DenseMap<PointerIntPair<MachineBasicBlock *, 1, bool>, MachineBasicBlock *>
 418       EntryToLayoutPred;
 419   for (auto *Pred : AllPreds) {
 420     bool PredInLoop = InLoop.count(Pred);
 421     for (auto *Entry : Pred->successors())
 422       if (Entries.count(Entry) && Pred->isLayoutSuccessor(Entry))
 423         EntryToLayoutPred[{Entry, PredInLoop}] = Pred;
 424   }
 425
 426   // We need to create at most two routing blocks per entry: one for
 427   // predecessors outside the loop and one for predecessors inside the loop.
 428   // This map stores
 429   // <<loop entry, Predecessor is within the loop?>, routing block>
 430   DenseMap<PointerIntPair<MachineBasicBlock *, 1, bool>, MachineBasicBlock *>
 431       Map;
 432   for (auto *Pred : AllPreds) {
 433     bool PredInLoop = InLoop.count(Pred);
 434     for (auto *Entry : Pred->successors()) {
 435       if (!Entries.count(Entry) || Map.count({Entry, PredInLoop}))
 436         continue;
 437       // If there exists a layout predecessor of this entry and this predecessor
 438       // is not that, we rather create a routing block after that layout
 439       // predecessor to save a branch.
 440       if (auto *OtherPred = EntryToLayoutPred.lookup({Entry, PredInLoop}))
 441         if (OtherPred != Pred)
 442           continue;
 443
 444       // This is a successor we need to rewrite.
 445       MachineBasicBlock *Routing = MF.CreateMachineBasicBlock();
 446       MF.insert(Pred->isLayoutSuccessor(Entry)
 447                     ? MachineFunction::iterator(Entry)
 448                     : MF.end(),
 449                 Routing);
 450       Blocks.insert(Routing);
 451
 452       // Set the jump table's register of the index of the block we wish to
 453       // jump to, and jump to the jump table.
 454       BuildMI(Routing, DebugLoc(), TII.get(WebAssembly::CONST_I32), Reg)
 455           .addImm(Indices[Entry]);
 456       BuildMI(Routing, DebugLoc(), TII.get(WebAssembly::BR)).addMBB(Dispatch);
 457       Routing->addSuccessor(Dispatch);
 458       Map[{Entry, PredInLoop}] = Routing;
 459     }
 460   }
 461
 462   for (auto *Pred : AllPreds) {
 463     bool PredInLoop = InLoop.count(Pred);
 464     // Remap the terminator operands and the successor list.
 465     for (MachineInstr &Term : Pred->terminators())
 466       for (auto &Op : Term.explicit_uses())
 467         if (Op.isMBB() && Indices.count(Op.getMBB()))
 468           Op.setMBB(Map[{Op.getMBB(), PredInLoop}]);
 469
 470     for (auto *Succ : Pred->successors()) {
 471       if (!Entries.count(Succ))
 472         continue;
 473       auto *Routing = Map[{Succ, PredInLoop}];
 474       Pred->replaceSuccessor(Succ, Routing);
 475     }
 476   }
 477
 478   // Create a fake default label, because br_table requires one.
 479   MIB.addMBB(MIB.getInstr()
 480                  ->getOperand(MIB.getInstr()->getNumExplicitOperands() - 1)
 481                  .getMBB());
 482 }
 483
 484 } // end anonymous namespace
 485
 486 char WebAssemblyFixIrreducibleControlFlow::ID = 0;
 487 INITIALIZE_PASS(WebAssemblyFixIrreducibleControlFlow, DEBUG_TYPE,
 488                 "Removes irreducible control flow", false, false)
 489
 490 FunctionPass *llvm::createWebAssemblyFixIrreducibleControlFlow() {
 491   return new WebAssemblyFixIrreducibleControlFlow();
 492 }
 493
 494 bool WebAssemblyFixIrreducibleControlFlow::runOnMachineFunction(
 495     MachineFunction &MF) {
 496   LLVM_DEBUG(dbgs() << "********** Fixing Irreducible Control Flow **********\n"
 497                        "********** Function: "
 498                     << MF.getName() << '\n');
 499
 500   // Start the recursive process on the entire function body.
 501   BlockSet AllBlocks;
 502   for (auto &MBB : MF) {
 503     AllBlocks.insert(&MBB);
 504   }
 505
 506   if (LLVM_UNLIKELY(processRegion(&*MF.begin(), AllBlocks, MF))) {
 507     // We rewrote part of the function; recompute relevant things.
 508     MF.getRegInfo().invalidateLiveness();
 509     MF.RenumberBlocks();
 510     return true;
 511   }
 512
 513   return false;
 514 }