lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp

   1 //=- WebAssemblyFixIrreducibleControlFlow.cpp - Fix irreducible control flow -//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 ///
   9 /// \file
  10 /// This file implements a pass that transforms irreducible control flow into
  11 /// reducible control flow. Irreducible control flow means multiple-entry
  12 /// loops; they appear as CFG cycles that are not recorded in MachineLoopInfo
  13 /// due to being unnatural.
  14 ///
  15 /// Note that LLVM has a generic pass that lowers irreducible control flow, but
  16 /// it linearizes control flow, turning diamonds into two triangles, which is
  17 /// both unnecessary and undesirable for WebAssembly.
  18 ///
  19 /// The big picture: Ignoring natural loops (seeing them monolithically), we
  20 /// find all the blocks which can return to themselves ("loopers"). Loopers
  21 /// reachable from the non-loopers are loop entries: if there are 2 or more,
  22 /// then we have irreducible control flow. We fix that as follows: a new block
  23 /// is created that can dispatch to each of the loop entries, based on the
  24 /// value of a label "helper" variable, and we replace direct branches to the
  25 /// entries with assignments to the label variable and a branch to the dispatch
  26 /// block. Then the dispatch block is the single entry in a new natural loop.
  27 ///
  28 /// This is similar to what the Relooper [1] does, both identify looping code
  29 /// that requires multiple entries, and resolve it in a similar way. In
  30 /// Relooper terminology, we implement a Multiple shape in a Loop shape. Note
  31 /// also that like the Relooper, we implement a "minimal" intervention: we only
  32 /// use the "label" helper for the blocks we absolutely must and no others. We
  33 /// also prioritize code size and do not perform node splitting (i.e. we don't
  34 /// duplicate code in order to resolve irreducibility).
  35 ///
  36 /// The difference between this code and the Relooper is that the Relooper also
  37 /// generates ifs and loops and works in a recursive manner, knowing at each
  38 /// point what the entries are, and recursively breaks down the problem. Here
  39 /// we just want to resolve irreducible control flow, and we also want to use
  40 /// as much LLVM infrastructure as possible. So we use the MachineLoopInfo to
  41 /// identify natural loops, etc., and we start with the whole CFG and must
  42 /// identify both the looping code and its entries.
  43 ///
  44 /// [1] Alon Zakai. 2011. Emscripten: an LLVM-to-JavaScript compiler. In
  45 /// Proceedings of the ACM international conference companion on Object oriented
  46 /// programming systems languages and applications companion (SPLASH '11). ACM,
  47 /// New York, NY, USA, 301-312. DOI=10.1145/2048147.2048224
  48 /// http://doi.acm.org/10.1145/2048147.2048224
  49 ///
  50 //===----------------------------------------------------------------------===//
  51
  52 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
  53 #include "WebAssembly.h"
  54 #include "WebAssemblyMachineFunctionInfo.h"
  55 #include "WebAssemblySubtarget.h"
  56 #include "llvm/ADT/PriorityQueue.h"
  57 #include "llvm/ADT/SCCIterator.h"
  58 #include "llvm/ADT/SetVector.h"
  59 #include "llvm/CodeGen/MachineDominators.h"
  60 #include "llvm/CodeGen/MachineFunction.h"
  61 #include "llvm/CodeGen/MachineInstrBuilder.h"
  62 #include "llvm/CodeGen/MachineLoopInfo.h"
  63 #include "llvm/CodeGen/MachineRegisterInfo.h"
  64 #include "llvm/CodeGen/Passes.h"
  65 #include "llvm/Support/Debug.h"
  66 #include "llvm/Support/raw_ostream.h"
  67 using namespace llvm;
  68
  69 #define DEBUG_TYPE "wasm-fix-irreducible-control-flow"
  70
  71 namespace {
  72
  73 class LoopFixer {
  74 public:
  75   LoopFixer(MachineFunction &MF, MachineLoopInfo &MLI, MachineLoop *Loop)
  76       : MF(MF), MLI(MLI), Loop(Loop) {}
  77
  78   // Run the fixer on the given inputs. Returns whether changes were made.
  79   bool run();
  80
  81 private:
  82   MachineFunction &MF;
  83   MachineLoopInfo &MLI;
  84   MachineLoop *Loop;
  85
  86   MachineBasicBlock *Header;
  87   SmallPtrSet<MachineBasicBlock *, 4> LoopBlocks;
  88
  89   using BlockSet = SmallPtrSet<MachineBasicBlock *, 4>;
  90   DenseMap<MachineBasicBlock *, BlockSet> Reachable;
  91
  92   // The worklist contains pairs of recent additions, (a, b), where we just
  93   // added a link a => b.
  94   using BlockPair = std::pair<MachineBasicBlock *, MachineBasicBlock *>;
  95   SmallVector<BlockPair, 4> WorkList;
  96
  97   // Get a canonical block to represent a block or a loop: the block, or if in
  98   // an inner loop, the loop header, of it in an outer loop scope, we can
  99   // ignore it. We need to call this on all blocks we work on.
 100   MachineBasicBlock *canonicalize(MachineBasicBlock *MBB) {
 101     MachineLoop *InnerLoop = MLI.getLoopFor(MBB);
 102     if (InnerLoop == Loop) {
 103       return MBB;
 104     } else {
 105       // This is either in an outer or an inner loop, and not in ours.
 106       if (!LoopBlocks.count(MBB)) {
 107         // It's in outer code, ignore it.
 108         return nullptr;
 109       }
 110       assert(InnerLoop);
 111       // It's in an inner loop, canonicalize it to the header of that loop.
 112       return InnerLoop->getHeader();
 113     }
 114   }
 115
 116   // For a successor we can additionally ignore it if it's a branch back to a
 117   // natural loop top, as when we are in the scope of a loop, we just care
 118   // about internal irreducibility, and can ignore the loop we are in. We need
 119   // to call this on all blocks in a context where they are a successor.
 120   MachineBasicBlock *canonicalizeSuccessor(MachineBasicBlock *MBB) {
 121     if (Loop && MBB == Loop->getHeader()) {
 122       // Ignore branches going to the loop's natural header.
 123       return nullptr;
 124     }
 125     return canonicalize(MBB);
 126   }
 127
 128   // Potentially insert a new reachable edge, and if so, note it as further
 129   // work.
 130   void maybeInsert(MachineBasicBlock *MBB, MachineBasicBlock *Succ) {
 131     assert(MBB == canonicalize(MBB));
 132     assert(Succ);
 133     // Succ may not be interesting as a sucessor.
 134     Succ = canonicalizeSuccessor(Succ);
 135     if (!Succ)
 136       return;
 137     if (Reachable[MBB].insert(Succ).second) {
 138       // For there to be further work, it means that we have
 139       //   X => MBB => Succ
 140       // for some other X, and in that case X => Succ would be a new edge for
 141       // us to discover later. However, if we don't care about MBB as a
 142       // successor, then we don't care about that anyhow.
 143       if (canonicalizeSuccessor(MBB)) {
 144         WorkList.emplace_back(MBB, Succ);
 145       }
 146     }
 147   }
 148 };
 149
 150 bool LoopFixer::run() {
 151   Header = Loop ? Loop->getHeader() : &*MF.begin();
 152
 153   // Identify all the blocks in this loop scope.
 154   if (Loop) {
 155     for (auto *MBB : Loop->getBlocks()) {
 156       LoopBlocks.insert(MBB);
 157     }
 158   } else {
 159     for (auto &MBB : MF) {
 160       LoopBlocks.insert(&MBB);
 161     }
 162   }
 163
 164   // Compute which (canonicalized) blocks each block can reach.
 165
 166   // Add all the initial work.
 167   for (auto *MBB : LoopBlocks) {
 168     MachineLoop *InnerLoop = MLI.getLoopFor(MBB);
 169
 170     if (InnerLoop == Loop) {
 171       for (auto *Succ : MBB->successors()) {
 172         maybeInsert(MBB, Succ);
 173       }
 174     } else {
 175       // It can't be in an outer loop - we loop on LoopBlocks - and so it must
 176       // be an inner loop.
 177       assert(InnerLoop);
 178       // Check if we are the canonical block for this loop.
 179       if (canonicalize(MBB) != MBB) {
 180         continue;
 181       }
 182       // The successors are those of the loop.
 183       SmallVector<MachineBasicBlock *, 2> ExitBlocks;
 184       InnerLoop->getExitBlocks(ExitBlocks);
 185       for (auto *Succ : ExitBlocks) {
 186         maybeInsert(MBB, Succ);
 187       }
 188     }
 189   }
 190
 191   // Do work until we are all done.
 192   while (!WorkList.empty()) {
 193     MachineBasicBlock *MBB;
 194     MachineBasicBlock *Succ;
 195     std::tie(MBB, Succ) = WorkList.pop_back_val();
 196     // The worklist item is an edge we just added, so it must have valid blocks
 197     // (and not something canonicalized to nullptr).
 198     assert(MBB);
 199     assert(Succ);
 200     // The successor in that pair must also be a valid successor.
 201     assert(MBB == canonicalizeSuccessor(MBB));
 202     // We recently added MBB => Succ, and that means we may have enabled
 203     // Pred => MBB => Succ. Check all the predecessors. Note that our loop here
 204     // is correct for both a block and a block representing a loop, as the loop
 205     // is natural and so the predecessors are all predecessors of the loop
 206     // header, which is the block we have here.
 207     for (auto *Pred : MBB->predecessors()) {
 208       // Canonicalize, make sure it's relevant, and check it's not the same
 209       // block (an update to the block itself doesn't help compute that same
 210       // block).
 211       Pred = canonicalize(Pred);
 212       if (Pred && Pred != MBB) {
 213         maybeInsert(Pred, Succ);
 214       }
 215     }
 216   }
 217
 218   // It's now trivial to identify the loopers.
 219   SmallPtrSet<MachineBasicBlock *, 4> Loopers;
 220   for (auto MBB : LoopBlocks) {
 221     if (Reachable[MBB].count(MBB)) {
 222       Loopers.insert(MBB);
 223     }
 224   }
 225   // The header cannot be a looper. At the toplevel, LLVM does not allow the
 226   // entry to be in a loop, and in a natural loop we should ignore the header.
 227   assert(Loopers.count(Header) == 0);
 228
 229   // Find the entries, loopers reachable from non-loopers.
 230   SmallPtrSet<MachineBasicBlock *, 4> Entries;
 231   SmallVector<MachineBasicBlock *, 4> SortedEntries;
 232   for (auto *Looper : Loopers) {
 233     for (auto *Pred : Looper->predecessors()) {
 234       Pred = canonicalize(Pred);
 235       if (Pred && !Loopers.count(Pred)) {
 236         Entries.insert(Looper);
 237         SortedEntries.push_back(Looper);
 238         break;
 239       }
 240     }
 241   }
 242
 243   // Check if we found irreducible control flow.
 244   if (LLVM_LIKELY(Entries.size() <= 1))
 245     return false;
 246
 247   // Sort the entries to ensure a deterministic build.
 248   llvm::sort(SortedEntries,
 249              [&](const MachineBasicBlock *A, const MachineBasicBlock *B) {
 250                auto ANum = A->getNumber();
 251                auto BNum = B->getNumber();
 252                return ANum < BNum;
 253              });
 254
 255 #ifndef NDEBUG
 256   for (auto Block : SortedEntries)
 257     assert(Block->getNumber() != -1);
 258   if (SortedEntries.size() > 1) {
 259     for (auto I = SortedEntries.begin(), E = SortedEntries.end() - 1;
 260          I != E; ++I) {
 261       auto ANum = (*I)->getNumber();
 262       auto BNum = (*(std::next(I)))->getNumber();
 263       assert(ANum != BNum);
 264     }
 265   }
 266 #endif
 267
 268   // Create a dispatch block which will contain a jump table to the entries.
 269   MachineBasicBlock *Dispatch = MF.CreateMachineBasicBlock();
 270   MF.insert(MF.end(), Dispatch);
 271   MLI.changeLoopFor(Dispatch, Loop);
 272
 273   // Add the jump table.
 274   const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
 275   MachineInstrBuilder MIB = BuildMI(*Dispatch, Dispatch->end(), DebugLoc(),
 276                                     TII.get(WebAssembly::BR_TABLE_I32));
 277
 278   // Add the register which will be used to tell the jump table which block to
 279   // jump to.
 280   MachineRegisterInfo &MRI = MF.getRegInfo();
 281   unsigned Reg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
 282   MIB.addReg(Reg);
 283
 284   // Compute the indices in the superheader, one for each bad block, and
 285   // add them as successors.
 286   DenseMap<MachineBasicBlock *, unsigned> Indices;
 287   for (auto *MBB : SortedEntries) {
 288     auto Pair = Indices.insert(std::make_pair(MBB, 0));
 289     if (!Pair.second) {
 290       continue;
 291     }
 292
 293     unsigned Index = MIB.getInstr()->getNumExplicitOperands() - 1;
 294     Pair.first->second = Index;
 295
 296     MIB.addMBB(MBB);
 297     Dispatch->addSuccessor(MBB);
 298   }
 299
 300   // Rewrite the problematic successors for every block that wants to reach the
 301   // bad blocks. For simplicity, we just introduce a new block for every edge
 302   // we need to rewrite. (Fancier things are possible.)
 303
 304   SmallVector<MachineBasicBlock *, 4> AllPreds;
 305   for (auto *MBB : SortedEntries) {
 306     for (auto *Pred : MBB->predecessors()) {
 307       if (Pred != Dispatch) {
 308         AllPreds.push_back(Pred);
 309       }
 310     }
 311   }
 312
 313   for (MachineBasicBlock *MBB : AllPreds) {
 314     DenseMap<MachineBasicBlock *, MachineBasicBlock *> Map;
 315     for (auto *Succ : MBB->successors()) {
 316       if (!Entries.count(Succ)) {
 317         continue;
 318       }
 319
 320       // This is a successor we need to rewrite.
 321       MachineBasicBlock *Split = MF.CreateMachineBasicBlock();
 322       MF.insert(MBB->isLayoutSuccessor(Succ) ? MachineFunction::iterator(Succ)
 323                                              : MF.end(),
 324                 Split);
 325       MLI.changeLoopFor(Split, Loop);
 326
 327       // Set the jump table's register of the index of the block we wish to
 328       // jump to, and jump to the jump table.
 329       BuildMI(*Split, Split->end(), DebugLoc(), TII.get(WebAssembly::CONST_I32),
 330               Reg)
 331           .addImm(Indices[Succ]);
 332       BuildMI(*Split, Split->end(), DebugLoc(), TII.get(WebAssembly::BR))
 333           .addMBB(Dispatch);
 334       Split->addSuccessor(Dispatch);
 335       Map[Succ] = Split;
 336     }
 337     // Remap the terminator operands and the successor list.
 338     for (MachineInstr &Term : MBB->terminators())
 339       for (auto &Op : Term.explicit_uses())
 340         if (Op.isMBB() && Indices.count(Op.getMBB()))
 341           Op.setMBB(Map[Op.getMBB()]);
 342     for (auto Rewrite : Map)
 343       MBB->replaceSuccessor(Rewrite.first, Rewrite.second);
 344   }
 345
 346   // Create a fake default label, because br_table requires one.
 347   MIB.addMBB(MIB.getInstr()
 348                  ->getOperand(MIB.getInstr()->getNumExplicitOperands() - 1)
 349                  .getMBB());
 350
 351   return true;
 352 }
 353
 354 class WebAssemblyFixIrreducibleControlFlow final : public MachineFunctionPass {
 355   StringRef getPassName() const override {
 356     return "WebAssembly Fix Irreducible Control Flow";
 357   }
 358
 359   void getAnalysisUsage(AnalysisUsage &AU) const override {
 360     AU.setPreservesCFG();
 361     AU.addRequired<MachineDominatorTree>();
 362     AU.addPreserved<MachineDominatorTree>();
 363     AU.addRequired<MachineLoopInfo>();
 364     AU.addPreserved<MachineLoopInfo>();
 365     MachineFunctionPass::getAnalysisUsage(AU);
 366   }
 367
 368   bool runOnMachineFunction(MachineFunction &MF) override;
 369
 370   bool runIteration(MachineFunction &MF, MachineLoopInfo &MLI) {
 371     // Visit the function body, which is identified as a null loop.
 372     if (LoopFixer(MF, MLI, nullptr).run()) {
 373       return true;
 374     }
 375
 376     // Visit all the loops.
 377     SmallVector<MachineLoop *, 8> Worklist(MLI.begin(), MLI.end());
 378     while (!Worklist.empty()) {
 379       MachineLoop *Loop = Worklist.pop_back_val();
 380       Worklist.append(Loop->begin(), Loop->end());
 381       if (LoopFixer(MF, MLI, Loop).run()) {
 382         return true;
 383       }
 384     }
 385
 386     return false;
 387   }
 388
 389 public:
 390   static char ID; // Pass identification, replacement for typeid
 391   WebAssemblyFixIrreducibleControlFlow() : MachineFunctionPass(ID) {}
 392 };
 393 } // end anonymous namespace
 394
 395 char WebAssemblyFixIrreducibleControlFlow::ID = 0;
 396 INITIALIZE_PASS(WebAssemblyFixIrreducibleControlFlow, DEBUG_TYPE,
 397                 "Removes irreducible control flow", false, false)
 398
 399 FunctionPass *llvm::createWebAssemblyFixIrreducibleControlFlow() {
 400   return new WebAssemblyFixIrreducibleControlFlow();
 401 }
 402
 403 bool WebAssemblyFixIrreducibleControlFlow::runOnMachineFunction(
 404     MachineFunction &MF) {
 405   LLVM_DEBUG(dbgs() << "********** Fixing Irreducible Control Flow **********\n"
 406                        "********** Function: "
 407                     << MF.getName() << '\n');
 408
 409   bool Changed = false;
 410   auto &MLI = getAnalysis<MachineLoopInfo>();
 411
 412   // When we modify something, bail out and recompute MLI, then start again, as
 413   // we create a new natural loop when we resolve irreducible control flow, and
 414   // other loops may become nested in it, etc. In practice this is not an issue
 415   // because irreducible control flow is rare, only very few cycles are needed
 416   // here.
 417   while (LLVM_UNLIKELY(runIteration(MF, MLI))) {
 418     // We rewrote part of the function; recompute MLI and start again.
 419     LLVM_DEBUG(dbgs() << "Recomputing loops.\n");
 420     MF.getRegInfo().invalidateLiveness();
 421     MF.RenumberBlocks();
 422     getAnalysis<MachineDominatorTree>().runOnMachineFunction(MF);
 423     MLI.runOnMachineFunction(MF);
 424     Changed = true;
 425   }
 426
 427   return Changed;
 428 }