[InstCombine] Signed saturation patterns
[llvm-complete.git] / lib / Target / WebAssembly / WebAssemblyFixIrreducibleControlFlow.cpp
blob157ea9d525c96ee0391fe333042c2c2108b44726
1 //=- WebAssemblyFixIrreducibleControlFlow.cpp - Fix irreducible control flow -//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file implements a pass that removes irreducible control flow.
11 /// Irreducible control flow means multiple-entry loops, which this pass
12 /// transforms to have a single entry.
13 ///
14 /// Note that LLVM has a generic pass that lowers irreducible control flow, but
15 /// it linearizes control flow, turning diamonds into two triangles, which is
16 /// both unnecessary and undesirable for WebAssembly.
17 ///
18 /// The big picture: We recursively process each "region", defined as a group
19 /// of blocks with a single entry and no branches back to that entry. A region
20 /// may be the entire function body, or the inner part of a loop, i.e., the
21 /// loop's body without branches back to the loop entry. In each region we fix
22 /// up multi-entry loops by adding a new block that can dispatch to each of the
23 /// loop entries, based on the value of a label "helper" variable, and we
24 /// replace direct branches to the entries with assignments to the label
25 /// variable and a branch to the dispatch block. Then the dispatch block is the
26 /// single entry in the loop containing the previous multiple entries. After
27 /// ensuring all the loops in a region are reducible, we recurse into them. The
28 /// total time complexity of this pass is:
29 ///
30 /// O(NumBlocks * NumNestedLoops * NumIrreducibleLoops +
31 /// NumLoops * NumLoops)
32 ///
33 /// This pass is similar to what the Relooper [1] does. Both identify looping
34 /// code that requires multiple entries, and resolve it in a similar way (in
35 /// Relooper terminology, we implement a Multiple shape in a Loop shape). Note
36 /// also that like the Relooper, we implement a "minimal" intervention: we only
37 /// use the "label" helper for the blocks we absolutely must and no others. We
38 /// also prioritize code size and do not duplicate code in order to resolve
39 /// irreducibility. The graph algorithms for finding loops and entries and so
40 /// forth are also similar to the Relooper. The main differences between this
41 /// pass and the Relooper are:
42 ///
43 /// * We just care about irreducibility, so we just look at loops.
44 /// * The Relooper emits structured control flow (with ifs etc.), while we
45 /// emit a CFG.
46 ///
47 /// [1] Alon Zakai. 2011. Emscripten: an LLVM-to-JavaScript compiler. In
48 /// Proceedings of the ACM international conference companion on Object oriented
49 /// programming systems languages and applications companion (SPLASH '11). ACM,
50 /// New York, NY, USA, 301-312. DOI=10.1145/2048147.2048224
51 /// http://doi.acm.org/10.1145/2048147.2048224
52 ///
53 //===----------------------------------------------------------------------===//
55 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
56 #include "WebAssembly.h"
57 #include "WebAssemblySubtarget.h"
58 #include "llvm/CodeGen/MachineInstrBuilder.h"
59 #include "llvm/Support/Debug.h"
60 using namespace llvm;
62 #define DEBUG_TYPE "wasm-fix-irreducible-control-flow"
64 namespace {
66 using BlockVector = SmallVector<MachineBasicBlock *, 4>;
67 using BlockSet = SmallPtrSet<MachineBasicBlock *, 4>;
69 // Calculates reachability in a region. Ignores branches to blocks outside of
70 // the region, and ignores branches to the region entry (for the case where
71 // the region is the inner part of a loop).
72 class ReachabilityGraph {
73 public:
74 ReachabilityGraph(MachineBasicBlock *Entry, const BlockSet &Blocks)
75 : Entry(Entry), Blocks(Blocks) {
76 #ifndef NDEBUG
77 // The region must have a single entry.
78 for (auto *MBB : Blocks) {
79 if (MBB != Entry) {
80 for (auto *Pred : MBB->predecessors()) {
81 assert(inRegion(Pred));
85 #endif
86 calculate();
89 bool canReach(MachineBasicBlock *From, MachineBasicBlock *To) const {
90 assert(inRegion(From) && inRegion(To));
91 auto I = Reachable.find(From);
92 if (I == Reachable.end())
93 return false;
94 return I->second.count(To);
97 // "Loopers" are blocks that are in a loop. We detect these by finding blocks
98 // that can reach themselves.
99 const BlockSet &getLoopers() const { return Loopers; }
101 // Get all blocks that are loop entries.
102 const BlockSet &getLoopEntries() const { return LoopEntries; }
104 // Get all blocks that enter a particular loop from outside.
105 const BlockSet &getLoopEnterers(MachineBasicBlock *LoopEntry) const {
106 assert(inRegion(LoopEntry));
107 auto I = LoopEnterers.find(LoopEntry);
108 assert(I != LoopEnterers.end());
109 return I->second;
112 private:
113 MachineBasicBlock *Entry;
114 const BlockSet &Blocks;
116 BlockSet Loopers, LoopEntries;
117 DenseMap<MachineBasicBlock *, BlockSet> LoopEnterers;
119 bool inRegion(MachineBasicBlock *MBB) const { return Blocks.count(MBB); }
121 // Maps a block to all the other blocks it can reach.
122 DenseMap<MachineBasicBlock *, BlockSet> Reachable;
124 void calculate() {
125 // Reachability computation work list. Contains pairs of recent additions
126 // (A, B) where we just added a link A => B.
127 using BlockPair = std::pair<MachineBasicBlock *, MachineBasicBlock *>;
128 SmallVector<BlockPair, 4> WorkList;
130 // Add all relevant direct branches.
131 for (auto *MBB : Blocks) {
132 for (auto *Succ : MBB->successors()) {
133 if (Succ != Entry && inRegion(Succ)) {
134 Reachable[MBB].insert(Succ);
135 WorkList.emplace_back(MBB, Succ);
140 while (!WorkList.empty()) {
141 MachineBasicBlock *MBB, *Succ;
142 std::tie(MBB, Succ) = WorkList.pop_back_val();
143 assert(inRegion(MBB) && Succ != Entry && inRegion(Succ));
144 if (MBB != Entry) {
145 // We recently added MBB => Succ, and that means we may have enabled
146 // Pred => MBB => Succ.
147 for (auto *Pred : MBB->predecessors()) {
148 if (Reachable[Pred].insert(Succ).second) {
149 WorkList.emplace_back(Pred, Succ);
155 // Blocks that can return to themselves are in a loop.
156 for (auto *MBB : Blocks) {
157 if (canReach(MBB, MBB)) {
158 Loopers.insert(MBB);
161 assert(!Loopers.count(Entry));
163 // Find the loop entries - loopers reachable from blocks not in that loop -
164 // and those outside blocks that reach them, the "loop enterers".
165 for (auto *Looper : Loopers) {
166 for (auto *Pred : Looper->predecessors()) {
167 // Pred can reach Looper. If Looper can reach Pred, it is in the loop;
168 // otherwise, it is a block that enters into the loop.
169 if (!canReach(Looper, Pred)) {
170 LoopEntries.insert(Looper);
171 LoopEnterers[Looper].insert(Pred);
178 // Finds the blocks in a single-entry loop, given the loop entry and the
179 // list of blocks that enter the loop.
180 class LoopBlocks {
181 public:
182 LoopBlocks(MachineBasicBlock *Entry, const BlockSet &Enterers)
183 : Entry(Entry), Enterers(Enterers) {
184 calculate();
187 BlockSet &getBlocks() { return Blocks; }
189 private:
190 MachineBasicBlock *Entry;
191 const BlockSet &Enterers;
193 BlockSet Blocks;
195 void calculate() {
196 // Going backwards from the loop entry, if we ignore the blocks entering
197 // from outside, we will traverse all the blocks in the loop.
198 BlockVector WorkList;
199 BlockSet AddedToWorkList;
200 Blocks.insert(Entry);
201 for (auto *Pred : Entry->predecessors()) {
202 if (!Enterers.count(Pred)) {
203 WorkList.push_back(Pred);
204 AddedToWorkList.insert(Pred);
208 while (!WorkList.empty()) {
209 auto *MBB = WorkList.pop_back_val();
210 assert(!Enterers.count(MBB));
211 if (Blocks.insert(MBB).second) {
212 for (auto *Pred : MBB->predecessors()) {
213 if (!AddedToWorkList.count(Pred)) {
214 WorkList.push_back(Pred);
215 AddedToWorkList.insert(Pred);
223 class WebAssemblyFixIrreducibleControlFlow final : public MachineFunctionPass {
224 StringRef getPassName() const override {
225 return "WebAssembly Fix Irreducible Control Flow";
228 bool runOnMachineFunction(MachineFunction &MF) override;
230 bool processRegion(MachineBasicBlock *Entry, BlockSet &Blocks,
231 MachineFunction &MF);
233 void makeSingleEntryLoop(BlockSet &Entries, BlockSet &Blocks,
234 MachineFunction &MF, const ReachabilityGraph &Graph);
236 public:
237 static char ID; // Pass identification, replacement for typeid
238 WebAssemblyFixIrreducibleControlFlow() : MachineFunctionPass(ID) {}
241 bool WebAssemblyFixIrreducibleControlFlow::processRegion(
242 MachineBasicBlock *Entry, BlockSet &Blocks, MachineFunction &MF) {
243 bool Changed = false;
245 // Remove irreducibility before processing child loops, which may take
246 // multiple iterations.
247 while (true) {
248 ReachabilityGraph Graph(Entry, Blocks);
250 bool FoundIrreducibility = false;
252 for (auto *LoopEntry : Graph.getLoopEntries()) {
253 // Find mutual entries - all entries which can reach this one, and
254 // are reached by it (that always includes LoopEntry itself). All mutual
255 // entries must be in the same loop, so if we have more than one, then we
256 // have irreducible control flow.
258 // Note that irreducibility may involve inner loops, e.g. imagine A
259 // starts one loop, and it has B inside it which starts an inner loop.
260 // If we add a branch from all the way on the outside to B, then in a
261 // sense B is no longer an "inner" loop, semantically speaking. We will
262 // fix that irreducibility by adding a block that dispatches to either
263 // either A or B, so B will no longer be an inner loop in our output.
264 // (A fancier approach might try to keep it as such.)
266 // Note that we still need to recurse into inner loops later, to handle
267 // the case where the irreducibility is entirely nested - we would not
268 // be able to identify that at this point, since the enclosing loop is
269 // a group of blocks all of whom can reach each other. (We'll see the
270 // irreducibility after removing branches to the top of that enclosing
271 // loop.)
272 BlockSet MutualLoopEntries;
273 MutualLoopEntries.insert(LoopEntry);
274 for (auto *OtherLoopEntry : Graph.getLoopEntries()) {
275 if (OtherLoopEntry != LoopEntry &&
276 Graph.canReach(LoopEntry, OtherLoopEntry) &&
277 Graph.canReach(OtherLoopEntry, LoopEntry)) {
278 MutualLoopEntries.insert(OtherLoopEntry);
282 if (MutualLoopEntries.size() > 1) {
283 makeSingleEntryLoop(MutualLoopEntries, Blocks, MF, Graph);
284 FoundIrreducibility = true;
285 Changed = true;
286 break;
289 // Only go on to actually process the inner loops when we are done
290 // removing irreducible control flow and changing the graph. Modifying
291 // the graph as we go is possible, and that might let us avoid looking at
292 // the already-fixed loops again if we are careful, but all that is
293 // complex and bug-prone. Since irreducible loops are rare, just starting
294 // another iteration is best.
295 if (FoundIrreducibility) {
296 continue;
299 for (auto *LoopEntry : Graph.getLoopEntries()) {
300 LoopBlocks InnerBlocks(LoopEntry, Graph.getLoopEnterers(LoopEntry));
301 // Each of these calls to processRegion may change the graph, but are
302 // guaranteed not to interfere with each other. The only changes we make
303 // to the graph are to add blocks on the way to a loop entry. As the
304 // loops are disjoint, that means we may only alter branches that exit
305 // another loop, which are ignored when recursing into that other loop
306 // anyhow.
307 if (processRegion(LoopEntry, InnerBlocks.getBlocks(), MF)) {
308 Changed = true;
312 return Changed;
316 // Given a set of entries to a single loop, create a single entry for that
317 // loop by creating a dispatch block for them, routing control flow using
318 // a helper variable. Also updates Blocks with any new blocks created, so
319 // that we properly track all the blocks in the region. But this does not update
320 // ReachabilityGraph; this will be updated in the caller of this function as
321 // needed.
322 void WebAssemblyFixIrreducibleControlFlow::makeSingleEntryLoop(
323 BlockSet &Entries, BlockSet &Blocks, MachineFunction &MF,
324 const ReachabilityGraph &Graph) {
325 assert(Entries.size() >= 2);
327 // Sort the entries to ensure a deterministic build.
328 BlockVector SortedEntries(Entries.begin(), Entries.end());
329 llvm::sort(SortedEntries,
330 [&](const MachineBasicBlock *A, const MachineBasicBlock *B) {
331 auto ANum = A->getNumber();
332 auto BNum = B->getNumber();
333 return ANum < BNum;
336 #ifndef NDEBUG
337 for (auto Block : SortedEntries)
338 assert(Block->getNumber() != -1);
339 if (SortedEntries.size() > 1) {
340 for (auto I = SortedEntries.begin(), E = SortedEntries.end() - 1; I != E;
341 ++I) {
342 auto ANum = (*I)->getNumber();
343 auto BNum = (*(std::next(I)))->getNumber();
344 assert(ANum != BNum);
347 #endif
349 // Create a dispatch block which will contain a jump table to the entries.
350 MachineBasicBlock *Dispatch = MF.CreateMachineBasicBlock();
351 MF.insert(MF.end(), Dispatch);
352 Blocks.insert(Dispatch);
354 // Add the jump table.
355 const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
356 MachineInstrBuilder MIB =
357 BuildMI(Dispatch, DebugLoc(), TII.get(WebAssembly::BR_TABLE_I32));
359 // Add the register which will be used to tell the jump table which block to
360 // jump to.
361 MachineRegisterInfo &MRI = MF.getRegInfo();
362 Register Reg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
363 MIB.addReg(Reg);
365 // Compute the indices in the superheader, one for each bad block, and
366 // add them as successors.
367 DenseMap<MachineBasicBlock *, unsigned> Indices;
368 for (auto *Entry : SortedEntries) {
369 auto Pair = Indices.insert(std::make_pair(Entry, 0));
370 assert(Pair.second);
372 unsigned Index = MIB.getInstr()->getNumExplicitOperands() - 1;
373 Pair.first->second = Index;
375 MIB.addMBB(Entry);
376 Dispatch->addSuccessor(Entry);
379 // Rewrite the problematic successors for every block that wants to reach
380 // the bad blocks. For simplicity, we just introduce a new block for every
381 // edge we need to rewrite. (Fancier things are possible.)
383 BlockVector AllPreds;
384 for (auto *Entry : SortedEntries) {
385 for (auto *Pred : Entry->predecessors()) {
386 if (Pred != Dispatch) {
387 AllPreds.push_back(Pred);
392 // This set stores predecessors within this loop.
393 DenseSet<MachineBasicBlock *> InLoop;
394 for (auto *Pred : AllPreds) {
395 for (auto *Entry : Pred->successors()) {
396 if (!Entries.count(Entry))
397 continue;
398 if (Graph.canReach(Entry, Pred)) {
399 InLoop.insert(Pred);
400 break;
405 // Record if each entry has a layout predecessor. This map stores
406 // <<Predecessor is within the loop?, loop entry>, layout predecessor>
407 std::map<std::pair<bool, MachineBasicBlock *>, MachineBasicBlock *>
408 EntryToLayoutPred;
409 for (auto *Pred : AllPreds)
410 for (auto *Entry : Pred->successors())
411 if (Entries.count(Entry) && Pred->isLayoutSuccessor(Entry))
412 EntryToLayoutPred[std::make_pair(InLoop.count(Pred), Entry)] = Pred;
414 // We need to create at most two routing blocks per entry: one for
415 // predecessors outside the loop and one for predecessors inside the loop.
416 // This map stores
417 // <<Predecessor is within the loop?, loop entry>, routing block>
418 std::map<std::pair<bool, MachineBasicBlock *>, MachineBasicBlock *> Map;
419 for (auto *Pred : AllPreds) {
420 bool PredInLoop = InLoop.count(Pred);
421 for (auto *Entry : Pred->successors()) {
422 if (!Entries.count(Entry) ||
423 Map.count(std::make_pair(InLoop.count(Pred), Entry)))
424 continue;
425 // If there exists a layout predecessor of this entry and this predecessor
426 // is not that, we rather create a routing block after that layout
427 // predecessor to save a branch.
428 if (EntryToLayoutPred.count(std::make_pair(PredInLoop, Entry)) &&
429 EntryToLayoutPred[std::make_pair(PredInLoop, Entry)] != Pred)
430 continue;
432 // This is a successor we need to rewrite.
433 MachineBasicBlock *Routing = MF.CreateMachineBasicBlock();
434 MF.insert(Pred->isLayoutSuccessor(Entry)
435 ? MachineFunction::iterator(Entry)
436 : MF.end(),
437 Routing);
438 Blocks.insert(Routing);
440 // Set the jump table's register of the index of the block we wish to
441 // jump to, and jump to the jump table.
442 BuildMI(Routing, DebugLoc(), TII.get(WebAssembly::CONST_I32), Reg)
443 .addImm(Indices[Entry]);
444 BuildMI(Routing, DebugLoc(), TII.get(WebAssembly::BR)).addMBB(Dispatch);
445 Routing->addSuccessor(Dispatch);
446 Map[std::make_pair(PredInLoop, Entry)] = Routing;
450 for (auto *Pred : AllPreds) {
451 bool PredInLoop = InLoop.count(Pred);
452 // Remap the terminator operands and the successor list.
453 for (MachineInstr &Term : Pred->terminators())
454 for (auto &Op : Term.explicit_uses())
455 if (Op.isMBB() && Indices.count(Op.getMBB()))
456 Op.setMBB(Map[std::make_pair(PredInLoop, Op.getMBB())]);
458 for (auto *Succ : Pred->successors()) {
459 if (!Entries.count(Succ))
460 continue;
461 auto *Routing = Map[std::make_pair(PredInLoop, Succ)];
462 Pred->replaceSuccessor(Succ, Routing);
466 // Create a fake default label, because br_table requires one.
467 MIB.addMBB(MIB.getInstr()
468 ->getOperand(MIB.getInstr()->getNumExplicitOperands() - 1)
469 .getMBB());
472 } // end anonymous namespace
474 char WebAssemblyFixIrreducibleControlFlow::ID = 0;
475 INITIALIZE_PASS(WebAssemblyFixIrreducibleControlFlow, DEBUG_TYPE,
476 "Removes irreducible control flow", false, false)
478 FunctionPass *llvm::createWebAssemblyFixIrreducibleControlFlow() {
479 return new WebAssemblyFixIrreducibleControlFlow();
482 bool WebAssemblyFixIrreducibleControlFlow::runOnMachineFunction(
483 MachineFunction &MF) {
484 LLVM_DEBUG(dbgs() << "********** Fixing Irreducible Control Flow **********\n"
485 "********** Function: "
486 << MF.getName() << '\n');
488 // Start the recursive process on the entire function body.
489 BlockSet AllBlocks;
490 for (auto &MBB : MF) {
491 AllBlocks.insert(&MBB);
494 if (LLVM_UNLIKELY(processRegion(&*MF.begin(), AllBlocks, MF))) {
495 // We rewrote part of the function; recompute relevant things.
496 MF.getRegInfo().invalidateLiveness();
497 MF.RenumberBlocks();
498 return true;
501 return false;