1 //=- WebAssemblyFixIrreducibleControlFlow.cpp - Fix irreducible control flow -//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 /// This file implements a pass that removes irreducible control flow.
11 /// Irreducible control flow means multiple-entry loops, which this pass
12 /// transforms to have a single entry.
14 /// Note that LLVM has a generic pass that lowers irreducible control flow, but
15 /// it linearizes control flow, turning diamonds into two triangles, which is
16 /// both unnecessary and undesirable for WebAssembly.
18 /// The big picture: We recursively process each "region", defined as a group
19 /// of blocks with a single entry and no branches back to that entry. A region
20 /// may be the entire function body, or the inner part of a loop, i.e., the
21 /// loop's body without branches back to the loop entry. In each region we fix
22 /// up multi-entry loops by adding a new block that can dispatch to each of the
23 /// loop entries, based on the value of a label "helper" variable, and we
24 /// replace direct branches to the entries with assignments to the label
25 /// variable and a branch to the dispatch block. Then the dispatch block is the
26 /// single entry in the loop containing the previous multiple entries. After
27 /// ensuring all the loops in a region are reducible, we recurse into them. The
28 /// total time complexity of this pass is:
30 /// O(NumBlocks * NumNestedLoops * NumIrreducibleLoops +
31 /// NumLoops * NumLoops)
33 /// This pass is similar to what the Relooper [1] does. Both identify looping
34 /// code that requires multiple entries, and resolve it in a similar way (in
35 /// Relooper terminology, we implement a Multiple shape in a Loop shape). Note
36 /// also that like the Relooper, we implement a "minimal" intervention: we only
37 /// use the "label" helper for the blocks we absolutely must and no others. We
38 /// also prioritize code size and do not duplicate code in order to resolve
39 /// irreducibility. The graph algorithms for finding loops and entries and so
40 /// forth are also similar to the Relooper. The main differences between this
41 /// pass and the Relooper are:
43 /// * We just care about irreducibility, so we just look at loops.
44 /// * The Relooper emits structured control flow (with ifs etc.), while we
47 /// [1] Alon Zakai. 2011. Emscripten: an LLVM-to-JavaScript compiler. In
48 /// Proceedings of the ACM international conference companion on Object oriented
49 /// programming systems languages and applications companion (SPLASH '11). ACM,
50 /// New York, NY, USA, 301-312. DOI=10.1145/2048147.2048224
51 /// http://doi.acm.org/10.1145/2048147.2048224
53 //===----------------------------------------------------------------------===//
55 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
56 #include "WebAssembly.h"
57 #include "WebAssemblySubtarget.h"
58 #include "llvm/CodeGen/MachineFunctionPass.h"
59 #include "llvm/CodeGen/MachineInstrBuilder.h"
60 #include "llvm/Support/Debug.h"
63 #define DEBUG_TYPE "wasm-fix-irreducible-control-flow"
67 using BlockVector
= SmallVector
<MachineBasicBlock
*, 4>;
68 using BlockSet
= SmallPtrSet
<MachineBasicBlock
*, 4>;
70 static BlockVector
getSortedEntries(const BlockSet
&Entries
) {
71 BlockVector
SortedEntries(Entries
.begin(), Entries
.end());
72 llvm::sort(SortedEntries
,
73 [](const MachineBasicBlock
*A
, const MachineBasicBlock
*B
) {
74 auto ANum
= A
->getNumber();
75 auto BNum
= B
->getNumber();
81 // Calculates reachability in a region. Ignores branches to blocks outside of
82 // the region, and ignores branches to the region entry (for the case where
83 // the region is the inner part of a loop).
84 class ReachabilityGraph
{
86 ReachabilityGraph(MachineBasicBlock
*Entry
, const BlockSet
&Blocks
)
87 : Entry(Entry
), Blocks(Blocks
) {
89 // The region must have a single entry.
90 for (auto *MBB
: Blocks
) {
92 for (auto *Pred
: MBB
->predecessors()) {
93 assert(inRegion(Pred
));
101 bool canReach(MachineBasicBlock
*From
, MachineBasicBlock
*To
) const {
102 assert(inRegion(From
) && inRegion(To
));
103 auto I
= Reachable
.find(From
);
104 if (I
== Reachable
.end())
106 return I
->second
.count(To
);
109 // "Loopers" are blocks that are in a loop. We detect these by finding blocks
110 // that can reach themselves.
111 const BlockSet
&getLoopers() const { return Loopers
; }
113 // Get all blocks that are loop entries.
114 const BlockSet
&getLoopEntries() const { return LoopEntries
; }
116 // Get all blocks that enter a particular loop from outside.
117 const BlockSet
&getLoopEnterers(MachineBasicBlock
*LoopEntry
) const {
118 assert(inRegion(LoopEntry
));
119 auto I
= LoopEnterers
.find(LoopEntry
);
120 assert(I
!= LoopEnterers
.end());
125 MachineBasicBlock
*Entry
;
126 const BlockSet
&Blocks
;
128 BlockSet Loopers
, LoopEntries
;
129 DenseMap
<MachineBasicBlock
*, BlockSet
> LoopEnterers
;
131 bool inRegion(MachineBasicBlock
*MBB
) const { return Blocks
.count(MBB
); }
133 // Maps a block to all the other blocks it can reach.
134 DenseMap
<MachineBasicBlock
*, BlockSet
> Reachable
;
137 // Reachability computation work list. Contains pairs of recent additions
138 // (A, B) where we just added a link A => B.
139 using BlockPair
= std::pair
<MachineBasicBlock
*, MachineBasicBlock
*>;
140 SmallVector
<BlockPair
, 4> WorkList
;
142 // Add all relevant direct branches.
143 for (auto *MBB
: Blocks
) {
144 for (auto *Succ
: MBB
->successors()) {
145 if (Succ
!= Entry
&& inRegion(Succ
)) {
146 Reachable
[MBB
].insert(Succ
);
147 WorkList
.emplace_back(MBB
, Succ
);
152 while (!WorkList
.empty()) {
153 MachineBasicBlock
*MBB
, *Succ
;
154 std::tie(MBB
, Succ
) = WorkList
.pop_back_val();
155 assert(inRegion(MBB
) && Succ
!= Entry
&& inRegion(Succ
));
157 // We recently added MBB => Succ, and that means we may have enabled
158 // Pred => MBB => Succ.
159 for (auto *Pred
: MBB
->predecessors()) {
160 if (Reachable
[Pred
].insert(Succ
).second
) {
161 WorkList
.emplace_back(Pred
, Succ
);
167 // Blocks that can return to themselves are in a loop.
168 for (auto *MBB
: Blocks
) {
169 if (canReach(MBB
, MBB
)) {
173 assert(!Loopers
.count(Entry
));
175 // Find the loop entries - loopers reachable from blocks not in that loop -
176 // and those outside blocks that reach them, the "loop enterers".
177 for (auto *Looper
: Loopers
) {
178 for (auto *Pred
: Looper
->predecessors()) {
179 // Pred can reach Looper. If Looper can reach Pred, it is in the loop;
180 // otherwise, it is a block that enters into the loop.
181 if (!canReach(Looper
, Pred
)) {
182 LoopEntries
.insert(Looper
);
183 LoopEnterers
[Looper
].insert(Pred
);
190 // Finds the blocks in a single-entry loop, given the loop entry and the
191 // list of blocks that enter the loop.
194 LoopBlocks(MachineBasicBlock
*Entry
, const BlockSet
&Enterers
)
195 : Entry(Entry
), Enterers(Enterers
) {
199 BlockSet
&getBlocks() { return Blocks
; }
202 MachineBasicBlock
*Entry
;
203 const BlockSet
&Enterers
;
208 // Going backwards from the loop entry, if we ignore the blocks entering
209 // from outside, we will traverse all the blocks in the loop.
210 BlockVector WorkList
;
211 BlockSet AddedToWorkList
;
212 Blocks
.insert(Entry
);
213 for (auto *Pred
: Entry
->predecessors()) {
214 if (!Enterers
.count(Pred
)) {
215 WorkList
.push_back(Pred
);
216 AddedToWorkList
.insert(Pred
);
220 while (!WorkList
.empty()) {
221 auto *MBB
= WorkList
.pop_back_val();
222 assert(!Enterers
.count(MBB
));
223 if (Blocks
.insert(MBB
).second
) {
224 for (auto *Pred
: MBB
->predecessors()) {
225 if (AddedToWorkList
.insert(Pred
).second
)
226 WorkList
.push_back(Pred
);
233 class WebAssemblyFixIrreducibleControlFlow final
: public MachineFunctionPass
{
234 StringRef
getPassName() const override
{
235 return "WebAssembly Fix Irreducible Control Flow";
238 bool runOnMachineFunction(MachineFunction
&MF
) override
;
240 bool processRegion(MachineBasicBlock
*Entry
, BlockSet
&Blocks
,
241 MachineFunction
&MF
);
243 void makeSingleEntryLoop(BlockSet
&Entries
, BlockSet
&Blocks
,
244 MachineFunction
&MF
, const ReachabilityGraph
&Graph
);
247 static char ID
; // Pass identification, replacement for typeid
248 WebAssemblyFixIrreducibleControlFlow() : MachineFunctionPass(ID
) {}
251 bool WebAssemblyFixIrreducibleControlFlow::processRegion(
252 MachineBasicBlock
*Entry
, BlockSet
&Blocks
, MachineFunction
&MF
) {
253 bool Changed
= false;
254 // Remove irreducibility before processing child loops, which may take
255 // multiple iterations.
257 ReachabilityGraph
Graph(Entry
, Blocks
);
259 bool FoundIrreducibility
= false;
261 for (auto *LoopEntry
: getSortedEntries(Graph
.getLoopEntries())) {
262 // Find mutual entries - all entries which can reach this one, and
263 // are reached by it (that always includes LoopEntry itself). All mutual
264 // entries must be in the same loop, so if we have more than one, then we
265 // have irreducible control flow.
267 // (Note that we need to sort the entries here, as otherwise the order can
268 // matter: being mutual is a symmetric relationship, and each set of
269 // mutuals will be handled properly no matter which we see first. However,
270 // there can be multiple disjoint sets of mutuals, and which we process
271 // first changes the output.)
273 // Note that irreducibility may involve inner loops, e.g. imagine A
274 // starts one loop, and it has B inside it which starts an inner loop.
275 // If we add a branch from all the way on the outside to B, then in a
276 // sense B is no longer an "inner" loop, semantically speaking. We will
277 // fix that irreducibility by adding a block that dispatches to either
278 // either A or B, so B will no longer be an inner loop in our output.
279 // (A fancier approach might try to keep it as such.)
281 // Note that we still need to recurse into inner loops later, to handle
282 // the case where the irreducibility is entirely nested - we would not
283 // be able to identify that at this point, since the enclosing loop is
284 // a group of blocks all of whom can reach each other. (We'll see the
285 // irreducibility after removing branches to the top of that enclosing
287 BlockSet MutualLoopEntries
;
288 MutualLoopEntries
.insert(LoopEntry
);
289 for (auto *OtherLoopEntry
: Graph
.getLoopEntries()) {
290 if (OtherLoopEntry
!= LoopEntry
&&
291 Graph
.canReach(LoopEntry
, OtherLoopEntry
) &&
292 Graph
.canReach(OtherLoopEntry
, LoopEntry
)) {
293 MutualLoopEntries
.insert(OtherLoopEntry
);
297 if (MutualLoopEntries
.size() > 1) {
298 makeSingleEntryLoop(MutualLoopEntries
, Blocks
, MF
, Graph
);
299 FoundIrreducibility
= true;
304 // Only go on to actually process the inner loops when we are done
305 // removing irreducible control flow and changing the graph. Modifying
306 // the graph as we go is possible, and that might let us avoid looking at
307 // the already-fixed loops again if we are careful, but all that is
308 // complex and bug-prone. Since irreducible loops are rare, just starting
309 // another iteration is best.
310 if (FoundIrreducibility
) {
314 for (auto *LoopEntry
: Graph
.getLoopEntries()) {
315 LoopBlocks
InnerBlocks(LoopEntry
, Graph
.getLoopEnterers(LoopEntry
));
316 // Each of these calls to processRegion may change the graph, but are
317 // guaranteed not to interfere with each other. The only changes we make
318 // to the graph are to add blocks on the way to a loop entry. As the
319 // loops are disjoint, that means we may only alter branches that exit
320 // another loop, which are ignored when recursing into that other loop
322 if (processRegion(LoopEntry
, InnerBlocks
.getBlocks(), MF
)) {
331 // Given a set of entries to a single loop, create a single entry for that
332 // loop by creating a dispatch block for them, routing control flow using
333 // a helper variable. Also updates Blocks with any new blocks created, so
334 // that we properly track all the blocks in the region. But this does not update
335 // ReachabilityGraph; this will be updated in the caller of this function as
337 void WebAssemblyFixIrreducibleControlFlow::makeSingleEntryLoop(
338 BlockSet
&Entries
, BlockSet
&Blocks
, MachineFunction
&MF
,
339 const ReachabilityGraph
&Graph
) {
340 assert(Entries
.size() >= 2);
342 // Sort the entries to ensure a deterministic build.
343 BlockVector SortedEntries
= getSortedEntries(Entries
);
346 for (auto *Block
: SortedEntries
)
347 assert(Block
->getNumber() != -1);
348 if (SortedEntries
.size() > 1) {
349 for (auto I
= SortedEntries
.begin(), E
= SortedEntries
.end() - 1; I
!= E
;
351 auto ANum
= (*I
)->getNumber();
352 auto BNum
= (*(std::next(I
)))->getNumber();
353 assert(ANum
!= BNum
);
358 // Create a dispatch block which will contain a jump table to the entries.
359 MachineBasicBlock
*Dispatch
= MF
.CreateMachineBasicBlock();
360 MF
.insert(MF
.end(), Dispatch
);
361 Blocks
.insert(Dispatch
);
363 // Add the jump table.
364 const auto &TII
= *MF
.getSubtarget
<WebAssemblySubtarget
>().getInstrInfo();
365 MachineInstrBuilder MIB
=
366 BuildMI(Dispatch
, DebugLoc(), TII
.get(WebAssembly::BR_TABLE_I32
));
368 // Add the register which will be used to tell the jump table which block to
370 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
371 Register Reg
= MRI
.createVirtualRegister(&WebAssembly::I32RegClass
);
374 // Compute the indices in the superheader, one for each bad block, and
375 // add them as successors.
376 DenseMap
<MachineBasicBlock
*, unsigned> Indices
;
377 for (auto *Entry
: SortedEntries
) {
378 auto Pair
= Indices
.insert(std::make_pair(Entry
, 0));
381 unsigned Index
= MIB
.getInstr()->getNumExplicitOperands() - 1;
382 Pair
.first
->second
= Index
;
385 Dispatch
->addSuccessor(Entry
);
388 // Rewrite the problematic successors for every block that wants to reach
389 // the bad blocks. For simplicity, we just introduce a new block for every
390 // edge we need to rewrite. (Fancier things are possible.)
392 BlockVector AllPreds
;
393 for (auto *Entry
: SortedEntries
) {
394 for (auto *Pred
: Entry
->predecessors()) {
395 if (Pred
!= Dispatch
) {
396 AllPreds
.push_back(Pred
);
401 // This set stores predecessors within this loop.
402 DenseSet
<MachineBasicBlock
*> InLoop
;
403 for (auto *Pred
: AllPreds
) {
404 for (auto *Entry
: Pred
->successors()) {
405 if (!Entries
.count(Entry
))
407 if (Graph
.canReach(Entry
, Pred
)) {
414 // Record if each entry has a layout predecessor. This map stores
415 // <<loop entry, Predecessor is within the loop?>, layout predecessor>
416 DenseMap
<PointerIntPair
<MachineBasicBlock
*, 1, bool>, MachineBasicBlock
*>
418 for (auto *Pred
: AllPreds
) {
419 bool PredInLoop
= InLoop
.count(Pred
);
420 for (auto *Entry
: Pred
->successors())
421 if (Entries
.count(Entry
) && Pred
->isLayoutSuccessor(Entry
))
422 EntryToLayoutPred
[{Entry
, PredInLoop
}] = Pred
;
425 // We need to create at most two routing blocks per entry: one for
426 // predecessors outside the loop and one for predecessors inside the loop.
428 // <<loop entry, Predecessor is within the loop?>, routing block>
429 DenseMap
<PointerIntPair
<MachineBasicBlock
*, 1, bool>, MachineBasicBlock
*>
431 for (auto *Pred
: AllPreds
) {
432 bool PredInLoop
= InLoop
.count(Pred
);
433 for (auto *Entry
: Pred
->successors()) {
434 if (!Entries
.count(Entry
) || Map
.count({Entry
, PredInLoop
}))
436 // If there exists a layout predecessor of this entry and this predecessor
437 // is not that, we rather create a routing block after that layout
438 // predecessor to save a branch.
439 if (auto *OtherPred
= EntryToLayoutPred
.lookup({Entry
, PredInLoop
}))
440 if (OtherPred
!= Pred
)
443 // This is a successor we need to rewrite.
444 MachineBasicBlock
*Routing
= MF
.CreateMachineBasicBlock();
445 MF
.insert(Pred
->isLayoutSuccessor(Entry
)
446 ? MachineFunction::iterator(Entry
)
449 Blocks
.insert(Routing
);
451 // Set the jump table's register of the index of the block we wish to
452 // jump to, and jump to the jump table.
453 BuildMI(Routing
, DebugLoc(), TII
.get(WebAssembly::CONST_I32
), Reg
)
454 .addImm(Indices
[Entry
]);
455 BuildMI(Routing
, DebugLoc(), TII
.get(WebAssembly::BR
)).addMBB(Dispatch
);
456 Routing
->addSuccessor(Dispatch
);
457 Map
[{Entry
, PredInLoop
}] = Routing
;
461 for (auto *Pred
: AllPreds
) {
462 bool PredInLoop
= InLoop
.count(Pred
);
463 // Remap the terminator operands and the successor list.
464 for (MachineInstr
&Term
: Pred
->terminators())
465 for (auto &Op
: Term
.explicit_uses())
466 if (Op
.isMBB() && Indices
.count(Op
.getMBB()))
467 Op
.setMBB(Map
[{Op
.getMBB(), PredInLoop
}]);
469 for (auto *Succ
: Pred
->successors()) {
470 if (!Entries
.count(Succ
))
472 auto *Routing
= Map
[{Succ
, PredInLoop
}];
473 Pred
->replaceSuccessor(Succ
, Routing
);
477 // Create a fake default label, because br_table requires one.
478 MIB
.addMBB(MIB
.getInstr()
479 ->getOperand(MIB
.getInstr()->getNumExplicitOperands() - 1)
483 } // end anonymous namespace
485 char WebAssemblyFixIrreducibleControlFlow::ID
= 0;
486 INITIALIZE_PASS(WebAssemblyFixIrreducibleControlFlow
, DEBUG_TYPE
,
487 "Removes irreducible control flow", false, false)
489 FunctionPass
*llvm::createWebAssemblyFixIrreducibleControlFlow() {
490 return new WebAssemblyFixIrreducibleControlFlow();
493 // Test whether the given register has an ARGUMENT def.
494 static bool hasArgumentDef(unsigned Reg
, const MachineRegisterInfo
&MRI
) {
495 for (const auto &Def
: MRI
.def_instructions(Reg
))
496 if (WebAssembly::isArgument(Def
.getOpcode()))
501 // Add a register definition with IMPLICIT_DEFs for every register to cover for
502 // register uses that don't have defs in every possible path.
503 // TODO: This is fairly heavy-handed; find a better approach.
504 static void addImplicitDefs(MachineFunction
&MF
) {
505 const MachineRegisterInfo
&MRI
= MF
.getRegInfo();
506 const auto &TII
= *MF
.getSubtarget
<WebAssemblySubtarget
>().getInstrInfo();
507 MachineBasicBlock
&Entry
= *MF
.begin();
508 for (unsigned I
= 0, E
= MRI
.getNumVirtRegs(); I
< E
; ++I
) {
509 Register Reg
= Register::index2VirtReg(I
);
511 // Skip unused registers.
512 if (MRI
.use_nodbg_empty(Reg
))
515 // Skip registers that have an ARGUMENT definition.
516 if (hasArgumentDef(Reg
, MRI
))
519 BuildMI(Entry
, Entry
.begin(), DebugLoc(),
520 TII
.get(WebAssembly::IMPLICIT_DEF
), Reg
);
523 // Move ARGUMENT_* instructions to the top of the entry block, so that their
524 // liveness reflects the fact that these really are live-in values.
525 for (MachineInstr
&MI
: llvm::make_early_inc_range(Entry
)) {
526 if (WebAssembly::isArgument(MI
.getOpcode())) {
527 MI
.removeFromParent();
528 Entry
.insert(Entry
.begin(), &MI
);
533 bool WebAssemblyFixIrreducibleControlFlow::runOnMachineFunction(
534 MachineFunction
&MF
) {
535 LLVM_DEBUG(dbgs() << "********** Fixing Irreducible Control Flow **********\n"
536 "********** Function: "
537 << MF
.getName() << '\n');
539 // Start the recursive process on the entire function body.
541 for (auto &MBB
: MF
) {
542 AllBlocks
.insert(&MBB
);
545 if (LLVM_UNLIKELY(processRegion(&*MF
.begin(), AllBlocks
, MF
))) {
546 // We rewrote part of the function; recompute relevant things.
548 // Now we've inserted dispatch blocks, some register uses can have incoming
549 // paths without a def. For example, before this pass register %a was
550 // defined in BB1 and used in BB2, and there was only one path from BB1 and
551 // BB2. But if this pass inserts a dispatch block having multiple
552 // predecessors between the two BBs, now there are paths to BB2 without
553 // visiting BB1, and %a's use in BB2 is not dominated by its def. Adding
554 // IMPLICIT_DEFs to all regs is one simple way to fix it.