1 //=- WebAssemblyFixIrreducibleControlFlow.cpp - Fix irreducible control flow -//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 /// This file implements a pass that transforms irreducible control flow into
11 /// reducible control flow. Irreducible control flow means multiple-entry
12 /// loops; they appear as CFG cycles that are not recorded in MachineLoopInfo
13 /// due to being unnatural.
15 /// Note that LLVM has a generic pass that lowers irreducible control flow, but
16 /// it linearizes control flow, turning diamonds into two triangles, which is
17 /// both unnecessary and undesirable for WebAssembly.
19 /// The big picture: Ignoring natural loops (seeing them monolithically), we
20 /// find all the blocks which can return to themselves ("loopers"). Loopers
21 /// reachable from the non-loopers are loop entries: if there are 2 or more,
22 /// then we have irreducible control flow. We fix that as follows: a new block
23 /// is created that can dispatch to each of the loop entries, based on the
24 /// value of a label "helper" variable, and we replace direct branches to the
25 /// entries with assignments to the label variable and a branch to the dispatch
26 /// block. Then the dispatch block is the single entry in a new natural loop.
28 /// This is similar to what the Relooper [1] does, both identify looping code
29 /// that requires multiple entries, and resolve it in a similar way. In
30 /// Relooper terminology, we implement a Multiple shape in a Loop shape. Note
31 /// also that like the Relooper, we implement a "minimal" intervention: we only
32 /// use the "label" helper for the blocks we absolutely must and no others. We
33 /// also prioritize code size and do not perform node splitting (i.e. we don't
34 /// duplicate code in order to resolve irreducibility).
36 /// The difference between this code and the Relooper is that the Relooper also
37 /// generates ifs and loops and works in a recursive manner, knowing at each
38 /// point what the entries are, and recursively breaks down the problem. Here
39 /// we just want to resolve irreducible control flow, and we also want to use
40 /// as much LLVM infrastructure as possible. So we use the MachineLoopInfo to
41 /// identify natural loops, etc., and we start with the whole CFG and must
42 /// identify both the looping code and its entries.
44 /// [1] Alon Zakai. 2011. Emscripten: an LLVM-to-JavaScript compiler. In
45 /// Proceedings of the ACM international conference companion on Object oriented
46 /// programming systems languages and applications companion (SPLASH '11). ACM,
47 /// New York, NY, USA, 301-312. DOI=10.1145/2048147.2048224
48 /// http://doi.acm.org/10.1145/2048147.2048224
50 //===----------------------------------------------------------------------===//
52 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
53 #include "WebAssembly.h"
54 #include "WebAssemblyMachineFunctionInfo.h"
55 #include "WebAssemblySubtarget.h"
56 #include "llvm/ADT/PriorityQueue.h"
57 #include "llvm/ADT/SCCIterator.h"
58 #include "llvm/ADT/SetVector.h"
59 #include "llvm/CodeGen/MachineDominators.h"
60 #include "llvm/CodeGen/MachineFunction.h"
61 #include "llvm/CodeGen/MachineInstrBuilder.h"
62 #include "llvm/CodeGen/MachineLoopInfo.h"
63 #include "llvm/CodeGen/MachineRegisterInfo.h"
64 #include "llvm/CodeGen/Passes.h"
65 #include "llvm/Support/Debug.h"
66 #include "llvm/Support/raw_ostream.h"
69 #define DEBUG_TYPE "wasm-fix-irreducible-control-flow"
75 LoopFixer(MachineFunction
&MF
, MachineLoopInfo
&MLI
, MachineLoop
*Loop
)
76 : MF(MF
), MLI(MLI
), Loop(Loop
) {}
78 // Run the fixer on the given inputs. Returns whether changes were made.
86 MachineBasicBlock
*Header
;
87 SmallPtrSet
<MachineBasicBlock
*, 4> LoopBlocks
;
89 using BlockSet
= SmallPtrSet
<MachineBasicBlock
*, 4>;
90 DenseMap
<MachineBasicBlock
*, BlockSet
> Reachable
;
92 // The worklist contains pairs of recent additions, (a, b), where we just
93 // added a link a => b.
94 using BlockPair
= std::pair
<MachineBasicBlock
*, MachineBasicBlock
*>;
95 SmallVector
<BlockPair
, 4> WorkList
;
97 // Get a canonical block to represent a block or a loop: the block, or if in
98 // an inner loop, the loop header, of it in an outer loop scope, we can
99 // ignore it. We need to call this on all blocks we work on.
100 MachineBasicBlock
*canonicalize(MachineBasicBlock
*MBB
) {
101 MachineLoop
*InnerLoop
= MLI
.getLoopFor(MBB
);
102 if (InnerLoop
== Loop
) {
105 // This is either in an outer or an inner loop, and not in ours.
106 if (!LoopBlocks
.count(MBB
)) {
107 // It's in outer code, ignore it.
111 // It's in an inner loop, canonicalize it to the header of that loop.
112 return InnerLoop
->getHeader();
116 // For a successor we can additionally ignore it if it's a branch back to a
117 // natural loop top, as when we are in the scope of a loop, we just care
118 // about internal irreducibility, and can ignore the loop we are in. We need
119 // to call this on all blocks in a context where they are a successor.
120 MachineBasicBlock
*canonicalizeSuccessor(MachineBasicBlock
*MBB
) {
121 if (Loop
&& MBB
== Loop
->getHeader()) {
122 // Ignore branches going to the loop's natural header.
125 return canonicalize(MBB
);
128 // Potentially insert a new reachable edge, and if so, note it as further
130 void maybeInsert(MachineBasicBlock
*MBB
, MachineBasicBlock
*Succ
) {
131 assert(MBB
== canonicalize(MBB
));
133 // Succ may not be interesting as a sucessor.
134 Succ
= canonicalizeSuccessor(Succ
);
137 if (Reachable
[MBB
].insert(Succ
).second
) {
138 // For there to be further work, it means that we have
140 // for some other X, and in that case X => Succ would be a new edge for
141 // us to discover later. However, if we don't care about MBB as a
142 // successor, then we don't care about that anyhow.
143 if (canonicalizeSuccessor(MBB
)) {
144 WorkList
.emplace_back(MBB
, Succ
);
150 bool LoopFixer::run() {
151 Header
= Loop
? Loop
->getHeader() : &*MF
.begin();
153 // Identify all the blocks in this loop scope.
155 for (auto *MBB
: Loop
->getBlocks()) {
156 LoopBlocks
.insert(MBB
);
159 for (auto &MBB
: MF
) {
160 LoopBlocks
.insert(&MBB
);
164 // Compute which (canonicalized) blocks each block can reach.
166 // Add all the initial work.
167 for (auto *MBB
: LoopBlocks
) {
168 MachineLoop
*InnerLoop
= MLI
.getLoopFor(MBB
);
170 if (InnerLoop
== Loop
) {
171 for (auto *Succ
: MBB
->successors()) {
172 maybeInsert(MBB
, Succ
);
175 // It can't be in an outer loop - we loop on LoopBlocks - and so it must
178 // Check if we are the canonical block for this loop.
179 if (canonicalize(MBB
) != MBB
) {
182 // The successors are those of the loop.
183 SmallVector
<MachineBasicBlock
*, 2> ExitBlocks
;
184 InnerLoop
->getExitBlocks(ExitBlocks
);
185 for (auto *Succ
: ExitBlocks
) {
186 maybeInsert(MBB
, Succ
);
191 // Do work until we are all done.
192 while (!WorkList
.empty()) {
193 MachineBasicBlock
*MBB
;
194 MachineBasicBlock
*Succ
;
195 std::tie(MBB
, Succ
) = WorkList
.pop_back_val();
196 // The worklist item is an edge we just added, so it must have valid blocks
197 // (and not something canonicalized to nullptr).
200 // The successor in that pair must also be a valid successor.
201 assert(MBB
== canonicalizeSuccessor(MBB
));
202 // We recently added MBB => Succ, and that means we may have enabled
203 // Pred => MBB => Succ. Check all the predecessors. Note that our loop here
204 // is correct for both a block and a block representing a loop, as the loop
205 // is natural and so the predecessors are all predecessors of the loop
206 // header, which is the block we have here.
207 for (auto *Pred
: MBB
->predecessors()) {
208 // Canonicalize, make sure it's relevant, and check it's not the same
209 // block (an update to the block itself doesn't help compute that same
211 Pred
= canonicalize(Pred
);
212 if (Pred
&& Pred
!= MBB
) {
213 maybeInsert(Pred
, Succ
);
218 // It's now trivial to identify the loopers.
219 SmallPtrSet
<MachineBasicBlock
*, 4> Loopers
;
220 for (auto MBB
: LoopBlocks
) {
221 if (Reachable
[MBB
].count(MBB
)) {
225 // The header cannot be a looper. At the toplevel, LLVM does not allow the
226 // entry to be in a loop, and in a natural loop we should ignore the header.
227 assert(Loopers
.count(Header
) == 0);
229 // Find the entries, loopers reachable from non-loopers.
230 SmallPtrSet
<MachineBasicBlock
*, 4> Entries
;
231 SmallVector
<MachineBasicBlock
*, 4> SortedEntries
;
232 for (auto *Looper
: Loopers
) {
233 for (auto *Pred
: Looper
->predecessors()) {
234 Pred
= canonicalize(Pred
);
235 if (Pred
&& !Loopers
.count(Pred
)) {
236 Entries
.insert(Looper
);
237 SortedEntries
.push_back(Looper
);
243 // Check if we found irreducible control flow.
244 if (LLVM_LIKELY(Entries
.size() <= 1))
247 // Sort the entries to ensure a deterministic build.
248 llvm::sort(SortedEntries
,
249 [&](const MachineBasicBlock
*A
, const MachineBasicBlock
*B
) {
250 auto ANum
= A
->getNumber();
251 auto BNum
= B
->getNumber();
256 for (auto Block
: SortedEntries
)
257 assert(Block
->getNumber() != -1);
258 if (SortedEntries
.size() > 1) {
259 for (auto I
= SortedEntries
.begin(), E
= SortedEntries
.end() - 1;
261 auto ANum
= (*I
)->getNumber();
262 auto BNum
= (*(std::next(I
)))->getNumber();
263 assert(ANum
!= BNum
);
268 // Create a dispatch block which will contain a jump table to the entries.
269 MachineBasicBlock
*Dispatch
= MF
.CreateMachineBasicBlock();
270 MF
.insert(MF
.end(), Dispatch
);
271 MLI
.changeLoopFor(Dispatch
, Loop
);
273 // Add the jump table.
274 const auto &TII
= *MF
.getSubtarget
<WebAssemblySubtarget
>().getInstrInfo();
275 MachineInstrBuilder MIB
= BuildMI(*Dispatch
, Dispatch
->end(), DebugLoc(),
276 TII
.get(WebAssembly::BR_TABLE_I32
));
278 // Add the register which will be used to tell the jump table which block to
280 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
281 unsigned Reg
= MRI
.createVirtualRegister(&WebAssembly::I32RegClass
);
284 // Compute the indices in the superheader, one for each bad block, and
285 // add them as successors.
286 DenseMap
<MachineBasicBlock
*, unsigned> Indices
;
287 for (auto *MBB
: SortedEntries
) {
288 auto Pair
= Indices
.insert(std::make_pair(MBB
, 0));
293 unsigned Index
= MIB
.getInstr()->getNumExplicitOperands() - 1;
294 Pair
.first
->second
= Index
;
297 Dispatch
->addSuccessor(MBB
);
300 // Rewrite the problematic successors for every block that wants to reach the
301 // bad blocks. For simplicity, we just introduce a new block for every edge
302 // we need to rewrite. (Fancier things are possible.)
304 SmallVector
<MachineBasicBlock
*, 4> AllPreds
;
305 for (auto *MBB
: SortedEntries
) {
306 for (auto *Pred
: MBB
->predecessors()) {
307 if (Pred
!= Dispatch
) {
308 AllPreds
.push_back(Pred
);
313 for (MachineBasicBlock
*MBB
: AllPreds
) {
314 DenseMap
<MachineBasicBlock
*, MachineBasicBlock
*> Map
;
315 for (auto *Succ
: MBB
->successors()) {
316 if (!Entries
.count(Succ
)) {
320 // This is a successor we need to rewrite.
321 MachineBasicBlock
*Split
= MF
.CreateMachineBasicBlock();
322 MF
.insert(MBB
->isLayoutSuccessor(Succ
) ? MachineFunction::iterator(Succ
)
325 MLI
.changeLoopFor(Split
, Loop
);
327 // Set the jump table's register of the index of the block we wish to
328 // jump to, and jump to the jump table.
329 BuildMI(*Split
, Split
->end(), DebugLoc(), TII
.get(WebAssembly::CONST_I32
),
331 .addImm(Indices
[Succ
]);
332 BuildMI(*Split
, Split
->end(), DebugLoc(), TII
.get(WebAssembly::BR
))
334 Split
->addSuccessor(Dispatch
);
337 // Remap the terminator operands and the successor list.
338 for (MachineInstr
&Term
: MBB
->terminators())
339 for (auto &Op
: Term
.explicit_uses())
340 if (Op
.isMBB() && Indices
.count(Op
.getMBB()))
341 Op
.setMBB(Map
[Op
.getMBB()]);
342 for (auto Rewrite
: Map
)
343 MBB
->replaceSuccessor(Rewrite
.first
, Rewrite
.second
);
346 // Create a fake default label, because br_table requires one.
347 MIB
.addMBB(MIB
.getInstr()
348 ->getOperand(MIB
.getInstr()->getNumExplicitOperands() - 1)
354 class WebAssemblyFixIrreducibleControlFlow final
: public MachineFunctionPass
{
355 StringRef
getPassName() const override
{
356 return "WebAssembly Fix Irreducible Control Flow";
359 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
360 AU
.setPreservesCFG();
361 AU
.addRequired
<MachineDominatorTree
>();
362 AU
.addPreserved
<MachineDominatorTree
>();
363 AU
.addRequired
<MachineLoopInfo
>();
364 AU
.addPreserved
<MachineLoopInfo
>();
365 MachineFunctionPass::getAnalysisUsage(AU
);
368 bool runOnMachineFunction(MachineFunction
&MF
) override
;
370 bool runIteration(MachineFunction
&MF
, MachineLoopInfo
&MLI
) {
371 // Visit the function body, which is identified as a null loop.
372 if (LoopFixer(MF
, MLI
, nullptr).run()) {
376 // Visit all the loops.
377 SmallVector
<MachineLoop
*, 8> Worklist(MLI
.begin(), MLI
.end());
378 while (!Worklist
.empty()) {
379 MachineLoop
*Loop
= Worklist
.pop_back_val();
380 Worklist
.append(Loop
->begin(), Loop
->end());
381 if (LoopFixer(MF
, MLI
, Loop
).run()) {
390 static char ID
; // Pass identification, replacement for typeid
391 WebAssemblyFixIrreducibleControlFlow() : MachineFunctionPass(ID
) {}
393 } // end anonymous namespace
395 char WebAssemblyFixIrreducibleControlFlow::ID
= 0;
396 INITIALIZE_PASS(WebAssemblyFixIrreducibleControlFlow
, DEBUG_TYPE
,
397 "Removes irreducible control flow", false, false)
399 FunctionPass
*llvm::createWebAssemblyFixIrreducibleControlFlow() {
400 return new WebAssemblyFixIrreducibleControlFlow();
403 bool WebAssemblyFixIrreducibleControlFlow::runOnMachineFunction(
404 MachineFunction
&MF
) {
405 LLVM_DEBUG(dbgs() << "********** Fixing Irreducible Control Flow **********\n"
406 "********** Function: "
407 << MF
.getName() << '\n');
409 bool Changed
= false;
410 auto &MLI
= getAnalysis
<MachineLoopInfo
>();
412 // When we modify something, bail out and recompute MLI, then start again, as
413 // we create a new natural loop when we resolve irreducible control flow, and
414 // other loops may become nested in it, etc. In practice this is not an issue
415 // because irreducible control flow is rare, only very few cycles are needed
417 while (LLVM_UNLIKELY(runIteration(MF
, MLI
))) {
418 // We rewrote part of the function; recompute MLI and start again.
419 LLVM_DEBUG(dbgs() << "Recomputing loops.\n");
420 MF
.getRegInfo().invalidateLiveness();
422 getAnalysis
<MachineDominatorTree
>().runOnMachineFunction(MF
);
423 MLI
.runOnMachineFunction(MF
);