1 //===-- WebAssemblyRegStackify.cpp - Register Stackification --------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 /// This file implements a register stacking pass.
12 /// This pass reorders instructions to put register uses and defs in an order
13 /// such that they form single-use expression trees. Registers fitting this form
14 /// are then marked as "stackified", meaning references to them are replaced by
15 /// "push" and "pop" from the value stack.
17 /// This is primarily a code size optimization, since temporary values on the
18 /// value stack don't need to be named.
20 //===----------------------------------------------------------------------===//
22 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" // for WebAssembly::ARGUMENT_*
23 #include "Utils/WebAssemblyUtilities.h"
24 #include "WebAssembly.h"
25 #include "WebAssemblyDebugValueManager.h"
26 #include "WebAssemblyMachineFunctionInfo.h"
27 #include "WebAssemblySubtarget.h"
28 #include "llvm/ADT/SmallPtrSet.h"
29 #include "llvm/Analysis/AliasAnalysis.h"
30 #include "llvm/CodeGen/LiveIntervals.h"
31 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
32 #include "llvm/CodeGen/MachineDominators.h"
33 #include "llvm/CodeGen/MachineInstrBuilder.h"
34 #include "llvm/CodeGen/MachineModuleInfoImpls.h"
35 #include "llvm/CodeGen/MachineRegisterInfo.h"
36 #include "llvm/CodeGen/Passes.h"
37 #include "llvm/Support/Debug.h"
38 #include "llvm/Support/raw_ostream.h"
42 #define DEBUG_TYPE "wasm-reg-stackify"
45 class WebAssemblyRegStackify final
: public MachineFunctionPass
{
46 StringRef
getPassName() const override
{
47 return "WebAssembly Register Stackify";
50 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
52 AU
.addRequired
<AAResultsWrapperPass
>();
53 AU
.addRequired
<MachineDominatorTree
>();
54 AU
.addRequired
<LiveIntervals
>();
55 AU
.addPreserved
<MachineBlockFrequencyInfo
>();
56 AU
.addPreserved
<SlotIndexes
>();
57 AU
.addPreserved
<LiveIntervals
>();
58 AU
.addPreservedID(LiveVariablesID
);
59 AU
.addPreserved
<MachineDominatorTree
>();
60 MachineFunctionPass::getAnalysisUsage(AU
);
63 bool runOnMachineFunction(MachineFunction
&MF
) override
;
66 static char ID
; // Pass identification, replacement for typeid
67 WebAssemblyRegStackify() : MachineFunctionPass(ID
) {}
69 } // end anonymous namespace
71 char WebAssemblyRegStackify::ID
= 0;
72 INITIALIZE_PASS(WebAssemblyRegStackify
, DEBUG_TYPE
,
73 "Reorder instructions to use the WebAssembly value stack",
76 FunctionPass
*llvm::createWebAssemblyRegStackify() {
77 return new WebAssemblyRegStackify();
80 // Decorate the given instruction with implicit operands that enforce the
81 // expression stack ordering constraints for an instruction which is on
82 // the expression stack.
83 static void imposeStackOrdering(MachineInstr
*MI
) {
84 // Write the opaque VALUE_STACK register.
85 if (!MI
->definesRegister(WebAssembly::VALUE_STACK
))
86 MI
->addOperand(MachineOperand::CreateReg(WebAssembly::VALUE_STACK
,
90 // Also read the opaque VALUE_STACK register.
91 if (!MI
->readsRegister(WebAssembly::VALUE_STACK
))
92 MI
->addOperand(MachineOperand::CreateReg(WebAssembly::VALUE_STACK
,
97 // Convert an IMPLICIT_DEF instruction into an instruction which defines
98 // a constant zero value.
99 static void convertImplicitDefToConstZero(MachineInstr
*MI
,
100 MachineRegisterInfo
&MRI
,
101 const TargetInstrInfo
*TII
,
103 LiveIntervals
&LIS
) {
104 assert(MI
->getOpcode() == TargetOpcode::IMPLICIT_DEF
);
106 const auto *RegClass
= MRI
.getRegClass(MI
->getOperand(0).getReg());
107 if (RegClass
== &WebAssembly::I32RegClass
) {
108 MI
->setDesc(TII
->get(WebAssembly::CONST_I32
));
109 MI
->addOperand(MachineOperand::CreateImm(0));
110 } else if (RegClass
== &WebAssembly::I64RegClass
) {
111 MI
->setDesc(TII
->get(WebAssembly::CONST_I64
));
112 MI
->addOperand(MachineOperand::CreateImm(0));
113 } else if (RegClass
== &WebAssembly::F32RegClass
) {
114 MI
->setDesc(TII
->get(WebAssembly::CONST_F32
));
115 auto *Val
= cast
<ConstantFP
>(Constant::getNullValue(
116 Type::getFloatTy(MF
.getFunction().getContext())));
117 MI
->addOperand(MachineOperand::CreateFPImm(Val
));
118 } else if (RegClass
== &WebAssembly::F64RegClass
) {
119 MI
->setDesc(TII
->get(WebAssembly::CONST_F64
));
120 auto *Val
= cast
<ConstantFP
>(Constant::getNullValue(
121 Type::getDoubleTy(MF
.getFunction().getContext())));
122 MI
->addOperand(MachineOperand::CreateFPImm(Val
));
123 } else if (RegClass
== &WebAssembly::V128RegClass
) {
124 MI
->setDesc(TII
->get(WebAssembly::CONST_V128_I64x2
));
125 MI
->addOperand(MachineOperand::CreateImm(0));
126 MI
->addOperand(MachineOperand::CreateImm(0));
128 llvm_unreachable("Unexpected reg class");
132 // Determine whether a call to the callee referenced by
133 // MI->getOperand(CalleeOpNo) reads memory, writes memory, and/or has side
135 static void queryCallee(const MachineInstr
&MI
, bool &Read
, bool &Write
,
136 bool &Effects
, bool &StackPointer
) {
137 // All calls can use the stack pointer.
140 const MachineOperand
&MO
= WebAssembly::getCalleeOp(MI
);
142 const Constant
*GV
= MO
.getGlobal();
143 if (const auto *GA
= dyn_cast
<GlobalAlias
>(GV
))
144 if (!GA
->isInterposable())
145 GV
= GA
->getAliasee();
147 if (const auto *F
= dyn_cast
<Function
>(GV
)) {
148 if (!F
->doesNotThrow())
150 if (F
->doesNotAccessMemory())
152 if (F
->onlyReadsMemory()) {
165 // Determine whether MI reads memory, writes memory, has side effects,
166 // and/or uses the stack pointer value.
167 static void query(const MachineInstr
&MI
, AliasAnalysis
&AA
, bool &Read
,
168 bool &Write
, bool &Effects
, bool &StackPointer
) {
169 assert(!MI
.isTerminator());
171 if (MI
.isDebugInstr() || MI
.isPosition())
175 if (MI
.mayLoad() && !MI
.isDereferenceableInvariantLoad(&AA
))
181 } else if (MI
.hasOrderedMemoryRef()) {
182 switch (MI
.getOpcode()) {
183 case WebAssembly::DIV_S_I32
:
184 case WebAssembly::DIV_S_I64
:
185 case WebAssembly::REM_S_I32
:
186 case WebAssembly::REM_S_I64
:
187 case WebAssembly::DIV_U_I32
:
188 case WebAssembly::DIV_U_I64
:
189 case WebAssembly::REM_U_I32
:
190 case WebAssembly::REM_U_I64
:
191 case WebAssembly::I32_TRUNC_S_F32
:
192 case WebAssembly::I64_TRUNC_S_F32
:
193 case WebAssembly::I32_TRUNC_S_F64
:
194 case WebAssembly::I64_TRUNC_S_F64
:
195 case WebAssembly::I32_TRUNC_U_F32
:
196 case WebAssembly::I64_TRUNC_U_F32
:
197 case WebAssembly::I32_TRUNC_U_F64
:
198 case WebAssembly::I64_TRUNC_U_F64
:
199 // These instruction have hasUnmodeledSideEffects() returning true
200 // because they trap on overflow and invalid so they can't be arbitrarily
201 // moved, however hasOrderedMemoryRef() interprets this plus their lack
202 // of memoperands as having a potential unknown memory reference.
205 // Record volatile accesses, unless it's a call, as calls are handled
215 // Check for side effects.
216 if (MI
.hasUnmodeledSideEffects()) {
217 switch (MI
.getOpcode()) {
218 case WebAssembly::DIV_S_I32
:
219 case WebAssembly::DIV_S_I64
:
220 case WebAssembly::REM_S_I32
:
221 case WebAssembly::REM_S_I64
:
222 case WebAssembly::DIV_U_I32
:
223 case WebAssembly::DIV_U_I64
:
224 case WebAssembly::REM_U_I32
:
225 case WebAssembly::REM_U_I64
:
226 case WebAssembly::I32_TRUNC_S_F32
:
227 case WebAssembly::I64_TRUNC_S_F32
:
228 case WebAssembly::I32_TRUNC_S_F64
:
229 case WebAssembly::I64_TRUNC_S_F64
:
230 case WebAssembly::I32_TRUNC_U_F32
:
231 case WebAssembly::I64_TRUNC_U_F32
:
232 case WebAssembly::I32_TRUNC_U_F64
:
233 case WebAssembly::I64_TRUNC_U_F64
:
234 // These instructions have hasUnmodeledSideEffects() returning true
235 // because they trap on overflow and invalid so they can't be arbitrarily
236 // moved, however in the specific case of register stackifying, it is safe
237 // to move them because overflow and invalid are Undefined Behavior.
245 // Check for writes to __stack_pointer global.
246 if ((MI
.getOpcode() == WebAssembly::GLOBAL_SET_I32
||
247 MI
.getOpcode() == WebAssembly::GLOBAL_SET_I64
) &&
248 strcmp(MI
.getOperand(0).getSymbolName(), "__stack_pointer") == 0)
253 queryCallee(MI
, Read
, Write
, Effects
, StackPointer
);
257 // Test whether Def is safe and profitable to rematerialize.
258 static bool shouldRematerialize(const MachineInstr
&Def
, AliasAnalysis
&AA
,
259 const WebAssemblyInstrInfo
*TII
) {
260 return Def
.isAsCheapAsAMove() && TII
->isTriviallyReMaterializable(Def
, &AA
);
263 // Identify the definition for this register at this point. This is a
264 // generalization of MachineRegisterInfo::getUniqueVRegDef that uses
265 // LiveIntervals to handle complex cases.
266 static MachineInstr
*getVRegDef(unsigned Reg
, const MachineInstr
*Insert
,
267 const MachineRegisterInfo
&MRI
,
268 const LiveIntervals
&LIS
) {
269 // Most registers are in SSA form here so we try a quick MRI query first.
270 if (MachineInstr
*Def
= MRI
.getUniqueVRegDef(Reg
))
273 // MRI doesn't know what the Def is. Try asking LIS.
274 if (const VNInfo
*ValNo
= LIS
.getInterval(Reg
).getVNInfoBefore(
275 LIS
.getInstructionIndex(*Insert
)))
276 return LIS
.getInstructionFromIndex(ValNo
->def
);
281 // Test whether Reg, as defined at Def, has exactly one use. This is a
282 // generalization of MachineRegisterInfo::hasOneUse that uses LiveIntervals
283 // to handle complex cases.
284 static bool hasOneUse(unsigned Reg
, MachineInstr
*Def
, MachineRegisterInfo
&MRI
,
285 MachineDominatorTree
&MDT
, LiveIntervals
&LIS
) {
286 // Most registers are in SSA form here so we try a quick MRI query first.
287 if (MRI
.hasOneUse(Reg
))
291 const LiveInterval
&LI
= LIS
.getInterval(Reg
);
292 const VNInfo
*DefVNI
=
293 LI
.getVNInfoAt(LIS
.getInstructionIndex(*Def
).getRegSlot());
295 for (auto &I
: MRI
.use_nodbg_operands(Reg
)) {
296 const auto &Result
= LI
.Query(LIS
.getInstructionIndex(*I
.getParent()));
297 if (Result
.valueIn() == DefVNI
) {
298 if (!Result
.isKill())
308 // Test whether it's safe to move Def to just before Insert.
309 // TODO: Compute memory dependencies in a way that doesn't require always
310 // walking the block.
311 // TODO: Compute memory dependencies in a way that uses AliasAnalysis to be
313 static bool isSafeToMove(const MachineOperand
*Def
, const MachineOperand
*Use
,
314 const MachineInstr
*Insert
, AliasAnalysis
&AA
,
315 const WebAssemblyFunctionInfo
&MFI
,
316 const MachineRegisterInfo
&MRI
) {
317 const MachineInstr
*DefI
= Def
->getParent();
318 const MachineInstr
*UseI
= Use
->getParent();
319 assert(DefI
->getParent() == Insert
->getParent());
320 assert(UseI
->getParent() == Insert
->getParent());
322 // The first def of a multivalue instruction can be stackified by moving,
323 // since the later defs can always be placed into locals if necessary. Later
324 // defs can only be stackified if all previous defs are already stackified
325 // since ExplicitLocals will not know how to place a def in a local if a
326 // subsequent def is stackified. But only one def can be stackified by moving
327 // the instruction, so it must be the first one.
329 // TODO: This could be loosened to be the first *live* def, but care would
330 // have to be taken to ensure the drops of the initial dead defs can be
331 // placed. This would require checking that no previous defs are used in the
332 // same instruction as subsequent defs.
333 if (Def
!= DefI
->defs().begin())
336 // If any subsequent def is used prior to the current value by the same
337 // instruction in which the current value is used, we cannot
338 // stackify. Stackifying in this case would require that def moving below the
339 // current def in the stack, which cannot be achieved, even with locals.
340 for (const auto &SubsequentDef
: drop_begin(DefI
->defs())) {
341 for (const auto &PriorUse
: UseI
->uses()) {
342 if (&PriorUse
== Use
)
344 if (PriorUse
.isReg() && SubsequentDef
.getReg() == PriorUse
.getReg())
349 // If moving is a semantic nop, it is always allowed
350 const MachineBasicBlock
*MBB
= DefI
->getParent();
351 auto NextI
= std::next(MachineBasicBlock::const_iterator(DefI
));
352 for (auto E
= MBB
->end(); NextI
!= E
&& NextI
->isDebugInstr(); ++NextI
)
357 // 'catch' and 'catch_all' should be the first instruction of a BB and cannot
359 if (WebAssembly::isCatch(DefI
->getOpcode()))
362 // Check for register dependencies.
363 SmallVector
<unsigned, 4> MutableRegisters
;
364 for (const MachineOperand
&MO
: DefI
->operands()) {
365 if (!MO
.isReg() || MO
.isUndef())
367 Register Reg
= MO
.getReg();
369 // If the register is dead here and at Insert, ignore it.
370 if (MO
.isDead() && Insert
->definesRegister(Reg
) &&
371 !Insert
->readsRegister(Reg
))
374 if (Register::isPhysicalRegister(Reg
)) {
375 // Ignore ARGUMENTS; it's just used to keep the ARGUMENT_* instructions
376 // from moving down, and we've already checked for that.
377 if (Reg
== WebAssembly::ARGUMENTS
)
379 // If the physical register is never modified, ignore it.
380 if (!MRI
.isPhysRegModified(Reg
))
382 // Otherwise, it's a physical register with unknown liveness.
386 // If one of the operands isn't in SSA form, it has different values at
387 // different times, and we need to make sure we don't move our use across
389 if (!MO
.isDef() && !MRI
.hasOneDef(Reg
))
390 MutableRegisters
.push_back(Reg
);
393 bool Read
= false, Write
= false, Effects
= false, StackPointer
= false;
394 query(*DefI
, AA
, Read
, Write
, Effects
, StackPointer
);
396 // If the instruction does not access memory and has no side effects, it has
397 // no additional dependencies.
398 bool HasMutableRegisters
= !MutableRegisters
.empty();
399 if (!Read
&& !Write
&& !Effects
&& !StackPointer
&& !HasMutableRegisters
)
402 // Scan through the intervening instructions between DefI and Insert.
403 MachineBasicBlock::const_iterator
D(DefI
), I(Insert
);
404 for (--I
; I
!= D
; --I
) {
405 bool InterveningRead
= false;
406 bool InterveningWrite
= false;
407 bool InterveningEffects
= false;
408 bool InterveningStackPointer
= false;
409 query(*I
, AA
, InterveningRead
, InterveningWrite
, InterveningEffects
,
410 InterveningStackPointer
);
411 if (Effects
&& InterveningEffects
)
413 if (Read
&& InterveningWrite
)
415 if (Write
&& (InterveningRead
|| InterveningWrite
))
417 if (StackPointer
&& InterveningStackPointer
)
420 for (unsigned Reg
: MutableRegisters
)
421 for (const MachineOperand
&MO
: I
->operands())
422 if (MO
.isReg() && MO
.isDef() && MO
.getReg() == Reg
)
429 /// Test whether OneUse, a use of Reg, dominates all of Reg's other uses.
430 static bool oneUseDominatesOtherUses(unsigned Reg
, const MachineOperand
&OneUse
,
431 const MachineBasicBlock
&MBB
,
432 const MachineRegisterInfo
&MRI
,
433 const MachineDominatorTree
&MDT
,
435 WebAssemblyFunctionInfo
&MFI
) {
436 const LiveInterval
&LI
= LIS
.getInterval(Reg
);
438 const MachineInstr
*OneUseInst
= OneUse
.getParent();
439 VNInfo
*OneUseVNI
= LI
.getVNInfoBefore(LIS
.getInstructionIndex(*OneUseInst
));
441 for (const MachineOperand
&Use
: MRI
.use_nodbg_operands(Reg
)) {
445 const MachineInstr
*UseInst
= Use
.getParent();
446 VNInfo
*UseVNI
= LI
.getVNInfoBefore(LIS
.getInstructionIndex(*UseInst
));
448 if (UseVNI
!= OneUseVNI
)
451 if (UseInst
== OneUseInst
) {
452 // Another use in the same instruction. We need to ensure that the one
453 // selected use happens "before" it.
457 // Test that the use is dominated by the one selected use.
458 while (!MDT
.dominates(OneUseInst
, UseInst
)) {
459 // Actually, dominating is over-conservative. Test that the use would
460 // happen after the one selected use in the stack evaluation order.
462 // This is needed as a consequence of using implicit local.gets for
463 // uses and implicit local.sets for defs.
464 if (UseInst
->getDesc().getNumDefs() == 0)
466 const MachineOperand
&MO
= UseInst
->getOperand(0);
469 Register DefReg
= MO
.getReg();
470 if (!Register::isVirtualRegister(DefReg
) ||
471 !MFI
.isVRegStackified(DefReg
))
473 assert(MRI
.hasOneNonDBGUse(DefReg
));
474 const MachineOperand
&NewUse
= *MRI
.use_nodbg_begin(DefReg
);
475 const MachineInstr
*NewUseInst
= NewUse
.getParent();
476 if (NewUseInst
== OneUseInst
) {
477 if (&OneUse
> &NewUse
)
481 UseInst
= NewUseInst
;
488 /// Get the appropriate tee opcode for the given register class.
489 static unsigned getTeeOpcode(const TargetRegisterClass
*RC
) {
490 if (RC
== &WebAssembly::I32RegClass
)
491 return WebAssembly::TEE_I32
;
492 if (RC
== &WebAssembly::I64RegClass
)
493 return WebAssembly::TEE_I64
;
494 if (RC
== &WebAssembly::F32RegClass
)
495 return WebAssembly::TEE_F32
;
496 if (RC
== &WebAssembly::F64RegClass
)
497 return WebAssembly::TEE_F64
;
498 if (RC
== &WebAssembly::V128RegClass
)
499 return WebAssembly::TEE_V128
;
500 llvm_unreachable("Unexpected register class");
503 // Shrink LI to its uses, cleaning up LI.
504 static void shrinkToUses(LiveInterval
&LI
, LiveIntervals
&LIS
) {
505 if (LIS
.shrinkToUses(&LI
)) {
506 SmallVector
<LiveInterval
*, 4> SplitLIs
;
507 LIS
.splitSeparateComponents(LI
, SplitLIs
);
511 /// A single-use def in the same block with no intervening memory or register
512 /// dependencies; move the def down and nest it with the current instruction.
513 static MachineInstr
*moveForSingleUse(unsigned Reg
, MachineOperand
&Op
,
514 MachineInstr
*Def
, MachineBasicBlock
&MBB
,
515 MachineInstr
*Insert
, LiveIntervals
&LIS
,
516 WebAssemblyFunctionInfo
&MFI
,
517 MachineRegisterInfo
&MRI
) {
518 LLVM_DEBUG(dbgs() << "Move for single use: "; Def
->dump());
520 WebAssemblyDebugValueManager
DefDIs(Def
);
521 MBB
.splice(Insert
, &MBB
, Def
);
523 LIS
.handleMove(*Def
);
525 if (MRI
.hasOneDef(Reg
) && MRI
.hasOneUse(Reg
)) {
526 // No one else is using this register for anything so we can just stackify
528 MFI
.stackifyVReg(MRI
, Reg
);
530 // The register may have unrelated uses or defs; create a new register for
531 // just our one def and use so that we can stackify it.
532 Register NewReg
= MRI
.createVirtualRegister(MRI
.getRegClass(Reg
));
533 Def
->getOperand(0).setReg(NewReg
);
536 // Tell LiveIntervals about the new register.
537 LIS
.createAndComputeVirtRegInterval(NewReg
);
539 // Tell LiveIntervals about the changes to the old register.
540 LiveInterval
&LI
= LIS
.getInterval(Reg
);
541 LI
.removeSegment(LIS
.getInstructionIndex(*Def
).getRegSlot(),
542 LIS
.getInstructionIndex(*Op
.getParent()).getRegSlot(),
543 /*RemoveDeadValNo=*/true);
545 MFI
.stackifyVReg(MRI
, NewReg
);
547 DefDIs
.updateReg(NewReg
);
549 LLVM_DEBUG(dbgs() << " - Replaced register: "; Def
->dump());
552 imposeStackOrdering(Def
);
556 /// A trivially cloneable instruction; clone it and nest the new copy with the
557 /// current instruction.
558 static MachineInstr
*rematerializeCheapDef(
559 unsigned Reg
, MachineOperand
&Op
, MachineInstr
&Def
, MachineBasicBlock
&MBB
,
560 MachineBasicBlock::instr_iterator Insert
, LiveIntervals
&LIS
,
561 WebAssemblyFunctionInfo
&MFI
, MachineRegisterInfo
&MRI
,
562 const WebAssemblyInstrInfo
*TII
, const WebAssemblyRegisterInfo
*TRI
) {
563 LLVM_DEBUG(dbgs() << "Rematerializing cheap def: "; Def
.dump());
564 LLVM_DEBUG(dbgs() << " - for use in "; Op
.getParent()->dump());
566 WebAssemblyDebugValueManager
DefDIs(&Def
);
568 Register NewReg
= MRI
.createVirtualRegister(MRI
.getRegClass(Reg
));
569 TII
->reMaterialize(MBB
, Insert
, NewReg
, 0, Def
, *TRI
);
571 MachineInstr
*Clone
= &*std::prev(Insert
);
572 LIS
.InsertMachineInstrInMaps(*Clone
);
573 LIS
.createAndComputeVirtRegInterval(NewReg
);
574 MFI
.stackifyVReg(MRI
, NewReg
);
575 imposeStackOrdering(Clone
);
577 LLVM_DEBUG(dbgs() << " - Cloned to "; Clone
->dump());
579 // Shrink the interval.
580 bool IsDead
= MRI
.use_empty(Reg
);
582 LiveInterval
&LI
= LIS
.getInterval(Reg
);
583 shrinkToUses(LI
, LIS
);
584 IsDead
= !LI
.liveAt(LIS
.getInstructionIndex(Def
).getDeadSlot());
587 // If that was the last use of the original, delete the original.
588 // Move or clone corresponding DBG_VALUEs to the 'Insert' location.
590 LLVM_DEBUG(dbgs() << " - Deleting original\n");
591 SlotIndex Idx
= LIS
.getInstructionIndex(Def
).getRegSlot();
592 LIS
.removePhysRegDefAt(MCRegister::from(WebAssembly::ARGUMENTS
), Idx
);
593 LIS
.removeInterval(Reg
);
594 LIS
.RemoveMachineInstrFromMaps(Def
);
595 Def
.eraseFromParent();
597 DefDIs
.move(&*Insert
);
598 DefDIs
.updateReg(NewReg
);
600 DefDIs
.clone(&*Insert
, NewReg
);
606 /// A multiple-use def in the same block with no intervening memory or register
607 /// dependencies; move the def down, nest it with the current instruction, and
608 /// insert a tee to satisfy the rest of the uses. As an illustration, rewrite
611 /// Reg = INST ... // Def
612 /// INST ..., Reg, ... // Insert
613 /// INST ..., Reg, ...
614 /// INST ..., Reg, ...
618 /// DefReg = INST ... // Def (to become the new Insert)
619 /// TeeReg, Reg = TEE_... DefReg
620 /// INST ..., TeeReg, ... // Insert
621 /// INST ..., Reg, ...
622 /// INST ..., Reg, ...
624 /// with DefReg and TeeReg stackified. This eliminates a local.get from the
626 static MachineInstr
*moveAndTeeForMultiUse(
627 unsigned Reg
, MachineOperand
&Op
, MachineInstr
*Def
, MachineBasicBlock
&MBB
,
628 MachineInstr
*Insert
, LiveIntervals
&LIS
, WebAssemblyFunctionInfo
&MFI
,
629 MachineRegisterInfo
&MRI
, const WebAssemblyInstrInfo
*TII
) {
630 LLVM_DEBUG(dbgs() << "Move and tee for multi-use:"; Def
->dump());
632 WebAssemblyDebugValueManager
DefDIs(Def
);
634 // Move Def into place.
635 MBB
.splice(Insert
, &MBB
, Def
);
636 LIS
.handleMove(*Def
);
638 // Create the Tee and attach the registers.
639 const auto *RegClass
= MRI
.getRegClass(Reg
);
640 Register TeeReg
= MRI
.createVirtualRegister(RegClass
);
641 Register DefReg
= MRI
.createVirtualRegister(RegClass
);
642 MachineOperand
&DefMO
= Def
->getOperand(0);
643 MachineInstr
*Tee
= BuildMI(MBB
, Insert
, Insert
->getDebugLoc(),
644 TII
->get(getTeeOpcode(RegClass
)), TeeReg
)
645 .addReg(Reg
, RegState::Define
)
646 .addReg(DefReg
, getUndefRegState(DefMO
.isDead()));
648 DefMO
.setReg(DefReg
);
649 SlotIndex TeeIdx
= LIS
.InsertMachineInstrInMaps(*Tee
).getRegSlot();
650 SlotIndex DefIdx
= LIS
.getInstructionIndex(*Def
).getRegSlot();
654 // Tell LiveIntervals we moved the original vreg def from Def to Tee.
655 LiveInterval
&LI
= LIS
.getInterval(Reg
);
656 LiveInterval::iterator I
= LI
.FindSegmentContaining(DefIdx
);
657 VNInfo
*ValNo
= LI
.getVNInfoAt(DefIdx
);
660 shrinkToUses(LI
, LIS
);
662 // Finish stackifying the new regs.
663 LIS
.createAndComputeVirtRegInterval(TeeReg
);
664 LIS
.createAndComputeVirtRegInterval(DefReg
);
665 MFI
.stackifyVReg(MRI
, DefReg
);
666 MFI
.stackifyVReg(MRI
, TeeReg
);
667 imposeStackOrdering(Def
);
668 imposeStackOrdering(Tee
);
670 DefDIs
.clone(Tee
, DefReg
);
671 DefDIs
.clone(Insert
, TeeReg
);
673 LLVM_DEBUG(dbgs() << " - Replaced register: "; Def
->dump());
674 LLVM_DEBUG(dbgs() << " - Tee instruction: "; Tee
->dump());
679 /// A stack for walking the tree of instructions being built, visiting the
680 /// MachineOperands in DFS order.
681 class TreeWalkerState
{
682 using mop_iterator
= MachineInstr::mop_iterator
;
683 using mop_reverse_iterator
= std::reverse_iterator
<mop_iterator
>;
684 using RangeTy
= iterator_range
<mop_reverse_iterator
>;
685 SmallVector
<RangeTy
, 4> Worklist
;
688 explicit TreeWalkerState(MachineInstr
*Insert
) {
689 const iterator_range
<mop_iterator
> &Range
= Insert
->explicit_uses();
691 Worklist
.push_back(reverse(Range
));
694 bool done() const { return Worklist
.empty(); }
696 MachineOperand
&pop() {
697 RangeTy
&Range
= Worklist
.back();
698 MachineOperand
&Op
= *Range
.begin();
699 Range
= drop_begin(Range
);
702 assert((Worklist
.empty() || !Worklist
.back().empty()) &&
703 "Empty ranges shouldn't remain in the worklist");
707 /// Push Instr's operands onto the stack to be visited.
708 void pushOperands(MachineInstr
*Instr
) {
709 const iterator_range
<mop_iterator
> &Range(Instr
->explicit_uses());
711 Worklist
.push_back(reverse(Range
));
714 /// Some of Instr's operands are on the top of the stack; remove them and
715 /// re-insert them starting from the beginning (because we've commuted them).
716 void resetTopOperands(MachineInstr
*Instr
) {
717 assert(hasRemainingOperands(Instr
) &&
718 "Reseting operands should only be done when the instruction has "
719 "an operand still on the stack");
720 Worklist
.back() = reverse(Instr
->explicit_uses());
723 /// Test whether Instr has operands remaining to be visited at the top of
725 bool hasRemainingOperands(const MachineInstr
*Instr
) const {
726 if (Worklist
.empty())
728 const RangeTy
&Range
= Worklist
.back();
729 return !Range
.empty() && Range
.begin()->getParent() == Instr
;
732 /// Test whether the given register is present on the stack, indicating an
733 /// operand in the tree that we haven't visited yet. Moving a definition of
734 /// Reg to a point in the tree after that would change its value.
736 /// This is needed as a consequence of using implicit local.gets for
737 /// uses and implicit local.sets for defs.
738 bool isOnStack(unsigned Reg
) const {
739 for (const RangeTy
&Range
: Worklist
)
740 for (const MachineOperand
&MO
: Range
)
741 if (MO
.isReg() && MO
.getReg() == Reg
)
747 /// State to keep track of whether commuting is in flight or whether it's been
748 /// tried for the current instruction and didn't work.
749 class CommutingState
{
750 /// There are effectively three states: the initial state where we haven't
751 /// started commuting anything and we don't know anything yet, the tentative
752 /// state where we've commuted the operands of the current instruction and are
753 /// revisiting it, and the declined state where we've reverted the operands
754 /// back to their original order and will no longer commute it further.
755 bool TentativelyCommuting
= false;
756 bool Declined
= false;
758 /// During the tentative state, these hold the operand indices of the commuted
760 unsigned Operand0
, Operand1
;
763 /// Stackification for an operand was not successful due to ordering
764 /// constraints. If possible, and if we haven't already tried it and declined
765 /// it, commute Insert's operands and prepare to revisit it.
766 void maybeCommute(MachineInstr
*Insert
, TreeWalkerState
&TreeWalker
,
767 const WebAssemblyInstrInfo
*TII
) {
768 if (TentativelyCommuting
) {
770 "Don't decline commuting until you've finished trying it");
771 // Commuting didn't help. Revert it.
772 TII
->commuteInstruction(*Insert
, /*NewMI=*/false, Operand0
, Operand1
);
773 TentativelyCommuting
= false;
775 } else if (!Declined
&& TreeWalker
.hasRemainingOperands(Insert
)) {
776 Operand0
= TargetInstrInfo::CommuteAnyOperandIndex
;
777 Operand1
= TargetInstrInfo::CommuteAnyOperandIndex
;
778 if (TII
->findCommutedOpIndices(*Insert
, Operand0
, Operand1
)) {
779 // Tentatively commute the operands and try again.
780 TII
->commuteInstruction(*Insert
, /*NewMI=*/false, Operand0
, Operand1
);
781 TreeWalker
.resetTopOperands(Insert
);
782 TentativelyCommuting
= true;
788 /// Stackification for some operand was successful. Reset to the default
791 TentativelyCommuting
= false;
795 } // end anonymous namespace
797 bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction
&MF
) {
798 LLVM_DEBUG(dbgs() << "********** Register Stackifying **********\n"
799 "********** Function: "
800 << MF
.getName() << '\n');
802 bool Changed
= false;
803 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
804 WebAssemblyFunctionInfo
&MFI
= *MF
.getInfo
<WebAssemblyFunctionInfo
>();
805 const auto *TII
= MF
.getSubtarget
<WebAssemblySubtarget
>().getInstrInfo();
806 const auto *TRI
= MF
.getSubtarget
<WebAssemblySubtarget
>().getRegisterInfo();
807 AliasAnalysis
&AA
= getAnalysis
<AAResultsWrapperPass
>().getAAResults();
808 auto &MDT
= getAnalysis
<MachineDominatorTree
>();
809 auto &LIS
= getAnalysis
<LiveIntervals
>();
811 // Walk the instructions from the bottom up. Currently we don't look past
812 // block boundaries, and the blocks aren't ordered so the block visitation
813 // order isn't significant, but we may want to change this in the future.
814 for (MachineBasicBlock
&MBB
: MF
) {
815 // Don't use a range-based for loop, because we modify the list as we're
816 // iterating over it and the end iterator may change.
817 for (auto MII
= MBB
.rbegin(); MII
!= MBB
.rend(); ++MII
) {
818 MachineInstr
*Insert
= &*MII
;
819 // Don't nest anything inside an inline asm, because we don't have
820 // constraints for $push inputs.
821 if (Insert
->isInlineAsm())
824 // Ignore debugging intrinsics.
825 if (Insert
->isDebugValue())
828 // Iterate through the inputs in reverse order, since we'll be pulling
829 // operands off the stack in LIFO order.
830 CommutingState Commuting
;
831 TreeWalkerState
TreeWalker(Insert
);
832 while (!TreeWalker
.done()) {
833 MachineOperand
&Use
= TreeWalker
.pop();
835 // We're only interested in explicit virtual register operands.
839 Register Reg
= Use
.getReg();
840 assert(Use
.isUse() && "explicit_uses() should only iterate over uses");
841 assert(!Use
.isImplicit() &&
842 "explicit_uses() should only iterate over explicit operands");
843 if (Register::isPhysicalRegister(Reg
))
846 // Identify the definition for this register at this point.
847 MachineInstr
*DefI
= getVRegDef(Reg
, Insert
, MRI
, LIS
);
851 // Don't nest an INLINE_ASM def into anything, because we don't have
852 // constraints for $pop outputs.
853 if (DefI
->isInlineAsm())
856 // Argument instructions represent live-in registers and not real
858 if (WebAssembly::isArgument(DefI
->getOpcode()))
861 MachineOperand
*Def
= DefI
->findRegisterDefOperand(Reg
);
862 assert(Def
!= nullptr);
864 // Decide which strategy to take. Prefer to move a single-use value
865 // over cloning it, and prefer cloning over introducing a tee.
866 // For moving, we require the def to be in the same block as the use;
867 // this makes things simpler (LiveIntervals' handleMove function only
868 // supports intra-block moves) and it's MachineSink's job to catch all
869 // the sinking opportunities anyway.
870 bool SameBlock
= DefI
->getParent() == &MBB
;
871 bool CanMove
= SameBlock
&&
872 isSafeToMove(Def
, &Use
, Insert
, AA
, MFI
, MRI
) &&
873 !TreeWalker
.isOnStack(Reg
);
874 if (CanMove
&& hasOneUse(Reg
, DefI
, MRI
, MDT
, LIS
)) {
875 Insert
= moveForSingleUse(Reg
, Use
, DefI
, MBB
, Insert
, LIS
, MFI
, MRI
);
877 // If we are removing the frame base reg completely, remove the debug
879 // TODO: Encode this properly as a stackified value.
880 if (MFI
.isFrameBaseVirtual() && MFI
.getFrameBaseVreg() == Reg
)
881 MFI
.clearFrameBaseVreg();
882 } else if (shouldRematerialize(*DefI
, AA
, TII
)) {
884 rematerializeCheapDef(Reg
, Use
, *DefI
, MBB
, Insert
->getIterator(),
885 LIS
, MFI
, MRI
, TII
, TRI
);
886 } else if (CanMove
&& oneUseDominatesOtherUses(Reg
, Use
, MBB
, MRI
, MDT
,
888 Insert
= moveAndTeeForMultiUse(Reg
, Use
, DefI
, MBB
, Insert
, LIS
, MFI
,
891 // We failed to stackify the operand. If the problem was ordering
892 // constraints, Commuting may be able to help.
893 if (!CanMove
&& SameBlock
)
894 Commuting
.maybeCommute(Insert
, TreeWalker
, TII
);
895 // Proceed to the next operand.
899 // Stackifying a multivalue def may unlock in-place stackification of
900 // subsequent defs. TODO: Handle the case where the consecutive uses are
901 // not all in the same instruction.
902 auto *SubsequentDef
= Insert
->defs().begin();
903 auto *SubsequentUse
= &Use
;
904 while (SubsequentDef
!= Insert
->defs().end() &&
905 SubsequentUse
!= Use
.getParent()->uses().end()) {
906 if (!SubsequentDef
->isReg() || !SubsequentUse
->isReg())
908 unsigned DefReg
= SubsequentDef
->getReg();
909 unsigned UseReg
= SubsequentUse
->getReg();
910 // TODO: This single-use restriction could be relaxed by using tees
911 if (DefReg
!= UseReg
|| !MRI
.hasOneUse(DefReg
))
913 MFI
.stackifyVReg(MRI
, DefReg
);
918 // If the instruction we just stackified is an IMPLICIT_DEF, convert it
919 // to a constant 0 so that the def is explicit, and the push/pop
920 // correspondence is maintained.
921 if (Insert
->getOpcode() == TargetOpcode::IMPLICIT_DEF
)
922 convertImplicitDefToConstZero(Insert
, MRI
, TII
, MF
, LIS
);
924 // We stackified an operand. Add the defining instruction's operands to
925 // the worklist stack now to continue to build an ever deeper tree.
927 TreeWalker
.pushOperands(Insert
);
930 // If we stackified any operands, skip over the tree to start looking for
931 // the next instruction we can build a tree on.
932 if (Insert
!= &*MII
) {
933 imposeStackOrdering(&*MII
);
934 MII
= MachineBasicBlock::iterator(Insert
).getReverse();
940 // If we used VALUE_STACK anywhere, add it to the live-in sets everywhere so
941 // that it never looks like a use-before-def.
943 MF
.getRegInfo().addLiveIn(WebAssembly::VALUE_STACK
);
944 for (MachineBasicBlock
&MBB
: MF
)
945 MBB
.addLiveIn(WebAssembly::VALUE_STACK
);
949 // Verify that pushes and pops are performed in LIFO order.
950 SmallVector
<unsigned, 0> Stack
;
951 for (MachineBasicBlock
&MBB
: MF
) {
952 for (MachineInstr
&MI
: MBB
) {
953 if (MI
.isDebugInstr())
955 for (MachineOperand
&MO
: reverse(MI
.explicit_uses())) {
958 Register Reg
= MO
.getReg();
959 if (MFI
.isVRegStackified(Reg
))
960 assert(Stack
.pop_back_val() == Reg
&&
961 "Register stack pop should be paired with a push");
963 for (MachineOperand
&MO
: MI
.defs()) {
966 Register Reg
= MO
.getReg();
967 if (MFI
.isVRegStackified(Reg
))
968 Stack
.push_back(MO
.getReg());
971 // TODO: Generalize this code to support keeping values on the stack across
972 // basic block boundaries.
973 assert(Stack
.empty() &&
974 "Register stack pushes and pops should be balanced");