1 //===-- WebAssemblyRegStackify.cpp - Register Stackification --------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 /// This file implements a register stacking pass.
12 /// This pass reorders instructions to put register uses and defs in an order
13 /// such that they form single-use expression trees. Registers fitting this form
14 /// are then marked as "stackified", meaning references to them are replaced by
15 /// "push" and "pop" from the value stack.
17 /// This is primarily a code size optimization, since temporary values on the
18 /// value stack don't need to be named.
20 //===----------------------------------------------------------------------===//
22 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" // for WebAssembly::ARGUMENT_*
23 #include "WebAssembly.h"
24 #include "WebAssemblyDebugValueManager.h"
25 #include "WebAssemblyMachineFunctionInfo.h"
26 #include "WebAssemblySubtarget.h"
27 #include "WebAssemblyUtilities.h"
28 #include "llvm/Analysis/AliasAnalysis.h"
29 #include "llvm/CodeGen/LiveIntervals.h"
30 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
31 #include "llvm/CodeGen/MachineDominators.h"
32 #include "llvm/CodeGen/MachineInstrBuilder.h"
33 #include "llvm/CodeGen/MachineModuleInfoImpls.h"
34 #include "llvm/CodeGen/MachineRegisterInfo.h"
35 #include "llvm/CodeGen/Passes.h"
36 #include "llvm/IR/GlobalAlias.h"
37 #include "llvm/Support/Debug.h"
38 #include "llvm/Support/raw_ostream.h"
42 #define DEBUG_TYPE "wasm-reg-stackify"
45 class WebAssemblyRegStackify final
: public MachineFunctionPass
{
46 StringRef
getPassName() const override
{
47 return "WebAssembly Register Stackify";
50 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
52 AU
.addRequired
<MachineDominatorTreeWrapperPass
>();
53 AU
.addRequired
<LiveIntervalsWrapperPass
>();
54 AU
.addPreserved
<MachineBlockFrequencyInfoWrapperPass
>();
55 AU
.addPreserved
<SlotIndexesWrapperPass
>();
56 AU
.addPreserved
<LiveIntervalsWrapperPass
>();
57 AU
.addPreservedID(LiveVariablesID
);
58 AU
.addPreserved
<MachineDominatorTreeWrapperPass
>();
59 MachineFunctionPass::getAnalysisUsage(AU
);
62 bool runOnMachineFunction(MachineFunction
&MF
) override
;
65 static char ID
; // Pass identification, replacement for typeid
66 WebAssemblyRegStackify() : MachineFunctionPass(ID
) {}
68 } // end anonymous namespace
70 char WebAssemblyRegStackify::ID
= 0;
71 INITIALIZE_PASS(WebAssemblyRegStackify
, DEBUG_TYPE
,
72 "Reorder instructions to use the WebAssembly value stack",
75 FunctionPass
*llvm::createWebAssemblyRegStackify() {
76 return new WebAssemblyRegStackify();
79 // Decorate the given instruction with implicit operands that enforce the
80 // expression stack ordering constraints for an instruction which is on
81 // the expression stack.
82 static void imposeStackOrdering(MachineInstr
*MI
) {
83 // Write the opaque VALUE_STACK register.
84 if (!MI
->definesRegister(WebAssembly::VALUE_STACK
, /*TRI=*/nullptr))
85 MI
->addOperand(MachineOperand::CreateReg(WebAssembly::VALUE_STACK
,
89 // Also read the opaque VALUE_STACK register.
90 if (!MI
->readsRegister(WebAssembly::VALUE_STACK
, /*TRI=*/nullptr))
91 MI
->addOperand(MachineOperand::CreateReg(WebAssembly::VALUE_STACK
,
96 // Convert an IMPLICIT_DEF instruction into an instruction which defines
97 // a constant zero value.
98 static void convertImplicitDefToConstZero(MachineInstr
*MI
,
99 MachineRegisterInfo
&MRI
,
100 const TargetInstrInfo
*TII
,
102 LiveIntervals
&LIS
) {
103 assert(MI
->getOpcode() == TargetOpcode::IMPLICIT_DEF
);
105 const auto *RegClass
= MRI
.getRegClass(MI
->getOperand(0).getReg());
106 if (RegClass
== &WebAssembly::I32RegClass
) {
107 MI
->setDesc(TII
->get(WebAssembly::CONST_I32
));
108 MI
->addOperand(MachineOperand::CreateImm(0));
109 } else if (RegClass
== &WebAssembly::I64RegClass
) {
110 MI
->setDesc(TII
->get(WebAssembly::CONST_I64
));
111 MI
->addOperand(MachineOperand::CreateImm(0));
112 } else if (RegClass
== &WebAssembly::F32RegClass
) {
113 MI
->setDesc(TII
->get(WebAssembly::CONST_F32
));
114 auto *Val
= cast
<ConstantFP
>(Constant::getNullValue(
115 Type::getFloatTy(MF
.getFunction().getContext())));
116 MI
->addOperand(MachineOperand::CreateFPImm(Val
));
117 } else if (RegClass
== &WebAssembly::F64RegClass
) {
118 MI
->setDesc(TII
->get(WebAssembly::CONST_F64
));
119 auto *Val
= cast
<ConstantFP
>(Constant::getNullValue(
120 Type::getDoubleTy(MF
.getFunction().getContext())));
121 MI
->addOperand(MachineOperand::CreateFPImm(Val
));
122 } else if (RegClass
== &WebAssembly::V128RegClass
) {
123 MI
->setDesc(TII
->get(WebAssembly::CONST_V128_I64x2
));
124 MI
->addOperand(MachineOperand::CreateImm(0));
125 MI
->addOperand(MachineOperand::CreateImm(0));
127 llvm_unreachable("Unexpected reg class");
131 // Determine whether a call to the callee referenced by
132 // MI->getOperand(CalleeOpNo) reads memory, writes memory, and/or has side
134 static void queryCallee(const MachineInstr
&MI
, bool &Read
, bool &Write
,
135 bool &Effects
, bool &StackPointer
) {
136 // All calls can use the stack pointer.
139 const MachineOperand
&MO
= WebAssembly::getCalleeOp(MI
);
141 const Constant
*GV
= MO
.getGlobal();
142 if (const auto *GA
= dyn_cast
<GlobalAlias
>(GV
))
143 if (!GA
->isInterposable())
144 GV
= GA
->getAliasee();
146 if (const auto *F
= dyn_cast
<Function
>(GV
)) {
147 if (!F
->doesNotThrow())
149 if (F
->doesNotAccessMemory())
151 if (F
->onlyReadsMemory()) {
164 // Determine whether MI reads memory, writes memory, has side effects,
165 // and/or uses the stack pointer value.
166 static void query(const MachineInstr
&MI
, bool &Read
, bool &Write
,
167 bool &Effects
, bool &StackPointer
) {
168 assert(!MI
.isTerminator());
170 if (MI
.isDebugInstr() || MI
.isPosition())
174 if (MI
.mayLoad() && !MI
.isDereferenceableInvariantLoad())
180 } else if (MI
.hasOrderedMemoryRef()) {
181 switch (MI
.getOpcode()) {
182 case WebAssembly::DIV_S_I32
:
183 case WebAssembly::DIV_S_I64
:
184 case WebAssembly::REM_S_I32
:
185 case WebAssembly::REM_S_I64
:
186 case WebAssembly::DIV_U_I32
:
187 case WebAssembly::DIV_U_I64
:
188 case WebAssembly::REM_U_I32
:
189 case WebAssembly::REM_U_I64
:
190 case WebAssembly::I32_TRUNC_S_F32
:
191 case WebAssembly::I64_TRUNC_S_F32
:
192 case WebAssembly::I32_TRUNC_S_F64
:
193 case WebAssembly::I64_TRUNC_S_F64
:
194 case WebAssembly::I32_TRUNC_U_F32
:
195 case WebAssembly::I64_TRUNC_U_F32
:
196 case WebAssembly::I32_TRUNC_U_F64
:
197 case WebAssembly::I64_TRUNC_U_F64
:
198 // These instruction have hasUnmodeledSideEffects() returning true
199 // because they trap on overflow and invalid so they can't be arbitrarily
200 // moved, however hasOrderedMemoryRef() interprets this plus their lack
201 // of memoperands as having a potential unknown memory reference.
204 // Record volatile accesses, unless it's a call, as calls are handled
214 // Check for side effects.
215 if (MI
.hasUnmodeledSideEffects()) {
216 switch (MI
.getOpcode()) {
217 case WebAssembly::DIV_S_I32
:
218 case WebAssembly::DIV_S_I64
:
219 case WebAssembly::REM_S_I32
:
220 case WebAssembly::REM_S_I64
:
221 case WebAssembly::DIV_U_I32
:
222 case WebAssembly::DIV_U_I64
:
223 case WebAssembly::REM_U_I32
:
224 case WebAssembly::REM_U_I64
:
225 case WebAssembly::I32_TRUNC_S_F32
:
226 case WebAssembly::I64_TRUNC_S_F32
:
227 case WebAssembly::I32_TRUNC_S_F64
:
228 case WebAssembly::I64_TRUNC_S_F64
:
229 case WebAssembly::I32_TRUNC_U_F32
:
230 case WebAssembly::I64_TRUNC_U_F32
:
231 case WebAssembly::I32_TRUNC_U_F64
:
232 case WebAssembly::I64_TRUNC_U_F64
:
233 // These instructions have hasUnmodeledSideEffects() returning true
234 // because they trap on overflow and invalid so they can't be arbitrarily
235 // moved, however in the specific case of register stackifying, it is safe
236 // to move them because overflow and invalid are Undefined Behavior.
244 // Check for writes to __stack_pointer global.
245 if ((MI
.getOpcode() == WebAssembly::GLOBAL_SET_I32
||
246 MI
.getOpcode() == WebAssembly::GLOBAL_SET_I64
) &&
247 strcmp(MI
.getOperand(0).getSymbolName(), "__stack_pointer") == 0)
252 queryCallee(MI
, Read
, Write
, Effects
, StackPointer
);
256 // Test whether Def is safe and profitable to rematerialize.
257 static bool shouldRematerialize(const MachineInstr
&Def
,
258 const WebAssemblyInstrInfo
*TII
) {
259 return Def
.isAsCheapAsAMove() && TII
->isTriviallyReMaterializable(Def
);
262 // Identify the definition for this register at this point. This is a
263 // generalization of MachineRegisterInfo::getUniqueVRegDef that uses
264 // LiveIntervals to handle complex cases.
265 static MachineInstr
*getVRegDef(unsigned Reg
, const MachineInstr
*Insert
,
266 const MachineRegisterInfo
&MRI
,
267 const LiveIntervals
&LIS
) {
268 // Most registers are in SSA form here so we try a quick MRI query first.
269 if (MachineInstr
*Def
= MRI
.getUniqueVRegDef(Reg
))
272 // MRI doesn't know what the Def is. Try asking LIS.
273 if (const VNInfo
*ValNo
= LIS
.getInterval(Reg
).getVNInfoBefore(
274 LIS
.getInstructionIndex(*Insert
)))
275 return LIS
.getInstructionFromIndex(ValNo
->def
);
280 // Test whether Reg, as defined at Def, has exactly one use. This is a
281 // generalization of MachineRegisterInfo::hasOneNonDBGUse that uses
282 // LiveIntervals to handle complex cases.
283 static bool hasOneNonDBGUse(unsigned Reg
, MachineInstr
*Def
,
284 MachineRegisterInfo
&MRI
, MachineDominatorTree
&MDT
,
285 LiveIntervals
&LIS
) {
286 // Most registers are in SSA form here so we try a quick MRI query first.
287 if (MRI
.hasOneNonDBGUse(Reg
))
291 const LiveInterval
&LI
= LIS
.getInterval(Reg
);
292 const VNInfo
*DefVNI
=
293 LI
.getVNInfoAt(LIS
.getInstructionIndex(*Def
).getRegSlot());
295 for (auto &I
: MRI
.use_nodbg_operands(Reg
)) {
296 const auto &Result
= LI
.Query(LIS
.getInstructionIndex(*I
.getParent()));
297 if (Result
.valueIn() == DefVNI
) {
298 if (!Result
.isKill())
308 // Test whether it's safe to move Def to just before Insert.
309 // TODO: Compute memory dependencies in a way that doesn't require always
310 // walking the block.
311 // TODO: Compute memory dependencies in a way that uses AliasAnalysis to be
313 static bool isSafeToMove(const MachineOperand
*Def
, const MachineOperand
*Use
,
314 const MachineInstr
*Insert
,
315 const WebAssemblyFunctionInfo
&MFI
,
316 const MachineRegisterInfo
&MRI
) {
317 const MachineInstr
*DefI
= Def
->getParent();
318 const MachineInstr
*UseI
= Use
->getParent();
319 assert(DefI
->getParent() == Insert
->getParent());
320 assert(UseI
->getParent() == Insert
->getParent());
322 // The first def of a multivalue instruction can be stackified by moving,
323 // since the later defs can always be placed into locals if necessary. Later
324 // defs can only be stackified if all previous defs are already stackified
325 // since ExplicitLocals will not know how to place a def in a local if a
326 // subsequent def is stackified. But only one def can be stackified by moving
327 // the instruction, so it must be the first one.
329 // TODO: This could be loosened to be the first *live* def, but care would
330 // have to be taken to ensure the drops of the initial dead defs can be
331 // placed. This would require checking that no previous defs are used in the
332 // same instruction as subsequent defs.
333 if (Def
!= DefI
->defs().begin())
336 // If any subsequent def is used prior to the current value by the same
337 // instruction in which the current value is used, we cannot
338 // stackify. Stackifying in this case would require that def moving below the
339 // current def in the stack, which cannot be achieved, even with locals.
340 // Also ensure we don't sink the def past any other prior uses.
341 for (const auto &SubsequentDef
: drop_begin(DefI
->defs())) {
342 auto I
= std::next(MachineBasicBlock::const_iterator(DefI
));
343 auto E
= std::next(MachineBasicBlock::const_iterator(UseI
));
344 for (; I
!= E
; ++I
) {
345 for (const auto &PriorUse
: I
->uses()) {
346 if (&PriorUse
== Use
)
348 if (PriorUse
.isReg() && SubsequentDef
.getReg() == PriorUse
.getReg())
354 // If moving is a semantic nop, it is always allowed
355 const MachineBasicBlock
*MBB
= DefI
->getParent();
356 auto NextI
= std::next(MachineBasicBlock::const_iterator(DefI
));
357 for (auto E
= MBB
->end(); NextI
!= E
&& NextI
->isDebugInstr(); ++NextI
)
362 // 'catch' and 'catch_all' should be the first instruction of a BB and cannot
364 if (WebAssembly::isCatch(DefI
->getOpcode()))
367 // Check for register dependencies.
368 SmallVector
<unsigned, 4> MutableRegisters
;
369 for (const MachineOperand
&MO
: DefI
->operands()) {
370 if (!MO
.isReg() || MO
.isUndef())
372 Register Reg
= MO
.getReg();
374 // If the register is dead here and at Insert, ignore it.
375 if (MO
.isDead() && Insert
->definesRegister(Reg
, /*TRI=*/nullptr) &&
376 !Insert
->readsRegister(Reg
, /*TRI=*/nullptr))
379 if (Reg
.isPhysical()) {
380 // Ignore ARGUMENTS; it's just used to keep the ARGUMENT_* instructions
381 // from moving down, and we've already checked for that.
382 if (Reg
== WebAssembly::ARGUMENTS
)
384 // If the physical register is never modified, ignore it.
385 if (!MRI
.isPhysRegModified(Reg
))
387 // Otherwise, it's a physical register with unknown liveness.
391 // If one of the operands isn't in SSA form, it has different values at
392 // different times, and we need to make sure we don't move our use across
394 if (!MO
.isDef() && !MRI
.hasOneDef(Reg
))
395 MutableRegisters
.push_back(Reg
);
398 bool Read
= false, Write
= false, Effects
= false, StackPointer
= false;
399 query(*DefI
, Read
, Write
, Effects
, StackPointer
);
401 // If the instruction does not access memory and has no side effects, it has
402 // no additional dependencies.
403 bool HasMutableRegisters
= !MutableRegisters
.empty();
404 if (!Read
&& !Write
&& !Effects
&& !StackPointer
&& !HasMutableRegisters
)
407 // Scan through the intervening instructions between DefI and Insert.
408 MachineBasicBlock::const_iterator
D(DefI
), I(Insert
);
409 for (--I
; I
!= D
; --I
) {
410 bool InterveningRead
= false;
411 bool InterveningWrite
= false;
412 bool InterveningEffects
= false;
413 bool InterveningStackPointer
= false;
414 query(*I
, InterveningRead
, InterveningWrite
, InterveningEffects
,
415 InterveningStackPointer
);
416 if (Effects
&& InterveningEffects
)
418 if (Read
&& InterveningWrite
)
420 if (Write
&& (InterveningRead
|| InterveningWrite
))
422 if (StackPointer
&& InterveningStackPointer
)
425 for (unsigned Reg
: MutableRegisters
)
426 for (const MachineOperand
&MO
: I
->operands())
427 if (MO
.isReg() && MO
.isDef() && MO
.getReg() == Reg
)
434 /// Test whether OneUse, a use of Reg, dominates all of Reg's other uses.
435 static bool oneUseDominatesOtherUses(unsigned Reg
, const MachineOperand
&OneUse
,
436 const MachineBasicBlock
&MBB
,
437 const MachineRegisterInfo
&MRI
,
438 const MachineDominatorTree
&MDT
,
440 WebAssemblyFunctionInfo
&MFI
) {
441 const LiveInterval
&LI
= LIS
.getInterval(Reg
);
443 const MachineInstr
*OneUseInst
= OneUse
.getParent();
444 VNInfo
*OneUseVNI
= LI
.getVNInfoBefore(LIS
.getInstructionIndex(*OneUseInst
));
446 for (const MachineOperand
&Use
: MRI
.use_nodbg_operands(Reg
)) {
450 const MachineInstr
*UseInst
= Use
.getParent();
451 VNInfo
*UseVNI
= LI
.getVNInfoBefore(LIS
.getInstructionIndex(*UseInst
));
453 if (UseVNI
!= OneUseVNI
)
456 if (UseInst
== OneUseInst
) {
457 // Another use in the same instruction. We need to ensure that the one
458 // selected use happens "before" it.
462 // Test that the use is dominated by the one selected use.
463 while (!MDT
.dominates(OneUseInst
, UseInst
)) {
464 // Actually, dominating is over-conservative. Test that the use would
465 // happen after the one selected use in the stack evaluation order.
467 // This is needed as a consequence of using implicit local.gets for
468 // uses and implicit local.sets for defs.
469 if (UseInst
->getDesc().getNumDefs() == 0)
471 const MachineOperand
&MO
= UseInst
->getOperand(0);
474 Register DefReg
= MO
.getReg();
475 if (!DefReg
.isVirtual() || !MFI
.isVRegStackified(DefReg
))
477 assert(MRI
.hasOneNonDBGUse(DefReg
));
478 const MachineOperand
&NewUse
= *MRI
.use_nodbg_begin(DefReg
);
479 const MachineInstr
*NewUseInst
= NewUse
.getParent();
480 if (NewUseInst
== OneUseInst
) {
481 if (&OneUse
> &NewUse
)
485 UseInst
= NewUseInst
;
492 /// Get the appropriate tee opcode for the given register class.
493 static unsigned getTeeOpcode(const TargetRegisterClass
*RC
) {
494 if (RC
== &WebAssembly::I32RegClass
)
495 return WebAssembly::TEE_I32
;
496 if (RC
== &WebAssembly::I64RegClass
)
497 return WebAssembly::TEE_I64
;
498 if (RC
== &WebAssembly::F32RegClass
)
499 return WebAssembly::TEE_F32
;
500 if (RC
== &WebAssembly::F64RegClass
)
501 return WebAssembly::TEE_F64
;
502 if (RC
== &WebAssembly::V128RegClass
)
503 return WebAssembly::TEE_V128
;
504 if (RC
== &WebAssembly::EXTERNREFRegClass
)
505 return WebAssembly::TEE_EXTERNREF
;
506 if (RC
== &WebAssembly::FUNCREFRegClass
)
507 return WebAssembly::TEE_FUNCREF
;
508 if (RC
== &WebAssembly::EXNREFRegClass
)
509 return WebAssembly::TEE_EXNREF
;
510 llvm_unreachable("Unexpected register class");
513 // Shrink LI to its uses, cleaning up LI.
514 static void shrinkToUses(LiveInterval
&LI
, LiveIntervals
&LIS
) {
515 if (LIS
.shrinkToUses(&LI
)) {
516 SmallVector
<LiveInterval
*, 4> SplitLIs
;
517 LIS
.splitSeparateComponents(LI
, SplitLIs
);
521 /// A single-use def in the same block with no intervening memory or register
522 /// dependencies; move the def down and nest it with the current instruction.
523 static MachineInstr
*moveForSingleUse(unsigned Reg
, MachineOperand
&Op
,
524 MachineInstr
*Def
, MachineBasicBlock
&MBB
,
525 MachineInstr
*Insert
, LiveIntervals
&LIS
,
526 WebAssemblyFunctionInfo
&MFI
,
527 MachineRegisterInfo
&MRI
) {
528 LLVM_DEBUG(dbgs() << "Move for single use: "; Def
->dump());
530 WebAssemblyDebugValueManager
DefDIs(Def
);
532 LIS
.handleMove(*Def
);
534 if (MRI
.hasOneDef(Reg
) && MRI
.hasOneNonDBGUse(Reg
)) {
535 // No one else is using this register for anything so we can just stackify
537 MFI
.stackifyVReg(MRI
, Reg
);
539 // The register may have unrelated uses or defs; create a new register for
540 // just our one def and use so that we can stackify it.
541 Register NewReg
= MRI
.createVirtualRegister(MRI
.getRegClass(Reg
));
543 DefDIs
.updateReg(NewReg
);
545 // Tell LiveIntervals about the new register.
546 LIS
.createAndComputeVirtRegInterval(NewReg
);
548 // Tell LiveIntervals about the changes to the old register.
549 LiveInterval
&LI
= LIS
.getInterval(Reg
);
550 LI
.removeSegment(LIS
.getInstructionIndex(*Def
).getRegSlot(),
551 LIS
.getInstructionIndex(*Op
.getParent()).getRegSlot(),
552 /*RemoveDeadValNo=*/true);
554 MFI
.stackifyVReg(MRI
, NewReg
);
556 LLVM_DEBUG(dbgs() << " - Replaced register: "; Def
->dump());
559 imposeStackOrdering(Def
);
563 static MachineInstr
*getPrevNonDebugInst(MachineInstr
*MI
) {
564 for (auto *I
= MI
->getPrevNode(); I
; I
= I
->getPrevNode())
565 if (!I
->isDebugInstr())
570 /// A trivially cloneable instruction; clone it and nest the new copy with the
571 /// current instruction.
572 static MachineInstr
*rematerializeCheapDef(
573 unsigned Reg
, MachineOperand
&Op
, MachineInstr
&Def
, MachineBasicBlock
&MBB
,
574 MachineBasicBlock::instr_iterator Insert
, LiveIntervals
&LIS
,
575 WebAssemblyFunctionInfo
&MFI
, MachineRegisterInfo
&MRI
,
576 const WebAssemblyInstrInfo
*TII
, const WebAssemblyRegisterInfo
*TRI
) {
577 LLVM_DEBUG(dbgs() << "Rematerializing cheap def: "; Def
.dump());
578 LLVM_DEBUG(dbgs() << " - for use in "; Op
.getParent()->dump());
580 WebAssemblyDebugValueManager
DefDIs(&Def
);
582 Register NewReg
= MRI
.createVirtualRegister(MRI
.getRegClass(Reg
));
583 DefDIs
.cloneSink(&*Insert
, NewReg
);
585 MachineInstr
*Clone
= getPrevNonDebugInst(&*Insert
);
587 LIS
.InsertMachineInstrInMaps(*Clone
);
588 LIS
.createAndComputeVirtRegInterval(NewReg
);
589 MFI
.stackifyVReg(MRI
, NewReg
);
590 imposeStackOrdering(Clone
);
592 LLVM_DEBUG(dbgs() << " - Cloned to "; Clone
->dump());
594 // Shrink the interval.
595 bool IsDead
= MRI
.use_empty(Reg
);
597 LiveInterval
&LI
= LIS
.getInterval(Reg
);
598 shrinkToUses(LI
, LIS
);
599 IsDead
= !LI
.liveAt(LIS
.getInstructionIndex(Def
).getDeadSlot());
602 // If that was the last use of the original, delete the original.
604 LLVM_DEBUG(dbgs() << " - Deleting original\n");
605 SlotIndex Idx
= LIS
.getInstructionIndex(Def
).getRegSlot();
606 LIS
.removePhysRegDefAt(MCRegister::from(WebAssembly::ARGUMENTS
), Idx
);
607 LIS
.removeInterval(Reg
);
608 LIS
.RemoveMachineInstrFromMaps(Def
);
615 /// A multiple-use def in the same block with no intervening memory or register
616 /// dependencies; move the def down, nest it with the current instruction, and
617 /// insert a tee to satisfy the rest of the uses. As an illustration, rewrite
620 /// Reg = INST ... // Def
621 /// INST ..., Reg, ... // Insert
622 /// INST ..., Reg, ...
623 /// INST ..., Reg, ...
627 /// DefReg = INST ... // Def (to become the new Insert)
628 /// TeeReg, Reg = TEE_... DefReg
629 /// INST ..., TeeReg, ... // Insert
630 /// INST ..., Reg, ...
631 /// INST ..., Reg, ...
633 /// with DefReg and TeeReg stackified. This eliminates a local.get from the
635 static MachineInstr
*moveAndTeeForMultiUse(
636 unsigned Reg
, MachineOperand
&Op
, MachineInstr
*Def
, MachineBasicBlock
&MBB
,
637 MachineInstr
*Insert
, LiveIntervals
&LIS
, WebAssemblyFunctionInfo
&MFI
,
638 MachineRegisterInfo
&MRI
, const WebAssemblyInstrInfo
*TII
) {
639 LLVM_DEBUG(dbgs() << "Move and tee for multi-use:"; Def
->dump());
641 const auto *RegClass
= MRI
.getRegClass(Reg
);
642 Register TeeReg
= MRI
.createVirtualRegister(RegClass
);
643 Register DefReg
= MRI
.createVirtualRegister(RegClass
);
645 // Move Def into place.
646 WebAssemblyDebugValueManager
DefDIs(Def
);
648 LIS
.handleMove(*Def
);
650 // Create the Tee and attach the registers.
651 MachineOperand
&DefMO
= Def
->getOperand(0);
652 MachineInstr
*Tee
= BuildMI(MBB
, Insert
, Insert
->getDebugLoc(),
653 TII
->get(getTeeOpcode(RegClass
)), TeeReg
)
654 .addReg(Reg
, RegState::Define
)
655 .addReg(DefReg
, getUndefRegState(DefMO
.isDead()));
657 DefDIs
.updateReg(DefReg
);
658 SlotIndex TeeIdx
= LIS
.InsertMachineInstrInMaps(*Tee
).getRegSlot();
659 SlotIndex DefIdx
= LIS
.getInstructionIndex(*Def
).getRegSlot();
661 // Tell LiveIntervals we moved the original vreg def from Def to Tee.
662 LiveInterval
&LI
= LIS
.getInterval(Reg
);
663 LiveInterval::iterator I
= LI
.FindSegmentContaining(DefIdx
);
664 VNInfo
*ValNo
= LI
.getVNInfoAt(DefIdx
);
667 shrinkToUses(LI
, LIS
);
669 // Finish stackifying the new regs.
670 LIS
.createAndComputeVirtRegInterval(TeeReg
);
671 LIS
.createAndComputeVirtRegInterval(DefReg
);
672 MFI
.stackifyVReg(MRI
, DefReg
);
673 MFI
.stackifyVReg(MRI
, TeeReg
);
674 imposeStackOrdering(Def
);
675 imposeStackOrdering(Tee
);
677 // Even though 'TeeReg, Reg = TEE ...', has two defs, we don't need to clone
678 // DBG_VALUEs for both of them, given that the latter will cancel the former
679 // anyway. Here we only clone DBG_VALUEs for TeeReg, which will be converted
680 // to a local index in ExplicitLocals pass.
681 DefDIs
.cloneSink(Insert
, TeeReg
, /* CloneDef */ false);
683 LLVM_DEBUG(dbgs() << " - Replaced register: "; Def
->dump());
684 LLVM_DEBUG(dbgs() << " - Tee instruction: "; Tee
->dump());
689 /// A stack for walking the tree of instructions being built, visiting the
690 /// MachineOperands in DFS order.
691 class TreeWalkerState
{
692 using mop_iterator
= MachineInstr::mop_iterator
;
693 using mop_reverse_iterator
= std::reverse_iterator
<mop_iterator
>;
694 using RangeTy
= iterator_range
<mop_reverse_iterator
>;
695 SmallVector
<RangeTy
, 4> Worklist
;
698 explicit TreeWalkerState(MachineInstr
*Insert
) {
699 const iterator_range
<mop_iterator
> &Range
= Insert
->explicit_uses();
701 Worklist
.push_back(reverse(Range
));
704 bool done() const { return Worklist
.empty(); }
706 MachineOperand
&pop() {
707 RangeTy
&Range
= Worklist
.back();
708 MachineOperand
&Op
= *Range
.begin();
709 Range
= drop_begin(Range
);
712 assert((Worklist
.empty() || !Worklist
.back().empty()) &&
713 "Empty ranges shouldn't remain in the worklist");
717 /// Push Instr's operands onto the stack to be visited.
718 void pushOperands(MachineInstr
*Instr
) {
719 const iterator_range
<mop_iterator
> &Range(Instr
->explicit_uses());
721 Worklist
.push_back(reverse(Range
));
724 /// Some of Instr's operands are on the top of the stack; remove them and
725 /// re-insert them starting from the beginning (because we've commuted them).
726 void resetTopOperands(MachineInstr
*Instr
) {
727 assert(hasRemainingOperands(Instr
) &&
728 "Reseting operands should only be done when the instruction has "
729 "an operand still on the stack");
730 Worklist
.back() = reverse(Instr
->explicit_uses());
733 /// Test whether Instr has operands remaining to be visited at the top of
735 bool hasRemainingOperands(const MachineInstr
*Instr
) const {
736 if (Worklist
.empty())
738 const RangeTy
&Range
= Worklist
.back();
739 return !Range
.empty() && Range
.begin()->getParent() == Instr
;
742 /// Test whether the given register is present on the stack, indicating an
743 /// operand in the tree that we haven't visited yet. Moving a definition of
744 /// Reg to a point in the tree after that would change its value.
746 /// This is needed as a consequence of using implicit local.gets for
747 /// uses and implicit local.sets for defs.
748 bool isOnStack(unsigned Reg
) const {
749 for (const RangeTy
&Range
: Worklist
)
750 for (const MachineOperand
&MO
: Range
)
751 if (MO
.isReg() && MO
.getReg() == Reg
)
757 /// State to keep track of whether commuting is in flight or whether it's been
758 /// tried for the current instruction and didn't work.
759 class CommutingState
{
760 /// There are effectively three states: the initial state where we haven't
761 /// started commuting anything and we don't know anything yet, the tentative
762 /// state where we've commuted the operands of the current instruction and are
763 /// revisiting it, and the declined state where we've reverted the operands
764 /// back to their original order and will no longer commute it further.
765 bool TentativelyCommuting
= false;
766 bool Declined
= false;
768 /// During the tentative state, these hold the operand indices of the commuted
770 unsigned Operand0
, Operand1
;
773 /// Stackification for an operand was not successful due to ordering
774 /// constraints. If possible, and if we haven't already tried it and declined
775 /// it, commute Insert's operands and prepare to revisit it.
776 void maybeCommute(MachineInstr
*Insert
, TreeWalkerState
&TreeWalker
,
777 const WebAssemblyInstrInfo
*TII
) {
778 if (TentativelyCommuting
) {
780 "Don't decline commuting until you've finished trying it");
781 // Commuting didn't help. Revert it.
782 TII
->commuteInstruction(*Insert
, /*NewMI=*/false, Operand0
, Operand1
);
783 TentativelyCommuting
= false;
785 } else if (!Declined
&& TreeWalker
.hasRemainingOperands(Insert
)) {
786 Operand0
= TargetInstrInfo::CommuteAnyOperandIndex
;
787 Operand1
= TargetInstrInfo::CommuteAnyOperandIndex
;
788 if (TII
->findCommutedOpIndices(*Insert
, Operand0
, Operand1
)) {
789 // Tentatively commute the operands and try again.
790 TII
->commuteInstruction(*Insert
, /*NewMI=*/false, Operand0
, Operand1
);
791 TreeWalker
.resetTopOperands(Insert
);
792 TentativelyCommuting
= true;
798 /// Stackification for some operand was successful. Reset to the default
801 TentativelyCommuting
= false;
805 } // end anonymous namespace
807 bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction
&MF
) {
808 LLVM_DEBUG(dbgs() << "********** Register Stackifying **********\n"
809 "********** Function: "
810 << MF
.getName() << '\n');
812 bool Changed
= false;
813 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
814 WebAssemblyFunctionInfo
&MFI
= *MF
.getInfo
<WebAssemblyFunctionInfo
>();
815 const auto *TII
= MF
.getSubtarget
<WebAssemblySubtarget
>().getInstrInfo();
816 const auto *TRI
= MF
.getSubtarget
<WebAssemblySubtarget
>().getRegisterInfo();
817 auto &MDT
= getAnalysis
<MachineDominatorTreeWrapperPass
>().getDomTree();
818 auto &LIS
= getAnalysis
<LiveIntervalsWrapperPass
>().getLIS();
820 // Walk the instructions from the bottom up. Currently we don't look past
821 // block boundaries, and the blocks aren't ordered so the block visitation
822 // order isn't significant, but we may want to change this in the future.
823 for (MachineBasicBlock
&MBB
: MF
) {
824 // Don't use a range-based for loop, because we modify the list as we're
825 // iterating over it and the end iterator may change.
826 for (auto MII
= MBB
.rbegin(); MII
!= MBB
.rend(); ++MII
) {
827 MachineInstr
*Insert
= &*MII
;
828 // Don't nest anything inside an inline asm, because we don't have
829 // constraints for $push inputs.
830 if (Insert
->isInlineAsm())
833 // Ignore debugging intrinsics.
834 if (Insert
->isDebugValue())
837 // Iterate through the inputs in reverse order, since we'll be pulling
838 // operands off the stack in LIFO order.
839 CommutingState Commuting
;
840 TreeWalkerState
TreeWalker(Insert
);
841 while (!TreeWalker
.done()) {
842 MachineOperand
&Use
= TreeWalker
.pop();
844 // We're only interested in explicit virtual register operands.
848 Register Reg
= Use
.getReg();
849 assert(Use
.isUse() && "explicit_uses() should only iterate over uses");
850 assert(!Use
.isImplicit() &&
851 "explicit_uses() should only iterate over explicit operands");
852 if (Reg
.isPhysical())
855 // Identify the definition for this register at this point.
856 MachineInstr
*DefI
= getVRegDef(Reg
, Insert
, MRI
, LIS
);
860 // Don't nest an INLINE_ASM def into anything, because we don't have
861 // constraints for $pop outputs.
862 if (DefI
->isInlineAsm())
865 // Argument instructions represent live-in registers and not real
867 if (WebAssembly::isArgument(DefI
->getOpcode()))
870 MachineOperand
*Def
=
871 DefI
->findRegisterDefOperand(Reg
, /*TRI=*/nullptr);
872 assert(Def
!= nullptr);
874 // Decide which strategy to take. Prefer to move a single-use value
875 // over cloning it, and prefer cloning over introducing a tee.
876 // For moving, we require the def to be in the same block as the use;
877 // this makes things simpler (LiveIntervals' handleMove function only
878 // supports intra-block moves) and it's MachineSink's job to catch all
879 // the sinking opportunities anyway.
880 bool SameBlock
= DefI
->getParent() == &MBB
;
881 bool CanMove
= SameBlock
&& isSafeToMove(Def
, &Use
, Insert
, MFI
, MRI
) &&
882 !TreeWalker
.isOnStack(Reg
);
883 if (CanMove
&& hasOneNonDBGUse(Reg
, DefI
, MRI
, MDT
, LIS
)) {
884 Insert
= moveForSingleUse(Reg
, Use
, DefI
, MBB
, Insert
, LIS
, MFI
, MRI
);
886 // If we are removing the frame base reg completely, remove the debug
888 // TODO: Encode this properly as a stackified value.
889 if (MFI
.isFrameBaseVirtual() && MFI
.getFrameBaseVreg() == Reg
)
890 MFI
.clearFrameBaseVreg();
891 } else if (shouldRematerialize(*DefI
, TII
)) {
893 rematerializeCheapDef(Reg
, Use
, *DefI
, MBB
, Insert
->getIterator(),
894 LIS
, MFI
, MRI
, TII
, TRI
);
895 } else if (CanMove
&& oneUseDominatesOtherUses(Reg
, Use
, MBB
, MRI
, MDT
,
897 Insert
= moveAndTeeForMultiUse(Reg
, Use
, DefI
, MBB
, Insert
, LIS
, MFI
,
900 // We failed to stackify the operand. If the problem was ordering
901 // constraints, Commuting may be able to help.
902 if (!CanMove
&& SameBlock
)
903 Commuting
.maybeCommute(Insert
, TreeWalker
, TII
);
904 // Proceed to the next operand.
908 // Stackifying a multivalue def may unlock in-place stackification of
909 // subsequent defs. TODO: Handle the case where the consecutive uses are
910 // not all in the same instruction.
911 auto *SubsequentDef
= Insert
->defs().begin();
912 auto *SubsequentUse
= &Use
;
913 while (SubsequentDef
!= Insert
->defs().end() &&
914 SubsequentUse
!= Use
.getParent()->uses().end()) {
915 if (!SubsequentDef
->isReg() || !SubsequentUse
->isReg())
917 Register DefReg
= SubsequentDef
->getReg();
918 Register UseReg
= SubsequentUse
->getReg();
919 // TODO: This single-use restriction could be relaxed by using tees
920 if (DefReg
!= UseReg
|| !MRI
.hasOneNonDBGUse(DefReg
))
922 MFI
.stackifyVReg(MRI
, DefReg
);
927 // If the instruction we just stackified is an IMPLICIT_DEF, convert it
928 // to a constant 0 so that the def is explicit, and the push/pop
929 // correspondence is maintained.
930 if (Insert
->getOpcode() == TargetOpcode::IMPLICIT_DEF
)
931 convertImplicitDefToConstZero(Insert
, MRI
, TII
, MF
, LIS
);
933 // We stackified an operand. Add the defining instruction's operands to
934 // the worklist stack now to continue to build an ever deeper tree.
936 TreeWalker
.pushOperands(Insert
);
939 // If we stackified any operands, skip over the tree to start looking for
940 // the next instruction we can build a tree on.
941 if (Insert
!= &*MII
) {
942 imposeStackOrdering(&*MII
);
943 MII
= MachineBasicBlock::iterator(Insert
).getReverse();
949 // If we used VALUE_STACK anywhere, add it to the live-in sets everywhere so
950 // that it never looks like a use-before-def.
952 MF
.getRegInfo().addLiveIn(WebAssembly::VALUE_STACK
);
953 for (MachineBasicBlock
&MBB
: MF
)
954 MBB
.addLiveIn(WebAssembly::VALUE_STACK
);
958 // Verify that pushes and pops are performed in LIFO order.
959 SmallVector
<unsigned, 0> Stack
;
960 for (MachineBasicBlock
&MBB
: MF
) {
961 for (MachineInstr
&MI
: MBB
) {
962 if (MI
.isDebugInstr())
964 for (MachineOperand
&MO
: reverse(MI
.explicit_uses())) {
967 Register Reg
= MO
.getReg();
968 if (MFI
.isVRegStackified(Reg
))
969 assert(Stack
.pop_back_val() == Reg
&&
970 "Register stack pop should be paired with a push");
972 for (MachineOperand
&MO
: MI
.defs()) {
975 Register Reg
= MO
.getReg();
976 if (MFI
.isVRegStackified(Reg
))
977 Stack
.push_back(MO
.getReg());
980 // TODO: Generalize this code to support keeping values on the stack across
981 // basic block boundaries.
982 assert(Stack
.empty() &&
983 "Register stack pushes and pops should be balanced");