1 //===-- WebAssemblyRegStackify.cpp - Register Stackification --------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 /// This file implements a register stacking pass.
12 /// This pass reorders instructions to put register uses and defs in an order
13 /// such that they form single-use expression trees. Registers fitting this form
14 /// are then marked as "stackified", meaning references to them are replaced by
15 /// "push" and "pop" from the value stack.
17 /// This is primarily a code size optimization, since temporary values on the
18 /// value stack don't need to be named.
20 //===----------------------------------------------------------------------===//
22 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" // for WebAssembly::ARGUMENT_*
23 #include "WebAssembly.h"
24 #include "WebAssemblyDebugValueManager.h"
25 #include "WebAssemblyMachineFunctionInfo.h"
26 #include "WebAssemblySubtarget.h"
27 #include "WebAssemblyUtilities.h"
28 #include "llvm/ADT/SmallPtrSet.h"
29 #include "llvm/Analysis/AliasAnalysis.h"
30 #include "llvm/CodeGen/LiveIntervals.h"
31 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
32 #include "llvm/CodeGen/MachineDominators.h"
33 #include "llvm/CodeGen/MachineInstrBuilder.h"
34 #include "llvm/CodeGen/MachineModuleInfoImpls.h"
35 #include "llvm/CodeGen/MachineRegisterInfo.h"
36 #include "llvm/CodeGen/Passes.h"
37 #include "llvm/Support/Debug.h"
38 #include "llvm/Support/raw_ostream.h"
42 #define DEBUG_TYPE "wasm-reg-stackify"
45 class WebAssemblyRegStackify final
: public MachineFunctionPass
{
46 StringRef
getPassName() const override
{
47 return "WebAssembly Register Stackify";
50 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
52 AU
.addRequired
<MachineDominatorTree
>();
53 AU
.addRequired
<LiveIntervals
>();
54 AU
.addPreserved
<MachineBlockFrequencyInfo
>();
55 AU
.addPreserved
<SlotIndexes
>();
56 AU
.addPreserved
<LiveIntervals
>();
57 AU
.addPreservedID(LiveVariablesID
);
58 AU
.addPreserved
<MachineDominatorTree
>();
59 MachineFunctionPass::getAnalysisUsage(AU
);
62 bool runOnMachineFunction(MachineFunction
&MF
) override
;
65 static char ID
; // Pass identification, replacement for typeid
66 WebAssemblyRegStackify() : MachineFunctionPass(ID
) {}
68 } // end anonymous namespace
70 char WebAssemblyRegStackify::ID
= 0;
71 INITIALIZE_PASS(WebAssemblyRegStackify
, DEBUG_TYPE
,
72 "Reorder instructions to use the WebAssembly value stack",
75 FunctionPass
*llvm::createWebAssemblyRegStackify() {
76 return new WebAssemblyRegStackify();
79 // Decorate the given instruction with implicit operands that enforce the
80 // expression stack ordering constraints for an instruction which is on
81 // the expression stack.
82 static void imposeStackOrdering(MachineInstr
*MI
) {
83 // Write the opaque VALUE_STACK register.
84 if (!MI
->definesRegister(WebAssembly::VALUE_STACK
))
85 MI
->addOperand(MachineOperand::CreateReg(WebAssembly::VALUE_STACK
,
89 // Also read the opaque VALUE_STACK register.
90 if (!MI
->readsRegister(WebAssembly::VALUE_STACK
))
91 MI
->addOperand(MachineOperand::CreateReg(WebAssembly::VALUE_STACK
,
96 // Convert an IMPLICIT_DEF instruction into an instruction which defines
97 // a constant zero value.
98 static void convertImplicitDefToConstZero(MachineInstr
*MI
,
99 MachineRegisterInfo
&MRI
,
100 const TargetInstrInfo
*TII
,
102 LiveIntervals
&LIS
) {
103 assert(MI
->getOpcode() == TargetOpcode::IMPLICIT_DEF
);
105 const auto *RegClass
= MRI
.getRegClass(MI
->getOperand(0).getReg());
106 if (RegClass
== &WebAssembly::I32RegClass
) {
107 MI
->setDesc(TII
->get(WebAssembly::CONST_I32
));
108 MI
->addOperand(MachineOperand::CreateImm(0));
109 } else if (RegClass
== &WebAssembly::I64RegClass
) {
110 MI
->setDesc(TII
->get(WebAssembly::CONST_I64
));
111 MI
->addOperand(MachineOperand::CreateImm(0));
112 } else if (RegClass
== &WebAssembly::F32RegClass
) {
113 MI
->setDesc(TII
->get(WebAssembly::CONST_F32
));
114 auto *Val
= cast
<ConstantFP
>(Constant::getNullValue(
115 Type::getFloatTy(MF
.getFunction().getContext())));
116 MI
->addOperand(MachineOperand::CreateFPImm(Val
));
117 } else if (RegClass
== &WebAssembly::F64RegClass
) {
118 MI
->setDesc(TII
->get(WebAssembly::CONST_F64
));
119 auto *Val
= cast
<ConstantFP
>(Constant::getNullValue(
120 Type::getDoubleTy(MF
.getFunction().getContext())));
121 MI
->addOperand(MachineOperand::CreateFPImm(Val
));
122 } else if (RegClass
== &WebAssembly::V128RegClass
) {
123 MI
->setDesc(TII
->get(WebAssembly::CONST_V128_I64x2
));
124 MI
->addOperand(MachineOperand::CreateImm(0));
125 MI
->addOperand(MachineOperand::CreateImm(0));
127 llvm_unreachable("Unexpected reg class");
131 // Determine whether a call to the callee referenced by
132 // MI->getOperand(CalleeOpNo) reads memory, writes memory, and/or has side
134 static void queryCallee(const MachineInstr
&MI
, bool &Read
, bool &Write
,
135 bool &Effects
, bool &StackPointer
) {
136 // All calls can use the stack pointer.
139 const MachineOperand
&MO
= WebAssembly::getCalleeOp(MI
);
141 const Constant
*GV
= MO
.getGlobal();
142 if (const auto *GA
= dyn_cast
<GlobalAlias
>(GV
))
143 if (!GA
->isInterposable())
144 GV
= GA
->getAliasee();
146 if (const auto *F
= dyn_cast
<Function
>(GV
)) {
147 if (!F
->doesNotThrow())
149 if (F
->doesNotAccessMemory())
151 if (F
->onlyReadsMemory()) {
164 // Determine whether MI reads memory, writes memory, has side effects,
165 // and/or uses the stack pointer value.
166 static void query(const MachineInstr
&MI
, bool &Read
, bool &Write
,
167 bool &Effects
, bool &StackPointer
) {
168 assert(!MI
.isTerminator());
170 if (MI
.isDebugInstr() || MI
.isPosition())
174 if (MI
.mayLoad() && !MI
.isDereferenceableInvariantLoad())
180 } else if (MI
.hasOrderedMemoryRef()) {
181 switch (MI
.getOpcode()) {
182 case WebAssembly::DIV_S_I32
:
183 case WebAssembly::DIV_S_I64
:
184 case WebAssembly::REM_S_I32
:
185 case WebAssembly::REM_S_I64
:
186 case WebAssembly::DIV_U_I32
:
187 case WebAssembly::DIV_U_I64
:
188 case WebAssembly::REM_U_I32
:
189 case WebAssembly::REM_U_I64
:
190 case WebAssembly::I32_TRUNC_S_F32
:
191 case WebAssembly::I64_TRUNC_S_F32
:
192 case WebAssembly::I32_TRUNC_S_F64
:
193 case WebAssembly::I64_TRUNC_S_F64
:
194 case WebAssembly::I32_TRUNC_U_F32
:
195 case WebAssembly::I64_TRUNC_U_F32
:
196 case WebAssembly::I32_TRUNC_U_F64
:
197 case WebAssembly::I64_TRUNC_U_F64
:
198 // These instruction have hasUnmodeledSideEffects() returning true
199 // because they trap on overflow and invalid so they can't be arbitrarily
200 // moved, however hasOrderedMemoryRef() interprets this plus their lack
201 // of memoperands as having a potential unknown memory reference.
204 // Record volatile accesses, unless it's a call, as calls are handled
214 // Check for side effects.
215 if (MI
.hasUnmodeledSideEffects()) {
216 switch (MI
.getOpcode()) {
217 case WebAssembly::DIV_S_I32
:
218 case WebAssembly::DIV_S_I64
:
219 case WebAssembly::REM_S_I32
:
220 case WebAssembly::REM_S_I64
:
221 case WebAssembly::DIV_U_I32
:
222 case WebAssembly::DIV_U_I64
:
223 case WebAssembly::REM_U_I32
:
224 case WebAssembly::REM_U_I64
:
225 case WebAssembly::I32_TRUNC_S_F32
:
226 case WebAssembly::I64_TRUNC_S_F32
:
227 case WebAssembly::I32_TRUNC_S_F64
:
228 case WebAssembly::I64_TRUNC_S_F64
:
229 case WebAssembly::I32_TRUNC_U_F32
:
230 case WebAssembly::I64_TRUNC_U_F32
:
231 case WebAssembly::I32_TRUNC_U_F64
:
232 case WebAssembly::I64_TRUNC_U_F64
:
233 // These instructions have hasUnmodeledSideEffects() returning true
234 // because they trap on overflow and invalid so they can't be arbitrarily
235 // moved, however in the specific case of register stackifying, it is safe
236 // to move them because overflow and invalid are Undefined Behavior.
244 // Check for writes to __stack_pointer global.
245 if ((MI
.getOpcode() == WebAssembly::GLOBAL_SET_I32
||
246 MI
.getOpcode() == WebAssembly::GLOBAL_SET_I64
) &&
247 strcmp(MI
.getOperand(0).getSymbolName(), "__stack_pointer") == 0)
252 queryCallee(MI
, Read
, Write
, Effects
, StackPointer
);
256 // Test whether Def is safe and profitable to rematerialize.
257 static bool shouldRematerialize(const MachineInstr
&Def
,
258 const WebAssemblyInstrInfo
*TII
) {
259 return Def
.isAsCheapAsAMove() && TII
->isTriviallyReMaterializable(Def
);
262 // Identify the definition for this register at this point. This is a
263 // generalization of MachineRegisterInfo::getUniqueVRegDef that uses
264 // LiveIntervals to handle complex cases.
265 static MachineInstr
*getVRegDef(unsigned Reg
, const MachineInstr
*Insert
,
266 const MachineRegisterInfo
&MRI
,
267 const LiveIntervals
&LIS
) {
268 // Most registers are in SSA form here so we try a quick MRI query first.
269 if (MachineInstr
*Def
= MRI
.getUniqueVRegDef(Reg
))
272 // MRI doesn't know what the Def is. Try asking LIS.
273 if (const VNInfo
*ValNo
= LIS
.getInterval(Reg
).getVNInfoBefore(
274 LIS
.getInstructionIndex(*Insert
)))
275 return LIS
.getInstructionFromIndex(ValNo
->def
);
280 // Test whether Reg, as defined at Def, has exactly one use. This is a
281 // generalization of MachineRegisterInfo::hasOneNonDBGUse that uses
282 // LiveIntervals to handle complex cases.
283 static bool hasOneNonDBGUse(unsigned Reg
, MachineInstr
*Def
,
284 MachineRegisterInfo
&MRI
, MachineDominatorTree
&MDT
,
285 LiveIntervals
&LIS
) {
286 // Most registers are in SSA form here so we try a quick MRI query first.
287 if (MRI
.hasOneNonDBGUse(Reg
))
291 const LiveInterval
&LI
= LIS
.getInterval(Reg
);
292 const VNInfo
*DefVNI
=
293 LI
.getVNInfoAt(LIS
.getInstructionIndex(*Def
).getRegSlot());
295 for (auto &I
: MRI
.use_nodbg_operands(Reg
)) {
296 const auto &Result
= LI
.Query(LIS
.getInstructionIndex(*I
.getParent()));
297 if (Result
.valueIn() == DefVNI
) {
298 if (!Result
.isKill())
308 // Test whether it's safe to move Def to just before Insert.
309 // TODO: Compute memory dependencies in a way that doesn't require always
310 // walking the block.
311 // TODO: Compute memory dependencies in a way that uses AliasAnalysis to be
313 static bool isSafeToMove(const MachineOperand
*Def
, const MachineOperand
*Use
,
314 const MachineInstr
*Insert
,
315 const WebAssemblyFunctionInfo
&MFI
,
316 const MachineRegisterInfo
&MRI
) {
317 const MachineInstr
*DefI
= Def
->getParent();
318 const MachineInstr
*UseI
= Use
->getParent();
319 assert(DefI
->getParent() == Insert
->getParent());
320 assert(UseI
->getParent() == Insert
->getParent());
322 // The first def of a multivalue instruction can be stackified by moving,
323 // since the later defs can always be placed into locals if necessary. Later
324 // defs can only be stackified if all previous defs are already stackified
325 // since ExplicitLocals will not know how to place a def in a local if a
326 // subsequent def is stackified. But only one def can be stackified by moving
327 // the instruction, so it must be the first one.
329 // TODO: This could be loosened to be the first *live* def, but care would
330 // have to be taken to ensure the drops of the initial dead defs can be
331 // placed. This would require checking that no previous defs are used in the
332 // same instruction as subsequent defs.
333 if (Def
!= DefI
->defs().begin())
336 // If any subsequent def is used prior to the current value by the same
337 // instruction in which the current value is used, we cannot
338 // stackify. Stackifying in this case would require that def moving below the
339 // current def in the stack, which cannot be achieved, even with locals.
340 // Also ensure we don't sink the def past any other prior uses.
341 for (const auto &SubsequentDef
: drop_begin(DefI
->defs())) {
342 auto I
= std::next(MachineBasicBlock::const_iterator(DefI
));
343 auto E
= std::next(MachineBasicBlock::const_iterator(UseI
));
344 for (; I
!= E
; ++I
) {
345 for (const auto &PriorUse
: I
->uses()) {
346 if (&PriorUse
== Use
)
348 if (PriorUse
.isReg() && SubsequentDef
.getReg() == PriorUse
.getReg())
354 // If moving is a semantic nop, it is always allowed
355 const MachineBasicBlock
*MBB
= DefI
->getParent();
356 auto NextI
= std::next(MachineBasicBlock::const_iterator(DefI
));
357 for (auto E
= MBB
->end(); NextI
!= E
&& NextI
->isDebugInstr(); ++NextI
)
362 // 'catch' and 'catch_all' should be the first instruction of a BB and cannot
364 if (WebAssembly::isCatch(DefI
->getOpcode()))
367 // Check for register dependencies.
368 SmallVector
<unsigned, 4> MutableRegisters
;
369 for (const MachineOperand
&MO
: DefI
->operands()) {
370 if (!MO
.isReg() || MO
.isUndef())
372 Register Reg
= MO
.getReg();
374 // If the register is dead here and at Insert, ignore it.
375 if (MO
.isDead() && Insert
->definesRegister(Reg
) &&
376 !Insert
->readsRegister(Reg
))
379 if (Reg
.isPhysical()) {
380 // Ignore ARGUMENTS; it's just used to keep the ARGUMENT_* instructions
381 // from moving down, and we've already checked for that.
382 if (Reg
== WebAssembly::ARGUMENTS
)
384 // If the physical register is never modified, ignore it.
385 if (!MRI
.isPhysRegModified(Reg
))
387 // Otherwise, it's a physical register with unknown liveness.
391 // If one of the operands isn't in SSA form, it has different values at
392 // different times, and we need to make sure we don't move our use across
394 if (!MO
.isDef() && !MRI
.hasOneDef(Reg
))
395 MutableRegisters
.push_back(Reg
);
398 bool Read
= false, Write
= false, Effects
= false, StackPointer
= false;
399 query(*DefI
, Read
, Write
, Effects
, StackPointer
);
401 // If the instruction does not access memory and has no side effects, it has
402 // no additional dependencies.
403 bool HasMutableRegisters
= !MutableRegisters
.empty();
404 if (!Read
&& !Write
&& !Effects
&& !StackPointer
&& !HasMutableRegisters
)
407 // Scan through the intervening instructions between DefI and Insert.
408 MachineBasicBlock::const_iterator
D(DefI
), I(Insert
);
409 for (--I
; I
!= D
; --I
) {
410 bool InterveningRead
= false;
411 bool InterveningWrite
= false;
412 bool InterveningEffects
= false;
413 bool InterveningStackPointer
= false;
414 query(*I
, InterveningRead
, InterveningWrite
, InterveningEffects
,
415 InterveningStackPointer
);
416 if (Effects
&& InterveningEffects
)
418 if (Read
&& InterveningWrite
)
420 if (Write
&& (InterveningRead
|| InterveningWrite
))
422 if (StackPointer
&& InterveningStackPointer
)
425 for (unsigned Reg
: MutableRegisters
)
426 for (const MachineOperand
&MO
: I
->operands())
427 if (MO
.isReg() && MO
.isDef() && MO
.getReg() == Reg
)
434 /// Test whether OneUse, a use of Reg, dominates all of Reg's other uses.
435 static bool oneUseDominatesOtherUses(unsigned Reg
, const MachineOperand
&OneUse
,
436 const MachineBasicBlock
&MBB
,
437 const MachineRegisterInfo
&MRI
,
438 const MachineDominatorTree
&MDT
,
440 WebAssemblyFunctionInfo
&MFI
) {
441 const LiveInterval
&LI
= LIS
.getInterval(Reg
);
443 const MachineInstr
*OneUseInst
= OneUse
.getParent();
444 VNInfo
*OneUseVNI
= LI
.getVNInfoBefore(LIS
.getInstructionIndex(*OneUseInst
));
446 for (const MachineOperand
&Use
: MRI
.use_nodbg_operands(Reg
)) {
450 const MachineInstr
*UseInst
= Use
.getParent();
451 VNInfo
*UseVNI
= LI
.getVNInfoBefore(LIS
.getInstructionIndex(*UseInst
));
453 if (UseVNI
!= OneUseVNI
)
456 if (UseInst
== OneUseInst
) {
457 // Another use in the same instruction. We need to ensure that the one
458 // selected use happens "before" it.
462 // Test that the use is dominated by the one selected use.
463 while (!MDT
.dominates(OneUseInst
, UseInst
)) {
464 // Actually, dominating is over-conservative. Test that the use would
465 // happen after the one selected use in the stack evaluation order.
467 // This is needed as a consequence of using implicit local.gets for
468 // uses and implicit local.sets for defs.
469 if (UseInst
->getDesc().getNumDefs() == 0)
471 const MachineOperand
&MO
= UseInst
->getOperand(0);
474 Register DefReg
= MO
.getReg();
475 if (!DefReg
.isVirtual() || !MFI
.isVRegStackified(DefReg
))
477 assert(MRI
.hasOneNonDBGUse(DefReg
));
478 const MachineOperand
&NewUse
= *MRI
.use_nodbg_begin(DefReg
);
479 const MachineInstr
*NewUseInst
= NewUse
.getParent();
480 if (NewUseInst
== OneUseInst
) {
481 if (&OneUse
> &NewUse
)
485 UseInst
= NewUseInst
;
492 /// Get the appropriate tee opcode for the given register class.
493 static unsigned getTeeOpcode(const TargetRegisterClass
*RC
) {
494 if (RC
== &WebAssembly::I32RegClass
)
495 return WebAssembly::TEE_I32
;
496 if (RC
== &WebAssembly::I64RegClass
)
497 return WebAssembly::TEE_I64
;
498 if (RC
== &WebAssembly::F32RegClass
)
499 return WebAssembly::TEE_F32
;
500 if (RC
== &WebAssembly::F64RegClass
)
501 return WebAssembly::TEE_F64
;
502 if (RC
== &WebAssembly::V128RegClass
)
503 return WebAssembly::TEE_V128
;
504 if (RC
== &WebAssembly::EXTERNREFRegClass
)
505 return WebAssembly::TEE_EXTERNREF
;
506 if (RC
== &WebAssembly::FUNCREFRegClass
)
507 return WebAssembly::TEE_FUNCREF
;
508 llvm_unreachable("Unexpected register class");
511 // Shrink LI to its uses, cleaning up LI.
512 static void shrinkToUses(LiveInterval
&LI
, LiveIntervals
&LIS
) {
513 if (LIS
.shrinkToUses(&LI
)) {
514 SmallVector
<LiveInterval
*, 4> SplitLIs
;
515 LIS
.splitSeparateComponents(LI
, SplitLIs
);
519 /// A single-use def in the same block with no intervening memory or register
520 /// dependencies; move the def down and nest it with the current instruction.
521 static MachineInstr
*moveForSingleUse(unsigned Reg
, MachineOperand
&Op
,
522 MachineInstr
*Def
, MachineBasicBlock
&MBB
,
523 MachineInstr
*Insert
, LiveIntervals
&LIS
,
524 WebAssemblyFunctionInfo
&MFI
,
525 MachineRegisterInfo
&MRI
) {
526 LLVM_DEBUG(dbgs() << "Move for single use: "; Def
->dump());
528 WebAssemblyDebugValueManager
DefDIs(Def
);
530 LIS
.handleMove(*Def
);
532 if (MRI
.hasOneDef(Reg
) && MRI
.hasOneNonDBGUse(Reg
)) {
533 // No one else is using this register for anything so we can just stackify
535 MFI
.stackifyVReg(MRI
, Reg
);
537 // The register may have unrelated uses or defs; create a new register for
538 // just our one def and use so that we can stackify it.
539 Register NewReg
= MRI
.createVirtualRegister(MRI
.getRegClass(Reg
));
541 DefDIs
.updateReg(NewReg
);
543 // Tell LiveIntervals about the new register.
544 LIS
.createAndComputeVirtRegInterval(NewReg
);
546 // Tell LiveIntervals about the changes to the old register.
547 LiveInterval
&LI
= LIS
.getInterval(Reg
);
548 LI
.removeSegment(LIS
.getInstructionIndex(*Def
).getRegSlot(),
549 LIS
.getInstructionIndex(*Op
.getParent()).getRegSlot(),
550 /*RemoveDeadValNo=*/true);
552 MFI
.stackifyVReg(MRI
, NewReg
);
554 LLVM_DEBUG(dbgs() << " - Replaced register: "; Def
->dump());
557 imposeStackOrdering(Def
);
561 static MachineInstr
*getPrevNonDebugInst(MachineInstr
*MI
) {
562 for (auto *I
= MI
->getPrevNode(); I
; I
= I
->getPrevNode())
563 if (!I
->isDebugInstr())
568 /// A trivially cloneable instruction; clone it and nest the new copy with the
569 /// current instruction.
570 static MachineInstr
*rematerializeCheapDef(
571 unsigned Reg
, MachineOperand
&Op
, MachineInstr
&Def
, MachineBasicBlock
&MBB
,
572 MachineBasicBlock::instr_iterator Insert
, LiveIntervals
&LIS
,
573 WebAssemblyFunctionInfo
&MFI
, MachineRegisterInfo
&MRI
,
574 const WebAssemblyInstrInfo
*TII
, const WebAssemblyRegisterInfo
*TRI
) {
575 LLVM_DEBUG(dbgs() << "Rematerializing cheap def: "; Def
.dump());
576 LLVM_DEBUG(dbgs() << " - for use in "; Op
.getParent()->dump());
578 WebAssemblyDebugValueManager
DefDIs(&Def
);
580 Register NewReg
= MRI
.createVirtualRegister(MRI
.getRegClass(Reg
));
581 DefDIs
.cloneSink(&*Insert
, NewReg
);
583 MachineInstr
*Clone
= getPrevNonDebugInst(&*Insert
);
585 LIS
.InsertMachineInstrInMaps(*Clone
);
586 LIS
.createAndComputeVirtRegInterval(NewReg
);
587 MFI
.stackifyVReg(MRI
, NewReg
);
588 imposeStackOrdering(Clone
);
590 LLVM_DEBUG(dbgs() << " - Cloned to "; Clone
->dump());
592 // Shrink the interval.
593 bool IsDead
= MRI
.use_empty(Reg
);
595 LiveInterval
&LI
= LIS
.getInterval(Reg
);
596 shrinkToUses(LI
, LIS
);
597 IsDead
= !LI
.liveAt(LIS
.getInstructionIndex(Def
).getDeadSlot());
600 // If that was the last use of the original, delete the original.
602 LLVM_DEBUG(dbgs() << " - Deleting original\n");
603 SlotIndex Idx
= LIS
.getInstructionIndex(Def
).getRegSlot();
604 LIS
.removePhysRegDefAt(MCRegister::from(WebAssembly::ARGUMENTS
), Idx
);
605 LIS
.removeInterval(Reg
);
606 LIS
.RemoveMachineInstrFromMaps(Def
);
613 /// A multiple-use def in the same block with no intervening memory or register
614 /// dependencies; move the def down, nest it with the current instruction, and
615 /// insert a tee to satisfy the rest of the uses. As an illustration, rewrite
618 /// Reg = INST ... // Def
619 /// INST ..., Reg, ... // Insert
620 /// INST ..., Reg, ...
621 /// INST ..., Reg, ...
625 /// DefReg = INST ... // Def (to become the new Insert)
626 /// TeeReg, Reg = TEE_... DefReg
627 /// INST ..., TeeReg, ... // Insert
628 /// INST ..., Reg, ...
629 /// INST ..., Reg, ...
631 /// with DefReg and TeeReg stackified. This eliminates a local.get from the
633 static MachineInstr
*moveAndTeeForMultiUse(
634 unsigned Reg
, MachineOperand
&Op
, MachineInstr
*Def
, MachineBasicBlock
&MBB
,
635 MachineInstr
*Insert
, LiveIntervals
&LIS
, WebAssemblyFunctionInfo
&MFI
,
636 MachineRegisterInfo
&MRI
, const WebAssemblyInstrInfo
*TII
) {
637 LLVM_DEBUG(dbgs() << "Move and tee for multi-use:"; Def
->dump());
639 const auto *RegClass
= MRI
.getRegClass(Reg
);
640 Register TeeReg
= MRI
.createVirtualRegister(RegClass
);
641 Register DefReg
= MRI
.createVirtualRegister(RegClass
);
643 // Move Def into place.
644 WebAssemblyDebugValueManager
DefDIs(Def
);
646 LIS
.handleMove(*Def
);
648 // Create the Tee and attach the registers.
649 MachineOperand
&DefMO
= Def
->getOperand(0);
650 MachineInstr
*Tee
= BuildMI(MBB
, Insert
, Insert
->getDebugLoc(),
651 TII
->get(getTeeOpcode(RegClass
)), TeeReg
)
652 .addReg(Reg
, RegState::Define
)
653 .addReg(DefReg
, getUndefRegState(DefMO
.isDead()));
655 DefDIs
.updateReg(DefReg
);
656 SlotIndex TeeIdx
= LIS
.InsertMachineInstrInMaps(*Tee
).getRegSlot();
657 SlotIndex DefIdx
= LIS
.getInstructionIndex(*Def
).getRegSlot();
659 // Tell LiveIntervals we moved the original vreg def from Def to Tee.
660 LiveInterval
&LI
= LIS
.getInterval(Reg
);
661 LiveInterval::iterator I
= LI
.FindSegmentContaining(DefIdx
);
662 VNInfo
*ValNo
= LI
.getVNInfoAt(DefIdx
);
665 shrinkToUses(LI
, LIS
);
667 // Finish stackifying the new regs.
668 LIS
.createAndComputeVirtRegInterval(TeeReg
);
669 LIS
.createAndComputeVirtRegInterval(DefReg
);
670 MFI
.stackifyVReg(MRI
, DefReg
);
671 MFI
.stackifyVReg(MRI
, TeeReg
);
672 imposeStackOrdering(Def
);
673 imposeStackOrdering(Tee
);
675 // Even though 'TeeReg, Reg = TEE ...', has two defs, we don't need to clone
676 // DBG_VALUEs for both of them, given that the latter will cancel the former
677 // anyway. Here we only clone DBG_VALUEs for TeeReg, which will be converted
678 // to a local index in ExplicitLocals pass.
679 DefDIs
.cloneSink(Insert
, TeeReg
, /* CloneDef */ false);
681 LLVM_DEBUG(dbgs() << " - Replaced register: "; Def
->dump());
682 LLVM_DEBUG(dbgs() << " - Tee instruction: "; Tee
->dump());
687 /// A stack for walking the tree of instructions being built, visiting the
688 /// MachineOperands in DFS order.
689 class TreeWalkerState
{
690 using mop_iterator
= MachineInstr::mop_iterator
;
691 using mop_reverse_iterator
= std::reverse_iterator
<mop_iterator
>;
692 using RangeTy
= iterator_range
<mop_reverse_iterator
>;
693 SmallVector
<RangeTy
, 4> Worklist
;
696 explicit TreeWalkerState(MachineInstr
*Insert
) {
697 const iterator_range
<mop_iterator
> &Range
= Insert
->explicit_uses();
699 Worklist
.push_back(reverse(Range
));
702 bool done() const { return Worklist
.empty(); }
704 MachineOperand
&pop() {
705 RangeTy
&Range
= Worklist
.back();
706 MachineOperand
&Op
= *Range
.begin();
707 Range
= drop_begin(Range
);
710 assert((Worklist
.empty() || !Worklist
.back().empty()) &&
711 "Empty ranges shouldn't remain in the worklist");
715 /// Push Instr's operands onto the stack to be visited.
716 void pushOperands(MachineInstr
*Instr
) {
717 const iterator_range
<mop_iterator
> &Range(Instr
->explicit_uses());
719 Worklist
.push_back(reverse(Range
));
722 /// Some of Instr's operands are on the top of the stack; remove them and
723 /// re-insert them starting from the beginning (because we've commuted them).
724 void resetTopOperands(MachineInstr
*Instr
) {
725 assert(hasRemainingOperands(Instr
) &&
726 "Reseting operands should only be done when the instruction has "
727 "an operand still on the stack");
728 Worklist
.back() = reverse(Instr
->explicit_uses());
731 /// Test whether Instr has operands remaining to be visited at the top of
733 bool hasRemainingOperands(const MachineInstr
*Instr
) const {
734 if (Worklist
.empty())
736 const RangeTy
&Range
= Worklist
.back();
737 return !Range
.empty() && Range
.begin()->getParent() == Instr
;
740 /// Test whether the given register is present on the stack, indicating an
741 /// operand in the tree that we haven't visited yet. Moving a definition of
742 /// Reg to a point in the tree after that would change its value.
744 /// This is needed as a consequence of using implicit local.gets for
745 /// uses and implicit local.sets for defs.
746 bool isOnStack(unsigned Reg
) const {
747 for (const RangeTy
&Range
: Worklist
)
748 for (const MachineOperand
&MO
: Range
)
749 if (MO
.isReg() && MO
.getReg() == Reg
)
755 /// State to keep track of whether commuting is in flight or whether it's been
756 /// tried for the current instruction and didn't work.
757 class CommutingState
{
758 /// There are effectively three states: the initial state where we haven't
759 /// started commuting anything and we don't know anything yet, the tentative
760 /// state where we've commuted the operands of the current instruction and are
761 /// revisiting it, and the declined state where we've reverted the operands
762 /// back to their original order and will no longer commute it further.
763 bool TentativelyCommuting
= false;
764 bool Declined
= false;
766 /// During the tentative state, these hold the operand indices of the commuted
768 unsigned Operand0
, Operand1
;
771 /// Stackification for an operand was not successful due to ordering
772 /// constraints. If possible, and if we haven't already tried it and declined
773 /// it, commute Insert's operands and prepare to revisit it.
774 void maybeCommute(MachineInstr
*Insert
, TreeWalkerState
&TreeWalker
,
775 const WebAssemblyInstrInfo
*TII
) {
776 if (TentativelyCommuting
) {
778 "Don't decline commuting until you've finished trying it");
779 // Commuting didn't help. Revert it.
780 TII
->commuteInstruction(*Insert
, /*NewMI=*/false, Operand0
, Operand1
);
781 TentativelyCommuting
= false;
783 } else if (!Declined
&& TreeWalker
.hasRemainingOperands(Insert
)) {
784 Operand0
= TargetInstrInfo::CommuteAnyOperandIndex
;
785 Operand1
= TargetInstrInfo::CommuteAnyOperandIndex
;
786 if (TII
->findCommutedOpIndices(*Insert
, Operand0
, Operand1
)) {
787 // Tentatively commute the operands and try again.
788 TII
->commuteInstruction(*Insert
, /*NewMI=*/false, Operand0
, Operand1
);
789 TreeWalker
.resetTopOperands(Insert
);
790 TentativelyCommuting
= true;
796 /// Stackification for some operand was successful. Reset to the default
799 TentativelyCommuting
= false;
803 } // end anonymous namespace
805 bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction
&MF
) {
806 LLVM_DEBUG(dbgs() << "********** Register Stackifying **********\n"
807 "********** Function: "
808 << MF
.getName() << '\n');
810 bool Changed
= false;
811 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
812 WebAssemblyFunctionInfo
&MFI
= *MF
.getInfo
<WebAssemblyFunctionInfo
>();
813 const auto *TII
= MF
.getSubtarget
<WebAssemblySubtarget
>().getInstrInfo();
814 const auto *TRI
= MF
.getSubtarget
<WebAssemblySubtarget
>().getRegisterInfo();
815 auto &MDT
= getAnalysis
<MachineDominatorTree
>();
816 auto &LIS
= getAnalysis
<LiveIntervals
>();
818 // Walk the instructions from the bottom up. Currently we don't look past
819 // block boundaries, and the blocks aren't ordered so the block visitation
820 // order isn't significant, but we may want to change this in the future.
821 for (MachineBasicBlock
&MBB
: MF
) {
822 // Don't use a range-based for loop, because we modify the list as we're
823 // iterating over it and the end iterator may change.
824 for (auto MII
= MBB
.rbegin(); MII
!= MBB
.rend(); ++MII
) {
825 MachineInstr
*Insert
= &*MII
;
826 // Don't nest anything inside an inline asm, because we don't have
827 // constraints for $push inputs.
828 if (Insert
->isInlineAsm())
831 // Ignore debugging intrinsics.
832 if (Insert
->isDebugValue())
835 // Iterate through the inputs in reverse order, since we'll be pulling
836 // operands off the stack in LIFO order.
837 CommutingState Commuting
;
838 TreeWalkerState
TreeWalker(Insert
);
839 while (!TreeWalker
.done()) {
840 MachineOperand
&Use
= TreeWalker
.pop();
842 // We're only interested in explicit virtual register operands.
846 Register Reg
= Use
.getReg();
847 assert(Use
.isUse() && "explicit_uses() should only iterate over uses");
848 assert(!Use
.isImplicit() &&
849 "explicit_uses() should only iterate over explicit operands");
850 if (Reg
.isPhysical())
853 // Identify the definition for this register at this point.
854 MachineInstr
*DefI
= getVRegDef(Reg
, Insert
, MRI
, LIS
);
858 // Don't nest an INLINE_ASM def into anything, because we don't have
859 // constraints for $pop outputs.
860 if (DefI
->isInlineAsm())
863 // Argument instructions represent live-in registers and not real
865 if (WebAssembly::isArgument(DefI
->getOpcode()))
868 MachineOperand
*Def
= DefI
->findRegisterDefOperand(Reg
);
869 assert(Def
!= nullptr);
871 // Decide which strategy to take. Prefer to move a single-use value
872 // over cloning it, and prefer cloning over introducing a tee.
873 // For moving, we require the def to be in the same block as the use;
874 // this makes things simpler (LiveIntervals' handleMove function only
875 // supports intra-block moves) and it's MachineSink's job to catch all
876 // the sinking opportunities anyway.
877 bool SameBlock
= DefI
->getParent() == &MBB
;
878 bool CanMove
= SameBlock
&& isSafeToMove(Def
, &Use
, Insert
, MFI
, MRI
) &&
879 !TreeWalker
.isOnStack(Reg
);
880 if (CanMove
&& hasOneNonDBGUse(Reg
, DefI
, MRI
, MDT
, LIS
)) {
881 Insert
= moveForSingleUse(Reg
, Use
, DefI
, MBB
, Insert
, LIS
, MFI
, MRI
);
883 // If we are removing the frame base reg completely, remove the debug
885 // TODO: Encode this properly as a stackified value.
886 if (MFI
.isFrameBaseVirtual() && MFI
.getFrameBaseVreg() == Reg
)
887 MFI
.clearFrameBaseVreg();
888 } else if (shouldRematerialize(*DefI
, TII
)) {
890 rematerializeCheapDef(Reg
, Use
, *DefI
, MBB
, Insert
->getIterator(),
891 LIS
, MFI
, MRI
, TII
, TRI
);
892 } else if (CanMove
&& oneUseDominatesOtherUses(Reg
, Use
, MBB
, MRI
, MDT
,
894 Insert
= moveAndTeeForMultiUse(Reg
, Use
, DefI
, MBB
, Insert
, LIS
, MFI
,
897 // We failed to stackify the operand. If the problem was ordering
898 // constraints, Commuting may be able to help.
899 if (!CanMove
&& SameBlock
)
900 Commuting
.maybeCommute(Insert
, TreeWalker
, TII
);
901 // Proceed to the next operand.
905 // Stackifying a multivalue def may unlock in-place stackification of
906 // subsequent defs. TODO: Handle the case where the consecutive uses are
907 // not all in the same instruction.
908 auto *SubsequentDef
= Insert
->defs().begin();
909 auto *SubsequentUse
= &Use
;
910 while (SubsequentDef
!= Insert
->defs().end() &&
911 SubsequentUse
!= Use
.getParent()->uses().end()) {
912 if (!SubsequentDef
->isReg() || !SubsequentUse
->isReg())
914 Register DefReg
= SubsequentDef
->getReg();
915 Register UseReg
= SubsequentUse
->getReg();
916 // TODO: This single-use restriction could be relaxed by using tees
917 if (DefReg
!= UseReg
|| !MRI
.hasOneNonDBGUse(DefReg
))
919 MFI
.stackifyVReg(MRI
, DefReg
);
924 // If the instruction we just stackified is an IMPLICIT_DEF, convert it
925 // to a constant 0 so that the def is explicit, and the push/pop
926 // correspondence is maintained.
927 if (Insert
->getOpcode() == TargetOpcode::IMPLICIT_DEF
)
928 convertImplicitDefToConstZero(Insert
, MRI
, TII
, MF
, LIS
);
930 // We stackified an operand. Add the defining instruction's operands to
931 // the worklist stack now to continue to build an ever deeper tree.
933 TreeWalker
.pushOperands(Insert
);
936 // If we stackified any operands, skip over the tree to start looking for
937 // the next instruction we can build a tree on.
938 if (Insert
!= &*MII
) {
939 imposeStackOrdering(&*MII
);
940 MII
= MachineBasicBlock::iterator(Insert
).getReverse();
946 // If we used VALUE_STACK anywhere, add it to the live-in sets everywhere so
947 // that it never looks like a use-before-def.
949 MF
.getRegInfo().addLiveIn(WebAssembly::VALUE_STACK
);
950 for (MachineBasicBlock
&MBB
: MF
)
951 MBB
.addLiveIn(WebAssembly::VALUE_STACK
);
955 // Verify that pushes and pops are performed in LIFO order.
956 SmallVector
<unsigned, 0> Stack
;
957 for (MachineBasicBlock
&MBB
: MF
) {
958 for (MachineInstr
&MI
: MBB
) {
959 if (MI
.isDebugInstr())
961 for (MachineOperand
&MO
: reverse(MI
.explicit_uses())) {
964 Register Reg
= MO
.getReg();
965 if (MFI
.isVRegStackified(Reg
))
966 assert(Stack
.pop_back_val() == Reg
&&
967 "Register stack pop should be paired with a push");
969 for (MachineOperand
&MO
: MI
.defs()) {
972 Register Reg
= MO
.getReg();
973 if (MFI
.isVRegStackified(Reg
))
974 Stack
.push_back(MO
.getReg());
977 // TODO: Generalize this code to support keeping values on the stack across
978 // basic block boundaries.
979 assert(Stack
.empty() &&
980 "Register stack pushes and pops should be balanced");