1 //===----- X86DynAllocaExpander.cpp - Expand DynAlloca pseudo instruction -===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file defines a pass that expands DynAlloca pseudo-instructions.
11 // It performs a conservative analysis to determine whether each allocation
12 // falls within a region of the stack that is safe to use, or whether stack
13 // probes must be emitted.
15 //===----------------------------------------------------------------------===//
18 #include "X86InstrBuilder.h"
19 #include "X86InstrInfo.h"
20 #include "X86MachineFunctionInfo.h"
21 #include "X86Subtarget.h"
22 #include "llvm/ADT/MapVector.h"
23 #include "llvm/ADT/PostOrderIterator.h"
24 #include "llvm/CodeGen/MachineFunctionPass.h"
25 #include "llvm/CodeGen/MachineInstrBuilder.h"
26 #include "llvm/CodeGen/MachineRegisterInfo.h"
27 #include "llvm/CodeGen/Passes.h"
28 #include "llvm/CodeGen/TargetInstrInfo.h"
29 #include "llvm/IR/Function.h"
30 #include "llvm/Support/raw_ostream.h"
36 class X86DynAllocaExpander
: public MachineFunctionPass
{
38 X86DynAllocaExpander() : MachineFunctionPass(ID
) {}
40 bool runOnMachineFunction(MachineFunction
&MF
) override
;
43 /// Strategies for lowering a DynAlloca.
44 enum Lowering
{ TouchAndSub
, Sub
, Probe
};
46 /// Deterministic-order map from DynAlloca instruction to desired lowering.
47 typedef MapVector
<MachineInstr
*, Lowering
> LoweringMap
;
49 /// Compute which lowering to use for each DynAlloca instruction.
50 void computeLowerings(MachineFunction
&MF
, LoweringMap
& Lowerings
);
52 /// Get the appropriate lowering based on current offset and amount.
53 Lowering
getLowering(int64_t CurrentOffset
, int64_t AllocaAmount
);
55 /// Lower a DynAlloca instruction.
56 void lower(MachineInstr
* MI
, Lowering L
);
58 MachineRegisterInfo
*MRI
= nullptr;
59 const X86Subtarget
*STI
= nullptr;
60 const TargetInstrInfo
*TII
= nullptr;
61 const X86RegisterInfo
*TRI
= nullptr;
62 unsigned StackPtr
= 0;
63 unsigned SlotSize
= 0;
64 int64_t StackProbeSize
= 0;
65 bool NoStackArgProbe
= false;
67 StringRef
getPassName() const override
{ return "X86 DynAlloca Expander"; }
71 char X86DynAllocaExpander::ID
= 0;
73 } // end anonymous namespace
75 FunctionPass
*llvm::createX86DynAllocaExpander() {
76 return new X86DynAllocaExpander();
79 /// Return the allocation amount for a DynAlloca instruction, or -1 if unknown.
80 static int64_t getDynAllocaAmount(MachineInstr
*MI
, MachineRegisterInfo
*MRI
) {
81 assert(MI
->getOpcode() == X86::DYN_ALLOCA_32
||
82 MI
->getOpcode() == X86::DYN_ALLOCA_64
);
83 assert(MI
->getOperand(0).isReg());
85 Register AmountReg
= MI
->getOperand(0).getReg();
86 MachineInstr
*Def
= MRI
->getUniqueVRegDef(AmountReg
);
89 (Def
->getOpcode() != X86::MOV32ri
&& Def
->getOpcode() != X86::MOV64ri
) ||
90 !Def
->getOperand(1).isImm())
93 return Def
->getOperand(1).getImm();
96 X86DynAllocaExpander::Lowering
97 X86DynAllocaExpander::getLowering(int64_t CurrentOffset
,
98 int64_t AllocaAmount
) {
99 // For a non-constant amount or a large amount, we have to probe.
100 if (AllocaAmount
< 0 || AllocaAmount
> StackProbeSize
)
103 // If it fits within the safe region of the stack, just subtract.
104 if (CurrentOffset
+ AllocaAmount
<= StackProbeSize
)
107 // Otherwise, touch the current tip of the stack, then subtract.
111 static bool isPushPop(const MachineInstr
&MI
) {
112 switch (MI
.getOpcode()) {
129 void X86DynAllocaExpander::computeLowerings(MachineFunction
&MF
,
130 LoweringMap
&Lowerings
) {
131 // Do a one-pass reverse post-order walk of the CFG to conservatively estimate
132 // the offset between the stack pointer and the lowest touched part of the
133 // stack, and use that to decide how to lower each DynAlloca instruction.
135 // Initialize OutOffset[B], the stack offset at exit from B, to something big.
136 DenseMap
<MachineBasicBlock
*, int64_t> OutOffset
;
137 for (MachineBasicBlock
&MBB
: MF
)
138 OutOffset
[&MBB
] = INT32_MAX
;
140 // Note: we don't know the offset at the start of the entry block since the
141 // prologue hasn't been inserted yet, and how much that will adjust the stack
142 // pointer depends on register spills, which have not been computed yet.
144 // Compute the reverse post-order.
145 ReversePostOrderTraversal
<MachineFunction
*> RPO(&MF
);
147 for (MachineBasicBlock
*MBB
: RPO
) {
149 for (MachineBasicBlock
*Pred
: MBB
->predecessors())
150 Offset
= std::max(Offset
, OutOffset
[Pred
]);
151 if (Offset
== -1) Offset
= INT32_MAX
;
153 for (MachineInstr
&MI
: *MBB
) {
154 if (MI
.getOpcode() == X86::DYN_ALLOCA_32
||
155 MI
.getOpcode() == X86::DYN_ALLOCA_64
) {
156 // A DynAlloca moves StackPtr, and potentially touches it.
157 int64_t Amount
= getDynAllocaAmount(&MI
, MRI
);
158 Lowering L
= getLowering(Offset
, Amount
);
171 } else if (MI
.isCall() || isPushPop(MI
)) {
172 // Calls, pushes and pops touch the tip of the stack.
174 } else if (MI
.getOpcode() == X86::ADJCALLSTACKUP32
||
175 MI
.getOpcode() == X86::ADJCALLSTACKUP64
) {
176 Offset
-= MI
.getOperand(0).getImm();
177 } else if (MI
.getOpcode() == X86::ADJCALLSTACKDOWN32
||
178 MI
.getOpcode() == X86::ADJCALLSTACKDOWN64
) {
179 Offset
+= MI
.getOperand(0).getImm();
180 } else if (MI
.modifiesRegister(StackPtr
, TRI
)) {
181 // Any other modification of SP means we've lost track of it.
186 OutOffset
[MBB
] = Offset
;
190 static unsigned getSubOpcode(bool Is64Bit
) {
192 return X86::SUB64ri32
;
196 void X86DynAllocaExpander::lower(MachineInstr
*MI
, Lowering L
) {
197 const DebugLoc
&DL
= MI
->getDebugLoc();
198 MachineBasicBlock
*MBB
= MI
->getParent();
199 MachineBasicBlock::iterator I
= *MI
;
201 int64_t Amount
= getDynAllocaAmount(MI
, MRI
);
203 MI
->eraseFromParent();
207 // These two variables differ on x32, which is a 64-bit target with a
209 bool Is64Bit
= STI
->is64Bit();
210 bool Is64BitAlloca
= MI
->getOpcode() == X86::DYN_ALLOCA_64
;
211 assert(SlotSize
== 4 || SlotSize
== 8);
213 std::optional
<MachineFunction::DebugInstrOperandPair
> InstrNum
;
214 if (unsigned Num
= MI
->peekDebugInstrNum()) {
215 // Operand 2 of DYN_ALLOCAs contains the stack def.
221 assert(Amount
>= SlotSize
);
223 // Use a push to touch the top of the stack.
224 unsigned RegA
= Is64Bit
? X86::RAX
: X86::EAX
;
225 BuildMI(*MBB
, I
, DL
, TII
->get(Is64Bit
? X86::PUSH64r
: X86::PUSH32r
))
226 .addReg(RegA
, RegState::Undef
);
231 // Fall through to make any remaining adjustment.
236 if (Amount
== SlotSize
) {
237 // Use push to save size.
238 unsigned RegA
= Is64Bit
? X86::RAX
: X86::EAX
;
239 BuildMI(*MBB
, I
, DL
, TII
->get(Is64Bit
? X86::PUSH64r
: X86::PUSH32r
))
240 .addReg(RegA
, RegState::Undef
);
243 BuildMI(*MBB
, I
, DL
, TII
->get(getSubOpcode(Is64BitAlloca
)), StackPtr
)
249 if (!NoStackArgProbe
) {
250 // The probe lowering expects the amount in RAX/EAX.
251 unsigned RegA
= Is64BitAlloca
? X86::RAX
: X86::EAX
;
252 BuildMI(*MBB
, MI
, DL
, TII
->get(TargetOpcode::COPY
), RegA
)
253 .addReg(MI
->getOperand(0).getReg());
256 STI
->getFrameLowering()->emitStackProbe(*MBB
->getParent(), *MBB
, MI
, DL
,
257 /*InProlog=*/false, InstrNum
);
261 TII
->get(Is64BitAlloca
? X86::SUB64rr
: X86::SUB32rr
), StackPtr
)
263 .addReg(MI
->getOperand(0).getReg());
268 Register AmountReg
= MI
->getOperand(0).getReg();
269 MI
->eraseFromParent();
271 // Delete the definition of AmountReg.
272 if (MRI
->use_empty(AmountReg
))
273 if (MachineInstr
*AmountDef
= MRI
->getUniqueVRegDef(AmountReg
))
274 AmountDef
->eraseFromParent();
277 bool X86DynAllocaExpander::runOnMachineFunction(MachineFunction
&MF
) {
278 if (!MF
.getInfo
<X86MachineFunctionInfo
>()->hasDynAlloca())
281 MRI
= &MF
.getRegInfo();
282 STI
= &MF
.getSubtarget
<X86Subtarget
>();
283 TII
= STI
->getInstrInfo();
284 TRI
= STI
->getRegisterInfo();
285 StackPtr
= TRI
->getStackRegister();
286 SlotSize
= TRI
->getSlotSize();
287 StackProbeSize
= STI
->getTargetLowering()->getStackProbeSize(MF
);
288 NoStackArgProbe
= MF
.getFunction().hasFnAttribute("no-stack-arg-probe");
290 StackProbeSize
= INT64_MAX
;
292 LoweringMap Lowerings
;
293 computeLowerings(MF
, Lowerings
);
294 for (auto &P
: Lowerings
)
295 lower(P
.first
, P
.second
);