1 //===-- X86FrameLowering.cpp - X86 Frame Information ----------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file contains the X86 implementation of TargetFrameLowering class.
11 //===----------------------------------------------------------------------===//
13 #include "X86FrameLowering.h"
14 #include "MCTargetDesc/X86MCTargetDesc.h"
15 #include "X86InstrBuilder.h"
16 #include "X86InstrInfo.h"
17 #include "X86MachineFunctionInfo.h"
18 #include "X86Subtarget.h"
19 #include "X86TargetMachine.h"
20 #include "llvm/ADT/Statistic.h"
21 #include "llvm/CodeGen/LivePhysRegs.h"
22 #include "llvm/CodeGen/MachineFrameInfo.h"
23 #include "llvm/CodeGen/MachineFunction.h"
24 #include "llvm/CodeGen/MachineInstrBuilder.h"
25 #include "llvm/CodeGen/MachineModuleInfo.h"
26 #include "llvm/CodeGen/MachineRegisterInfo.h"
27 #include "llvm/CodeGen/WinEHFuncInfo.h"
28 #include "llvm/IR/DataLayout.h"
29 #include "llvm/IR/EHPersonalities.h"
30 #include "llvm/IR/Function.h"
31 #include "llvm/IR/Module.h"
32 #include "llvm/MC/MCAsmInfo.h"
33 #include "llvm/MC/MCObjectFileInfo.h"
34 #include "llvm/MC/MCSymbol.h"
35 #include "llvm/Support/LEB128.h"
36 #include "llvm/Target/TargetOptions.h"
39 #define DEBUG_TYPE "x86-fl"
41 STATISTIC(NumFrameLoopProbe
, "Number of loop stack probes used in prologue");
42 STATISTIC(NumFrameExtraProbe
,
43 "Number of extra stack probes generated in prologue");
44 STATISTIC(NumFunctionUsingPush2Pop2
, "Number of funtions using push2/pop2");
48 X86FrameLowering::X86FrameLowering(const X86Subtarget
&STI
,
49 MaybeAlign StackAlignOverride
)
50 : TargetFrameLowering(StackGrowsDown
, StackAlignOverride
.valueOrOne(),
51 STI
.is64Bit() ? -8 : -4),
52 STI(STI
), TII(*STI
.getInstrInfo()), TRI(STI
.getRegisterInfo()) {
53 // Cache a bunch of frame-related predicates for this subtarget.
54 SlotSize
= TRI
->getSlotSize();
55 Is64Bit
= STI
.is64Bit();
56 IsLP64
= STI
.isTarget64BitLP64();
57 // standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit.
58 Uses64BitFramePtr
= STI
.isTarget64BitLP64() || STI
.isTargetNaCl64();
59 StackPtr
= TRI
->getStackRegister();
62 bool X86FrameLowering::hasReservedCallFrame(const MachineFunction
&MF
) const {
63 return !MF
.getFrameInfo().hasVarSizedObjects() &&
64 !MF
.getInfo
<X86MachineFunctionInfo
>()->getHasPushSequences() &&
65 !MF
.getInfo
<X86MachineFunctionInfo
>()->hasPreallocatedCall();
68 /// canSimplifyCallFramePseudos - If there is a reserved call frame, the
69 /// call frame pseudos can be simplified. Having a FP, as in the default
70 /// implementation, is not sufficient here since we can't always use it.
71 /// Use a more nuanced condition.
72 bool X86FrameLowering::canSimplifyCallFramePseudos(
73 const MachineFunction
&MF
) const {
74 return hasReservedCallFrame(MF
) ||
75 MF
.getInfo
<X86MachineFunctionInfo
>()->hasPreallocatedCall() ||
76 (hasFP(MF
) && !TRI
->hasStackRealignment(MF
)) ||
77 TRI
->hasBasePointer(MF
);
80 // needsFrameIndexResolution - Do we need to perform FI resolution for
81 // this function. Normally, this is required only when the function
82 // has any stack objects. However, FI resolution actually has another job,
83 // not apparent from the title - it resolves callframesetup/destroy
84 // that were not simplified earlier.
85 // So, this is required for x86 functions that have push sequences even
86 // when there are no stack objects.
87 bool X86FrameLowering::needsFrameIndexResolution(
88 const MachineFunction
&MF
) const {
89 return MF
.getFrameInfo().hasStackObjects() ||
90 MF
.getInfo
<X86MachineFunctionInfo
>()->getHasPushSequences();
93 /// hasFPImpl - Return true if the specified function should have a dedicated
94 /// frame pointer register. This is true if the function has variable sized
95 /// allocas or if frame pointer elimination is disabled.
96 bool X86FrameLowering::hasFPImpl(const MachineFunction
&MF
) const {
97 const MachineFrameInfo
&MFI
= MF
.getFrameInfo();
98 return (MF
.getTarget().Options
.DisableFramePointerElim(MF
) ||
99 TRI
->hasStackRealignment(MF
) || MFI
.hasVarSizedObjects() ||
100 MFI
.isFrameAddressTaken() || MFI
.hasOpaqueSPAdjustment() ||
101 MF
.getInfo
<X86MachineFunctionInfo
>()->getForceFramePointer() ||
102 MF
.getInfo
<X86MachineFunctionInfo
>()->hasPreallocatedCall() ||
103 MF
.callsUnwindInit() || MF
.hasEHFunclets() || MF
.callsEHReturn() ||
104 MFI
.hasStackMap() || MFI
.hasPatchPoint() ||
105 (isWin64Prologue(MF
) && MFI
.hasCopyImplyingStackAdjustment()));
108 static unsigned getSUBriOpcode(bool IsLP64
) {
109 return IsLP64
? X86::SUB64ri32
: X86::SUB32ri
;
112 static unsigned getADDriOpcode(bool IsLP64
) {
113 return IsLP64
? X86::ADD64ri32
: X86::ADD32ri
;
116 static unsigned getSUBrrOpcode(bool IsLP64
) {
117 return IsLP64
? X86::SUB64rr
: X86::SUB32rr
;
120 static unsigned getADDrrOpcode(bool IsLP64
) {
121 return IsLP64
? X86::ADD64rr
: X86::ADD32rr
;
124 static unsigned getANDriOpcode(bool IsLP64
, int64_t Imm
) {
125 return IsLP64
? X86::AND64ri32
: X86::AND32ri
;
128 static unsigned getLEArOpcode(bool IsLP64
) {
129 return IsLP64
? X86::LEA64r
: X86::LEA32r
;
132 static unsigned getMOVriOpcode(bool Use64BitReg
, int64_t Imm
) {
135 return X86::MOV32ri64
;
137 return X86::MOV64ri32
;
143 // Push-Pop Acceleration (PPX) hint is used to indicate that the POP reads the
144 // value written by the PUSH from the stack. The processor tracks these marked
145 // instructions internally and fast-forwards register data between matching PUSH
146 // and POP instructions, without going through memory or through the training
147 // loop of the Fast Store Forwarding Predictor (FSFP). Instead, a more efficient
148 // memory-renaming optimization can be used.
150 // The PPX hint is purely a performance hint. Instructions with this hint have
151 // the same functional semantics as those without. PPX hints set by the
152 // compiler that violate the balancing rule may turn off the PPX optimization,
153 // but they will not affect program semantics.
155 // Hence, PPX is used for balanced spill/reloads (Exceptions and setjmp/longjmp
156 // are not considered).
158 // PUSH2 and POP2 are instructions for (respectively) pushing/popping 2
159 // GPRs at a time to/from the stack.
160 static unsigned getPUSHOpcode(const X86Subtarget
&ST
) {
161 return ST
.is64Bit() ? (ST
.hasPPX() ? X86::PUSHP64r
: X86::PUSH64r
)
164 static unsigned getPOPOpcode(const X86Subtarget
&ST
) {
165 return ST
.is64Bit() ? (ST
.hasPPX() ? X86::POPP64r
: X86::POP64r
)
168 static unsigned getPUSH2Opcode(const X86Subtarget
&ST
) {
169 return ST
.hasPPX() ? X86::PUSH2P
: X86::PUSH2
;
171 static unsigned getPOP2Opcode(const X86Subtarget
&ST
) {
172 return ST
.hasPPX() ? X86::POP2P
: X86::POP2
;
175 static bool isEAXLiveIn(MachineBasicBlock
&MBB
) {
176 for (MachineBasicBlock::RegisterMaskPair RegMask
: MBB
.liveins()) {
177 unsigned Reg
= RegMask
.PhysReg
;
179 if (Reg
== X86::RAX
|| Reg
== X86::EAX
|| Reg
== X86::AX
||
180 Reg
== X86::AH
|| Reg
== X86::AL
)
187 /// Check if the flags need to be preserved before the terminators.
188 /// This would be the case, if the eflags is live-in of the region
189 /// composed by the terminators or live-out of that region, without
190 /// being defined by a terminator.
192 flagsNeedToBePreservedBeforeTheTerminators(const MachineBasicBlock
&MBB
) {
193 for (const MachineInstr
&MI
: MBB
.terminators()) {
194 bool BreakNext
= false;
195 for (const MachineOperand
&MO
: MI
.operands()) {
198 Register Reg
= MO
.getReg();
199 if (Reg
!= X86::EFLAGS
)
202 // This terminator needs an eflags that is not defined
203 // by a previous another terminator:
204 // EFLAGS is live-in of the region composed by the terminators.
207 // This terminator defines the eflags, i.e., we don't need to preserve it.
208 // However, we still need to check this specific terminator does not
209 // read a live-in value.
212 // We found a definition of the eflags, no need to preserve them.
217 // None of the terminators use or define the eflags.
218 // Check if they are live-out, that would imply we need to preserve them.
219 for (const MachineBasicBlock
*Succ
: MBB
.successors())
220 if (Succ
->isLiveIn(X86::EFLAGS
))
226 /// emitSPUpdate - Emit a series of instructions to increment / decrement the
227 /// stack pointer by a constant value.
228 void X86FrameLowering::emitSPUpdate(MachineBasicBlock
&MBB
,
229 MachineBasicBlock::iterator
&MBBI
,
230 const DebugLoc
&DL
, int64_t NumBytes
,
231 bool InEpilogue
) const {
232 bool isSub
= NumBytes
< 0;
233 uint64_t Offset
= isSub
? -NumBytes
: NumBytes
;
234 MachineInstr::MIFlag Flag
=
235 isSub
? MachineInstr::FrameSetup
: MachineInstr::FrameDestroy
;
237 uint64_t Chunk
= (1LL << 31) - 1;
239 MachineFunction
&MF
= *MBB
.getParent();
240 const X86Subtarget
&STI
= MF
.getSubtarget
<X86Subtarget
>();
241 const X86TargetLowering
&TLI
= *STI
.getTargetLowering();
242 const bool EmitInlineStackProbe
= TLI
.hasInlineStackProbe(MF
);
244 // It's ok to not take into account large chunks when probing, as the
245 // allocation is split in smaller chunks anyway.
246 if (EmitInlineStackProbe
&& !InEpilogue
) {
248 // This pseudo-instruction is going to be expanded, potentially using a
249 // loop, by inlineStackProbe().
250 BuildMI(MBB
, MBBI
, DL
, TII
.get(X86::STACKALLOC_W_PROBING
)).addImm(Offset
);
252 } else if (Offset
> Chunk
) {
253 // Rather than emit a long series of instructions for large offsets,
254 // load the offset into a register and do one sub/add
256 unsigned Rax
= (unsigned)(Is64Bit
? X86::RAX
: X86::EAX
);
258 if (isSub
&& !isEAXLiveIn(MBB
))
261 Reg
= TRI
->findDeadCallerSavedReg(MBB
, MBBI
);
263 unsigned AddSubRROpc
=
264 isSub
? getSUBrrOpcode(Is64Bit
) : getADDrrOpcode(Is64Bit
);
266 BuildMI(MBB
, MBBI
, DL
, TII
.get(getMOVriOpcode(Is64Bit
, Offset
)), Reg
)
269 MachineInstr
*MI
= BuildMI(MBB
, MBBI
, DL
, TII
.get(AddSubRROpc
), StackPtr
)
272 MI
->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
274 } else if (Offset
> 8 * Chunk
) {
275 // If we would need more than 8 add or sub instructions (a >16GB stack
276 // frame), it's worth spilling RAX to materialize this immediate.
278 // movabsq +-$Offset+-SlotSize, %rax
282 assert(Is64Bit
&& "can't have 32-bit 16GB stack frame");
283 BuildMI(MBB
, MBBI
, DL
, TII
.get(X86::PUSH64r
))
284 .addReg(Rax
, RegState::Kill
)
286 // Subtract is not commutative, so negate the offset and always use add.
287 // Subtract 8 less and add 8 more to account for the PUSH we just did.
289 Offset
= -(Offset
- SlotSize
);
291 Offset
= Offset
+ SlotSize
;
292 BuildMI(MBB
, MBBI
, DL
, TII
.get(getMOVriOpcode(Is64Bit
, Offset
)), Rax
)
295 MachineInstr
*MI
= BuildMI(MBB
, MBBI
, DL
, TII
.get(X86::ADD64rr
), Rax
)
298 MI
->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
299 // Exchange the new SP in RAX with the top of the stack.
301 BuildMI(MBB
, MBBI
, DL
, TII
.get(X86::XCHG64rm
), Rax
).addReg(Rax
),
303 // Load new SP from the top of the stack into RSP.
304 addRegOffset(BuildMI(MBB
, MBBI
, DL
, TII
.get(X86::MOV64rm
), StackPtr
),
311 uint64_t ThisVal
= std::min(Offset
, Chunk
);
312 if (ThisVal
== SlotSize
) {
313 // Use push / pop for slot sized adjustments as a size optimization. We
314 // need to find a dead register when using pop.
315 unsigned Reg
= isSub
? (unsigned)(Is64Bit
? X86::RAX
: X86::EAX
)
316 : TRI
->findDeadCallerSavedReg(MBB
, MBBI
);
318 unsigned Opc
= isSub
? (Is64Bit
? X86::PUSH64r
: X86::PUSH32r
)
319 : (Is64Bit
? X86::POP64r
: X86::POP32r
);
320 BuildMI(MBB
, MBBI
, DL
, TII
.get(Opc
))
321 .addReg(Reg
, getDefRegState(!isSub
) | getUndefRegState(isSub
))
328 BuildStackAdjustment(MBB
, MBBI
, DL
, isSub
? -ThisVal
: ThisVal
, InEpilogue
)
335 MachineInstrBuilder
X86FrameLowering::BuildStackAdjustment(
336 MachineBasicBlock
&MBB
, MachineBasicBlock::iterator MBBI
,
337 const DebugLoc
&DL
, int64_t Offset
, bool InEpilogue
) const {
338 assert(Offset
!= 0 && "zero offset stack adjustment requested");
340 // On Atom, using LEA to adjust SP is preferred, but using it in the epilogue
344 // Check if inserting the prologue at the beginning
345 // of MBB would require to use LEA operations.
346 // We need to use LEA operations if EFLAGS is live in, because
347 // it means an instruction will read it before it gets defined.
348 UseLEA
= STI
.useLeaForSP() || MBB
.isLiveIn(X86::EFLAGS
);
350 // If we can use LEA for SP but we shouldn't, check that none
351 // of the terminators uses the eflags. Otherwise we will insert
352 // a ADD that will redefine the eflags and break the condition.
353 // Alternatively, we could move the ADD, but this may not be possible
354 // and is an optimization anyway.
355 UseLEA
= canUseLEAForSPInEpilogue(*MBB
.getParent());
356 if (UseLEA
&& !STI
.useLeaForSP())
357 UseLEA
= flagsNeedToBePreservedBeforeTheTerminators(MBB
);
358 // If that assert breaks, that means we do not do the right thing
359 // in canUseAsEpilogue.
360 assert((UseLEA
|| !flagsNeedToBePreservedBeforeTheTerminators(MBB
)) &&
361 "We shouldn't have allowed this insertion point");
364 MachineInstrBuilder MI
;
366 MI
= addRegOffset(BuildMI(MBB
, MBBI
, DL
,
367 TII
.get(getLEArOpcode(Uses64BitFramePtr
)),
369 StackPtr
, false, Offset
);
371 bool IsSub
= Offset
< 0;
372 uint64_t AbsOffset
= IsSub
? -Offset
: Offset
;
373 const unsigned Opc
= IsSub
? getSUBriOpcode(Uses64BitFramePtr
)
374 : getADDriOpcode(Uses64BitFramePtr
);
375 MI
= BuildMI(MBB
, MBBI
, DL
, TII
.get(Opc
), StackPtr
)
378 MI
->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
383 int X86FrameLowering::mergeSPUpdates(MachineBasicBlock
&MBB
,
384 MachineBasicBlock::iterator
&MBBI
,
385 bool doMergeWithPrevious
) const {
386 if ((doMergeWithPrevious
&& MBBI
== MBB
.begin()) ||
387 (!doMergeWithPrevious
&& MBBI
== MBB
.end()))
390 MachineBasicBlock::iterator PI
= doMergeWithPrevious
? std::prev(MBBI
) : MBBI
;
392 PI
= skipDebugInstructionsBackward(PI
, MBB
.begin());
393 // It is assumed that ADD/SUB/LEA instruction is succeded by one CFI
394 // instruction, and that there are no DBG_VALUE or other instructions between
395 // ADD/SUB/LEA and its corresponding CFI instruction.
396 /* TODO: Add support for the case where there are multiple CFI instructions
397 below the ADD/SUB/LEA, e.g.:
404 if (doMergeWithPrevious
&& PI
!= MBB
.begin() && PI
->isCFIInstruction())
407 unsigned Opc
= PI
->getOpcode();
410 if ((Opc
== X86::ADD64ri32
|| Opc
== X86::ADD32ri
) &&
411 PI
->getOperand(0).getReg() == StackPtr
) {
412 assert(PI
->getOperand(1).getReg() == StackPtr
);
413 Offset
= PI
->getOperand(2).getImm();
414 } else if ((Opc
== X86::LEA32r
|| Opc
== X86::LEA64_32r
) &&
415 PI
->getOperand(0).getReg() == StackPtr
&&
416 PI
->getOperand(1).getReg() == StackPtr
&&
417 PI
->getOperand(2).getImm() == 1 &&
418 PI
->getOperand(3).getReg() == X86::NoRegister
&&
419 PI
->getOperand(5).getReg() == X86::NoRegister
) {
420 // For LEAs we have: def = lea SP, FI, noreg, Offset, noreg.
421 Offset
= PI
->getOperand(4).getImm();
422 } else if ((Opc
== X86::SUB64ri32
|| Opc
== X86::SUB32ri
) &&
423 PI
->getOperand(0).getReg() == StackPtr
) {
424 assert(PI
->getOperand(1).getReg() == StackPtr
);
425 Offset
= -PI
->getOperand(2).getImm();
430 if (PI
!= MBB
.end() && PI
->isCFIInstruction()) {
431 auto CIs
= MBB
.getParent()->getFrameInstructions();
432 MCCFIInstruction CI
= CIs
[PI
->getOperand(0).getCFIIndex()];
433 if (CI
.getOperation() == MCCFIInstruction::OpDefCfaOffset
||
434 CI
.getOperation() == MCCFIInstruction::OpAdjustCfaOffset
)
437 if (!doMergeWithPrevious
)
438 MBBI
= skipDebugInstructionsForward(PI
, MBB
.end());
443 void X86FrameLowering::BuildCFI(MachineBasicBlock
&MBB
,
444 MachineBasicBlock::iterator MBBI
,
446 const MCCFIInstruction
&CFIInst
,
447 MachineInstr::MIFlag Flag
) const {
448 MachineFunction
&MF
= *MBB
.getParent();
449 unsigned CFIIndex
= MF
.addFrameInst(CFIInst
);
451 if (CFIInst
.getOperation() == MCCFIInstruction::OpAdjustCfaOffset
)
452 MF
.getInfo
<X86MachineFunctionInfo
>()->setHasCFIAdjustCfa(true);
454 BuildMI(MBB
, MBBI
, DL
, TII
.get(TargetOpcode::CFI_INSTRUCTION
))
455 .addCFIIndex(CFIIndex
)
459 /// Emits Dwarf Info specifying offsets of callee saved registers and
460 /// frame pointer. This is called only when basic block sections are enabled.
461 void X86FrameLowering::emitCalleeSavedFrameMovesFullCFA(
462 MachineBasicBlock
&MBB
, MachineBasicBlock::iterator MBBI
) const {
463 MachineFunction
&MF
= *MBB
.getParent();
465 emitCalleeSavedFrameMoves(MBB
, MBBI
, DebugLoc
{}, true);
468 const MCRegisterInfo
*MRI
= MF
.getContext().getRegisterInfo();
469 const Register FramePtr
= TRI
->getFrameRegister(MF
);
470 const Register MachineFramePtr
=
471 STI
.isTarget64BitILP32() ? Register(getX86SubSuperRegister(FramePtr
, 64))
473 unsigned DwarfReg
= MRI
->getDwarfRegNum(MachineFramePtr
, true);
474 // Offset = space for return address + size of the frame pointer itself.
475 int64_t Offset
= (Is64Bit
? 8 : 4) + (Uses64BitFramePtr
? 8 : 4);
476 BuildCFI(MBB
, MBBI
, DebugLoc
{},
477 MCCFIInstruction::createOffset(nullptr, DwarfReg
, -Offset
));
478 emitCalleeSavedFrameMoves(MBB
, MBBI
, DebugLoc
{}, true);
481 void X86FrameLowering::emitCalleeSavedFrameMoves(
482 MachineBasicBlock
&MBB
, MachineBasicBlock::iterator MBBI
,
483 const DebugLoc
&DL
, bool IsPrologue
) const {
484 MachineFunction
&MF
= *MBB
.getParent();
485 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
486 const MCRegisterInfo
*MRI
= MF
.getContext().getRegisterInfo();
487 X86MachineFunctionInfo
*X86FI
= MF
.getInfo
<X86MachineFunctionInfo
>();
489 // Add callee saved registers to move list.
490 const std::vector
<CalleeSavedInfo
> &CSI
= MFI
.getCalleeSavedInfo();
492 // Calculate offsets.
493 for (const CalleeSavedInfo
&I
: CSI
) {
494 int64_t Offset
= MFI
.getObjectOffset(I
.getFrameIdx());
495 Register Reg
= I
.getReg();
496 unsigned DwarfReg
= MRI
->getDwarfRegNum(Reg
, true);
499 if (X86FI
->getStackPtrSaveMI()) {
500 // +2*SlotSize because there is return address and ebp at the bottom
505 Offset
+= 2 * SlotSize
;
506 SmallString
<64> CfaExpr
;
507 CfaExpr
.push_back(dwarf::DW_CFA_expression
);
509 CfaExpr
.append(buffer
, buffer
+ encodeULEB128(DwarfReg
, buffer
));
510 CfaExpr
.push_back(2);
511 Register FramePtr
= TRI
->getFrameRegister(MF
);
512 const Register MachineFramePtr
=
513 STI
.isTarget64BitILP32()
514 ? Register(getX86SubSuperRegister(FramePtr
, 64))
516 unsigned DwarfFramePtr
= MRI
->getDwarfRegNum(MachineFramePtr
, true);
517 CfaExpr
.push_back((uint8_t)(dwarf::DW_OP_breg0
+ DwarfFramePtr
));
518 CfaExpr
.append(buffer
, buffer
+ encodeSLEB128(Offset
, buffer
));
519 BuildCFI(MBB
, MBBI
, DL
,
520 MCCFIInstruction::createEscape(nullptr, CfaExpr
.str()),
521 MachineInstr::FrameSetup
);
523 BuildCFI(MBB
, MBBI
, DL
,
524 MCCFIInstruction::createOffset(nullptr, DwarfReg
, Offset
));
527 BuildCFI(MBB
, MBBI
, DL
,
528 MCCFIInstruction::createRestore(nullptr, DwarfReg
));
531 if (auto *MI
= X86FI
->getStackPtrSaveMI()) {
532 int FI
= MI
->getOperand(1).getIndex();
533 int64_t Offset
= MFI
.getObjectOffset(FI
) + 2 * SlotSize
;
534 SmallString
<64> CfaExpr
;
535 Register FramePtr
= TRI
->getFrameRegister(MF
);
536 const Register MachineFramePtr
=
537 STI
.isTarget64BitILP32()
538 ? Register(getX86SubSuperRegister(FramePtr
, 64))
540 unsigned DwarfFramePtr
= MRI
->getDwarfRegNum(MachineFramePtr
, true);
541 CfaExpr
.push_back((uint8_t)(dwarf::DW_OP_breg0
+ DwarfFramePtr
));
543 CfaExpr
.append(buffer
, buffer
+ encodeSLEB128(Offset
, buffer
));
544 CfaExpr
.push_back(dwarf::DW_OP_deref
);
546 SmallString
<64> DefCfaExpr
;
547 DefCfaExpr
.push_back(dwarf::DW_CFA_def_cfa_expression
);
548 DefCfaExpr
.append(buffer
, buffer
+ encodeSLEB128(CfaExpr
.size(), buffer
));
549 DefCfaExpr
.append(CfaExpr
.str());
550 // DW_CFA_def_cfa_expression: DW_OP_breg5 offset, DW_OP_deref
551 BuildCFI(MBB
, MBBI
, DL
,
552 MCCFIInstruction::createEscape(nullptr, DefCfaExpr
.str()),
553 MachineInstr::FrameSetup
);
557 void X86FrameLowering::emitZeroCallUsedRegs(BitVector RegsToZero
,
558 MachineBasicBlock
&MBB
) const {
559 const MachineFunction
&MF
= *MBB
.getParent();
562 MachineBasicBlock::iterator MBBI
= MBB
.getFirstTerminator();
566 if (MBBI
!= MBB
.end())
567 DL
= MBBI
->getDebugLoc();
569 // Zero out FP stack if referenced. Do this outside of the loop below so that
570 // it's done only once.
571 const X86Subtarget
&ST
= MF
.getSubtarget
<X86Subtarget
>();
572 for (MCRegister Reg
: RegsToZero
.set_bits()) {
573 if (!X86::RFP80RegClass
.contains(Reg
))
576 unsigned NumFPRegs
= ST
.is64Bit() ? 8 : 7;
577 for (unsigned i
= 0; i
!= NumFPRegs
; ++i
)
578 BuildMI(MBB
, MBBI
, DL
, TII
.get(X86::LD_F0
));
580 for (unsigned i
= 0; i
!= NumFPRegs
; ++i
)
581 BuildMI(MBB
, MBBI
, DL
, TII
.get(X86::ST_FPrr
)).addReg(X86::ST0
);
585 // For GPRs, we only care to clear out the 32-bit register.
586 BitVector
GPRsToZero(TRI
->getNumRegs());
587 for (MCRegister Reg
: RegsToZero
.set_bits())
588 if (TRI
->isGeneralPurposeRegister(MF
, Reg
)) {
589 GPRsToZero
.set(getX86SubSuperRegister(Reg
, 32));
590 RegsToZero
.reset(Reg
);
593 // Zero out the GPRs first.
594 for (MCRegister Reg
: GPRsToZero
.set_bits())
595 TII
.buildClearRegister(Reg
, MBB
, MBBI
, DL
);
597 // Zero out the remaining registers.
598 for (MCRegister Reg
: RegsToZero
.set_bits())
599 TII
.buildClearRegister(Reg
, MBB
, MBBI
, DL
);
602 void X86FrameLowering::emitStackProbe(
603 MachineFunction
&MF
, MachineBasicBlock
&MBB
,
604 MachineBasicBlock::iterator MBBI
, const DebugLoc
&DL
, bool InProlog
,
605 std::optional
<MachineFunction::DebugInstrOperandPair
> InstrNum
) const {
606 const X86Subtarget
&STI
= MF
.getSubtarget
<X86Subtarget
>();
607 if (STI
.isTargetWindowsCoreCLR()) {
609 BuildMI(MBB
, MBBI
, DL
, TII
.get(X86::STACKALLOC_W_PROBING
))
610 .addImm(0 /* no explicit stack size */);
612 emitStackProbeInline(MF
, MBB
, MBBI
, DL
, false);
615 emitStackProbeCall(MF
, MBB
, MBBI
, DL
, InProlog
, InstrNum
);
619 bool X86FrameLowering::stackProbeFunctionModifiesSP() const {
620 return STI
.isOSWindows() && !STI
.isTargetWin64();
623 void X86FrameLowering::inlineStackProbe(MachineFunction
&MF
,
624 MachineBasicBlock
&PrologMBB
) const {
625 auto Where
= llvm::find_if(PrologMBB
, [](MachineInstr
&MI
) {
626 return MI
.getOpcode() == X86::STACKALLOC_W_PROBING
;
628 if (Where
!= PrologMBB
.end()) {
629 DebugLoc DL
= PrologMBB
.findDebugLoc(Where
);
630 emitStackProbeInline(MF
, PrologMBB
, Where
, DL
, true);
631 Where
->eraseFromParent();
635 void X86FrameLowering::emitStackProbeInline(MachineFunction
&MF
,
636 MachineBasicBlock
&MBB
,
637 MachineBasicBlock::iterator MBBI
,
639 bool InProlog
) const {
640 const X86Subtarget
&STI
= MF
.getSubtarget
<X86Subtarget
>();
641 if (STI
.isTargetWindowsCoreCLR() && STI
.is64Bit())
642 emitStackProbeInlineWindowsCoreCLR64(MF
, MBB
, MBBI
, DL
, InProlog
);
644 emitStackProbeInlineGeneric(MF
, MBB
, MBBI
, DL
, InProlog
);
647 void X86FrameLowering::emitStackProbeInlineGeneric(
648 MachineFunction
&MF
, MachineBasicBlock
&MBB
,
649 MachineBasicBlock::iterator MBBI
, const DebugLoc
&DL
, bool InProlog
) const {
650 MachineInstr
&AllocWithProbe
= *MBBI
;
651 uint64_t Offset
= AllocWithProbe
.getOperand(0).getImm();
653 const X86Subtarget
&STI
= MF
.getSubtarget
<X86Subtarget
>();
654 const X86TargetLowering
&TLI
= *STI
.getTargetLowering();
655 assert(!(STI
.is64Bit() && STI
.isTargetWindowsCoreCLR()) &&
656 "different expansion expected for CoreCLR 64 bit");
658 const uint64_t StackProbeSize
= TLI
.getStackProbeSize(MF
);
659 uint64_t ProbeChunk
= StackProbeSize
* 8;
662 TRI
->hasStackRealignment(MF
) ? calculateMaxStackAlign(MF
) : 0;
664 // Synthesize a loop or unroll it, depending on the number of iterations.
665 // BuildStackAlignAND ensures that only MaxAlign % StackProbeSize bits left
666 // between the unaligned rsp and current rsp.
667 if (Offset
> ProbeChunk
) {
668 emitStackProbeInlineGenericLoop(MF
, MBB
, MBBI
, DL
, Offset
,
669 MaxAlign
% StackProbeSize
);
671 emitStackProbeInlineGenericBlock(MF
, MBB
, MBBI
, DL
, Offset
,
672 MaxAlign
% StackProbeSize
);
676 void X86FrameLowering::emitStackProbeInlineGenericBlock(
677 MachineFunction
&MF
, MachineBasicBlock
&MBB
,
678 MachineBasicBlock::iterator MBBI
, const DebugLoc
&DL
, uint64_t Offset
,
679 uint64_t AlignOffset
) const {
681 const bool NeedsDwarfCFI
= needsDwarfCFI(MF
);
682 const bool HasFP
= hasFP(MF
);
683 const X86Subtarget
&STI
= MF
.getSubtarget
<X86Subtarget
>();
684 const X86TargetLowering
&TLI
= *STI
.getTargetLowering();
685 const unsigned MovMIOpc
= Is64Bit
? X86::MOV64mi32
: X86::MOV32mi
;
686 const uint64_t StackProbeSize
= TLI
.getStackProbeSize(MF
);
688 uint64_t CurrentOffset
= 0;
690 assert(AlignOffset
< StackProbeSize
);
692 // If the offset is so small it fits within a page, there's nothing to do.
693 if (StackProbeSize
< Offset
+ AlignOffset
) {
695 uint64_t StackAdjustment
= StackProbeSize
- AlignOffset
;
696 BuildStackAdjustment(MBB
, MBBI
, DL
, -StackAdjustment
, /*InEpilogue=*/false)
697 .setMIFlag(MachineInstr::FrameSetup
);
698 if (!HasFP
&& NeedsDwarfCFI
) {
701 MCCFIInstruction::createAdjustCfaOffset(nullptr, StackAdjustment
));
704 addRegOffset(BuildMI(MBB
, MBBI
, DL
, TII
.get(MovMIOpc
))
705 .setMIFlag(MachineInstr::FrameSetup
),
708 .setMIFlag(MachineInstr::FrameSetup
);
709 NumFrameExtraProbe
++;
710 CurrentOffset
= StackProbeSize
- AlignOffset
;
713 // For the next N - 1 pages, just probe. I tried to take advantage of
714 // natural probes but it implies much more logic and there was very few
715 // interesting natural probes to interleave.
716 while (CurrentOffset
+ StackProbeSize
< Offset
) {
717 BuildStackAdjustment(MBB
, MBBI
, DL
, -StackProbeSize
, /*InEpilogue=*/false)
718 .setMIFlag(MachineInstr::FrameSetup
);
720 if (!HasFP
&& NeedsDwarfCFI
) {
723 MCCFIInstruction::createAdjustCfaOffset(nullptr, StackProbeSize
));
725 addRegOffset(BuildMI(MBB
, MBBI
, DL
, TII
.get(MovMIOpc
))
726 .setMIFlag(MachineInstr::FrameSetup
),
729 .setMIFlag(MachineInstr::FrameSetup
);
730 NumFrameExtraProbe
++;
731 CurrentOffset
+= StackProbeSize
;
734 // No need to probe the tail, it is smaller than a Page.
735 uint64_t ChunkSize
= Offset
- CurrentOffset
;
736 if (ChunkSize
== SlotSize
) {
737 // Use push for slot sized adjustments as a size optimization,
738 // like emitSPUpdate does when not probing.
739 unsigned Reg
= Is64Bit
? X86::RAX
: X86::EAX
;
740 unsigned Opc
= Is64Bit
? X86::PUSH64r
: X86::PUSH32r
;
741 BuildMI(MBB
, MBBI
, DL
, TII
.get(Opc
))
742 .addReg(Reg
, RegState::Undef
)
743 .setMIFlag(MachineInstr::FrameSetup
);
745 BuildStackAdjustment(MBB
, MBBI
, DL
, -ChunkSize
, /*InEpilogue=*/false)
746 .setMIFlag(MachineInstr::FrameSetup
);
748 // No need to adjust Dwarf CFA offset here, the last position of the stack has
752 void X86FrameLowering::emitStackProbeInlineGenericLoop(
753 MachineFunction
&MF
, MachineBasicBlock
&MBB
,
754 MachineBasicBlock::iterator MBBI
, const DebugLoc
&DL
, uint64_t Offset
,
755 uint64_t AlignOffset
) const {
756 assert(Offset
&& "null offset");
758 assert(MBB
.computeRegisterLiveness(TRI
, X86::EFLAGS
, MBBI
) !=
759 MachineBasicBlock::LQR_Live
&&
760 "Inline stack probe loop will clobber live EFLAGS.");
762 const bool NeedsDwarfCFI
= needsDwarfCFI(MF
);
763 const bool HasFP
= hasFP(MF
);
764 const X86Subtarget
&STI
= MF
.getSubtarget
<X86Subtarget
>();
765 const X86TargetLowering
&TLI
= *STI
.getTargetLowering();
766 const unsigned MovMIOpc
= Is64Bit
? X86::MOV64mi32
: X86::MOV32mi
;
767 const uint64_t StackProbeSize
= TLI
.getStackProbeSize(MF
);
770 if (AlignOffset
< StackProbeSize
) {
771 // Perform a first smaller allocation followed by a probe.
772 BuildStackAdjustment(MBB
, MBBI
, DL
, -AlignOffset
, /*InEpilogue=*/false)
773 .setMIFlag(MachineInstr::FrameSetup
);
775 addRegOffset(BuildMI(MBB
, MBBI
, DL
, TII
.get(MovMIOpc
))
776 .setMIFlag(MachineInstr::FrameSetup
),
779 .setMIFlag(MachineInstr::FrameSetup
);
780 NumFrameExtraProbe
++;
781 Offset
-= AlignOffset
;
787 const BasicBlock
*LLVM_BB
= MBB
.getBasicBlock();
789 MachineBasicBlock
*testMBB
= MF
.CreateMachineBasicBlock(LLVM_BB
);
790 MachineBasicBlock
*tailMBB
= MF
.CreateMachineBasicBlock(LLVM_BB
);
792 MachineFunction::iterator MBBIter
= ++MBB
.getIterator();
793 MF
.insert(MBBIter
, testMBB
);
794 MF
.insert(MBBIter
, tailMBB
);
796 Register FinalStackProbed
= Uses64BitFramePtr
? X86::R11
797 : Is64Bit
? X86::R11D
800 BuildMI(MBB
, MBBI
, DL
, TII
.get(TargetOpcode::COPY
), FinalStackProbed
)
802 .setMIFlag(MachineInstr::FrameSetup
);
806 const unsigned BoundOffset
= alignDown(Offset
, StackProbeSize
);
807 const unsigned SUBOpc
= getSUBriOpcode(Uses64BitFramePtr
);
808 BuildMI(MBB
, MBBI
, DL
, TII
.get(SUBOpc
), FinalStackProbed
)
809 .addReg(FinalStackProbed
)
811 .setMIFlag(MachineInstr::FrameSetup
);
813 // while in the loop, use loop-invariant reg for CFI,
814 // instead of the stack pointer, which changes during the loop
815 if (!HasFP
&& NeedsDwarfCFI
) {
816 // x32 uses the same DWARF register numbers as x86-64,
817 // so there isn't a register number for r11d, we must use r11 instead
818 const Register DwarfFinalStackProbed
=
819 STI
.isTarget64BitILP32()
820 ? Register(getX86SubSuperRegister(FinalStackProbed
, 64))
823 BuildCFI(MBB
, MBBI
, DL
,
824 MCCFIInstruction::createDefCfaRegister(
825 nullptr, TRI
->getDwarfRegNum(DwarfFinalStackProbed
, true)));
826 BuildCFI(MBB
, MBBI
, DL
,
827 MCCFIInstruction::createAdjustCfaOffset(nullptr, BoundOffset
));
832 BuildStackAdjustment(*testMBB
, testMBB
->end(), DL
, -StackProbeSize
,
833 /*InEpilogue=*/false)
834 .setMIFlag(MachineInstr::FrameSetup
);
837 addRegOffset(BuildMI(testMBB
, DL
, TII
.get(MovMIOpc
))
838 .setMIFlag(MachineInstr::FrameSetup
),
841 .setMIFlag(MachineInstr::FrameSetup
);
843 // cmp with stack pointer bound
844 BuildMI(testMBB
, DL
, TII
.get(Uses64BitFramePtr
? X86::CMP64rr
: X86::CMP32rr
))
846 .addReg(FinalStackProbed
)
847 .setMIFlag(MachineInstr::FrameSetup
);
850 BuildMI(testMBB
, DL
, TII
.get(X86::JCC_1
))
852 .addImm(X86::COND_NE
)
853 .setMIFlag(MachineInstr::FrameSetup
);
854 testMBB
->addSuccessor(testMBB
);
855 testMBB
->addSuccessor(tailMBB
);
858 tailMBB
->splice(tailMBB
->end(), &MBB
, MBBI
, MBB
.end());
859 tailMBB
->transferSuccessorsAndUpdatePHIs(&MBB
);
860 MBB
.addSuccessor(testMBB
);
863 const uint64_t TailOffset
= Offset
% StackProbeSize
;
864 MachineBasicBlock::iterator TailMBBIter
= tailMBB
->begin();
866 BuildStackAdjustment(*tailMBB
, TailMBBIter
, DL
, -TailOffset
,
867 /*InEpilogue=*/false)
868 .setMIFlag(MachineInstr::FrameSetup
);
871 // after the loop, switch back to stack pointer for CFI
872 if (!HasFP
&& NeedsDwarfCFI
) {
873 // x32 uses the same DWARF register numbers as x86-64,
874 // so there isn't a register number for esp, we must use rsp instead
875 const Register DwarfStackPtr
=
876 STI
.isTarget64BitILP32()
877 ? Register(getX86SubSuperRegister(StackPtr
, 64))
878 : Register(StackPtr
);
880 BuildCFI(*tailMBB
, TailMBBIter
, DL
,
881 MCCFIInstruction::createDefCfaRegister(
882 nullptr, TRI
->getDwarfRegNum(DwarfStackPtr
, true)));
885 // Update Live In information
886 fullyRecomputeLiveIns({tailMBB
, testMBB
});
889 void X86FrameLowering::emitStackProbeInlineWindowsCoreCLR64(
890 MachineFunction
&MF
, MachineBasicBlock
&MBB
,
891 MachineBasicBlock::iterator MBBI
, const DebugLoc
&DL
, bool InProlog
) const {
892 const X86Subtarget
&STI
= MF
.getSubtarget
<X86Subtarget
>();
893 assert(STI
.is64Bit() && "different expansion needed for 32 bit");
894 assert(STI
.isTargetWindowsCoreCLR() && "custom expansion expects CoreCLR");
895 const TargetInstrInfo
&TII
= *STI
.getInstrInfo();
896 const BasicBlock
*LLVM_BB
= MBB
.getBasicBlock();
898 assert(MBB
.computeRegisterLiveness(TRI
, X86::EFLAGS
, MBBI
) !=
899 MachineBasicBlock::LQR_Live
&&
900 "Inline stack probe loop will clobber live EFLAGS.");
902 // RAX contains the number of bytes of desired stack adjustment.
903 // The handling here assumes this value has already been updated so as to
904 // maintain stack alignment.
906 // We need to exit with RSP modified by this amount and execute suitable
907 // page touches to notify the OS that we're growing the stack responsibly.
908 // All stack probing must be done without modifying RSP.
914 // Flags, TestReg = CopyReg - SizeReg
915 // FinalReg = !Flags.Ovf ? TestReg : ZeroReg
916 // LimitReg = gs magic thread env access
917 // if FinalReg >= LimitReg goto ContinueMBB
919 // RoundReg = page address of FinalReg
921 // LoopReg = PHI(LimitReg,ProbeReg)
922 // ProbeReg = LoopReg - PageSize
924 // if (ProbeReg > RoundReg) goto LoopMBB
927 // [rest of original MBB]
929 // Set up the new basic blocks
930 MachineBasicBlock
*RoundMBB
= MF
.CreateMachineBasicBlock(LLVM_BB
);
931 MachineBasicBlock
*LoopMBB
= MF
.CreateMachineBasicBlock(LLVM_BB
);
932 MachineBasicBlock
*ContinueMBB
= MF
.CreateMachineBasicBlock(LLVM_BB
);
934 MachineFunction::iterator MBBIter
= std::next(MBB
.getIterator());
935 MF
.insert(MBBIter
, RoundMBB
);
936 MF
.insert(MBBIter
, LoopMBB
);
937 MF
.insert(MBBIter
, ContinueMBB
);
939 // Split MBB and move the tail portion down to ContinueMBB.
940 MachineBasicBlock::iterator BeforeMBBI
= std::prev(MBBI
);
941 ContinueMBB
->splice(ContinueMBB
->begin(), &MBB
, MBBI
, MBB
.end());
942 ContinueMBB
->transferSuccessorsAndUpdatePHIs(&MBB
);
944 // Some useful constants
945 const int64_t ThreadEnvironmentStackLimit
= 0x10;
946 const int64_t PageSize
= 0x1000;
947 const int64_t PageMask
= ~(PageSize
- 1);
949 // Registers we need. For the normal case we use virtual
950 // registers. For the prolog expansion we use RAX, RCX and RDX.
951 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
952 const TargetRegisterClass
*RegClass
= &X86::GR64RegClass
;
954 SizeReg
= InProlog
? X86::RAX
: MRI
.createVirtualRegister(RegClass
),
955 ZeroReg
= InProlog
? X86::RCX
: MRI
.createVirtualRegister(RegClass
),
956 CopyReg
= InProlog
? X86::RDX
: MRI
.createVirtualRegister(RegClass
),
957 TestReg
= InProlog
? X86::RDX
: MRI
.createVirtualRegister(RegClass
),
958 FinalReg
= InProlog
? X86::RDX
: MRI
.createVirtualRegister(RegClass
),
959 RoundedReg
= InProlog
? X86::RDX
: MRI
.createVirtualRegister(RegClass
),
960 LimitReg
= InProlog
? X86::RCX
: MRI
.createVirtualRegister(RegClass
),
961 JoinReg
= InProlog
? X86::RCX
: MRI
.createVirtualRegister(RegClass
),
962 ProbeReg
= InProlog
? X86::RCX
: MRI
.createVirtualRegister(RegClass
);
964 // SP-relative offsets where we can save RCX and RDX.
965 int64_t RCXShadowSlot
= 0;
966 int64_t RDXShadowSlot
= 0;
968 // If inlining in the prolog, save RCX and RDX.
970 // Compute the offsets. We need to account for things already
971 // pushed onto the stack at this point: return address, frame
972 // pointer (if used), and callee saves.
973 X86MachineFunctionInfo
*X86FI
= MF
.getInfo
<X86MachineFunctionInfo
>();
974 const int64_t CalleeSaveSize
= X86FI
->getCalleeSavedFrameSize();
975 const bool HasFP
= hasFP(MF
);
977 // Check if we need to spill RCX and/or RDX.
978 // Here we assume that no earlier prologue instruction changes RCX and/or
979 // RDX, so checking the block live-ins is enough.
980 const bool IsRCXLiveIn
= MBB
.isLiveIn(X86::RCX
);
981 const bool IsRDXLiveIn
= MBB
.isLiveIn(X86::RDX
);
982 int64_t InitSlot
= 8 + CalleeSaveSize
+ (HasFP
? 8 : 0);
983 // Assign the initial slot to both registers, then change RDX's slot if both
984 // need to be spilled.
986 RCXShadowSlot
= InitSlot
;
988 RDXShadowSlot
= InitSlot
;
989 if (IsRDXLiveIn
&& IsRCXLiveIn
)
991 // Emit the saves if needed.
993 addRegOffset(BuildMI(&MBB
, DL
, TII
.get(X86::MOV64mr
)), X86::RSP
, false,
997 addRegOffset(BuildMI(&MBB
, DL
, TII
.get(X86::MOV64mr
)), X86::RSP
, false,
1001 // Not in the prolog. Copy RAX to a virtual reg.
1002 BuildMI(&MBB
, DL
, TII
.get(X86::MOV64rr
), SizeReg
).addReg(X86::RAX
);
1005 // Add code to MBB to check for overflow and set the new target stack pointer
1007 BuildMI(&MBB
, DL
, TII
.get(X86::XOR64rr
), ZeroReg
)
1008 .addReg(ZeroReg
, RegState::Undef
)
1009 .addReg(ZeroReg
, RegState::Undef
);
1010 BuildMI(&MBB
, DL
, TII
.get(X86::MOV64rr
), CopyReg
).addReg(X86::RSP
);
1011 BuildMI(&MBB
, DL
, TII
.get(X86::SUB64rr
), TestReg
)
1014 BuildMI(&MBB
, DL
, TII
.get(X86::CMOV64rr
), FinalReg
)
1017 .addImm(X86::COND_B
);
1019 // FinalReg now holds final stack pointer value, or zero if
1020 // allocation would overflow. Compare against the current stack
1021 // limit from the thread environment block. Note this limit is the
1022 // lowest touched page on the stack, not the point at which the OS
1023 // will cause an overflow exception, so this is just an optimization
1024 // to avoid unnecessarily touching pages that are below the current
1025 // SP but already committed to the stack by the OS.
1026 BuildMI(&MBB
, DL
, TII
.get(X86::MOV64rm
), LimitReg
)
1030 .addImm(ThreadEnvironmentStackLimit
)
1032 BuildMI(&MBB
, DL
, TII
.get(X86::CMP64rr
)).addReg(FinalReg
).addReg(LimitReg
);
1033 // Jump if the desired stack pointer is at or above the stack limit.
1034 BuildMI(&MBB
, DL
, TII
.get(X86::JCC_1
))
1035 .addMBB(ContinueMBB
)
1036 .addImm(X86::COND_AE
);
1038 // Add code to roundMBB to round the final stack pointer to a page boundary.
1040 RoundMBB
->addLiveIn(FinalReg
);
1041 BuildMI(RoundMBB
, DL
, TII
.get(X86::AND64ri32
), RoundedReg
)
1044 BuildMI(RoundMBB
, DL
, TII
.get(X86::JMP_1
)).addMBB(LoopMBB
);
1046 // LimitReg now holds the current stack limit, RoundedReg page-rounded
1047 // final RSP value. Add code to loopMBB to decrement LimitReg page-by-page
1048 // and probe until we reach RoundedReg.
1050 BuildMI(LoopMBB
, DL
, TII
.get(X86::PHI
), JoinReg
)
1058 LoopMBB
->addLiveIn(JoinReg
);
1059 addRegOffset(BuildMI(LoopMBB
, DL
, TII
.get(X86::LEA64r
), ProbeReg
), JoinReg
,
1062 // Probe by storing a byte onto the stack.
1063 BuildMI(LoopMBB
, DL
, TII
.get(X86::MOV8mi
))
1072 LoopMBB
->addLiveIn(RoundedReg
);
1073 BuildMI(LoopMBB
, DL
, TII
.get(X86::CMP64rr
))
1076 BuildMI(LoopMBB
, DL
, TII
.get(X86::JCC_1
))
1078 .addImm(X86::COND_NE
);
1080 MachineBasicBlock::iterator ContinueMBBI
= ContinueMBB
->getFirstNonPHI();
1082 // If in prolog, restore RDX and RCX.
1084 if (RCXShadowSlot
) // It means we spilled RCX in the prologue.
1085 addRegOffset(BuildMI(*ContinueMBB
, ContinueMBBI
, DL
,
1086 TII
.get(X86::MOV64rm
), X86::RCX
),
1087 X86::RSP
, false, RCXShadowSlot
);
1088 if (RDXShadowSlot
) // It means we spilled RDX in the prologue.
1089 addRegOffset(BuildMI(*ContinueMBB
, ContinueMBBI
, DL
,
1090 TII
.get(X86::MOV64rm
), X86::RDX
),
1091 X86::RSP
, false, RDXShadowSlot
);
1094 // Now that the probing is done, add code to continueMBB to update
1095 // the stack pointer for real.
1096 BuildMI(*ContinueMBB
, ContinueMBBI
, DL
, TII
.get(X86::SUB64rr
), X86::RSP
)
1100 // Add the control flow edges we need.
1101 MBB
.addSuccessor(ContinueMBB
);
1102 MBB
.addSuccessor(RoundMBB
);
1103 RoundMBB
->addSuccessor(LoopMBB
);
1104 LoopMBB
->addSuccessor(ContinueMBB
);
1105 LoopMBB
->addSuccessor(LoopMBB
);
1108 LivePhysRegs LiveRegs
;
1109 computeAndAddLiveIns(LiveRegs
, *ContinueMBB
);
1112 // Mark all the instructions added to the prolog as frame setup.
1114 for (++BeforeMBBI
; BeforeMBBI
!= MBB
.end(); ++BeforeMBBI
) {
1115 BeforeMBBI
->setFlag(MachineInstr::FrameSetup
);
1117 for (MachineInstr
&MI
: *RoundMBB
) {
1118 MI
.setFlag(MachineInstr::FrameSetup
);
1120 for (MachineInstr
&MI
: *LoopMBB
) {
1121 MI
.setFlag(MachineInstr::FrameSetup
);
1123 for (MachineInstr
&MI
:
1124 llvm::make_range(ContinueMBB
->begin(), ContinueMBBI
)) {
1125 MI
.setFlag(MachineInstr::FrameSetup
);
1130 void X86FrameLowering::emitStackProbeCall(
1131 MachineFunction
&MF
, MachineBasicBlock
&MBB
,
1132 MachineBasicBlock::iterator MBBI
, const DebugLoc
&DL
, bool InProlog
,
1133 std::optional
<MachineFunction::DebugInstrOperandPair
> InstrNum
) const {
1134 bool IsLargeCodeModel
= MF
.getTarget().getCodeModel() == CodeModel::Large
;
1136 // FIXME: Add indirect thunk support and remove this.
1137 if (Is64Bit
&& IsLargeCodeModel
&& STI
.useIndirectThunkCalls())
1138 report_fatal_error("Emitting stack probe calls on 64-bit with the large "
1139 "code model and indirect thunks not yet implemented.");
1141 assert(MBB
.computeRegisterLiveness(TRI
, X86::EFLAGS
, MBBI
) !=
1142 MachineBasicBlock::LQR_Live
&&
1143 "Stack probe calls will clobber live EFLAGS.");
1147 CallOp
= IsLargeCodeModel
? X86::CALL64r
: X86::CALL64pcrel32
;
1149 CallOp
= X86::CALLpcrel32
;
1151 StringRef Symbol
= STI
.getTargetLowering()->getStackProbeSymbolName(MF
);
1153 MachineInstrBuilder CI
;
1154 MachineBasicBlock::iterator ExpansionMBBI
= std::prev(MBBI
);
1156 // All current stack probes take AX and SP as input, clobber flags, and
1157 // preserve all registers. x86_64 probes leave RSP unmodified.
1158 if (Is64Bit
&& MF
.getTarget().getCodeModel() == CodeModel::Large
) {
1159 // For the large code model, we have to call through a register. Use R11,
1160 // as it is scratch in all supported calling conventions.
1161 BuildMI(MBB
, MBBI
, DL
, TII
.get(X86::MOV64ri
), X86::R11
)
1162 .addExternalSymbol(MF
.createExternalSymbolName(Symbol
));
1163 CI
= BuildMI(MBB
, MBBI
, DL
, TII
.get(CallOp
)).addReg(X86::R11
);
1165 CI
= BuildMI(MBB
, MBBI
, DL
, TII
.get(CallOp
))
1166 .addExternalSymbol(MF
.createExternalSymbolName(Symbol
));
1169 unsigned AX
= Uses64BitFramePtr
? X86::RAX
: X86::EAX
;
1170 unsigned SP
= Uses64BitFramePtr
? X86::RSP
: X86::ESP
;
1171 CI
.addReg(AX
, RegState::Implicit
)
1172 .addReg(SP
, RegState::Implicit
)
1173 .addReg(AX
, RegState::Define
| RegState::Implicit
)
1174 .addReg(SP
, RegState::Define
| RegState::Implicit
)
1175 .addReg(X86::EFLAGS
, RegState::Define
| RegState::Implicit
);
1177 MachineInstr
*ModInst
= CI
;
1178 if (STI
.isTargetWin64() || !STI
.isOSWindows()) {
1179 // MSVC x32's _chkstk and cygwin/mingw's _alloca adjust %esp themselves.
1180 // MSVC x64's __chkstk and cygwin/mingw's ___chkstk_ms do not adjust %rsp
1181 // themselves. They also does not clobber %rax so we can reuse it when
1183 // All other platforms do not specify a particular ABI for the stack probe
1184 // function, so we arbitrarily define it to not adjust %esp/%rsp itself.
1186 BuildMI(MBB
, MBBI
, DL
, TII
.get(getSUBrrOpcode(Uses64BitFramePtr
)), SP
)
1191 // DebugInfo variable locations -- if there's an instruction number for the
1192 // allocation (i.e., DYN_ALLOC_*), substitute it for the instruction that
1195 if (STI
.isTargetWin64() || !STI
.isOSWindows()) {
1196 // Label destination operand of the subtract.
1197 MF
.makeDebugValueSubstitution(*InstrNum
,
1198 {ModInst
->getDebugInstrNum(), 0});
1200 // Label the call. The operand number is the penultimate operand, zero
1202 unsigned SPDefOperand
= ModInst
->getNumOperands() - 2;
1203 MF
.makeDebugValueSubstitution(
1204 *InstrNum
, {ModInst
->getDebugInstrNum(), SPDefOperand
});
1209 // Apply the frame setup flag to all inserted instrs.
1210 for (++ExpansionMBBI
; ExpansionMBBI
!= MBBI
; ++ExpansionMBBI
)
1211 ExpansionMBBI
->setFlag(MachineInstr::FrameSetup
);
1215 static unsigned calculateSetFPREG(uint64_t SPAdjust
) {
1216 // Win64 ABI has a less restrictive limitation of 240; 128 works equally well
1217 // and might require smaller successive adjustments.
1218 const uint64_t Win64MaxSEHOffset
= 128;
1219 uint64_t SEHFrameOffset
= std::min(SPAdjust
, Win64MaxSEHOffset
);
1220 // Win64 ABI requires 16-byte alignment for the UWOP_SET_FPREG opcode.
1221 return SEHFrameOffset
& -16;
1224 // If we're forcing a stack realignment we can't rely on just the frame
1225 // info, we need to know the ABI stack alignment as well in case we
1226 // have a call out. Otherwise just make sure we have some alignment - we'll
1227 // go with the minimum SlotSize.
1229 X86FrameLowering::calculateMaxStackAlign(const MachineFunction
&MF
) const {
1230 const MachineFrameInfo
&MFI
= MF
.getFrameInfo();
1231 Align MaxAlign
= MFI
.getMaxAlign(); // Desired stack alignment.
1232 Align StackAlign
= getStackAlign();
1233 bool HasRealign
= MF
.getFunction().hasFnAttribute("stackrealign");
1236 MaxAlign
= (StackAlign
> MaxAlign
) ? StackAlign
: MaxAlign
;
1237 else if (MaxAlign
< SlotSize
)
1238 MaxAlign
= Align(SlotSize
);
1241 if (!Is64Bit
&& MF
.getFunction().getCallingConv() == CallingConv::X86_INTR
) {
1243 MaxAlign
= (MaxAlign
> 16) ? MaxAlign
: Align(16);
1245 MaxAlign
= Align(16);
1247 return MaxAlign
.value();
1250 void X86FrameLowering::BuildStackAlignAND(MachineBasicBlock
&MBB
,
1251 MachineBasicBlock::iterator MBBI
,
1252 const DebugLoc
&DL
, unsigned Reg
,
1253 uint64_t MaxAlign
) const {
1254 uint64_t Val
= -MaxAlign
;
1255 unsigned AndOp
= getANDriOpcode(Uses64BitFramePtr
, Val
);
1257 MachineFunction
&MF
= *MBB
.getParent();
1258 const X86Subtarget
&STI
= MF
.getSubtarget
<X86Subtarget
>();
1259 const X86TargetLowering
&TLI
= *STI
.getTargetLowering();
1260 const uint64_t StackProbeSize
= TLI
.getStackProbeSize(MF
);
1261 const bool EmitInlineStackProbe
= TLI
.hasInlineStackProbe(MF
);
1263 // We want to make sure that (in worst case) less than StackProbeSize bytes
1264 // are not probed after the AND. This assumption is used in
1265 // emitStackProbeInlineGeneric.
1266 if (Reg
== StackPtr
&& EmitInlineStackProbe
&& MaxAlign
>= StackProbeSize
) {
1268 NumFrameLoopProbe
++;
1269 MachineBasicBlock
*entryMBB
=
1270 MF
.CreateMachineBasicBlock(MBB
.getBasicBlock());
1271 MachineBasicBlock
*headMBB
=
1272 MF
.CreateMachineBasicBlock(MBB
.getBasicBlock());
1273 MachineBasicBlock
*bodyMBB
=
1274 MF
.CreateMachineBasicBlock(MBB
.getBasicBlock());
1275 MachineBasicBlock
*footMBB
=
1276 MF
.CreateMachineBasicBlock(MBB
.getBasicBlock());
1278 MachineFunction::iterator MBBIter
= MBB
.getIterator();
1279 MF
.insert(MBBIter
, entryMBB
);
1280 MF
.insert(MBBIter
, headMBB
);
1281 MF
.insert(MBBIter
, bodyMBB
);
1282 MF
.insert(MBBIter
, footMBB
);
1283 const unsigned MovMIOpc
= Is64Bit
? X86::MOV64mi32
: X86::MOV32mi
;
1284 Register FinalStackProbed
= Uses64BitFramePtr
? X86::R11
1285 : Is64Bit
? X86::R11D
1288 // Setup entry block
1291 entryMBB
->splice(entryMBB
->end(), &MBB
, MBB
.begin(), MBBI
);
1292 BuildMI(entryMBB
, DL
, TII
.get(TargetOpcode::COPY
), FinalStackProbed
)
1294 .setMIFlag(MachineInstr::FrameSetup
);
1296 BuildMI(entryMBB
, DL
, TII
.get(AndOp
), FinalStackProbed
)
1297 .addReg(FinalStackProbed
)
1299 .setMIFlag(MachineInstr::FrameSetup
);
1301 // The EFLAGS implicit def is dead.
1302 MI
->getOperand(3).setIsDead();
1304 BuildMI(entryMBB
, DL
,
1305 TII
.get(Uses64BitFramePtr
? X86::CMP64rr
: X86::CMP32rr
))
1306 .addReg(FinalStackProbed
)
1308 .setMIFlag(MachineInstr::FrameSetup
);
1309 BuildMI(entryMBB
, DL
, TII
.get(X86::JCC_1
))
1311 .addImm(X86::COND_E
)
1312 .setMIFlag(MachineInstr::FrameSetup
);
1313 entryMBB
->addSuccessor(headMBB
);
1314 entryMBB
->addSuccessor(&MBB
);
1320 const unsigned SUBOpc
= getSUBriOpcode(Uses64BitFramePtr
);
1321 BuildMI(headMBB
, DL
, TII
.get(SUBOpc
), StackPtr
)
1323 .addImm(StackProbeSize
)
1324 .setMIFlag(MachineInstr::FrameSetup
);
1326 BuildMI(headMBB
, DL
,
1327 TII
.get(Uses64BitFramePtr
? X86::CMP64rr
: X86::CMP32rr
))
1329 .addReg(FinalStackProbed
)
1330 .setMIFlag(MachineInstr::FrameSetup
);
1332 // jump to the footer if StackPtr < FinalStackProbed
1333 BuildMI(headMBB
, DL
, TII
.get(X86::JCC_1
))
1335 .addImm(X86::COND_B
)
1336 .setMIFlag(MachineInstr::FrameSetup
);
1338 headMBB
->addSuccessor(bodyMBB
);
1339 headMBB
->addSuccessor(footMBB
);
1344 addRegOffset(BuildMI(bodyMBB
, DL
, TII
.get(MovMIOpc
))
1345 .setMIFlag(MachineInstr::FrameSetup
),
1348 .setMIFlag(MachineInstr::FrameSetup
);
1350 const unsigned SUBOpc
= getSUBriOpcode(Uses64BitFramePtr
);
1351 BuildMI(bodyMBB
, DL
, TII
.get(SUBOpc
), StackPtr
)
1353 .addImm(StackProbeSize
)
1354 .setMIFlag(MachineInstr::FrameSetup
);
1356 // cmp with stack pointer bound
1357 BuildMI(bodyMBB
, DL
,
1358 TII
.get(Uses64BitFramePtr
? X86::CMP64rr
: X86::CMP32rr
))
1359 .addReg(FinalStackProbed
)
1361 .setMIFlag(MachineInstr::FrameSetup
);
1363 // jump back while FinalStackProbed < StackPtr
1364 BuildMI(bodyMBB
, DL
, TII
.get(X86::JCC_1
))
1366 .addImm(X86::COND_B
)
1367 .setMIFlag(MachineInstr::FrameSetup
);
1368 bodyMBB
->addSuccessor(bodyMBB
);
1369 bodyMBB
->addSuccessor(footMBB
);
1372 // setup loop footer
1374 BuildMI(footMBB
, DL
, TII
.get(TargetOpcode::COPY
), StackPtr
)
1375 .addReg(FinalStackProbed
)
1376 .setMIFlag(MachineInstr::FrameSetup
);
1377 addRegOffset(BuildMI(footMBB
, DL
, TII
.get(MovMIOpc
))
1378 .setMIFlag(MachineInstr::FrameSetup
),
1381 .setMIFlag(MachineInstr::FrameSetup
);
1382 footMBB
->addSuccessor(&MBB
);
1385 fullyRecomputeLiveIns({footMBB
, bodyMBB
, headMBB
, &MBB
});
1388 MachineInstr
*MI
= BuildMI(MBB
, MBBI
, DL
, TII
.get(AndOp
), Reg
)
1391 .setMIFlag(MachineInstr::FrameSetup
);
1393 // The EFLAGS implicit def is dead.
1394 MI
->getOperand(3).setIsDead();
1398 bool X86FrameLowering::has128ByteRedZone(const MachineFunction
&MF
) const {
1399 // x86-64 (non Win64) has a 128 byte red zone which is guaranteed not to be
1400 // clobbered by any interrupt handler.
1401 assert(&STI
== &MF
.getSubtarget
<X86Subtarget
>() &&
1402 "MF used frame lowering for wrong subtarget");
1403 const Function
&Fn
= MF
.getFunction();
1404 const bool IsWin64CC
= STI
.isCallingConvWin64(Fn
.getCallingConv());
1405 return Is64Bit
&& !IsWin64CC
&& !Fn
.hasFnAttribute(Attribute::NoRedZone
);
1408 /// Return true if we need to use the restricted Windows x64 prologue and
1409 /// epilogue code patterns that can be described with WinCFI (.seh_*
1411 bool X86FrameLowering::isWin64Prologue(const MachineFunction
&MF
) const {
1412 return MF
.getTarget().getMCAsmInfo()->usesWindowsCFI();
1415 bool X86FrameLowering::needsDwarfCFI(const MachineFunction
&MF
) const {
1416 return !isWin64Prologue(MF
) && MF
.needsFrameMoves();
1419 /// Return true if an opcode is part of the REP group of instructions
1420 static bool isOpcodeRep(unsigned Opcode
) {
1422 case X86::REPNE_PREFIX
:
1423 case X86::REP_MOVSB_32
:
1424 case X86::REP_MOVSB_64
:
1425 case X86::REP_MOVSD_32
:
1426 case X86::REP_MOVSD_64
:
1427 case X86::REP_MOVSQ_32
:
1428 case X86::REP_MOVSQ_64
:
1429 case X86::REP_MOVSW_32
:
1430 case X86::REP_MOVSW_64
:
1431 case X86::REP_PREFIX
:
1432 case X86::REP_STOSB_32
:
1433 case X86::REP_STOSB_64
:
1434 case X86::REP_STOSD_32
:
1435 case X86::REP_STOSD_64
:
1436 case X86::REP_STOSQ_32
:
1437 case X86::REP_STOSQ_64
:
1438 case X86::REP_STOSW_32
:
1439 case X86::REP_STOSW_64
:
1447 /// emitPrologue - Push callee-saved registers onto the stack, which
1448 /// automatically adjust the stack pointer. Adjust the stack pointer to allocate
1449 /// space for local variables. Also emit labels used by the exception handler to
1450 /// generate the exception handling frames.
1453 Here's a gist of what gets emitted:
1455 ; Establish frame pointer, if needed
1458 .cfi_def_cfa_offset 16
1459 .cfi_offset %rbp, -16
1462 .cfi_def_cfa_register %rbp
1464 ; Spill general-purpose registers
1465 [for all callee-saved GPRs]
1468 .cfi_def_cfa_offset (offset from RETADDR)
1471 ; If the required stack alignment > default stack alignment
1472 ; rsp needs to be re-aligned. This creates a "re-alignment gap"
1473 ; of unknown size in the stack frame.
1474 [if stack needs re-alignment]
1477 ; Allocate space for locals
1478 [if target is Windows and allocated space > 4096 bytes]
1479 ; Windows needs special care for allocations larger
1482 call ___chkstk_ms/___chkstk
1488 .seh_stackalloc (size of XMM spill slots)
1489 .seh_setframe %rbp, SEHFrameOffset ; = size of all spill slots
1494 ; Note, that while only Windows 64 ABI specifies XMMs as callee-preserved,
1495 ; they may get spilled on any platform, if the current function
1496 ; calls @llvm.eh.unwind.init
1498 [for all callee-saved XMM registers]
1499 movaps %<xmm reg>, -MMM(%rbp)
1500 [for all callee-saved XMM registers]
1501 .seh_savexmm %<xmm reg>, (-MMM + SEHFrameOffset)
1502 ; i.e. the offset relative to (%rbp - SEHFrameOffset)
1504 [for all callee-saved XMM registers]
1505 movaps %<xmm reg>, KKK(%rsp)
1506 [for all callee-saved XMM registers]
1507 .seh_savexmm %<xmm reg>, KKK
1511 [if needs base pointer]
1513 [if needs to restore base pointer]
1514 mov %rsp, -MMM(%rbp)
1518 [for all callee-saved registers]
1519 .cfi_offset %<reg>, (offset from %rbp)
1521 .cfi_def_cfa_offset (offset from RETADDR)
1522 [for all callee-saved registers]
1523 .cfi_offset %<reg>, (offset from %rsp)
1526 - .seh directives are emitted only for Windows 64 ABI
1527 - .cv_fpo directives are emitted on win32 when emitting CodeView
1528 - .cfi directives are emitted for all other ABIs
1529 - for 32-bit code, substitute %e?? registers for %r??
1532 void X86FrameLowering::emitPrologue(MachineFunction
&MF
,
1533 MachineBasicBlock
&MBB
) const {
1534 assert(&STI
== &MF
.getSubtarget
<X86Subtarget
>() &&
1535 "MF used frame lowering for wrong subtarget");
1536 MachineBasicBlock::iterator MBBI
= MBB
.begin();
1537 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
1538 const Function
&Fn
= MF
.getFunction();
1539 X86MachineFunctionInfo
*X86FI
= MF
.getInfo
<X86MachineFunctionInfo
>();
1540 uint64_t MaxAlign
= calculateMaxStackAlign(MF
); // Desired stack alignment.
1541 uint64_t StackSize
= MFI
.getStackSize(); // Number of bytes to allocate.
1542 bool IsFunclet
= MBB
.isEHFuncletEntry();
1543 EHPersonality Personality
= EHPersonality::Unknown
;
1544 if (Fn
.hasPersonalityFn())
1545 Personality
= classifyEHPersonality(Fn
.getPersonalityFn());
1546 bool FnHasClrFunclet
=
1547 MF
.hasEHFunclets() && Personality
== EHPersonality::CoreCLR
;
1548 bool IsClrFunclet
= IsFunclet
&& FnHasClrFunclet
;
1549 bool HasFP
= hasFP(MF
);
1550 bool IsWin64Prologue
= isWin64Prologue(MF
);
1551 bool NeedsWin64CFI
= IsWin64Prologue
&& Fn
.needsUnwindTableEntry();
1552 // FIXME: Emit FPO data for EH funclets.
1553 bool NeedsWinFPO
= !IsFunclet
&& STI
.isTargetWin32() &&
1554 MF
.getFunction().getParent()->getCodeViewFlag();
1555 bool NeedsWinCFI
= NeedsWin64CFI
|| NeedsWinFPO
;
1556 bool NeedsDwarfCFI
= needsDwarfCFI(MF
);
1557 Register FramePtr
= TRI
->getFrameRegister(MF
);
1558 const Register MachineFramePtr
=
1559 STI
.isTarget64BitILP32() ? Register(getX86SubSuperRegister(FramePtr
, 64))
1561 Register BasePtr
= TRI
->getBaseRegister();
1562 bool HasWinCFI
= false;
1564 // Debug location must be unknown since the first debug location is used
1565 // to determine the end of the prologue.
1567 Register ArgBaseReg
;
1569 // Emit extra prolog for argument stack slot reference.
1570 if (auto *MI
= X86FI
->getStackPtrSaveMI()) {
1571 // MI is lea instruction that created in X86ArgumentStackSlotPass.
1572 // Creat extra prolog for stack realignment.
1573 ArgBaseReg
= MI
->getOperand(0).getReg();
1574 // leal 4(%esp), %basereg
1575 // .cfi_def_cfa %basereg, 0
1577 // pushl -4(%basereg)
1578 BuildMI(MBB
, MBBI
, DL
, TII
.get(Is64Bit
? X86::LEA64r
: X86::LEA32r
),
1582 .addUse(X86::NoRegister
)
1584 .addUse(X86::NoRegister
)
1585 .setMIFlag(MachineInstr::FrameSetup
);
1586 if (NeedsDwarfCFI
) {
1587 // .cfi_def_cfa %basereg, 0
1588 unsigned DwarfStackPtr
= TRI
->getDwarfRegNum(ArgBaseReg
, true);
1589 BuildCFI(MBB
, MBBI
, DL
,
1590 MCCFIInstruction::cfiDefCfa(nullptr, DwarfStackPtr
, 0),
1591 MachineInstr::FrameSetup
);
1593 BuildStackAlignAND(MBB
, MBBI
, DL
, StackPtr
, MaxAlign
);
1594 int64_t Offset
= -(int64_t)SlotSize
;
1595 BuildMI(MBB
, MBBI
, DL
, TII
.get(Is64Bit
? X86::PUSH64rmm
: X86::PUSH32rmm
))
1598 .addReg(X86::NoRegister
)
1600 .addReg(X86::NoRegister
)
1601 .setMIFlag(MachineInstr::FrameSetup
);
1604 // Space reserved for stack-based arguments when making a (ABI-guaranteed)
1606 unsigned TailCallArgReserveSize
= -X86FI
->getTCReturnAddrDelta();
1607 if (TailCallArgReserveSize
&& IsWin64Prologue
)
1608 report_fatal_error("Can't handle guaranteed tail call under win64 yet");
1610 const bool EmitStackProbeCall
=
1611 STI
.getTargetLowering()->hasStackProbeSymbol(MF
);
1612 unsigned StackProbeSize
= STI
.getTargetLowering()->getStackProbeSize(MF
);
1614 if (HasFP
&& X86FI
->hasSwiftAsyncContext()) {
1615 switch (MF
.getTarget().Options
.SwiftAsyncFramePointer
) {
1616 case SwiftAsyncFramePointerMode::DeploymentBased
:
1617 if (STI
.swiftAsyncContextIsDynamicallySet()) {
1618 // The special symbol below is absolute and has a *value* suitable to be
1619 // combined with the frame pointer directly.
1620 BuildMI(MBB
, MBBI
, DL
, TII
.get(X86::OR64rm
), MachineFramePtr
)
1621 .addUse(MachineFramePtr
)
1624 .addUse(X86::NoRegister
)
1625 .addExternalSymbol("swift_async_extendedFramePointerFlags",
1627 .addUse(X86::NoRegister
);
1632 case SwiftAsyncFramePointerMode::Always
:
1635 "win64 prologue does not set the bit 60 in the saved frame pointer");
1636 BuildMI(MBB
, MBBI
, DL
, TII
.get(X86::BTS64ri8
), MachineFramePtr
)
1637 .addUse(MachineFramePtr
)
1639 .setMIFlag(MachineInstr::FrameSetup
);
1642 case SwiftAsyncFramePointerMode::Never
:
1647 // Re-align the stack on 64-bit if the x86-interrupt calling convention is
1648 // used and an error code was pushed, since the x86-64 ABI requires a 16-byte
1650 if (Fn
.getCallingConv() == CallingConv::X86_INTR
&& Is64Bit
&&
1651 Fn
.arg_size() == 2) {
1653 MFI
.setStackSize(StackSize
);
1655 // Update the stack pointer by pushing a register. This is the instruction
1656 // emitted that would be end up being emitted by a call to `emitSPUpdate`.
1657 // Hard-coding the update to a push avoids emitting a second
1658 // `STACKALLOC_W_PROBING` instruction in the save block: We know that stack
1659 // probing isn't needed anyways for an 8-byte update.
1660 // Pushing a register leaves us in a similar situation to a regular
1661 // function call where we know that the address at (rsp-8) is writeable.
1662 // That way we avoid any off-by-ones with stack probing for additional
1663 // stack pointer updates later on.
1664 BuildMI(MBB
, MBBI
, DL
, TII
.get(X86::PUSH64r
))
1665 .addReg(X86::RAX
, RegState::Undef
)
1666 .setMIFlag(MachineInstr::FrameSetup
);
1669 // If this is x86-64 and the Red Zone is not disabled, if we are a leaf
1670 // function, and use up to 128 bytes of stack space, don't have a frame
1671 // pointer, calls, or dynamic alloca then we do not need to adjust the
1672 // stack pointer (we fit in the Red Zone). We also check that we don't
1673 // push and pop from the stack.
1674 if (has128ByteRedZone(MF
) && !TRI
->hasStackRealignment(MF
) &&
1675 !MFI
.hasVarSizedObjects() && // No dynamic alloca.
1676 !MFI
.adjustsStack() && // No calls.
1677 !EmitStackProbeCall
&& // No stack probes.
1678 !MFI
.hasCopyImplyingStackAdjustment() && // Don't push and pop.
1679 !MF
.shouldSplitStack()) { // Regular stack
1681 X86FI
->getCalleeSavedFrameSize() - X86FI
->getTCReturnAddrDelta();
1683 MinSize
+= SlotSize
;
1684 X86FI
->setUsesRedZone(MinSize
> 0 || StackSize
> 0);
1685 StackSize
= std::max(MinSize
, StackSize
> 128 ? StackSize
- 128 : 0);
1686 MFI
.setStackSize(StackSize
);
1689 // Insert stack pointer adjustment for later moving of return addr. Only
1690 // applies to tail call optimized functions where the callee argument stack
1691 // size is bigger than the callers.
1692 if (TailCallArgReserveSize
!= 0) {
1693 BuildStackAdjustment(MBB
, MBBI
, DL
, -(int)TailCallArgReserveSize
,
1694 /*InEpilogue=*/false)
1695 .setMIFlag(MachineInstr::FrameSetup
);
1698 // Mapping for machine moves:
1700 // DST: VirtualFP AND
1701 // SRC: VirtualFP => DW_CFA_def_cfa_offset
1702 // ELSE => DW_CFA_def_cfa
1704 // SRC: VirtualFP AND
1705 // DST: Register => DW_CFA_def_cfa_register
1708 // OFFSET < 0 => DW_CFA_offset_extended_sf
1709 // REG < 64 => DW_CFA_offset + Reg
1710 // ELSE => DW_CFA_offset_extended
1712 uint64_t NumBytes
= 0;
1713 int stackGrowth
= -SlotSize
;
1715 // Find the funclet establisher parameter
1716 Register Establisher
= X86::NoRegister
;
1718 Establisher
= Uses64BitFramePtr
? X86::RCX
: X86::ECX
;
1720 Establisher
= Uses64BitFramePtr
? X86::RDX
: X86::EDX
;
1722 if (IsWin64Prologue
&& IsFunclet
&& !IsClrFunclet
) {
1723 // Immediately spill establisher into the home slot.
1724 // The runtime cares about this.
1725 // MOV64mr %rdx, 16(%rsp)
1726 unsigned MOVmr
= Uses64BitFramePtr
? X86::MOV64mr
: X86::MOV32mr
;
1727 addRegOffset(BuildMI(MBB
, MBBI
, DL
, TII
.get(MOVmr
)), StackPtr
, true, 16)
1728 .addReg(Establisher
)
1729 .setMIFlag(MachineInstr::FrameSetup
);
1730 MBB
.addLiveIn(Establisher
);
1734 assert(MF
.getRegInfo().isReserved(MachineFramePtr
) && "FP reserved");
1736 // Calculate required stack adjustment.
1737 uint64_t FrameSize
= StackSize
- SlotSize
;
1739 FrameSize
- (X86FI
->getCalleeSavedFrameSize() + TailCallArgReserveSize
);
1741 // Callee-saved registers are pushed on stack before the stack is realigned.
1742 if (TRI
->hasStackRealignment(MF
) && !IsWin64Prologue
)
1743 NumBytes
= alignTo(NumBytes
, MaxAlign
);
1745 // Save EBP/RBP into the appropriate stack slot.
1746 BuildMI(MBB
, MBBI
, DL
,
1747 TII
.get(getPUSHOpcode(MF
.getSubtarget
<X86Subtarget
>())))
1748 .addReg(MachineFramePtr
, RegState::Kill
)
1749 .setMIFlag(MachineInstr::FrameSetup
);
1751 if (NeedsDwarfCFI
&& !ArgBaseReg
.isValid()) {
1752 // Mark the place where EBP/RBP was saved.
1753 // Define the current CFA rule to use the provided offset.
1755 BuildCFI(MBB
, MBBI
, DL
,
1756 MCCFIInstruction::cfiDefCfaOffset(
1757 nullptr, -2 * stackGrowth
+ (int)TailCallArgReserveSize
),
1758 MachineInstr::FrameSetup
);
1760 // Change the rule for the FramePtr to be an "offset" rule.
1761 unsigned DwarfFramePtr
= TRI
->getDwarfRegNum(MachineFramePtr
, true);
1762 BuildCFI(MBB
, MBBI
, DL
,
1763 MCCFIInstruction::createOffset(nullptr, DwarfFramePtr
,
1765 (int)TailCallArgReserveSize
),
1766 MachineInstr::FrameSetup
);
1771 BuildMI(MBB
, MBBI
, DL
, TII
.get(X86::SEH_PushReg
))
1773 .setMIFlag(MachineInstr::FrameSetup
);
1777 if (X86FI
->hasSwiftAsyncContext()) {
1778 assert(!IsWin64Prologue
&&
1779 "win64 prologue does not store async context right below rbp");
1780 const auto &Attrs
= MF
.getFunction().getAttributes();
1782 // Before we update the live frame pointer we have to ensure there's a
1783 // valid (or null) asynchronous context in its slot just before FP in
1784 // the frame record, so store it now.
1785 if (Attrs
.hasAttrSomewhere(Attribute::SwiftAsync
)) {
1786 // We have an initial context in r14, store it just before the frame
1788 MBB
.addLiveIn(X86::R14
);
1789 BuildMI(MBB
, MBBI
, DL
, TII
.get(X86::PUSH64r
))
1791 .setMIFlag(MachineInstr::FrameSetup
);
1793 // No initial context, store null so that there's no pointer that
1794 // could be misused.
1795 BuildMI(MBB
, MBBI
, DL
, TII
.get(X86::PUSH64i32
))
1797 .setMIFlag(MachineInstr::FrameSetup
);
1802 BuildMI(MBB
, MBBI
, DL
, TII
.get(X86::SEH_PushReg
))
1804 .setMIFlag(MachineInstr::FrameSetup
);
1807 BuildMI(MBB
, MBBI
, DL
, TII
.get(X86::LEA64r
), FramePtr
)
1810 .addUse(X86::NoRegister
)
1812 .addUse(X86::NoRegister
)
1813 .setMIFlag(MachineInstr::FrameSetup
);
1814 BuildMI(MBB
, MBBI
, DL
, TII
.get(X86::SUB64ri32
), X86::RSP
)
1817 .setMIFlag(MachineInstr::FrameSetup
);
1820 if (!IsWin64Prologue
&& !IsFunclet
) {
1821 // Update EBP with the new base value.
1822 if (!X86FI
->hasSwiftAsyncContext())
1823 BuildMI(MBB
, MBBI
, DL
,
1824 TII
.get(Uses64BitFramePtr
? X86::MOV64rr
: X86::MOV32rr
),
1827 .setMIFlag(MachineInstr::FrameSetup
);
1829 if (NeedsDwarfCFI
) {
1830 if (ArgBaseReg
.isValid()) {
1831 SmallString
<64> CfaExpr
;
1832 CfaExpr
.push_back(dwarf::DW_CFA_expression
);
1834 unsigned DwarfReg
= TRI
->getDwarfRegNum(MachineFramePtr
, true);
1835 CfaExpr
.append(buffer
, buffer
+ encodeULEB128(DwarfReg
, buffer
));
1836 CfaExpr
.push_back(2);
1837 CfaExpr
.push_back((uint8_t)(dwarf::DW_OP_breg0
+ DwarfReg
));
1838 CfaExpr
.push_back(0);
1839 // DW_CFA_expression: reg5 DW_OP_breg5 +0
1840 BuildCFI(MBB
, MBBI
, DL
,
1841 MCCFIInstruction::createEscape(nullptr, CfaExpr
.str()),
1842 MachineInstr::FrameSetup
);
1844 // Mark effective beginning of when frame pointer becomes valid.
1845 // Define the current CFA to use the EBP/RBP register.
1846 unsigned DwarfFramePtr
= TRI
->getDwarfRegNum(MachineFramePtr
, true);
1849 MCCFIInstruction::createDefCfaRegister(nullptr, DwarfFramePtr
),
1850 MachineInstr::FrameSetup
);
1855 // .cv_fpo_setframe $FramePtr
1857 BuildMI(MBB
, MBBI
, DL
, TII
.get(X86::SEH_SetFrame
))
1860 .setMIFlag(MachineInstr::FrameSetup
);
1865 assert(!IsFunclet
&& "funclets without FPs not yet implemented");
1867 StackSize
- (X86FI
->getCalleeSavedFrameSize() + TailCallArgReserveSize
);
1870 // Update the offset adjustment, which is mainly used by codeview to translate
1871 // from ESP to VFRAME relative local variable offsets.
1873 if (HasFP
&& TRI
->hasStackRealignment(MF
))
1874 MFI
.setOffsetAdjustment(-NumBytes
);
1876 MFI
.setOffsetAdjustment(-StackSize
);
1879 // For EH funclets, only allocate enough space for outgoing calls. Save the
1880 // NumBytes value that we would've used for the parent frame.
1881 unsigned ParentFrameNumBytes
= NumBytes
;
1883 NumBytes
= getWinEHFuncletFrameSize(MF
);
1885 // Skip the callee-saved push instructions.
1886 bool PushedRegs
= false;
1887 int StackOffset
= 2 * stackGrowth
;
1888 MachineBasicBlock::const_iterator LastCSPush
= MBBI
;
1889 auto IsCSPush
= [&](const MachineBasicBlock::iterator
&MBBI
) {
1890 if (MBBI
== MBB
.end() || !MBBI
->getFlag(MachineInstr::FrameSetup
))
1892 unsigned Opc
= MBBI
->getOpcode();
1893 return Opc
== X86::PUSH32r
|| Opc
== X86::PUSH64r
|| Opc
== X86::PUSHP64r
||
1894 Opc
== X86::PUSH2
|| Opc
== X86::PUSH2P
;
1897 while (IsCSPush(MBBI
)) {
1899 Register Reg
= MBBI
->getOperand(0).getReg();
1902 unsigned Opc
= LastCSPush
->getOpcode();
1904 if (!HasFP
&& NeedsDwarfCFI
) {
1905 // Mark callee-saved push instruction.
1906 // Define the current CFA rule to use the provided offset.
1908 // Compared to push, push2 introduces more stack offset (one more
1910 if (Opc
== X86::PUSH2
|| Opc
== X86::PUSH2P
)
1911 StackOffset
+= stackGrowth
;
1912 BuildCFI(MBB
, MBBI
, DL
,
1913 MCCFIInstruction::cfiDefCfaOffset(nullptr, -StackOffset
),
1914 MachineInstr::FrameSetup
);
1915 StackOffset
+= stackGrowth
;
1920 BuildMI(MBB
, MBBI
, DL
, TII
.get(X86::SEH_PushReg
))
1922 .setMIFlag(MachineInstr::FrameSetup
);
1923 if (Opc
== X86::PUSH2
|| Opc
== X86::PUSH2P
)
1924 BuildMI(MBB
, MBBI
, DL
, TII
.get(X86::SEH_PushReg
))
1925 .addImm(LastCSPush
->getOperand(1).getReg())
1926 .setMIFlag(MachineInstr::FrameSetup
);
1930 // Realign stack after we pushed callee-saved registers (so that we'll be
1931 // able to calculate their offsets from the frame pointer).
1932 // Don't do this for Win64, it needs to realign the stack after the prologue.
1933 if (!IsWin64Prologue
&& !IsFunclet
&& TRI
->hasStackRealignment(MF
) &&
1934 !ArgBaseReg
.isValid()) {
1935 assert(HasFP
&& "There should be a frame pointer if stack is realigned.");
1936 BuildStackAlignAND(MBB
, MBBI
, DL
, StackPtr
, MaxAlign
);
1940 BuildMI(MBB
, MBBI
, DL
, TII
.get(X86::SEH_StackAlign
))
1942 .setMIFlag(MachineInstr::FrameSetup
);
1946 // If there is an SUB32ri of ESP immediately before this instruction, merge
1947 // the two. This can be the case when tail call elimination is enabled and
1948 // the callee has more arguments then the caller.
1949 NumBytes
-= mergeSPUpdates(MBB
, MBBI
, true);
1951 // Adjust stack pointer: ESP -= numbytes.
1953 // Windows and cygwin/mingw require a prologue helper routine when allocating
1954 // more than 4K bytes on the stack. Windows uses __chkstk and cygwin/mingw
1955 // uses __alloca. __alloca and the 32-bit version of __chkstk will probe the
1956 // stack and adjust the stack pointer in one go. The 64-bit version of
1957 // __chkstk is only responsible for probing the stack. The 64-bit prologue is
1958 // responsible for adjusting the stack pointer. Touching the stack at 4K
1959 // increments is necessary to ensure that the guard pages used by the OS
1960 // virtual memory manager are allocated in correct sequence.
1961 uint64_t AlignedNumBytes
= NumBytes
;
1962 if (IsWin64Prologue
&& !IsFunclet
&& TRI
->hasStackRealignment(MF
))
1963 AlignedNumBytes
= alignTo(AlignedNumBytes
, MaxAlign
);
1964 if (AlignedNumBytes
>= StackProbeSize
&& EmitStackProbeCall
) {
1965 assert(!X86FI
->getUsesRedZone() &&
1966 "The Red Zone is not accounted for in stack probes");
1968 // Check whether EAX is livein for this block.
1969 bool isEAXAlive
= isEAXLiveIn(MBB
);
1974 BuildMI(MBB
, MBBI
, DL
, TII
.get(X86::PUSH64r
))
1975 .addReg(X86::RAX
, RegState::Kill
)
1976 .setMIFlag(MachineInstr::FrameSetup
);
1979 BuildMI(MBB
, MBBI
, DL
, TII
.get(X86::PUSH32r
))
1980 .addReg(X86::EAX
, RegState::Kill
)
1981 .setMIFlag(MachineInstr::FrameSetup
);
1986 // Handle the 64-bit Windows ABI case where we need to call __chkstk.
1987 // Function prologue is responsible for adjusting the stack pointer.
1988 int64_t Alloc
= isEAXAlive
? NumBytes
- 8 : NumBytes
;
1989 BuildMI(MBB
, MBBI
, DL
, TII
.get(getMOVriOpcode(Is64Bit
, Alloc
)), X86::RAX
)
1991 .setMIFlag(MachineInstr::FrameSetup
);
1993 // Allocate NumBytes-4 bytes on stack in case of isEAXAlive.
1994 // We'll also use 4 already allocated bytes for EAX.
1995 BuildMI(MBB
, MBBI
, DL
, TII
.get(X86::MOV32ri
), X86::EAX
)
1996 .addImm(isEAXAlive
? NumBytes
- 4 : NumBytes
)
1997 .setMIFlag(MachineInstr::FrameSetup
);
2000 // Call __chkstk, __chkstk_ms, or __alloca.
2001 emitStackProbe(MF
, MBB
, MBBI
, DL
, true);
2007 MI
= addRegOffset(BuildMI(MF
, DL
, TII
.get(X86::MOV64rm
), X86::RAX
),
2008 StackPtr
, false, NumBytes
- 8);
2010 MI
= addRegOffset(BuildMI(MF
, DL
, TII
.get(X86::MOV32rm
), X86::EAX
),
2011 StackPtr
, false, NumBytes
- 4);
2012 MI
->setFlag(MachineInstr::FrameSetup
);
2013 MBB
.insert(MBBI
, MI
);
2015 } else if (NumBytes
) {
2016 emitSPUpdate(MBB
, MBBI
, DL
, -(int64_t)NumBytes
, /*InEpilogue=*/false);
2019 if (NeedsWinCFI
&& NumBytes
) {
2021 BuildMI(MBB
, MBBI
, DL
, TII
.get(X86::SEH_StackAlloc
))
2023 .setMIFlag(MachineInstr::FrameSetup
);
2026 int SEHFrameOffset
= 0;
2027 unsigned SPOrEstablisher
;
2030 // The establisher parameter passed to a CLR funclet is actually a pointer
2031 // to the (mostly empty) frame of its nearest enclosing funclet; we have
2032 // to find the root function establisher frame by loading the PSPSym from
2033 // the intermediate frame.
2034 unsigned PSPSlotOffset
= getPSPSlotOffsetFromSP(MF
);
2035 MachinePointerInfo NoInfo
;
2036 MBB
.addLiveIn(Establisher
);
2037 addRegOffset(BuildMI(MBB
, MBBI
, DL
, TII
.get(X86::MOV64rm
), Establisher
),
2038 Establisher
, false, PSPSlotOffset
)
2039 .addMemOperand(MF
.getMachineMemOperand(
2040 NoInfo
, MachineMemOperand::MOLoad
, SlotSize
, Align(SlotSize
)));
2042 // Save the root establisher back into the current funclet's (mostly
2043 // empty) frame, in case a sub-funclet or the GC needs it.
2044 addRegOffset(BuildMI(MBB
, MBBI
, DL
, TII
.get(X86::MOV64mr
)), StackPtr
,
2045 false, PSPSlotOffset
)
2046 .addReg(Establisher
)
2047 .addMemOperand(MF
.getMachineMemOperand(
2049 MachineMemOperand::MOStore
| MachineMemOperand::MOVolatile
,
2050 SlotSize
, Align(SlotSize
)));
2052 SPOrEstablisher
= Establisher
;
2054 SPOrEstablisher
= StackPtr
;
2057 if (IsWin64Prologue
&& HasFP
) {
2058 // Set RBP to a small fixed offset from RSP. In the funclet case, we base
2059 // this calculation on the incoming establisher, which holds the value of
2060 // RSP from the parent frame at the end of the prologue.
2061 SEHFrameOffset
= calculateSetFPREG(ParentFrameNumBytes
);
2063 addRegOffset(BuildMI(MBB
, MBBI
, DL
, TII
.get(X86::LEA64r
), FramePtr
),
2064 SPOrEstablisher
, false, SEHFrameOffset
);
2066 BuildMI(MBB
, MBBI
, DL
, TII
.get(X86::MOV64rr
), FramePtr
)
2067 .addReg(SPOrEstablisher
);
2069 // If this is not a funclet, emit the CFI describing our frame pointer.
2070 if (NeedsWinCFI
&& !IsFunclet
) {
2071 assert(!NeedsWinFPO
&& "this setframe incompatible with FPO data");
2073 BuildMI(MBB
, MBBI
, DL
, TII
.get(X86::SEH_SetFrame
))
2075 .addImm(SEHFrameOffset
)
2076 .setMIFlag(MachineInstr::FrameSetup
);
2077 if (isAsynchronousEHPersonality(Personality
))
2078 MF
.getWinEHFuncInfo()->SEHSetFrameOffset
= SEHFrameOffset
;
2080 } else if (IsFunclet
&& STI
.is32Bit()) {
2081 // Reset EBP / ESI to something good for funclets.
2082 MBBI
= restoreWin32EHStackPointers(MBB
, MBBI
, DL
);
2083 // If we're a catch funclet, we can be returned to via catchret. Save ESP
2084 // into the registration node so that the runtime will restore it for us.
2085 if (!MBB
.isCleanupFuncletEntry()) {
2086 assert(Personality
== EHPersonality::MSVC_CXX
);
2088 int FI
= MF
.getWinEHFuncInfo()->EHRegNodeFrameIndex
;
2089 int64_t EHRegOffset
= getFrameIndexReference(MF
, FI
, FrameReg
).getFixed();
2090 // ESP is the first field, so no extra displacement is needed.
2091 addRegOffset(BuildMI(MBB
, MBBI
, DL
, TII
.get(X86::MOV32mr
)), FrameReg
,
2097 while (MBBI
!= MBB
.end() && MBBI
->getFlag(MachineInstr::FrameSetup
)) {
2098 const MachineInstr
&FrameInstr
= *MBBI
;
2103 if (Register Reg
= TII
.isStoreToStackSlot(FrameInstr
, FI
)) {
2104 if (X86::FR64RegClass
.contains(Reg
)) {
2106 Register IgnoredFrameReg
;
2107 if (IsWin64Prologue
&& IsFunclet
)
2108 Offset
= getWin64EHFrameIndexRef(MF
, FI
, IgnoredFrameReg
);
2111 getFrameIndexReference(MF
, FI
, IgnoredFrameReg
).getFixed() +
2115 assert(!NeedsWinFPO
&& "SEH_SaveXMM incompatible with FPO data");
2116 BuildMI(MBB
, MBBI
, DL
, TII
.get(X86::SEH_SaveXMM
))
2119 .setMIFlag(MachineInstr::FrameSetup
);
2125 if (NeedsWinCFI
&& HasWinCFI
)
2126 BuildMI(MBB
, MBBI
, DL
, TII
.get(X86::SEH_EndPrologue
))
2127 .setMIFlag(MachineInstr::FrameSetup
);
2129 if (FnHasClrFunclet
&& !IsFunclet
) {
2130 // Save the so-called Initial-SP (i.e. the value of the stack pointer
2131 // immediately after the prolog) into the PSPSlot so that funclets
2132 // and the GC can recover it.
2133 unsigned PSPSlotOffset
= getPSPSlotOffsetFromSP(MF
);
2134 auto PSPInfo
= MachinePointerInfo::getFixedStack(
2135 MF
, MF
.getWinEHFuncInfo()->PSPSymFrameIdx
);
2136 addRegOffset(BuildMI(MBB
, MBBI
, DL
, TII
.get(X86::MOV64mr
)), StackPtr
, false,
2139 .addMemOperand(MF
.getMachineMemOperand(
2140 PSPInfo
, MachineMemOperand::MOStore
| MachineMemOperand::MOVolatile
,
2141 SlotSize
, Align(SlotSize
)));
2144 // Realign stack after we spilled callee-saved registers (so that we'll be
2145 // able to calculate their offsets from the frame pointer).
2146 // Win64 requires aligning the stack after the prologue.
2147 if (IsWin64Prologue
&& TRI
->hasStackRealignment(MF
)) {
2148 assert(HasFP
&& "There should be a frame pointer if stack is realigned.");
2149 BuildStackAlignAND(MBB
, MBBI
, DL
, SPOrEstablisher
, MaxAlign
);
2152 // We already dealt with stack realignment and funclets above.
2153 if (IsFunclet
&& STI
.is32Bit())
2156 // If we need a base pointer, set it up here. It's whatever the value
2157 // of the stack pointer is at this point. Any variable size objects
2158 // will be allocated after this, so we can still use the base pointer
2159 // to reference locals.
2160 if (TRI
->hasBasePointer(MF
)) {
2161 // Update the base pointer with the current stack pointer.
2162 unsigned Opc
= Uses64BitFramePtr
? X86::MOV64rr
: X86::MOV32rr
;
2163 BuildMI(MBB
, MBBI
, DL
, TII
.get(Opc
), BasePtr
)
2164 .addReg(SPOrEstablisher
)
2165 .setMIFlag(MachineInstr::FrameSetup
);
2166 if (X86FI
->getRestoreBasePointer()) {
2167 // Stash value of base pointer. Saving RSP instead of EBP shortens
2168 // dependence chain. Used by SjLj EH.
2169 unsigned Opm
= Uses64BitFramePtr
? X86::MOV64mr
: X86::MOV32mr
;
2170 addRegOffset(BuildMI(MBB
, MBBI
, DL
, TII
.get(Opm
)), FramePtr
, true,
2171 X86FI
->getRestoreBasePointerOffset())
2172 .addReg(SPOrEstablisher
)
2173 .setMIFlag(MachineInstr::FrameSetup
);
2176 if (X86FI
->getHasSEHFramePtrSave() && !IsFunclet
) {
2177 // Stash the value of the frame pointer relative to the base pointer for
2178 // Win32 EH. This supports Win32 EH, which does the inverse of the above:
2179 // it recovers the frame pointer from the base pointer rather than the
2180 // other way around.
2181 unsigned Opm
= Uses64BitFramePtr
? X86::MOV64mr
: X86::MOV32mr
;
2184 getFrameIndexReference(MF
, X86FI
->getSEHFramePtrSaveIndex(), UsedReg
)
2186 assert(UsedReg
== BasePtr
);
2187 addRegOffset(BuildMI(MBB
, MBBI
, DL
, TII
.get(Opm
)), UsedReg
, true, Offset
)
2189 .setMIFlag(MachineInstr::FrameSetup
);
2192 if (ArgBaseReg
.isValid()) {
2193 // Save argument base pointer.
2194 auto *MI
= X86FI
->getStackPtrSaveMI();
2195 int FI
= MI
->getOperand(1).getIndex();
2196 unsigned MOVmr
= Is64Bit
? X86::MOV64mr
: X86::MOV32mr
;
2197 // movl %basereg, offset(%ebp)
2198 addFrameReference(BuildMI(MBB
, MBBI
, DL
, TII
.get(MOVmr
)), FI
)
2200 .setMIFlag(MachineInstr::FrameSetup
);
2203 if (((!HasFP
&& NumBytes
) || PushedRegs
) && NeedsDwarfCFI
) {
2204 // Mark end of stack pointer adjustment.
2205 if (!HasFP
&& NumBytes
) {
2206 // Define the current CFA rule to use the provided offset.
2210 MCCFIInstruction::cfiDefCfaOffset(nullptr, StackSize
- stackGrowth
),
2211 MachineInstr::FrameSetup
);
2214 // Emit DWARF info specifying the offsets of the callee-saved registers.
2215 emitCalleeSavedFrameMoves(MBB
, MBBI
, DL
, true);
2218 // X86 Interrupt handling function cannot assume anything about the direction
2219 // flag (DF in EFLAGS register). Clear this flag by creating "cld" instruction
2220 // in each prologue of interrupt handler function.
2222 // Create "cld" instruction only in these cases:
2223 // 1. The interrupt handling function uses any of the "rep" instructions.
2224 // 2. Interrupt handling function calls another function.
2225 // 3. If there are any inline asm blocks, as we do not know what they do
2227 // TODO: We should also emit cld if we detect the use of std, but as of now,
2228 // the compiler does not even emit that instruction or even define it, so in
2229 // practice, this would only happen with inline asm, which we cover anyway.
2230 if (Fn
.getCallingConv() == CallingConv::X86_INTR
) {
2231 bool NeedsCLD
= false;
2233 for (const MachineBasicBlock
&B
: MF
) {
2234 for (const MachineInstr
&MI
: B
) {
2240 if (isOpcodeRep(MI
.getOpcode())) {
2245 if (MI
.isInlineAsm()) {
2246 // TODO: Parse asm for rep instructions or call sites?
2247 // For now, let's play it safe and emit a cld instruction
2256 BuildMI(MBB
, MBBI
, DL
, TII
.get(X86::CLD
))
2257 .setMIFlag(MachineInstr::FrameSetup
);
2261 // At this point we know if the function has WinCFI or not.
2262 MF
.setHasWinCFI(HasWinCFI
);
2265 bool X86FrameLowering::canUseLEAForSPInEpilogue(
2266 const MachineFunction
&MF
) const {
2267 // We can't use LEA instructions for adjusting the stack pointer if we don't
2268 // have a frame pointer in the Win64 ABI. Only ADD instructions may be used
2269 // to deallocate the stack.
2270 // This means that we can use LEA for SP in two situations:
2271 // 1. We *aren't* using the Win64 ABI which means we are free to use LEA.
2272 // 2. We *have* a frame pointer which means we are permitted to use LEA.
2273 return !MF
.getTarget().getMCAsmInfo()->usesWindowsCFI() || hasFP(MF
);
2276 static bool isFuncletReturnInstr(MachineInstr
&MI
) {
2277 switch (MI
.getOpcode()) {
2279 case X86::CLEANUPRET
:
2284 llvm_unreachable("impossible");
2287 // CLR funclets use a special "Previous Stack Pointer Symbol" slot on the
2288 // stack. It holds a pointer to the bottom of the root function frame. The
2289 // establisher frame pointer passed to a nested funclet may point to the
2290 // (mostly empty) frame of its parent funclet, but it will need to find
2291 // the frame of the root function to access locals. To facilitate this,
2292 // every funclet copies the pointer to the bottom of the root function
2293 // frame into a PSPSym slot in its own (mostly empty) stack frame. Using the
2294 // same offset for the PSPSym in the root function frame that's used in the
2295 // funclets' frames allows each funclet to dynamically accept any ancestor
2296 // frame as its establisher argument (the runtime doesn't guarantee the
2297 // immediate parent for some reason lost to history), and also allows the GC,
2298 // which uses the PSPSym for some bookkeeping, to find it in any funclet's
2299 // frame with only a single offset reported for the entire method.
2301 X86FrameLowering::getPSPSlotOffsetFromSP(const MachineFunction
&MF
) const {
2302 const WinEHFuncInfo
&Info
= *MF
.getWinEHFuncInfo();
2304 int Offset
= getFrameIndexReferencePreferSP(MF
, Info
.PSPSymFrameIdx
, SPReg
,
2305 /*IgnoreSPUpdates*/ true)
2307 assert(Offset
>= 0 && SPReg
== TRI
->getStackRegister());
2308 return static_cast<unsigned>(Offset
);
2312 X86FrameLowering::getWinEHFuncletFrameSize(const MachineFunction
&MF
) const {
2313 const X86MachineFunctionInfo
*X86FI
= MF
.getInfo
<X86MachineFunctionInfo
>();
2314 // This is the size of the pushed CSRs.
2315 unsigned CSSize
= X86FI
->getCalleeSavedFrameSize();
2316 // This is the size of callee saved XMMs.
2317 const auto &WinEHXMMSlotInfo
= X86FI
->getWinEHXMMSlotInfo();
2319 WinEHXMMSlotInfo
.size() * TRI
->getSpillSize(X86::VR128RegClass
);
2320 // This is the amount of stack a funclet needs to allocate.
2322 EHPersonality Personality
=
2323 classifyEHPersonality(MF
.getFunction().getPersonalityFn());
2324 if (Personality
== EHPersonality::CoreCLR
) {
2325 // CLR funclets need to hold enough space to include the PSPSym, at the
2326 // same offset from the stack pointer (immediately after the prolog) as it
2327 // resides at in the main function.
2328 UsedSize
= getPSPSlotOffsetFromSP(MF
) + SlotSize
;
2330 // Other funclets just need enough stack for outgoing call arguments.
2331 UsedSize
= MF
.getFrameInfo().getMaxCallFrameSize();
2333 // RBP is not included in the callee saved register block. After pushing RBP,
2334 // everything is 16 byte aligned. Everything we allocate before an outgoing
2335 // call must also be 16 byte aligned.
2336 unsigned FrameSizeMinusRBP
= alignTo(CSSize
+ UsedSize
, getStackAlign());
2337 // Subtract out the size of the callee saved registers. This is how much stack
2338 // each funclet will allocate.
2339 return FrameSizeMinusRBP
+ XMMSize
- CSSize
;
2342 static bool isTailCallOpcode(unsigned Opc
) {
2343 return Opc
== X86::TCRETURNri
|| Opc
== X86::TCRETURNdi
||
2344 Opc
== X86::TCRETURNmi
|| Opc
== X86::TCRETURNri64
||
2345 Opc
== X86::TCRETURNdi64
|| Opc
== X86::TCRETURNmi64
;
2348 void X86FrameLowering::emitEpilogue(MachineFunction
&MF
,
2349 MachineBasicBlock
&MBB
) const {
2350 const MachineFrameInfo
&MFI
= MF
.getFrameInfo();
2351 X86MachineFunctionInfo
*X86FI
= MF
.getInfo
<X86MachineFunctionInfo
>();
2352 MachineBasicBlock::iterator Terminator
= MBB
.getFirstTerminator();
2353 MachineBasicBlock::iterator MBBI
= Terminator
;
2355 if (MBBI
!= MBB
.end())
2356 DL
= MBBI
->getDebugLoc();
2357 // standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit.
2358 const bool Is64BitILP32
= STI
.isTarget64BitILP32();
2359 Register FramePtr
= TRI
->getFrameRegister(MF
);
2360 Register MachineFramePtr
=
2361 Is64BitILP32
? Register(getX86SubSuperRegister(FramePtr
, 64)) : FramePtr
;
2363 bool IsWin64Prologue
= MF
.getTarget().getMCAsmInfo()->usesWindowsCFI();
2364 bool NeedsWin64CFI
=
2365 IsWin64Prologue
&& MF
.getFunction().needsUnwindTableEntry();
2366 bool IsFunclet
= MBBI
== MBB
.end() ? false : isFuncletReturnInstr(*MBBI
);
2368 // Get the number of bytes to allocate from the FrameInfo.
2369 uint64_t StackSize
= MFI
.getStackSize();
2370 uint64_t MaxAlign
= calculateMaxStackAlign(MF
);
2371 unsigned CSSize
= X86FI
->getCalleeSavedFrameSize();
2372 unsigned TailCallArgReserveSize
= -X86FI
->getTCReturnAddrDelta();
2373 bool HasFP
= hasFP(MF
);
2374 uint64_t NumBytes
= 0;
2376 bool NeedsDwarfCFI
= (!MF
.getTarget().getTargetTriple().isOSDarwin() &&
2377 !MF
.getTarget().getTargetTriple().isOSWindows()) &&
2378 MF
.needsFrameMoves();
2380 Register ArgBaseReg
;
2381 if (auto *MI
= X86FI
->getStackPtrSaveMI()) {
2382 unsigned Opc
= X86::LEA32r
;
2383 Register StackReg
= X86::ESP
;
2384 ArgBaseReg
= MI
->getOperand(0).getReg();
2385 if (STI
.is64Bit()) {
2387 StackReg
= X86::RSP
;
2389 // leal -4(%basereg), %esp
2390 // .cfi_def_cfa %esp, 4
2391 BuildMI(MBB
, MBBI
, DL
, TII
.get(Opc
), StackReg
)
2394 .addUse(X86::NoRegister
)
2395 .addImm(-(int64_t)SlotSize
)
2396 .addUse(X86::NoRegister
)
2397 .setMIFlag(MachineInstr::FrameDestroy
);
2398 if (NeedsDwarfCFI
) {
2399 unsigned DwarfStackPtr
= TRI
->getDwarfRegNum(StackReg
, true);
2400 BuildCFI(MBB
, MBBI
, DL
,
2401 MCCFIInstruction::cfiDefCfa(nullptr, DwarfStackPtr
, SlotSize
),
2402 MachineInstr::FrameDestroy
);
2409 assert(HasFP
&& "EH funclets without FP not yet implemented");
2410 NumBytes
= getWinEHFuncletFrameSize(MF
);
2412 // Calculate required stack adjustment.
2413 uint64_t FrameSize
= StackSize
- SlotSize
;
2414 NumBytes
= FrameSize
- CSSize
- TailCallArgReserveSize
;
2416 // Callee-saved registers were pushed on stack before the stack was
2418 if (TRI
->hasStackRealignment(MF
) && !IsWin64Prologue
)
2419 NumBytes
= alignTo(FrameSize
, MaxAlign
);
2421 NumBytes
= StackSize
- CSSize
- TailCallArgReserveSize
;
2423 uint64_t SEHStackAllocAmt
= NumBytes
;
2425 // AfterPop is the position to insert .cfi_restore.
2426 MachineBasicBlock::iterator AfterPop
= MBBI
;
2428 if (X86FI
->hasSwiftAsyncContext()) {
2429 // Discard the context.
2430 int Offset
= 16 + mergeSPUpdates(MBB
, MBBI
, true);
2431 emitSPUpdate(MBB
, MBBI
, DL
, Offset
, /*InEpilogue*/ true);
2434 BuildMI(MBB
, MBBI
, DL
,
2435 TII
.get(getPOPOpcode(MF
.getSubtarget
<X86Subtarget
>())),
2437 .setMIFlag(MachineInstr::FrameDestroy
);
2439 // We need to reset FP to its untagged state on return. Bit 60 is currently
2440 // used to show the presence of an extended frame.
2441 if (X86FI
->hasSwiftAsyncContext()) {
2442 BuildMI(MBB
, MBBI
, DL
, TII
.get(X86::BTR64ri8
), MachineFramePtr
)
2443 .addUse(MachineFramePtr
)
2445 .setMIFlag(MachineInstr::FrameDestroy
);
2448 if (NeedsDwarfCFI
) {
2449 if (!ArgBaseReg
.isValid()) {
2450 unsigned DwarfStackPtr
=
2451 TRI
->getDwarfRegNum(Is64Bit
? X86::RSP
: X86::ESP
, true);
2452 BuildCFI(MBB
, MBBI
, DL
,
2453 MCCFIInstruction::cfiDefCfa(nullptr, DwarfStackPtr
, SlotSize
),
2454 MachineInstr::FrameDestroy
);
2456 if (!MBB
.succ_empty() && !MBB
.isReturnBlock()) {
2457 unsigned DwarfFramePtr
= TRI
->getDwarfRegNum(MachineFramePtr
, true);
2458 BuildCFI(MBB
, AfterPop
, DL
,
2459 MCCFIInstruction::createRestore(nullptr, DwarfFramePtr
),
2460 MachineInstr::FrameDestroy
);
2468 MachineBasicBlock::iterator FirstCSPop
= MBBI
;
2469 // Skip the callee-saved pop instructions.
2470 while (MBBI
!= MBB
.begin()) {
2471 MachineBasicBlock::iterator PI
= std::prev(MBBI
);
2472 unsigned Opc
= PI
->getOpcode();
2474 if (Opc
!= X86::DBG_VALUE
&& !PI
->isTerminator()) {
2475 if (!PI
->getFlag(MachineInstr::FrameDestroy
) ||
2476 (Opc
!= X86::POP32r
&& Opc
!= X86::POP64r
&& Opc
!= X86::BTR64ri8
&&
2477 Opc
!= X86::ADD64ri32
&& Opc
!= X86::POPP64r
&& Opc
!= X86::POP2
&&
2478 Opc
!= X86::POP2P
&& Opc
!= X86::LEA64r
))
2485 if (ArgBaseReg
.isValid()) {
2486 // Restore argument base pointer.
2487 auto *MI
= X86FI
->getStackPtrSaveMI();
2488 int FI
= MI
->getOperand(1).getIndex();
2489 unsigned MOVrm
= Is64Bit
? X86::MOV64rm
: X86::MOV32rm
;
2490 // movl offset(%ebp), %basereg
2491 addFrameReference(BuildMI(MBB
, MBBI
, DL
, TII
.get(MOVrm
), ArgBaseReg
), FI
)
2492 .setMIFlag(MachineInstr::FrameDestroy
);
2496 if (IsFunclet
&& Terminator
->getOpcode() == X86::CATCHRET
)
2497 emitCatchRetReturnValue(MBB
, FirstCSPop
, &*Terminator
);
2499 if (MBBI
!= MBB
.end())
2500 DL
= MBBI
->getDebugLoc();
2501 // If there is an ADD32ri or SUB32ri of ESP immediately before this
2502 // instruction, merge the two instructions.
2503 if (NumBytes
|| MFI
.hasVarSizedObjects())
2504 NumBytes
+= mergeSPUpdates(MBB
, MBBI
, true);
2506 // If dynamic alloca is used, then reset esp to point to the last callee-saved
2507 // slot before popping them off! Same applies for the case, when stack was
2508 // realigned. Don't do this if this was a funclet epilogue, since the funclets
2509 // will not do realignment or dynamic stack allocation.
2510 if (((TRI
->hasStackRealignment(MF
)) || MFI
.hasVarSizedObjects()) &&
2512 if (TRI
->hasStackRealignment(MF
))
2514 unsigned SEHFrameOffset
= calculateSetFPREG(SEHStackAllocAmt
);
2515 uint64_t LEAAmount
=
2516 IsWin64Prologue
? SEHStackAllocAmt
- SEHFrameOffset
: -CSSize
;
2518 if (X86FI
->hasSwiftAsyncContext())
2521 // There are only two legal forms of epilogue:
2522 // - add SEHAllocationSize, %rsp
2523 // - lea SEHAllocationSize(%FramePtr), %rsp
2525 // 'mov %FramePtr, %rsp' will not be recognized as an epilogue sequence.
2526 // However, we may use this sequence if we have a frame pointer because the
2527 // effects of the prologue can safely be undone.
2528 if (LEAAmount
!= 0) {
2529 unsigned Opc
= getLEArOpcode(Uses64BitFramePtr
);
2530 addRegOffset(BuildMI(MBB
, MBBI
, DL
, TII
.get(Opc
), StackPtr
), FramePtr
,
2534 unsigned Opc
= (Uses64BitFramePtr
? X86::MOV64rr
: X86::MOV32rr
);
2535 BuildMI(MBB
, MBBI
, DL
, TII
.get(Opc
), StackPtr
).addReg(FramePtr
);
2538 } else if (NumBytes
) {
2539 // Adjust stack pointer back: ESP += numbytes.
2540 emitSPUpdate(MBB
, MBBI
, DL
, NumBytes
, /*InEpilogue=*/true);
2541 if (!HasFP
&& NeedsDwarfCFI
) {
2542 // Define the current CFA rule to use the provided offset.
2543 BuildCFI(MBB
, MBBI
, DL
,
2544 MCCFIInstruction::cfiDefCfaOffset(
2545 nullptr, CSSize
+ TailCallArgReserveSize
+ SlotSize
),
2546 MachineInstr::FrameDestroy
);
2551 // Windows unwinder will not invoke function's exception handler if IP is
2552 // either in prologue or in epilogue. This behavior causes a problem when a
2553 // call immediately precedes an epilogue, because the return address points
2554 // into the epilogue. To cope with that, we insert an epilogue marker here,
2555 // then replace it with a 'nop' if it ends up immediately after a CALL in the
2556 // final emitted code.
2557 if (NeedsWin64CFI
&& MF
.hasWinCFI())
2558 BuildMI(MBB
, MBBI
, DL
, TII
.get(X86::SEH_Epilogue
));
2560 if (!HasFP
&& NeedsDwarfCFI
) {
2562 int64_t Offset
= -(int64_t)CSSize
- SlotSize
;
2563 // Mark callee-saved pop instruction.
2564 // Define the current CFA rule to use the provided offset.
2565 while (MBBI
!= MBB
.end()) {
2566 MachineBasicBlock::iterator PI
= MBBI
;
2567 unsigned Opc
= PI
->getOpcode();
2569 if (Opc
== X86::POP32r
|| Opc
== X86::POP64r
|| Opc
== X86::POPP64r
||
2570 Opc
== X86::POP2
|| Opc
== X86::POP2P
) {
2572 // Compared to pop, pop2 introduces more stack offset (one more
2574 if (Opc
== X86::POP2
|| Opc
== X86::POP2P
)
2576 BuildCFI(MBB
, MBBI
, DL
,
2577 MCCFIInstruction::cfiDefCfaOffset(nullptr, -Offset
),
2578 MachineInstr::FrameDestroy
);
2583 // Emit DWARF info specifying the restores of the callee-saved registers.
2584 // For epilogue with return inside or being other block without successor,
2585 // no need to generate .cfi_restore for callee-saved registers.
2586 if (NeedsDwarfCFI
&& !MBB
.succ_empty())
2587 emitCalleeSavedFrameMoves(MBB
, AfterPop
, DL
, false);
2589 if (Terminator
== MBB
.end() || !isTailCallOpcode(Terminator
->getOpcode())) {
2590 // Add the return addr area delta back since we are not tail calling.
2591 int Offset
= -1 * X86FI
->getTCReturnAddrDelta();
2592 assert(Offset
>= 0 && "TCDelta should never be positive");
2594 // Check for possible merge with preceding ADD instruction.
2595 Offset
+= mergeSPUpdates(MBB
, Terminator
, true);
2596 emitSPUpdate(MBB
, Terminator
, DL
, Offset
, /*InEpilogue=*/true);
2600 // Emit tilerelease for AMX kernel.
2601 if (X86FI
->getAMXProgModel() == AMXProgModelEnum::ManagedRA
)
2602 BuildMI(MBB
, Terminator
, DL
, TII
.get(X86::TILERELEASE
));
2605 StackOffset
X86FrameLowering::getFrameIndexReference(const MachineFunction
&MF
,
2607 Register
&FrameReg
) const {
2608 const MachineFrameInfo
&MFI
= MF
.getFrameInfo();
2610 bool IsFixed
= MFI
.isFixedObjectIndex(FI
);
2611 // We can't calculate offset from frame pointer if the stack is realigned,
2612 // so enforce usage of stack/base pointer. The base pointer is used when we
2613 // have dynamic allocas in addition to dynamic realignment.
2614 if (TRI
->hasBasePointer(MF
))
2615 FrameReg
= IsFixed
? TRI
->getFramePtr() : TRI
->getBaseRegister();
2616 else if (TRI
->hasStackRealignment(MF
))
2617 FrameReg
= IsFixed
? TRI
->getFramePtr() : TRI
->getStackRegister();
2619 FrameReg
= TRI
->getFrameRegister(MF
);
2621 // Offset will hold the offset from the stack pointer at function entry to the
2623 // We need to factor in additional offsets applied during the prologue to the
2624 // frame, base, and stack pointer depending on which is used.
2625 int Offset
= MFI
.getObjectOffset(FI
) - getOffsetOfLocalArea();
2626 const X86MachineFunctionInfo
*X86FI
= MF
.getInfo
<X86MachineFunctionInfo
>();
2627 unsigned CSSize
= X86FI
->getCalleeSavedFrameSize();
2628 uint64_t StackSize
= MFI
.getStackSize();
2629 bool IsWin64Prologue
= MF
.getTarget().getMCAsmInfo()->usesWindowsCFI();
2630 int64_t FPDelta
= 0;
2632 // In an x86 interrupt, remove the offset we added to account for the return
2633 // address from any stack object allocated in the caller's frame. Interrupts
2634 // do not have a standard return address. Fixed objects in the current frame,
2635 // such as SSE register spills, should not get this treatment.
2636 if (MF
.getFunction().getCallingConv() == CallingConv::X86_INTR
&&
2638 Offset
+= getOffsetOfLocalArea();
2641 if (IsWin64Prologue
) {
2642 assert(!MFI
.hasCalls() || (StackSize
% 16) == 8);
2644 // Calculate required stack adjustment.
2645 uint64_t FrameSize
= StackSize
- SlotSize
;
2646 // If required, include space for extra hidden slot for stashing base
2648 if (X86FI
->getRestoreBasePointer())
2649 FrameSize
+= SlotSize
;
2650 uint64_t NumBytes
= FrameSize
- CSSize
;
2652 uint64_t SEHFrameOffset
= calculateSetFPREG(NumBytes
);
2653 if (FI
&& FI
== X86FI
->getFAIndex())
2654 return StackOffset::getFixed(-SEHFrameOffset
);
2656 // FPDelta is the offset from the "traditional" FP location of the old base
2657 // pointer followed by return address and the location required by the
2658 // restricted Win64 prologue.
2659 // Add FPDelta to all offsets below that go through the frame pointer.
2660 FPDelta
= FrameSize
- SEHFrameOffset
;
2661 assert((!MFI
.hasCalls() || (FPDelta
% 16) == 0) &&
2662 "FPDelta isn't aligned per the Win64 ABI!");
2665 if (FrameReg
== TRI
->getFramePtr()) {
2666 // Skip saved EBP/RBP
2669 // Account for restricted Windows prologue.
2672 // Skip the RETADDR move area
2673 int TailCallReturnAddrDelta
= X86FI
->getTCReturnAddrDelta();
2674 if (TailCallReturnAddrDelta
< 0)
2675 Offset
-= TailCallReturnAddrDelta
;
2677 return StackOffset::getFixed(Offset
);
2680 // FrameReg is either the stack pointer or a base pointer. But the base is
2681 // located at the end of the statically known StackSize so the distinction
2682 // doesn't really matter.
2683 if (TRI
->hasStackRealignment(MF
) || TRI
->hasBasePointer(MF
))
2684 assert(isAligned(MFI
.getObjectAlign(FI
), -(Offset
+ StackSize
)));
2685 return StackOffset::getFixed(Offset
+ StackSize
);
2688 int X86FrameLowering::getWin64EHFrameIndexRef(const MachineFunction
&MF
, int FI
,
2689 Register
&FrameReg
) const {
2690 const MachineFrameInfo
&MFI
= MF
.getFrameInfo();
2691 const X86MachineFunctionInfo
*X86FI
= MF
.getInfo
<X86MachineFunctionInfo
>();
2692 const auto &WinEHXMMSlotInfo
= X86FI
->getWinEHXMMSlotInfo();
2693 const auto it
= WinEHXMMSlotInfo
.find(FI
);
2695 if (it
== WinEHXMMSlotInfo
.end())
2696 return getFrameIndexReference(MF
, FI
, FrameReg
).getFixed();
2698 FrameReg
= TRI
->getStackRegister();
2699 return alignDown(MFI
.getMaxCallFrameSize(), getStackAlign().value()) +
2704 X86FrameLowering::getFrameIndexReferenceSP(const MachineFunction
&MF
, int FI
,
2706 int Adjustment
) const {
2707 const MachineFrameInfo
&MFI
= MF
.getFrameInfo();
2708 FrameReg
= TRI
->getStackRegister();
2709 return StackOffset::getFixed(MFI
.getObjectOffset(FI
) -
2710 getOffsetOfLocalArea() + Adjustment
);
2714 X86FrameLowering::getFrameIndexReferencePreferSP(const MachineFunction
&MF
,
2715 int FI
, Register
&FrameReg
,
2716 bool IgnoreSPUpdates
) const {
2718 const MachineFrameInfo
&MFI
= MF
.getFrameInfo();
2719 // Does not include any dynamic realign.
2720 const uint64_t StackSize
= MFI
.getStackSize();
2721 // LLVM arranges the stack as follows:
2726 // PUSH RBP <-- RBP points here
2728 // ~~~~~~~ <-- possible stack realignment (non-win64)
2731 // ... <-- RSP after prologue points here
2732 // ~~~~~~~ <-- possible stack realignment (win64)
2734 // if (hasVarSizedObjects()):
2735 // ... <-- "base pointer" (ESI/RBX) points here
2737 // ... <-- RSP points here
2739 // Case 1: In the simple case of no stack realignment and no dynamic
2740 // allocas, both "fixed" stack objects (arguments and CSRs) are addressable
2741 // with fixed offsets from RSP.
2743 // Case 2: In the case of stack realignment with no dynamic allocas, fixed
2744 // stack objects are addressed with RBP and regular stack objects with RSP.
2746 // Case 3: In the case of dynamic allocas and stack realignment, RSP is used
2747 // to address stack arguments for outgoing calls and nothing else. The "base
2748 // pointer" points to local variables, and RBP points to fixed objects.
2750 // In cases 2 and 3, we can only answer for non-fixed stack objects, and the
2751 // answer we give is relative to the SP after the prologue, and not the
2752 // SP in the middle of the function.
2754 if (MFI
.isFixedObjectIndex(FI
) && TRI
->hasStackRealignment(MF
) &&
2755 !STI
.isTargetWin64())
2756 return getFrameIndexReference(MF
, FI
, FrameReg
);
2758 // If !hasReservedCallFrame the function might have SP adjustement in the
2759 // body. So, even though the offset is statically known, it depends on where
2760 // we are in the function.
2761 if (!IgnoreSPUpdates
&& !hasReservedCallFrame(MF
))
2762 return getFrameIndexReference(MF
, FI
, FrameReg
);
2764 // We don't handle tail calls, and shouldn't be seeing them either.
2765 assert(MF
.getInfo
<X86MachineFunctionInfo
>()->getTCReturnAddrDelta() >= 0 &&
2766 "we don't handle this case!");
2768 // This is how the math works out:
2770 // %rsp grows (i.e. gets lower) left to right. Each box below is
2771 // one word (eight bytes). Obj0 is the stack slot we're trying to
2774 // ----------------------------------
2775 // | BP | Obj0 | Obj1 | ... | ObjN |
2776 // ----------------------------------
2780 // A is the incoming stack pointer.
2781 // (B - A) is the local area offset (-8 for x86-64) [1]
2782 // (C - A) is the Offset returned by MFI.getObjectOffset for Obj0 [2]
2784 // |(E - B)| is the StackSize (absolute value, positive). For a
2785 // stack that grown down, this works out to be (B - E). [3]
2787 // E is also the value of %rsp after stack has been set up, and we
2788 // want (C - E) -- the value we can add to %rsp to get to Obj0. Now
2789 // (C - E) == (C - A) - (B - A) + (B - E)
2790 // { Using [1], [2] and [3] above }
2791 // == getObjectOffset - LocalAreaOffset + StackSize
2793 return getFrameIndexReferenceSP(MF
, FI
, FrameReg
, StackSize
);
2796 bool X86FrameLowering::assignCalleeSavedSpillSlots(
2797 MachineFunction
&MF
, const TargetRegisterInfo
*TRI
,
2798 std::vector
<CalleeSavedInfo
> &CSI
) const {
2799 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
2800 X86MachineFunctionInfo
*X86FI
= MF
.getInfo
<X86MachineFunctionInfo
>();
2802 unsigned CalleeSavedFrameSize
= 0;
2803 unsigned XMMCalleeSavedFrameSize
= 0;
2804 auto &WinEHXMMSlotInfo
= X86FI
->getWinEHXMMSlotInfo();
2805 int SpillSlotOffset
= getOffsetOfLocalArea() + X86FI
->getTCReturnAddrDelta();
2807 int64_t TailCallReturnAddrDelta
= X86FI
->getTCReturnAddrDelta();
2809 if (TailCallReturnAddrDelta
< 0) {
2810 // create RETURNADDR area
2819 MFI
.CreateFixedObject(-TailCallReturnAddrDelta
,
2820 TailCallReturnAddrDelta
- SlotSize
, true);
2823 // Spill the BasePtr if it's used.
2824 if (this->TRI
->hasBasePointer(MF
)) {
2825 // Allocate a spill slot for EBP if we have a base pointer and EH funclets.
2826 if (MF
.hasEHFunclets()) {
2827 int FI
= MFI
.CreateSpillStackObject(SlotSize
, Align(SlotSize
));
2828 X86FI
->setHasSEHFramePtrSave(true);
2829 X86FI
->setSEHFramePtrSaveIndex(FI
);
2834 // emitPrologue always spills frame register the first thing.
2835 SpillSlotOffset
-= SlotSize
;
2836 MFI
.CreateFixedSpillStackObject(SlotSize
, SpillSlotOffset
);
2838 // The async context lives directly before the frame pointer, and we
2839 // allocate a second slot to preserve stack alignment.
2840 if (X86FI
->hasSwiftAsyncContext()) {
2841 SpillSlotOffset
-= SlotSize
;
2842 MFI
.CreateFixedSpillStackObject(SlotSize
, SpillSlotOffset
);
2843 SpillSlotOffset
-= SlotSize
;
2846 // Since emitPrologue and emitEpilogue will handle spilling and restoring of
2847 // the frame register, we can delete it from CSI list and not have to worry
2848 // about avoiding it later.
2849 Register FPReg
= TRI
->getFrameRegister(MF
);
2850 for (unsigned i
= 0; i
< CSI
.size(); ++i
) {
2851 if (TRI
->regsOverlap(CSI
[i
].getReg(), FPReg
)) {
2852 CSI
.erase(CSI
.begin() + i
);
2859 // 1. Use push2 when
2860 // a) number of CSR > 1 if no need padding
2861 // b) number of CSR > 2 if need padding
2862 // 2. When the number of CSR push is odd
2863 // a. Start to use push2 from the 1st push if stack is 16B aligned.
2864 // b. Start to use push2 from the 2nd push if stack is not 16B aligned.
2865 // 3. When the number of CSR push is even, start to use push2 from the 1st
2866 // push and make the stack 16B aligned before the push
2867 unsigned NumRegsForPush2
= 0;
2868 if (STI
.hasPush2Pop2()) {
2869 unsigned NumCSGPR
= llvm::count_if(CSI
, [](const CalleeSavedInfo
&I
) {
2870 return X86::GR64RegClass
.contains(I
.getReg());
2872 bool NeedPadding
= (SpillSlotOffset
% 16 != 0) && (NumCSGPR
% 2 == 0);
2873 bool UsePush2Pop2
= NeedPadding
? NumCSGPR
> 2 : NumCSGPR
> 1;
2874 X86FI
->setPadForPush2Pop2(NeedPadding
&& UsePush2Pop2
);
2875 NumRegsForPush2
= UsePush2Pop2
? alignDown(NumCSGPR
, 2) : 0;
2876 if (X86FI
->padForPush2Pop2()) {
2877 SpillSlotOffset
-= SlotSize
;
2878 MFI
.CreateFixedSpillStackObject(SlotSize
, SpillSlotOffset
);
2882 // Assign slots for GPRs. It increases frame size.
2883 for (CalleeSavedInfo
&I
: llvm::reverse(CSI
)) {
2884 Register Reg
= I
.getReg();
2886 if (!X86::GR64RegClass
.contains(Reg
) && !X86::GR32RegClass
.contains(Reg
))
2889 // A CSR is a candidate for push2/pop2 when it's slot offset is 16B aligned
2890 // or only an odd number of registers in the candidates.
2891 if (X86FI
->getNumCandidatesForPush2Pop2() < NumRegsForPush2
&&
2892 (SpillSlotOffset
% 16 == 0 ||
2893 X86FI
->getNumCandidatesForPush2Pop2() % 2))
2894 X86FI
->addCandidateForPush2Pop2(Reg
);
2896 SpillSlotOffset
-= SlotSize
;
2897 CalleeSavedFrameSize
+= SlotSize
;
2899 int SlotIndex
= MFI
.CreateFixedSpillStackObject(SlotSize
, SpillSlotOffset
);
2900 I
.setFrameIdx(SlotIndex
);
2903 // Adjust the offset of spill slot as we know the accurate callee saved frame
2905 if (X86FI
->getRestoreBasePointer()) {
2906 SpillSlotOffset
-= SlotSize
;
2907 CalleeSavedFrameSize
+= SlotSize
;
2909 MFI
.CreateFixedSpillStackObject(SlotSize
, SpillSlotOffset
);
2910 // TODO: saving the slot index is better?
2911 X86FI
->setRestoreBasePointer(CalleeSavedFrameSize
);
2913 assert(X86FI
->getNumCandidatesForPush2Pop2() % 2 == 0 &&
2914 "Expect even candidates for push2/pop2");
2915 if (X86FI
->getNumCandidatesForPush2Pop2())
2916 ++NumFunctionUsingPush2Pop2
;
2917 X86FI
->setCalleeSavedFrameSize(CalleeSavedFrameSize
);
2918 MFI
.setCVBytesOfCalleeSavedRegisters(CalleeSavedFrameSize
);
2920 // Assign slots for XMMs.
2921 for (CalleeSavedInfo
&I
: llvm::reverse(CSI
)) {
2922 Register Reg
= I
.getReg();
2923 if (X86::GR64RegClass
.contains(Reg
) || X86::GR32RegClass
.contains(Reg
))
2926 // If this is k-register make sure we lookup via the largest legal type.
2927 MVT VT
= MVT::Other
;
2928 if (X86::VK16RegClass
.contains(Reg
))
2929 VT
= STI
.hasBWI() ? MVT::v64i1
: MVT::v16i1
;
2931 const TargetRegisterClass
*RC
= TRI
->getMinimalPhysRegClass(Reg
, VT
);
2932 unsigned Size
= TRI
->getSpillSize(*RC
);
2933 Align Alignment
= TRI
->getSpillAlign(*RC
);
2935 assert(SpillSlotOffset
< 0 && "SpillSlotOffset should always < 0 on X86");
2936 SpillSlotOffset
= -alignTo(-SpillSlotOffset
, Alignment
);
2939 SpillSlotOffset
-= Size
;
2940 int SlotIndex
= MFI
.CreateFixedSpillStackObject(Size
, SpillSlotOffset
);
2941 I
.setFrameIdx(SlotIndex
);
2942 MFI
.ensureMaxAlignment(Alignment
);
2944 // Save the start offset and size of XMM in stack frame for funclets.
2945 if (X86::VR128RegClass
.contains(Reg
)) {
2946 WinEHXMMSlotInfo
[SlotIndex
] = XMMCalleeSavedFrameSize
;
2947 XMMCalleeSavedFrameSize
+= Size
;
2954 bool X86FrameLowering::spillCalleeSavedRegisters(
2955 MachineBasicBlock
&MBB
, MachineBasicBlock::iterator MI
,
2956 ArrayRef
<CalleeSavedInfo
> CSI
, const TargetRegisterInfo
*TRI
) const {
2957 DebugLoc DL
= MBB
.findDebugLoc(MI
);
2959 // Don't save CSRs in 32-bit EH funclets. The caller saves EBX, EBP, ESI, EDI
2960 // for us, and there are no XMM CSRs on Win32.
2961 if (MBB
.isEHFuncletEntry() && STI
.is32Bit() && STI
.isOSWindows())
2964 // Push GPRs. It increases frame size.
2965 const MachineFunction
&MF
= *MBB
.getParent();
2966 const X86MachineFunctionInfo
*X86FI
= MF
.getInfo
<X86MachineFunctionInfo
>();
2967 if (X86FI
->padForPush2Pop2())
2968 emitSPUpdate(MBB
, MI
, DL
, -(int64_t)SlotSize
, /*InEpilogue=*/false);
2970 // Update LiveIn of the basic block and decide whether we can add a kill flag
2972 auto UpdateLiveInCheckCanKill
= [&](Register Reg
) {
2973 const MachineRegisterInfo
&MRI
= MF
.getRegInfo();
2974 // Do not set a kill flag on values that are also marked as live-in. This
2975 // happens with the @llvm-returnaddress intrinsic and with arguments
2976 // passed in callee saved registers.
2977 // Omitting the kill flags is conservatively correct even if the live-in
2978 // is not used after all.
2979 if (MRI
.isLiveIn(Reg
))
2982 // Check if any subregister is live-in
2983 for (MCRegAliasIterator
AReg(Reg
, TRI
, false); AReg
.isValid(); ++AReg
)
2984 if (MRI
.isLiveIn(*AReg
))
2988 auto UpdateLiveInGetKillRegState
= [&](Register Reg
) {
2989 return getKillRegState(UpdateLiveInCheckCanKill(Reg
));
2992 for (auto RI
= CSI
.rbegin(), RE
= CSI
.rend(); RI
!= RE
; ++RI
) {
2993 Register Reg
= RI
->getReg();
2994 if (!X86::GR64RegClass
.contains(Reg
) && !X86::GR32RegClass
.contains(Reg
))
2997 if (X86FI
->isCandidateForPush2Pop2(Reg
)) {
2998 Register Reg2
= (++RI
)->getReg();
2999 BuildMI(MBB
, MI
, DL
, TII
.get(getPUSH2Opcode(STI
)))
3000 .addReg(Reg
, UpdateLiveInGetKillRegState(Reg
))
3001 .addReg(Reg2
, UpdateLiveInGetKillRegState(Reg2
))
3002 .setMIFlag(MachineInstr::FrameSetup
);
3004 BuildMI(MBB
, MI
, DL
, TII
.get(getPUSHOpcode(STI
)))
3005 .addReg(Reg
, UpdateLiveInGetKillRegState(Reg
))
3006 .setMIFlag(MachineInstr::FrameSetup
);
3010 if (X86FI
->getRestoreBasePointer()) {
3011 unsigned Opc
= STI
.is64Bit() ? X86::PUSH64r
: X86::PUSH32r
;
3012 Register BaseReg
= this->TRI
->getBaseRegister();
3013 BuildMI(MBB
, MI
, DL
, TII
.get(Opc
))
3014 .addReg(BaseReg
, getKillRegState(true))
3015 .setMIFlag(MachineInstr::FrameSetup
);
3018 // Make XMM regs spilled. X86 does not have ability of push/pop XMM.
3019 // It can be done by spilling XMMs to stack frame.
3020 for (const CalleeSavedInfo
&I
: llvm::reverse(CSI
)) {
3021 Register Reg
= I
.getReg();
3022 if (X86::GR64RegClass
.contains(Reg
) || X86::GR32RegClass
.contains(Reg
))
3025 // If this is k-register make sure we lookup via the largest legal type.
3026 MVT VT
= MVT::Other
;
3027 if (X86::VK16RegClass
.contains(Reg
))
3028 VT
= STI
.hasBWI() ? MVT::v64i1
: MVT::v16i1
;
3030 // Add the callee-saved register as live-in. It's killed at the spill.
3032 const TargetRegisterClass
*RC
= TRI
->getMinimalPhysRegClass(Reg
, VT
);
3034 TII
.storeRegToStackSlot(MBB
, MI
, Reg
, true, I
.getFrameIdx(), RC
, TRI
,
3037 MI
->setFlag(MachineInstr::FrameSetup
);
3044 void X86FrameLowering::emitCatchRetReturnValue(MachineBasicBlock
&MBB
,
3045 MachineBasicBlock::iterator MBBI
,
3046 MachineInstr
*CatchRet
) const {
3047 // SEH shouldn't use catchret.
3048 assert(!isAsynchronousEHPersonality(classifyEHPersonality(
3049 MBB
.getParent()->getFunction().getPersonalityFn())) &&
3050 "SEH should not use CATCHRET");
3051 const DebugLoc
&DL
= CatchRet
->getDebugLoc();
3052 MachineBasicBlock
*CatchRetTarget
= CatchRet
->getOperand(0).getMBB();
3054 // Fill EAX/RAX with the address of the target block.
3055 if (STI
.is64Bit()) {
3056 // LEA64r CatchRetTarget(%rip), %rax
3057 BuildMI(MBB
, MBBI
, DL
, TII
.get(X86::LEA64r
), X86::RAX
)
3061 .addMBB(CatchRetTarget
)
3064 // MOV32ri $CatchRetTarget, %eax
3065 BuildMI(MBB
, MBBI
, DL
, TII
.get(X86::MOV32ri
), X86::EAX
)
3066 .addMBB(CatchRetTarget
);
3069 // Record that we've taken the address of CatchRetTarget and no longer just
3070 // reference it in a terminator.
3071 CatchRetTarget
->setMachineBlockAddressTaken();
3074 bool X86FrameLowering::restoreCalleeSavedRegisters(
3075 MachineBasicBlock
&MBB
, MachineBasicBlock::iterator MI
,
3076 MutableArrayRef
<CalleeSavedInfo
> CSI
, const TargetRegisterInfo
*TRI
) const {
3080 if (MI
!= MBB
.end() && isFuncletReturnInstr(*MI
) && STI
.isOSWindows()) {
3081 // Don't restore CSRs in 32-bit EH funclets. Matches
3082 // spillCalleeSavedRegisters.
3085 // Don't restore CSRs before an SEH catchret. SEH except blocks do not form
3086 // funclets. emitEpilogue transforms these to normal jumps.
3087 if (MI
->getOpcode() == X86::CATCHRET
) {
3088 const Function
&F
= MBB
.getParent()->getFunction();
3089 bool IsSEH
= isAsynchronousEHPersonality(
3090 classifyEHPersonality(F
.getPersonalityFn()));
3096 DebugLoc DL
= MBB
.findDebugLoc(MI
);
3098 // Reload XMMs from stack frame.
3099 for (const CalleeSavedInfo
&I
: CSI
) {
3100 Register Reg
= I
.getReg();
3101 if (X86::GR64RegClass
.contains(Reg
) || X86::GR32RegClass
.contains(Reg
))
3104 // If this is k-register make sure we lookup via the largest legal type.
3105 MVT VT
= MVT::Other
;
3106 if (X86::VK16RegClass
.contains(Reg
))
3107 VT
= STI
.hasBWI() ? MVT::v64i1
: MVT::v16i1
;
3109 const TargetRegisterClass
*RC
= TRI
->getMinimalPhysRegClass(Reg
, VT
);
3110 TII
.loadRegFromStackSlot(MBB
, MI
, Reg
, I
.getFrameIdx(), RC
, TRI
,
3114 // Clear the stack slot for spill base pointer register.
3115 MachineFunction
&MF
= *MBB
.getParent();
3116 const X86MachineFunctionInfo
*X86FI
= MF
.getInfo
<X86MachineFunctionInfo
>();
3117 if (X86FI
->getRestoreBasePointer()) {
3118 unsigned Opc
= STI
.is64Bit() ? X86::POP64r
: X86::POP32r
;
3119 Register BaseReg
= this->TRI
->getBaseRegister();
3120 BuildMI(MBB
, MI
, DL
, TII
.get(Opc
), BaseReg
)
3121 .setMIFlag(MachineInstr::FrameDestroy
);
3125 for (auto I
= CSI
.begin(), E
= CSI
.end(); I
!= E
; ++I
) {
3126 Register Reg
= I
->getReg();
3127 if (!X86::GR64RegClass
.contains(Reg
) && !X86::GR32RegClass
.contains(Reg
))
3130 if (X86FI
->isCandidateForPush2Pop2(Reg
))
3131 BuildMI(MBB
, MI
, DL
, TII
.get(getPOP2Opcode(STI
)), Reg
)
3132 .addReg((++I
)->getReg(), RegState::Define
)
3133 .setMIFlag(MachineInstr::FrameDestroy
);
3135 BuildMI(MBB
, MI
, DL
, TII
.get(getPOPOpcode(STI
)), Reg
)
3136 .setMIFlag(MachineInstr::FrameDestroy
);
3138 if (X86FI
->padForPush2Pop2())
3139 emitSPUpdate(MBB
, MI
, DL
, SlotSize
, /*InEpilogue=*/true);
3144 void X86FrameLowering::determineCalleeSaves(MachineFunction
&MF
,
3145 BitVector
&SavedRegs
,
3146 RegScavenger
*RS
) const {
3147 TargetFrameLowering::determineCalleeSaves(MF
, SavedRegs
, RS
);
3149 // Spill the BasePtr if it's used.
3150 if (TRI
->hasBasePointer(MF
)) {
3151 Register BasePtr
= TRI
->getBaseRegister();
3152 if (STI
.isTarget64BitILP32())
3153 BasePtr
= getX86SubSuperRegister(BasePtr
, 64);
3154 SavedRegs
.set(BasePtr
);
3158 static bool HasNestArgument(const MachineFunction
*MF
) {
3159 const Function
&F
= MF
->getFunction();
3160 for (Function::const_arg_iterator I
= F
.arg_begin(), E
= F
.arg_end(); I
!= E
;
3162 if (I
->hasNestAttr() && !I
->use_empty())
3168 /// GetScratchRegister - Get a temp register for performing work in the
3169 /// segmented stack and the Erlang/HiPE stack prologue. Depending on platform
3170 /// and the properties of the function either one or two registers will be
3171 /// needed. Set primary to true for the first register, false for the second.
3172 static unsigned GetScratchRegister(bool Is64Bit
, bool IsLP64
,
3173 const MachineFunction
&MF
, bool Primary
) {
3174 CallingConv::ID CallingConvention
= MF
.getFunction().getCallingConv();
3177 if (CallingConvention
== CallingConv::HiPE
) {
3179 return Primary
? X86::R14
: X86::R13
;
3181 return Primary
? X86::EBX
: X86::EDI
;
3186 return Primary
? X86::R11
: X86::R12
;
3188 return Primary
? X86::R11D
: X86::R12D
;
3191 bool IsNested
= HasNestArgument(&MF
);
3193 if (CallingConvention
== CallingConv::X86_FastCall
||
3194 CallingConvention
== CallingConv::Fast
||
3195 CallingConvention
== CallingConv::Tail
) {
3197 report_fatal_error("Segmented stacks does not support fastcall with "
3198 "nested function.");
3199 return Primary
? X86::EAX
: X86::ECX
;
3202 return Primary
? X86::EDX
: X86::EAX
;
3203 return Primary
? X86::ECX
: X86::EAX
;
3206 // The stack limit in the TCB is set to this many bytes above the actual stack
3208 static const uint64_t kSplitStackAvailable
= 256;
3210 void X86FrameLowering::adjustForSegmentedStacks(
3211 MachineFunction
&MF
, MachineBasicBlock
&PrologueMBB
) const {
3212 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
3214 unsigned TlsReg
, TlsOffset
;
3217 // To support shrink-wrapping we would need to insert the new blocks
3218 // at the right place and update the branches to PrologueMBB.
3219 assert(&(*MF
.begin()) == &PrologueMBB
&& "Shrink-wrapping not supported yet");
3221 unsigned ScratchReg
= GetScratchRegister(Is64Bit
, IsLP64
, MF
, true);
3222 assert(!MF
.getRegInfo().isLiveIn(ScratchReg
) &&
3223 "Scratch register is live-in");
3225 if (MF
.getFunction().isVarArg())
3226 report_fatal_error("Segmented stacks do not support vararg functions.");
3227 if (!STI
.isTargetLinux() && !STI
.isTargetDarwin() && !STI
.isTargetWin32() &&
3228 !STI
.isTargetWin64() && !STI
.isTargetFreeBSD() &&
3229 !STI
.isTargetDragonFly())
3230 report_fatal_error("Segmented stacks not supported on this platform.");
3232 // Eventually StackSize will be calculated by a link-time pass; which will
3233 // also decide whether checking code needs to be injected into this particular
3235 StackSize
= MFI
.getStackSize();
3237 if (!MFI
.needsSplitStackProlog())
3240 MachineBasicBlock
*allocMBB
= MF
.CreateMachineBasicBlock();
3241 MachineBasicBlock
*checkMBB
= MF
.CreateMachineBasicBlock();
3242 X86MachineFunctionInfo
*X86FI
= MF
.getInfo
<X86MachineFunctionInfo
>();
3243 bool IsNested
= false;
3245 // We need to know if the function has a nest argument only in 64 bit mode.
3247 IsNested
= HasNestArgument(&MF
);
3249 // The MOV R10, RAX needs to be in a different block, since the RET we emit in
3250 // allocMBB needs to be last (terminating) instruction.
3252 for (const auto &LI
: PrologueMBB
.liveins()) {
3253 allocMBB
->addLiveIn(LI
);
3254 checkMBB
->addLiveIn(LI
);
3258 allocMBB
->addLiveIn(IsLP64
? X86::R10
: X86::R10D
);
3260 MF
.push_front(allocMBB
);
3261 MF
.push_front(checkMBB
);
3263 // When the frame size is less than 256 we just compare the stack
3264 // boundary directly to the value of the stack pointer, per gcc.
3265 bool CompareStackPointer
= StackSize
< kSplitStackAvailable
;
3267 // Read the limit off the current stacklet off the stack_guard location.
3269 if (STI
.isTargetLinux()) {
3271 TlsOffset
= IsLP64
? 0x70 : 0x40;
3272 } else if (STI
.isTargetDarwin()) {
3274 TlsOffset
= 0x60 + 90 * 8; // See pthread_machdep.h. Steal TLS slot 90.
3275 } else if (STI
.isTargetWin64()) {
3277 TlsOffset
= 0x28; // pvArbitrary, reserved for application use
3278 } else if (STI
.isTargetFreeBSD()) {
3281 } else if (STI
.isTargetDragonFly()) {
3283 TlsOffset
= 0x20; // use tls_tcb.tcb_segstack
3285 report_fatal_error("Segmented stacks not supported on this platform.");
3288 if (CompareStackPointer
)
3289 ScratchReg
= IsLP64
? X86::RSP
: X86::ESP
;
3291 BuildMI(checkMBB
, DL
, TII
.get(IsLP64
? X86::LEA64r
: X86::LEA64_32r
),
3299 BuildMI(checkMBB
, DL
, TII
.get(IsLP64
? X86::CMP64rm
: X86::CMP32rm
))
3307 if (STI
.isTargetLinux()) {
3310 } else if (STI
.isTargetDarwin()) {
3312 TlsOffset
= 0x48 + 90 * 4;
3313 } else if (STI
.isTargetWin32()) {
3315 TlsOffset
= 0x14; // pvArbitrary, reserved for application use
3316 } else if (STI
.isTargetDragonFly()) {
3318 TlsOffset
= 0x10; // use tls_tcb.tcb_segstack
3319 } else if (STI
.isTargetFreeBSD()) {
3320 report_fatal_error("Segmented stacks not supported on FreeBSD i386.");
3322 report_fatal_error("Segmented stacks not supported on this platform.");
3325 if (CompareStackPointer
)
3326 ScratchReg
= X86::ESP
;
3328 BuildMI(checkMBB
, DL
, TII
.get(X86::LEA32r
), ScratchReg
)
3335 if (STI
.isTargetLinux() || STI
.isTargetWin32() || STI
.isTargetWin64() ||
3336 STI
.isTargetDragonFly()) {
3337 BuildMI(checkMBB
, DL
, TII
.get(X86::CMP32rm
))
3344 } else if (STI
.isTargetDarwin()) {
3346 // TlsOffset doesn't fit into a mod r/m byte so we need an extra register.
3347 unsigned ScratchReg2
;
3349 if (CompareStackPointer
) {
3350 // The primary scratch register is available for holding the TLS offset.
3351 ScratchReg2
= GetScratchRegister(Is64Bit
, IsLP64
, MF
, true);
3352 SaveScratch2
= false;
3354 // Need to use a second register to hold the TLS offset
3355 ScratchReg2
= GetScratchRegister(Is64Bit
, IsLP64
, MF
, false);
3357 // Unfortunately, with fastcc the second scratch register may hold an
3359 SaveScratch2
= MF
.getRegInfo().isLiveIn(ScratchReg2
);
3362 // If Scratch2 is live-in then it needs to be saved.
3363 assert((!MF
.getRegInfo().isLiveIn(ScratchReg2
) || SaveScratch2
) &&
3364 "Scratch register is live-in and not saved");
3367 BuildMI(checkMBB
, DL
, TII
.get(X86::PUSH32r
))
3368 .addReg(ScratchReg2
, RegState::Kill
);
3370 BuildMI(checkMBB
, DL
, TII
.get(X86::MOV32ri
), ScratchReg2
)
3372 BuildMI(checkMBB
, DL
, TII
.get(X86::CMP32rm
))
3374 .addReg(ScratchReg2
)
3381 BuildMI(checkMBB
, DL
, TII
.get(X86::POP32r
), ScratchReg2
);
3385 // This jump is taken if SP >= (Stacklet Limit + Stack Space required).
3386 // It jumps to normal execution of the function body.
3387 BuildMI(checkMBB
, DL
, TII
.get(X86::JCC_1
))
3388 .addMBB(&PrologueMBB
)
3389 .addImm(X86::COND_A
);
3391 // On 32 bit we first push the arguments size and then the frame size. On 64
3392 // bit, we pass the stack frame size in r10 and the argument size in r11.
3394 // Functions with nested arguments use R10, so it needs to be saved across
3395 // the call to _morestack
3397 const unsigned RegAX
= IsLP64
? X86::RAX
: X86::EAX
;
3398 const unsigned Reg10
= IsLP64
? X86::R10
: X86::R10D
;
3399 const unsigned Reg11
= IsLP64
? X86::R11
: X86::R11D
;
3400 const unsigned MOVrr
= IsLP64
? X86::MOV64rr
: X86::MOV32rr
;
3403 BuildMI(allocMBB
, DL
, TII
.get(MOVrr
), RegAX
).addReg(Reg10
);
3405 BuildMI(allocMBB
, DL
, TII
.get(getMOVriOpcode(IsLP64
, StackSize
)), Reg10
)
3407 BuildMI(allocMBB
, DL
,
3408 TII
.get(getMOVriOpcode(IsLP64
, X86FI
->getArgumentStackSize())),
3410 .addImm(X86FI
->getArgumentStackSize());
3412 BuildMI(allocMBB
, DL
, TII
.get(X86::PUSH32i
))
3413 .addImm(X86FI
->getArgumentStackSize());
3414 BuildMI(allocMBB
, DL
, TII
.get(X86::PUSH32i
)).addImm(StackSize
);
3417 // __morestack is in libgcc
3418 if (Is64Bit
&& MF
.getTarget().getCodeModel() == CodeModel::Large
) {
3419 // Under the large code model, we cannot assume that __morestack lives
3420 // within 2^31 bytes of the call site, so we cannot use pc-relative
3421 // addressing. We cannot perform the call via a temporary register,
3422 // as the rax register may be used to store the static chain, and all
3423 // other suitable registers may be either callee-save or used for
3424 // parameter passing. We cannot use the stack at this point either
3425 // because __morestack manipulates the stack directly.
3427 // To avoid these issues, perform an indirect call via a read-only memory
3428 // location containing the address.
3430 // This solution is not perfect, as it assumes that the .rodata section
3431 // is laid out within 2^31 bytes of each function body, but this seems
3432 // to be sufficient for JIT.
3433 // FIXME: Add retpoline support and remove the error here..
3434 if (STI
.useIndirectThunkCalls())
3435 report_fatal_error("Emitting morestack calls on 64-bit with the large "
3436 "code model and thunks not yet implemented.");
3437 BuildMI(allocMBB
, DL
, TII
.get(X86::CALL64m
))
3441 .addExternalSymbol("__morestack_addr")
3445 BuildMI(allocMBB
, DL
, TII
.get(X86::CALL64pcrel32
))
3446 .addExternalSymbol("__morestack");
3448 BuildMI(allocMBB
, DL
, TII
.get(X86::CALLpcrel32
))
3449 .addExternalSymbol("__morestack");
3453 BuildMI(allocMBB
, DL
, TII
.get(X86::MORESTACK_RET_RESTORE_R10
));
3455 BuildMI(allocMBB
, DL
, TII
.get(X86::MORESTACK_RET
));
3457 allocMBB
->addSuccessor(&PrologueMBB
);
3459 checkMBB
->addSuccessor(allocMBB
, BranchProbability::getZero());
3460 checkMBB
->addSuccessor(&PrologueMBB
, BranchProbability::getOne());
3462 #ifdef EXPENSIVE_CHECKS
3467 /// Lookup an ERTS parameter in the !hipe.literals named metadata node.
3468 /// HiPE provides Erlang Runtime System-internal parameters, such as PCB offsets
3469 /// to fields it needs, through a named metadata node "hipe.literals" containing
3470 /// name-value pairs.
3471 static unsigned getHiPELiteral(NamedMDNode
*HiPELiteralsMD
,
3472 const StringRef LiteralName
) {
3473 for (int i
= 0, e
= HiPELiteralsMD
->getNumOperands(); i
!= e
; ++i
) {
3474 MDNode
*Node
= HiPELiteralsMD
->getOperand(i
);
3475 if (Node
->getNumOperands() != 2)
3477 MDString
*NodeName
= dyn_cast
<MDString
>(Node
->getOperand(0));
3478 ValueAsMetadata
*NodeVal
= dyn_cast
<ValueAsMetadata
>(Node
->getOperand(1));
3479 if (!NodeName
|| !NodeVal
)
3481 ConstantInt
*ValConst
= dyn_cast_or_null
<ConstantInt
>(NodeVal
->getValue());
3482 if (ValConst
&& NodeName
->getString() == LiteralName
) {
3483 return ValConst
->getZExtValue();
3487 report_fatal_error("HiPE literal " + LiteralName
+
3488 " required but not provided");
3491 // Return true if there are no non-ehpad successors to MBB and there are no
3492 // non-meta instructions between MBBI and MBB.end().
3493 static bool blockEndIsUnreachable(const MachineBasicBlock
&MBB
,
3494 MachineBasicBlock::const_iterator MBBI
) {
3495 return llvm::all_of(
3497 [](const MachineBasicBlock
*Succ
) { return Succ
->isEHPad(); }) &&
3498 std::all_of(MBBI
, MBB
.end(), [](const MachineInstr
&MI
) {
3499 return MI
.isMetaInstruction();
3503 /// Erlang programs may need a special prologue to handle the stack size they
3504 /// might need at runtime. That is because Erlang/OTP does not implement a C
3505 /// stack but uses a custom implementation of hybrid stack/heap architecture.
3506 /// (for more information see Eric Stenman's Ph.D. thesis:
3507 /// http://publications.uu.se/uu/fulltext/nbn_se_uu_diva-2688.pdf)
3510 /// temp0 = sp - MaxStack
3511 /// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart
3515 /// call inc_stack # doubles the stack space
3516 /// temp0 = sp - MaxStack
3517 /// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart
3518 void X86FrameLowering::adjustForHiPEPrologue(
3519 MachineFunction
&MF
, MachineBasicBlock
&PrologueMBB
) const {
3520 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
3523 // To support shrink-wrapping we would need to insert the new blocks
3524 // at the right place and update the branches to PrologueMBB.
3525 assert(&(*MF
.begin()) == &PrologueMBB
&& "Shrink-wrapping not supported yet");
3527 // HiPE-specific values
3528 NamedMDNode
*HiPELiteralsMD
=
3529 MF
.getFunction().getParent()->getNamedMetadata("hipe.literals");
3530 if (!HiPELiteralsMD
)
3532 "Can't generate HiPE prologue without runtime parameters");
3533 const unsigned HipeLeafWords
= getHiPELiteral(
3534 HiPELiteralsMD
, Is64Bit
? "AMD64_LEAF_WORDS" : "X86_LEAF_WORDS");
3535 const unsigned CCRegisteredArgs
= Is64Bit
? 6 : 5;
3536 const unsigned Guaranteed
= HipeLeafWords
* SlotSize
;
3537 unsigned CallerStkArity
= MF
.getFunction().arg_size() > CCRegisteredArgs
3538 ? MF
.getFunction().arg_size() - CCRegisteredArgs
3540 unsigned MaxStack
= MFI
.getStackSize() + CallerStkArity
* SlotSize
+ SlotSize
;
3542 assert(STI
.isTargetLinux() &&
3543 "HiPE prologue is only supported on Linux operating systems.");
3545 // Compute the largest caller's frame that is needed to fit the callees'
3546 // frames. This 'MaxStack' is computed from:
3548 // a) the fixed frame size, which is the space needed for all spilled temps,
3549 // b) outgoing on-stack parameter areas, and
3550 // c) the minimum stack space this function needs to make available for the
3551 // functions it calls (a tunable ABI property).
3552 if (MFI
.hasCalls()) {
3553 unsigned MoreStackForCalls
= 0;
3555 for (auto &MBB
: MF
) {
3556 for (auto &MI
: MBB
) {
3560 // Get callee operand.
3561 const MachineOperand
&MO
= MI
.getOperand(0);
3563 // Only take account of global function calls (no closures etc.).
3567 const Function
*F
= dyn_cast
<Function
>(MO
.getGlobal());
3571 // Do not update 'MaxStack' for primitive and built-in functions
3572 // (encoded with names either starting with "erlang."/"bif_" or not
3573 // having a ".", such as a simple <Module>.<Function>.<Arity>, or an
3574 // "_", such as the BIF "suspend_0") as they are executed on another
3576 if (F
->getName().contains("erlang.") || F
->getName().contains("bif_") ||
3577 F
->getName().find_first_of("._") == StringRef::npos
)
3580 unsigned CalleeStkArity
= F
->arg_size() > CCRegisteredArgs
3581 ? F
->arg_size() - CCRegisteredArgs
3583 if (HipeLeafWords
- 1 > CalleeStkArity
)
3585 std::max(MoreStackForCalls
,
3586 (HipeLeafWords
- 1 - CalleeStkArity
) * SlotSize
);
3589 MaxStack
+= MoreStackForCalls
;
3592 // If the stack frame needed is larger than the guaranteed then runtime checks
3593 // and calls to "inc_stack_0" BIF should be inserted in the assembly prologue.
3594 if (MaxStack
> Guaranteed
) {
3595 MachineBasicBlock
*stackCheckMBB
= MF
.CreateMachineBasicBlock();
3596 MachineBasicBlock
*incStackMBB
= MF
.CreateMachineBasicBlock();
3598 for (const auto &LI
: PrologueMBB
.liveins()) {
3599 stackCheckMBB
->addLiveIn(LI
);
3600 incStackMBB
->addLiveIn(LI
);
3603 MF
.push_front(incStackMBB
);
3604 MF
.push_front(stackCheckMBB
);
3606 unsigned ScratchReg
, SPReg
, PReg
, SPLimitOffset
;
3607 unsigned LEAop
, CMPop
, CALLop
;
3608 SPLimitOffset
= getHiPELiteral(HiPELiteralsMD
, "P_NSP_LIMIT");
3612 LEAop
= X86::LEA64r
;
3613 CMPop
= X86::CMP64rm
;
3614 CALLop
= X86::CALL64pcrel32
;
3618 LEAop
= X86::LEA32r
;
3619 CMPop
= X86::CMP32rm
;
3620 CALLop
= X86::CALLpcrel32
;
3623 ScratchReg
= GetScratchRegister(Is64Bit
, IsLP64
, MF
, true);
3624 assert(!MF
.getRegInfo().isLiveIn(ScratchReg
) &&
3625 "HiPE prologue scratch register is live-in");
3627 // Create new MBB for StackCheck:
3628 addRegOffset(BuildMI(stackCheckMBB
, DL
, TII
.get(LEAop
), ScratchReg
), SPReg
,
3630 // SPLimitOffset is in a fixed heap location (pointed by BP).
3631 addRegOffset(BuildMI(stackCheckMBB
, DL
, TII
.get(CMPop
)).addReg(ScratchReg
),
3632 PReg
, false, SPLimitOffset
);
3633 BuildMI(stackCheckMBB
, DL
, TII
.get(X86::JCC_1
))
3634 .addMBB(&PrologueMBB
)
3635 .addImm(X86::COND_AE
);
3637 // Create new MBB for IncStack:
3638 BuildMI(incStackMBB
, DL
, TII
.get(CALLop
)).addExternalSymbol("inc_stack_0");
3639 addRegOffset(BuildMI(incStackMBB
, DL
, TII
.get(LEAop
), ScratchReg
), SPReg
,
3641 addRegOffset(BuildMI(incStackMBB
, DL
, TII
.get(CMPop
)).addReg(ScratchReg
),
3642 PReg
, false, SPLimitOffset
);
3643 BuildMI(incStackMBB
, DL
, TII
.get(X86::JCC_1
))
3644 .addMBB(incStackMBB
)
3645 .addImm(X86::COND_LE
);
3647 stackCheckMBB
->addSuccessor(&PrologueMBB
, {99, 100});
3648 stackCheckMBB
->addSuccessor(incStackMBB
, {1, 100});
3649 incStackMBB
->addSuccessor(&PrologueMBB
, {99, 100});
3650 incStackMBB
->addSuccessor(incStackMBB
, {1, 100});
3652 #ifdef EXPENSIVE_CHECKS
3657 bool X86FrameLowering::adjustStackWithPops(MachineBasicBlock
&MBB
,
3658 MachineBasicBlock::iterator MBBI
,
3664 if (Offset
% SlotSize
)
3667 int NumPops
= Offset
/ SlotSize
;
3668 // This is only worth it if we have at most 2 pops.
3669 if (NumPops
!= 1 && NumPops
!= 2)
3672 // Handle only the trivial case where the adjustment directly follows
3673 // a call. This is the most common one, anyway.
3674 if (MBBI
== MBB
.begin())
3676 MachineBasicBlock::iterator Prev
= std::prev(MBBI
);
3677 if (!Prev
->isCall() || !Prev
->getOperand(1).isRegMask())
3681 unsigned FoundRegs
= 0;
3683 const MachineRegisterInfo
&MRI
= MBB
.getParent()->getRegInfo();
3684 const MachineOperand
&RegMask
= Prev
->getOperand(1);
3687 Is64Bit
? X86::GR64_NOREX_NOSPRegClass
: X86::GR32_NOREX_NOSPRegClass
;
3688 // Try to find up to NumPops free registers.
3689 for (auto Candidate
: RegClass
) {
3690 // Poor man's liveness:
3691 // Since we're immediately after a call, any register that is clobbered
3692 // by the call and not defined by it can be considered dead.
3693 if (!RegMask
.clobbersPhysReg(Candidate
))
3696 // Don't clobber reserved registers
3697 if (MRI
.isReserved(Candidate
))
3701 for (const MachineOperand
&MO
: Prev
->implicit_operands()) {
3702 if (MO
.isReg() && MO
.isDef() &&
3703 TRI
->isSuperOrSubRegisterEq(MO
.getReg(), Candidate
)) {
3712 Regs
[FoundRegs
++] = Candidate
;
3713 if (FoundRegs
== (unsigned)NumPops
)
3720 // If we found only one free register, but need two, reuse the same one twice.
3721 while (FoundRegs
< (unsigned)NumPops
)
3722 Regs
[FoundRegs
++] = Regs
[0];
3724 for (int i
= 0; i
< NumPops
; ++i
)
3725 BuildMI(MBB
, MBBI
, DL
, TII
.get(STI
.is64Bit() ? X86::POP64r
: X86::POP32r
),
3731 MachineBasicBlock::iterator
X86FrameLowering::eliminateCallFramePseudoInstr(
3732 MachineFunction
&MF
, MachineBasicBlock
&MBB
,
3733 MachineBasicBlock::iterator I
) const {
3734 bool reserveCallFrame
= hasReservedCallFrame(MF
);
3735 unsigned Opcode
= I
->getOpcode();
3736 bool isDestroy
= Opcode
== TII
.getCallFrameDestroyOpcode();
3737 DebugLoc DL
= I
->getDebugLoc(); // copy DebugLoc as I will be erased.
3738 uint64_t Amount
= TII
.getFrameSize(*I
);
3739 uint64_t InternalAmt
= (isDestroy
|| Amount
) ? TII
.getFrameAdjustment(*I
) : 0;
3741 auto InsertPos
= skipDebugInstructionsForward(I
, MBB
.end());
3743 // Try to avoid emitting dead SP adjustments if the block end is unreachable,
3744 // typically because the function is marked noreturn (abort, throw,
3745 // assert_fail, etc).
3746 if (isDestroy
&& blockEndIsUnreachable(MBB
, I
))
3749 if (!reserveCallFrame
) {
3750 // If the stack pointer can be changed after prologue, turn the
3751 // adjcallstackup instruction into a 'sub ESP, <amt>' and the
3752 // adjcallstackdown instruction into 'add ESP, <amt>'
3754 // We need to keep the stack aligned properly. To do this, we round the
3755 // amount of space needed for the outgoing arguments up to the next
3756 // alignment boundary.
3757 Amount
= alignTo(Amount
, getStackAlign());
3759 const Function
&F
= MF
.getFunction();
3760 bool WindowsCFI
= MF
.getTarget().getMCAsmInfo()->usesWindowsCFI();
3761 bool DwarfCFI
= !WindowsCFI
&& MF
.needsFrameMoves();
3763 // If we have any exception handlers in this function, and we adjust
3764 // the SP before calls, we may need to indicate this to the unwinder
3765 // using GNU_ARGS_SIZE. Note that this may be necessary even when
3766 // Amount == 0, because the preceding function may have set a non-0
3768 // TODO: We don't need to reset this between subsequent functions,
3769 // if it didn't change.
3770 bool HasDwarfEHHandlers
= !WindowsCFI
&& !MF
.getLandingPads().empty();
3772 if (HasDwarfEHHandlers
&& !isDestroy
&&
3773 MF
.getInfo
<X86MachineFunctionInfo
>()->getHasPushSequences())
3774 BuildCFI(MBB
, InsertPos
, DL
,
3775 MCCFIInstruction::createGnuArgsSize(nullptr, Amount
));
3780 // Factor out the amount that gets handled inside the sequence
3781 // (Pushes of argument for frame setup, callee pops for frame destroy)
3782 Amount
-= InternalAmt
;
3784 // TODO: This is needed only if we require precise CFA.
3785 // If this is a callee-pop calling convention, emit a CFA adjust for
3786 // the amount the callee popped.
3787 if (isDestroy
&& InternalAmt
&& DwarfCFI
&& !hasFP(MF
))
3788 BuildCFI(MBB
, InsertPos
, DL
,
3789 MCCFIInstruction::createAdjustCfaOffset(nullptr, -InternalAmt
));
3791 // Add Amount to SP to destroy a frame, or subtract to setup.
3792 int64_t StackAdjustment
= isDestroy
? Amount
: -Amount
;
3794 if (StackAdjustment
) {
3795 // Merge with any previous or following adjustment instruction. Note: the
3796 // instructions merged with here do not have CFI, so their stack
3797 // adjustments do not feed into CfaAdjustment.
3798 StackAdjustment
+= mergeSPUpdates(MBB
, InsertPos
, true);
3799 StackAdjustment
+= mergeSPUpdates(MBB
, InsertPos
, false);
3801 if (StackAdjustment
) {
3802 if (!(F
.hasMinSize() &&
3803 adjustStackWithPops(MBB
, InsertPos
, DL
, StackAdjustment
)))
3804 BuildStackAdjustment(MBB
, InsertPos
, DL
, StackAdjustment
,
3805 /*InEpilogue=*/false);
3809 if (DwarfCFI
&& !hasFP(MF
)) {
3810 // If we don't have FP, but need to generate unwind information,
3811 // we need to set the correct CFA offset after the stack adjustment.
3812 // How much we adjust the CFA offset depends on whether we're emitting
3813 // CFI only for EH purposes or for debugging. EH only requires the CFA
3814 // offset to be correct at each call site, while for debugging we want
3815 // it to be more precise.
3817 int64_t CfaAdjustment
= -StackAdjustment
;
3818 // TODO: When not using precise CFA, we also need to adjust for the
3819 // InternalAmt here.
3820 if (CfaAdjustment
) {
3823 MCCFIInstruction::createAdjustCfaOffset(nullptr, CfaAdjustment
));
3831 MachineBasicBlock::iterator CI
= I
;
3832 MachineBasicBlock::iterator B
= MBB
.begin();
3833 while (CI
!= B
&& !std::prev(CI
)->isCall())
3835 BuildStackAdjustment(MBB
, CI
, DL
, -InternalAmt
, /*InEpilogue=*/false);
3841 bool X86FrameLowering::canUseAsPrologue(const MachineBasicBlock
&MBB
) const {
3842 assert(MBB
.getParent() && "Block is not attached to a function!");
3843 const MachineFunction
&MF
= *MBB
.getParent();
3844 if (!MBB
.isLiveIn(X86::EFLAGS
))
3847 // If stack probes have to loop inline or call, that will clobber EFLAGS.
3848 // FIXME: we could allow cases that will use emitStackProbeInlineGenericBlock.
3849 const X86Subtarget
&STI
= MF
.getSubtarget
<X86Subtarget
>();
3850 const X86TargetLowering
&TLI
= *STI
.getTargetLowering();
3851 if (TLI
.hasInlineStackProbe(MF
) || TLI
.hasStackProbeSymbol(MF
))
3854 const X86MachineFunctionInfo
*X86FI
= MF
.getInfo
<X86MachineFunctionInfo
>();
3855 return !TRI
->hasStackRealignment(MF
) && !X86FI
->hasSwiftAsyncContext();
3858 bool X86FrameLowering::canUseAsEpilogue(const MachineBasicBlock
&MBB
) const {
3859 assert(MBB
.getParent() && "Block is not attached to a function!");
3861 // Win64 has strict requirements in terms of epilogue and we are
3862 // not taking a chance at messing with them.
3863 // I.e., unless this block is already an exit block, we can't use
3864 // it as an epilogue.
3865 if (STI
.isTargetWin64() && !MBB
.succ_empty() && !MBB
.isReturnBlock())
3868 // Swift async context epilogue has a BTR instruction that clobbers parts of
3870 const MachineFunction
&MF
= *MBB
.getParent();
3871 if (MF
.getInfo
<X86MachineFunctionInfo
>()->hasSwiftAsyncContext())
3872 return !flagsNeedToBePreservedBeforeTheTerminators(MBB
);
3874 if (canUseLEAForSPInEpilogue(*MBB
.getParent()))
3877 // If we cannot use LEA to adjust SP, we may need to use ADD, which
3878 // clobbers the EFLAGS. Check that we do not need to preserve it,
3879 // otherwise, conservatively assume this is not
3880 // safe to insert the epilogue here.
3881 return !flagsNeedToBePreservedBeforeTheTerminators(MBB
);
3884 bool X86FrameLowering::enableShrinkWrapping(const MachineFunction
&MF
) const {
3885 // If we may need to emit frameless compact unwind information, give
3886 // up as this is currently broken: PR25614.
3887 bool CompactUnwind
=
3888 MF
.getContext().getObjectFileInfo()->getCompactUnwindSection() != nullptr;
3889 return (MF
.getFunction().hasFnAttribute(Attribute::NoUnwind
) || hasFP(MF
) ||
3891 // The lowering of segmented stack and HiPE only support entry
3892 // blocks as prologue blocks: PR26107. This limitation may be
3893 // lifted if we fix:
3894 // - adjustForSegmentedStacks
3895 // - adjustForHiPEPrologue
3896 MF
.getFunction().getCallingConv() != CallingConv::HiPE
&&
3897 !MF
.shouldSplitStack();
3900 MachineBasicBlock::iterator
X86FrameLowering::restoreWin32EHStackPointers(
3901 MachineBasicBlock
&MBB
, MachineBasicBlock::iterator MBBI
,
3902 const DebugLoc
&DL
, bool RestoreSP
) const {
3903 assert(STI
.isTargetWindowsMSVC() && "funclets only supported in MSVC env");
3904 assert(STI
.isTargetWin32() && "EBP/ESI restoration only required on win32");
3905 assert(STI
.is32Bit() && !Uses64BitFramePtr
&&
3906 "restoring EBP/ESI on non-32-bit target");
3908 MachineFunction
&MF
= *MBB
.getParent();
3909 Register FramePtr
= TRI
->getFrameRegister(MF
);
3910 Register BasePtr
= TRI
->getBaseRegister();
3911 WinEHFuncInfo
&FuncInfo
= *MF
.getWinEHFuncInfo();
3912 X86MachineFunctionInfo
*X86FI
= MF
.getInfo
<X86MachineFunctionInfo
>();
3913 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
3915 // FIXME: Don't set FrameSetup flag in catchret case.
3917 int FI
= FuncInfo
.EHRegNodeFrameIndex
;
3918 int EHRegSize
= MFI
.getObjectSize(FI
);
3921 // MOV32rm -EHRegSize(%ebp), %esp
3922 addRegOffset(BuildMI(MBB
, MBBI
, DL
, TII
.get(X86::MOV32rm
), X86::ESP
),
3923 X86::EBP
, true, -EHRegSize
)
3924 .setMIFlag(MachineInstr::FrameSetup
);
3928 int EHRegOffset
= getFrameIndexReference(MF
, FI
, UsedReg
).getFixed();
3929 int EndOffset
= -EHRegOffset
- EHRegSize
;
3930 FuncInfo
.EHRegNodeEndOffset
= EndOffset
;
3932 if (UsedReg
== FramePtr
) {
3933 // ADD $offset, %ebp
3934 unsigned ADDri
= getADDriOpcode(false);
3935 BuildMI(MBB
, MBBI
, DL
, TII
.get(ADDri
), FramePtr
)
3938 .setMIFlag(MachineInstr::FrameSetup
)
3941 assert(EndOffset
>= 0 &&
3942 "end of registration object above normal EBP position!");
3943 } else if (UsedReg
== BasePtr
) {
3944 // LEA offset(%ebp), %esi
3945 addRegOffset(BuildMI(MBB
, MBBI
, DL
, TII
.get(X86::LEA32r
), BasePtr
),
3946 FramePtr
, false, EndOffset
)
3947 .setMIFlag(MachineInstr::FrameSetup
);
3948 // MOV32rm SavedEBPOffset(%esi), %ebp
3949 assert(X86FI
->getHasSEHFramePtrSave());
3951 getFrameIndexReference(MF
, X86FI
->getSEHFramePtrSaveIndex(), UsedReg
)
3953 assert(UsedReg
== BasePtr
);
3954 addRegOffset(BuildMI(MBB
, MBBI
, DL
, TII
.get(X86::MOV32rm
), FramePtr
),
3955 UsedReg
, true, Offset
)
3956 .setMIFlag(MachineInstr::FrameSetup
);
3958 llvm_unreachable("32-bit frames with WinEH must use FramePtr or BasePtr");
3963 int X86FrameLowering::getInitialCFAOffset(const MachineFunction
&MF
) const {
3964 return TRI
->getSlotSize();
3968 X86FrameLowering::getInitialCFARegister(const MachineFunction
&MF
) const {
3972 TargetFrameLowering::DwarfFrameBase
3973 X86FrameLowering::getDwarfFrameBase(const MachineFunction
&MF
) const {
3974 const TargetRegisterInfo
*RI
= MF
.getSubtarget().getRegisterInfo();
3975 Register FrameRegister
= RI
->getFrameRegister(MF
);
3976 if (getInitialCFARegister(MF
) == FrameRegister
&&
3977 MF
.getInfo
<X86MachineFunctionInfo
>()->hasCFIAdjustCfa()) {
3978 DwarfFrameBase FrameBase
;
3979 FrameBase
.Kind
= DwarfFrameBase::CFA
;
3980 FrameBase
.Location
.Offset
=
3981 -MF
.getFrameInfo().getStackSize() - getInitialCFAOffset(MF
);
3985 return DwarfFrameBase
{DwarfFrameBase::Register
, {FrameRegister
}};
3989 // Struct used by orderFrameObjects to help sort the stack objects.
3990 struct X86FrameSortingObject
{
3991 bool IsValid
= false; // true if we care about this Object.
3992 unsigned ObjectIndex
= 0; // Index of Object into MFI list.
3993 unsigned ObjectSize
= 0; // Size of Object in bytes.
3994 Align ObjectAlignment
= Align(1); // Alignment of Object in bytes.
3995 unsigned ObjectNumUses
= 0; // Object static number of uses.
3998 // The comparison function we use for std::sort to order our local
3999 // stack symbols. The current algorithm is to use an estimated
4000 // "density". This takes into consideration the size and number of
4001 // uses each object has in order to roughly minimize code size.
4002 // So, for example, an object of size 16B that is referenced 5 times
4003 // will get higher priority than 4 4B objects referenced 1 time each.
4004 // It's not perfect and we may be able to squeeze a few more bytes out of
4005 // it (for example : 0(esp) requires fewer bytes, symbols allocated at the
4006 // fringe end can have special consideration, given their size is less
4007 // important, etc.), but the algorithmic complexity grows too much to be
4008 // worth the extra gains we get. This gets us pretty close.
4009 // The final order leaves us with objects with highest priority going
4010 // at the end of our list.
4011 struct X86FrameSortingComparator
{
4012 inline bool operator()(const X86FrameSortingObject
&A
,
4013 const X86FrameSortingObject
&B
) const {
4014 uint64_t DensityAScaled
, DensityBScaled
;
4016 // For consistency in our comparison, all invalid objects are placed
4017 // at the end. This also allows us to stop walking when we hit the
4018 // first invalid item after it's all sorted.
4024 // The density is calculated by doing :
4025 // (double)DensityA = A.ObjectNumUses / A.ObjectSize
4026 // (double)DensityB = B.ObjectNumUses / B.ObjectSize
4027 // Since this approach may cause inconsistencies in
4028 // the floating point <, >, == comparisons, depending on the floating
4029 // point model with which the compiler was built, we're going
4030 // to scale both sides by multiplying with
4031 // A.ObjectSize * B.ObjectSize. This ends up factoring away
4032 // the division and, with it, the need for any floating point
4034 DensityAScaled
= static_cast<uint64_t>(A
.ObjectNumUses
) *
4035 static_cast<uint64_t>(B
.ObjectSize
);
4036 DensityBScaled
= static_cast<uint64_t>(B
.ObjectNumUses
) *
4037 static_cast<uint64_t>(A
.ObjectSize
);
4039 // If the two densities are equal, prioritize highest alignment
4040 // objects. This allows for similar alignment objects
4041 // to be packed together (given the same density).
4042 // There's room for improvement here, also, since we can pack
4043 // similar alignment (different density) objects next to each
4044 // other to save padding. This will also require further
4045 // complexity/iterations, and the overall gain isn't worth it,
4046 // in general. Something to keep in mind, though.
4047 if (DensityAScaled
== DensityBScaled
)
4048 return A
.ObjectAlignment
< B
.ObjectAlignment
;
4050 return DensityAScaled
< DensityBScaled
;
4055 // Order the symbols in the local stack.
4056 // We want to place the local stack objects in some sort of sensible order.
4057 // The heuristic we use is to try and pack them according to static number
4058 // of uses and size of object in order to minimize code size.
4059 void X86FrameLowering::orderFrameObjects(
4060 const MachineFunction
&MF
, SmallVectorImpl
<int> &ObjectsToAllocate
) const {
4061 const MachineFrameInfo
&MFI
= MF
.getFrameInfo();
4063 // Don't waste time if there's nothing to do.
4064 if (ObjectsToAllocate
.empty())
4067 // Create an array of all MFI objects. We won't need all of these
4068 // objects, but we're going to create a full array of them to make
4069 // it easier to index into when we're counting "uses" down below.
4070 // We want to be able to easily/cheaply access an object by simply
4071 // indexing into it, instead of having to search for it every time.
4072 std::vector
<X86FrameSortingObject
> SortingObjects(MFI
.getObjectIndexEnd());
4074 // Walk the objects we care about and mark them as such in our working
4076 for (auto &Obj
: ObjectsToAllocate
) {
4077 SortingObjects
[Obj
].IsValid
= true;
4078 SortingObjects
[Obj
].ObjectIndex
= Obj
;
4079 SortingObjects
[Obj
].ObjectAlignment
= MFI
.getObjectAlign(Obj
);
4081 int ObjectSize
= MFI
.getObjectSize(Obj
);
4082 if (ObjectSize
== 0)
4083 // Variable size. Just use 4.
4084 SortingObjects
[Obj
].ObjectSize
= 4;
4086 SortingObjects
[Obj
].ObjectSize
= ObjectSize
;
4089 // Count the number of uses for each object.
4090 for (auto &MBB
: MF
) {
4091 for (auto &MI
: MBB
) {
4092 if (MI
.isDebugInstr())
4094 for (const MachineOperand
&MO
: MI
.operands()) {
4095 // Check to see if it's a local stack symbol.
4098 int Index
= MO
.getIndex();
4099 // Check to see if it falls within our range, and is tagged
4100 // to require ordering.
4101 if (Index
>= 0 && Index
< MFI
.getObjectIndexEnd() &&
4102 SortingObjects
[Index
].IsValid
)
4103 SortingObjects
[Index
].ObjectNumUses
++;
4108 // Sort the objects using X86FrameSortingAlgorithm (see its comment for
4110 llvm::stable_sort(SortingObjects
, X86FrameSortingComparator());
4112 // Now modify the original list to represent the final order that
4113 // we want. The order will depend on whether we're going to access them
4114 // from the stack pointer or the frame pointer. For SP, the list should
4115 // end up with the END containing objects that we want with smaller offsets.
4116 // For FP, it should be flipped.
4118 for (auto &Obj
: SortingObjects
) {
4119 // All invalid items are sorted at the end, so it's safe to stop.
4122 ObjectsToAllocate
[i
++] = Obj
.ObjectIndex
;
4125 // Flip it if we're accessing off of the FP.
4126 if (!TRI
->hasStackRealignment(MF
) && hasFP(MF
))
4127 std::reverse(ObjectsToAllocate
.begin(), ObjectsToAllocate
.end());
4131 X86FrameLowering::getWinEHParentFrameOffset(const MachineFunction
&MF
) const {
4132 // RDX, the parent frame pointer, is homed into 16(%rsp) in the prologue.
4133 unsigned Offset
= 16;
4134 // RBP is immediately pushed.
4136 // All callee-saved registers are then pushed.
4137 Offset
+= MF
.getInfo
<X86MachineFunctionInfo
>()->getCalleeSavedFrameSize();
4138 // Every funclet allocates enough stack space for the largest outgoing call.
4139 Offset
+= getWinEHFuncletFrameSize(MF
);
4143 void X86FrameLowering::processFunctionBeforeFrameFinalized(
4144 MachineFunction
&MF
, RegScavenger
*RS
) const {
4145 // Mark the function as not having WinCFI. We will set it back to true in
4146 // emitPrologue if it gets called and emits CFI.
4147 MF
.setHasWinCFI(false);
4149 // If we are using Windows x64 CFI, ensure that the stack is always 8 byte
4150 // aligned. The format doesn't support misaligned stack adjustments.
4151 if (MF
.getTarget().getMCAsmInfo()->usesWindowsCFI())
4152 MF
.getFrameInfo().ensureMaxAlignment(Align(SlotSize
));
4154 // If this function isn't doing Win64-style C++ EH, we don't need to do
4156 if (STI
.is64Bit() && MF
.hasEHFunclets() &&
4157 classifyEHPersonality(MF
.getFunction().getPersonalityFn()) ==
4158 EHPersonality::MSVC_CXX
) {
4159 adjustFrameForMsvcCxxEh(MF
);
4163 void X86FrameLowering::adjustFrameForMsvcCxxEh(MachineFunction
&MF
) const {
4164 // Win64 C++ EH needs to allocate the UnwindHelp object at some fixed offset
4165 // relative to RSP after the prologue. Find the offset of the last fixed
4166 // object, so that we can allocate a slot immediately following it. If there
4167 // were no fixed objects, use offset -SlotSize, which is immediately after the
4168 // return address. Fixed objects have negative frame indices.
4169 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
4170 WinEHFuncInfo
&EHInfo
= *MF
.getWinEHFuncInfo();
4171 int64_t MinFixedObjOffset
= -SlotSize
;
4172 for (int I
= MFI
.getObjectIndexBegin(); I
< 0; ++I
)
4173 MinFixedObjOffset
= std::min(MinFixedObjOffset
, MFI
.getObjectOffset(I
));
4175 for (WinEHTryBlockMapEntry
&TBME
: EHInfo
.TryBlockMap
) {
4176 for (WinEHHandlerType
&H
: TBME
.HandlerArray
) {
4177 int FrameIndex
= H
.CatchObj
.FrameIndex
;
4178 if (FrameIndex
!= INT_MAX
) {
4179 // Ensure alignment.
4180 unsigned Align
= MFI
.getObjectAlign(FrameIndex
).value();
4181 MinFixedObjOffset
-= std::abs(MinFixedObjOffset
) % Align
;
4182 MinFixedObjOffset
-= MFI
.getObjectSize(FrameIndex
);
4183 MFI
.setObjectOffset(FrameIndex
, MinFixedObjOffset
);
4188 // Ensure alignment.
4189 MinFixedObjOffset
-= std::abs(MinFixedObjOffset
) % 8;
4190 int64_t UnwindHelpOffset
= MinFixedObjOffset
- SlotSize
;
4192 MFI
.CreateFixedObject(SlotSize
, UnwindHelpOffset
, /*IsImmutable=*/false);
4193 EHInfo
.UnwindHelpFrameIdx
= UnwindHelpFI
;
4195 // Store -2 into UnwindHelp on function entry. We have to scan forwards past
4196 // other frame setup instructions.
4197 MachineBasicBlock
&MBB
= MF
.front();
4198 auto MBBI
= MBB
.begin();
4199 while (MBBI
!= MBB
.end() && MBBI
->getFlag(MachineInstr::FrameSetup
))
4202 DebugLoc DL
= MBB
.findDebugLoc(MBBI
);
4203 addFrameReference(BuildMI(MBB
, MBBI
, DL
, TII
.get(X86::MOV64mi32
)),
4208 void X86FrameLowering::processFunctionBeforeFrameIndicesReplaced(
4209 MachineFunction
&MF
, RegScavenger
*RS
) const {
4210 auto *X86FI
= MF
.getInfo
<X86MachineFunctionInfo
>();
4212 if (STI
.is32Bit() && MF
.hasEHFunclets())
4213 restoreWinEHStackPointersInParent(MF
);
4214 // We have emitted prolog and epilog. Don't need stack pointer saving
4215 // instruction any more.
4216 if (MachineInstr
*MI
= X86FI
->getStackPtrSaveMI()) {
4217 MI
->eraseFromParent();
4218 X86FI
->setStackPtrSaveMI(nullptr);
4222 void X86FrameLowering::restoreWinEHStackPointersInParent(
4223 MachineFunction
&MF
) const {
4224 // 32-bit functions have to restore stack pointers when control is transferred
4225 // back to the parent function. These blocks are identified as eh pads that
4226 // are not funclet entries.
4227 bool IsSEH
= isAsynchronousEHPersonality(
4228 classifyEHPersonality(MF
.getFunction().getPersonalityFn()));
4229 for (MachineBasicBlock
&MBB
: MF
) {
4230 bool NeedsRestore
= MBB
.isEHPad() && !MBB
.isEHFuncletEntry();
4232 restoreWin32EHStackPointers(MBB
, MBB
.begin(), DebugLoc(),
4233 /*RestoreSP=*/IsSEH
);
4237 // Compute the alignment gap between current SP after spilling FP/BP and the
4238 // next properly aligned stack offset.
4239 static int computeFPBPAlignmentGap(MachineFunction
&MF
,
4240 const TargetRegisterClass
*RC
,
4241 unsigned NumSpilledRegs
) {
4242 const TargetRegisterInfo
*TRI
= MF
.getSubtarget().getRegisterInfo();
4243 unsigned AllocSize
= TRI
->getSpillSize(*RC
) * NumSpilledRegs
;
4244 Align StackAlign
= MF
.getSubtarget().getFrameLowering()->getStackAlign();
4245 unsigned AlignedSize
= alignTo(AllocSize
, StackAlign
);
4246 return AlignedSize
- AllocSize
;
4249 void X86FrameLowering::spillFPBPUsingSP(MachineFunction
&MF
,
4250 MachineBasicBlock::iterator BeforeMI
,
4251 Register FP
, Register BP
,
4252 int SPAdjust
) const {
4253 assert(FP
.isValid() || BP
.isValid());
4255 MachineBasicBlock
*MBB
= BeforeMI
->getParent();
4256 DebugLoc DL
= BeforeMI
->getDebugLoc();
4260 BuildMI(*MBB
, BeforeMI
, DL
,
4261 TII
.get(getPUSHOpcode(MF
.getSubtarget
<X86Subtarget
>())))
4267 BuildMI(*MBB
, BeforeMI
, DL
,
4268 TII
.get(getPUSHOpcode(MF
.getSubtarget
<X86Subtarget
>())))
4272 // Make sure SP is aligned.
4274 emitSPUpdate(*MBB
, BeforeMI
, DL
, -SPAdjust
, false);
4276 // Emit unwinding information.
4277 if (FP
.isValid() && needsDwarfCFI(MF
)) {
4278 // Emit .cfi_remember_state to remember old frame.
4280 MF
.addFrameInst(MCCFIInstruction::createRememberState(nullptr));
4281 BuildMI(*MBB
, BeforeMI
, DL
, TII
.get(TargetOpcode::CFI_INSTRUCTION
))
4282 .addCFIIndex(CFIIndex
);
4284 // Setup new CFA value with DW_CFA_def_cfa_expression:
4285 // DW_OP_breg7+offset, DW_OP_deref, DW_OP_consts 16, DW_OP_plus
4286 SmallString
<64> CfaExpr
;
4288 int Offset
= SPAdjust
;
4290 Offset
+= TRI
->getSpillSize(*TRI
->getMinimalPhysRegClass(BP
));
4291 // If BeforeMI is a frame setup instruction, we need to adjust the position
4292 // and offset of the new cfi instruction.
4293 if (TII
.isFrameSetup(*BeforeMI
)) {
4294 Offset
+= alignTo(TII
.getFrameSize(*BeforeMI
), getStackAlign());
4295 BeforeMI
= std::next(BeforeMI
);
4297 Register StackPtr
= TRI
->getStackRegister();
4298 if (STI
.isTarget64BitILP32())
4299 StackPtr
= Register(getX86SubSuperRegister(StackPtr
, 64));
4300 unsigned DwarfStackPtr
= TRI
->getDwarfRegNum(StackPtr
, true);
4301 CfaExpr
.push_back((uint8_t)(dwarf::DW_OP_breg0
+ DwarfStackPtr
));
4302 CfaExpr
.append(buffer
, buffer
+ encodeSLEB128(Offset
, buffer
));
4303 CfaExpr
.push_back(dwarf::DW_OP_deref
);
4304 CfaExpr
.push_back(dwarf::DW_OP_consts
);
4305 CfaExpr
.append(buffer
, buffer
+ encodeSLEB128(SlotSize
* 2, buffer
));
4306 CfaExpr
.push_back((uint8_t)dwarf::DW_OP_plus
);
4308 SmallString
<64> DefCfaExpr
;
4309 DefCfaExpr
.push_back(dwarf::DW_CFA_def_cfa_expression
);
4310 DefCfaExpr
.append(buffer
, buffer
+ encodeSLEB128(CfaExpr
.size(), buffer
));
4311 DefCfaExpr
.append(CfaExpr
.str());
4312 BuildCFI(*MBB
, BeforeMI
, DL
,
4313 MCCFIInstruction::createEscape(nullptr, DefCfaExpr
.str()),
4314 MachineInstr::FrameSetup
);
4318 void X86FrameLowering::restoreFPBPUsingSP(MachineFunction
&MF
,
4319 MachineBasicBlock::iterator AfterMI
,
4320 Register FP
, Register BP
,
4321 int SPAdjust
) const {
4322 assert(FP
.isValid() || BP
.isValid());
4324 // Adjust SP so it points to spilled FP or BP.
4325 MachineBasicBlock
*MBB
= AfterMI
->getParent();
4326 MachineBasicBlock::iterator Pos
= std::next(AfterMI
);
4327 DebugLoc DL
= AfterMI
->getDebugLoc();
4329 emitSPUpdate(*MBB
, Pos
, DL
, SPAdjust
, false);
4333 BuildMI(*MBB
, Pos
, DL
,
4334 TII
.get(getPOPOpcode(MF
.getSubtarget
<X86Subtarget
>())), BP
);
4339 BuildMI(*MBB
, Pos
, DL
,
4340 TII
.get(getPOPOpcode(MF
.getSubtarget
<X86Subtarget
>())), FP
);
4342 // Emit unwinding information.
4343 if (needsDwarfCFI(MF
)) {
4344 // Restore original frame with .cfi_restore_state.
4346 MF
.addFrameInst(MCCFIInstruction::createRestoreState(nullptr));
4347 BuildMI(*MBB
, Pos
, DL
, TII
.get(TargetOpcode::CFI_INSTRUCTION
))
4348 .addCFIIndex(CFIIndex
);
4353 void X86FrameLowering::saveAndRestoreFPBPUsingSP(
4354 MachineFunction
&MF
, MachineBasicBlock::iterator BeforeMI
,
4355 MachineBasicBlock::iterator AfterMI
, bool SpillFP
, bool SpillBP
) const {
4356 assert(SpillFP
|| SpillBP
);
4359 const TargetRegisterClass
*RC
;
4360 unsigned NumRegs
= 0;
4363 FP
= TRI
->getFrameRegister(MF
);
4364 if (STI
.isTarget64BitILP32())
4365 FP
= Register(getX86SubSuperRegister(FP
, 64));
4366 RC
= TRI
->getMinimalPhysRegClass(FP
);
4370 BP
= TRI
->getBaseRegister();
4371 if (STI
.isTarget64BitILP32())
4372 BP
= Register(getX86SubSuperRegister(BP
, 64));
4373 RC
= TRI
->getMinimalPhysRegClass(BP
);
4376 int SPAdjust
= computeFPBPAlignmentGap(MF
, RC
, NumRegs
);
4378 spillFPBPUsingSP(MF
, BeforeMI
, FP
, BP
, SPAdjust
);
4379 restoreFPBPUsingSP(MF
, AfterMI
, FP
, BP
, SPAdjust
);
4382 bool X86FrameLowering::skipSpillFPBP(
4383 MachineFunction
&MF
, MachineBasicBlock::reverse_iterator
&MI
) const {
4384 if (MI
->getOpcode() == X86::LCMPXCHG16B_SAVE_RBX
) {
4385 // The pseudo instruction LCMPXCHG16B_SAVE_RBX is generated in the form
4386 // SaveRbx = COPY RBX
4387 // SaveRbx = LCMPXCHG16B_SAVE_RBX ..., SaveRbx, implicit-def rbx
4388 // And later LCMPXCHG16B_SAVE_RBX is expanded to restore RBX from SaveRbx.
4389 // We should skip this instruction sequence.
4392 while (!(MI
->getOpcode() == TargetOpcode::COPY
&&
4393 MI
->getOperand(1).getReg() == X86::RBX
) &&
4394 !((Reg
= TII
.isStoreToStackSlot(*MI
, FI
)) && Reg
== X86::RBX
))
4401 static bool isFPBPAccess(const MachineInstr
&MI
, Register FP
, Register BP
,
4402 const TargetRegisterInfo
*TRI
, bool &AccessFP
,
4404 AccessFP
= AccessBP
= false;
4406 if (MI
.findRegisterUseOperandIdx(FP
, TRI
, false) != -1 ||
4407 MI
.findRegisterDefOperandIdx(FP
, TRI
, false, true) != -1)
4411 if (MI
.findRegisterUseOperandIdx(BP
, TRI
, false) != -1 ||
4412 MI
.findRegisterDefOperandIdx(BP
, TRI
, false, true) != -1)
4415 return AccessFP
|| AccessBP
;
4418 // Invoke instruction has been lowered to normal function call. We try to figure
4419 // out if MI comes from Invoke.
4420 // Do we have any better method?
4421 static bool isInvoke(const MachineInstr
&MI
, bool InsideEHLabels
) {
4427 const MachineBasicBlock
*MBB
= MI
.getParent();
4428 if (!MBB
->hasEHPadSuccessor())
4431 // Check if there is another call instruction from MI to the end of MBB.
4432 MachineBasicBlock::const_iterator MBBI
= MI
, ME
= MBB
->end();
4433 for (++MBBI
; MBBI
!= ME
; ++MBBI
)
4439 /// Given the live range of FP or BP (DefMI, KillMI), check if there is any
4440 /// interfered stack access in the range, usually generated by register spill.
4441 void X86FrameLowering::checkInterferedAccess(
4442 MachineFunction
&MF
, MachineBasicBlock::reverse_iterator DefMI
,
4443 MachineBasicBlock::reverse_iterator KillMI
, bool SpillFP
,
4444 bool SpillBP
) const {
4445 if (DefMI
== KillMI
)
4447 if (TRI
->hasBasePointer(MF
)) {
4456 while (MI
!= DefMI
) {
4457 if (any_of(MI
->operands(),
4458 [](const MachineOperand
&MO
) { return MO
.isFI(); }))
4459 MF
.getContext().reportError(SMLoc(),
4460 "Interference usage of base pointer/frame "
4466 /// If a function uses base pointer and the base pointer is clobbered by inline
4467 /// asm, RA doesn't detect this case, and after the inline asm, the base pointer
4468 /// contains garbage value.
4469 /// For example if a 32b x86 function uses base pointer esi, and esi is
4470 /// clobbered by following inline asm
4471 /// asm("rep movsb" : "+D"(ptr), "+S"(x), "+c"(c)::"memory");
4472 /// We need to save esi before the asm and restore it after the asm.
4474 /// The problem can also occur to frame pointer if there is a function call, and
4475 /// the callee uses a different calling convention and clobbers the fp.
4477 /// Because normal frame objects (spill slots) are accessed through fp/bp
4478 /// register, so we can't spill fp/bp to normal spill slots.
4480 /// FIXME: There are 2 possible enhancements:
4481 /// 1. In many cases there are different physical registers not clobbered by
4482 /// inline asm, we can use one of them as base pointer. Or use a virtual
4483 /// register as base pointer and let RA allocate a physical register to it.
4484 /// 2. If there is no other instructions access stack with fp/bp from the
4485 /// inline asm to the epilog, and no cfi requirement for a correct fp, we can
4486 /// skip the save and restore operations.
4487 void X86FrameLowering::spillFPBP(MachineFunction
&MF
) const {
4489 const TargetFrameLowering
&TFI
= *MF
.getSubtarget().getFrameLowering();
4491 FP
= TRI
->getFrameRegister(MF
);
4492 if (TRI
->hasBasePointer(MF
))
4493 BP
= TRI
->getBaseRegister();
4495 // Currently only inline asm and function call can clobbers fp/bp. So we can
4496 // do some quick test and return early.
4497 if (!MF
.hasInlineAsm()) {
4498 X86MachineFunctionInfo
*X86FI
= MF
.getInfo
<X86MachineFunctionInfo
>();
4499 if (!X86FI
->getFPClobberedByCall())
4501 if (!X86FI
->getBPClobberedByCall())
4507 for (MachineBasicBlock
&MBB
: MF
) {
4508 bool InsideEHLabels
= false;
4509 auto MI
= MBB
.rbegin(), ME
= MBB
.rend();
4510 auto TermMI
= MBB
.getFirstTerminator();
4511 if (TermMI
== MBB
.begin())
4513 MI
= *(std::prev(TermMI
));
4516 // Skip frame setup/destroy instructions.
4517 // Skip Invoke (call inside try block) instructions.
4518 // Skip instructions handled by target.
4519 if (MI
->getFlag(MachineInstr::MIFlag::FrameSetup
) ||
4520 MI
->getFlag(MachineInstr::MIFlag::FrameDestroy
) ||
4521 isInvoke(*MI
, InsideEHLabels
) || skipSpillFPBP(MF
, MI
)) {
4526 if (MI
->getOpcode() == TargetOpcode::EH_LABEL
) {
4527 InsideEHLabels
= !InsideEHLabels
;
4532 bool AccessFP
, AccessBP
;
4533 // Check if fp or bp is used in MI.
4534 if (!isFPBPAccess(*MI
, FP
, BP
, TRI
, AccessFP
, AccessBP
)) {
4539 // Look for the range [DefMI, KillMI] in which fp or bp is defined and
4541 bool FPLive
= false, BPLive
= false;
4542 bool SpillFP
= false, SpillBP
= false;
4543 auto DefMI
= MI
, KillMI
= MI
;
4545 SpillFP
|= AccessFP
;
4546 SpillBP
|= AccessBP
;
4548 // Maintain FPLive and BPLive.
4549 if (FPLive
&& MI
->findRegisterDefOperandIdx(FP
, TRI
, false, true) != -1)
4551 if (FP
&& MI
->findRegisterUseOperandIdx(FP
, TRI
, false) != -1)
4553 if (BPLive
&& MI
->findRegisterDefOperandIdx(BP
, TRI
, false, true) != -1)
4555 if (BP
&& MI
->findRegisterUseOperandIdx(BP
, TRI
, false) != -1)
4559 } while ((MI
!= ME
) &&
4560 (FPLive
|| BPLive
||
4561 isFPBPAccess(*MI
, FP
, BP
, TRI
, AccessFP
, AccessBP
)));
4563 // Don't need to save/restore if FP is accessed through llvm.frameaddress.
4564 if (FPLive
&& !SpillBP
)
4567 // If the bp is clobbered by a call, we should save and restore outside of
4568 // the frame setup instructions.
4569 if (KillMI
->isCall() && DefMI
!= ME
) {
4570 auto FrameSetup
= std::next(DefMI
);
4571 // Look for frame setup instruction toward the start of the BB.
4572 // If we reach another call instruction, it means no frame setup
4573 // instruction for the current call instruction.
4574 while (FrameSetup
!= ME
&& !TII
.isFrameSetup(*FrameSetup
) &&
4575 !FrameSetup
->isCall())
4577 // If a frame setup instruction is found, we need to find out the
4578 // corresponding frame destroy instruction.
4579 if (FrameSetup
!= ME
&& TII
.isFrameSetup(*FrameSetup
) &&
4580 (TII
.getFrameSize(*FrameSetup
) ||
4581 TII
.getFrameAdjustment(*FrameSetup
))) {
4582 while (!TII
.isFrameInstr(*KillMI
))
4590 checkInterferedAccess(MF
, DefMI
, KillMI
, SpillFP
, SpillBP
);
4592 // Call target function to spill and restore FP and BP registers.
4593 saveAndRestoreFPBPUsingSP(MF
, &(*DefMI
), &(*KillMI
), SpillFP
, SpillBP
);