1 //===- AArch64FrameLowering.cpp - AArch64 Frame Lowering -------*- C++ -*-====//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file contains the AArch64 implementation of TargetFrameLowering class.
11 // On AArch64, stack frames are structured as follows:
13 // The stack grows downward.
15 // All of the individual frame areas on the frame below are optional, i.e. it's
16 // possible to create a function so that the particular area isn't present
19 // At function entry, the "frame" looks as follows:
22 // |-----------------------------------|
24 // | arguments passed on the stack |
26 // |-----------------------------------| <- sp
30 // After the prologue has run, the frame has the following general structure.
31 // Note that this doesn't depict the case where a red-zone is used. Also,
32 // technically the last frame area (VLAs) doesn't get created until in the
33 // main function body, after the prologue is run. However, it's depicted here
37 // |-----------------------------------|
39 // | arguments passed on the stack |
41 // |-----------------------------------|
43 // | (Win64 only) varargs from reg |
45 // |-----------------------------------|
47 // | callee-saved gpr registers | <--.
48 // | | | On Darwin platforms these
49 // |- - - - - - - - - - - - - - - - - -| | callee saves are swapped,
50 // | | | (frame record first)
51 // | prev_fp, prev_lr | <--'
52 // | (a.k.a. "frame record") |
53 // |-----------------------------------| <- fp(=x29)
55 // | callee-saved fp/simd/SVE regs |
57 // |-----------------------------------|
58 // |.empty.space.to.make.part.below....|
59 // |.aligned.in.case.it.needs.more.than| (size of this area is unknown at
60 // |.the.standard.16-byte.alignment....| compile time; if present)
61 // |-----------------------------------|
63 // | local variables of fixed size |
64 // | including spill slots |
65 // |-----------------------------------| <- bp(not defined by ABI,
66 // |.variable-sized.local.variables....| LLVM chooses X19)
67 // |.(VLAs)............................| (size of this area is unknown at
68 // |...................................| compile time)
69 // |-----------------------------------| <- sp
73 // To access the data in a frame, at-compile time, a constant offset must be
74 // computable from one of the pointers (fp, bp, sp) to access it. The size
75 // of the areas with a dotted background cannot be computed at compile-time
76 // if they are present, making it required to have all three of fp, bp and
77 // sp to be set up to be able to access all contents in the frame areas,
78 // assuming all of the frame areas are non-empty.
80 // For most functions, some of the frame areas are empty. For those functions,
81 // it may not be necessary to set up fp or bp:
82 // * A base pointer is definitely needed when there are both VLAs and local
83 // variables with more-than-default alignment requirements.
84 // * A frame pointer is definitely needed when there are local variables with
85 // more-than-default alignment requirements.
87 // For Darwin platforms the frame-record (fp, lr) is stored at the top of the
88 // callee-saved area, since the unwind encoding does not allow for encoding
89 // this dynamically and existing tools depend on this layout. For other
90 // platforms, the frame-record is stored at the bottom of the (gpr) callee-saved
91 // area to allow SVE stack objects (allocated directly below the callee-saves,
92 // if available) to be accessed directly from the framepointer.
93 // The SVE spill/fill instructions have VL-scaled addressing modes such
95 // ldr z8, [fp, #-7 mul vl]
96 // For SVE the size of the vector length (VL) is not known at compile-time, so
97 // '#-7 mul vl' is an offset that can only be evaluated at runtime. With this
98 // layout, we don't need to add an unscaled offset to the framepointer before
99 // accessing the SVE object in the frame.
101 // In some cases when a base pointer is not strictly needed, it is generated
102 // anyway when offsets from the frame pointer to access local variables become
103 // so large that the offset can't be encoded in the immediate fields of loads
106 // FIXME: also explain the redzone concept.
107 // FIXME: also explain the concept of reserved call frames.
109 //===----------------------------------------------------------------------===//
111 #include "AArch64FrameLowering.h"
112 #include "AArch64InstrInfo.h"
113 #include "AArch64MachineFunctionInfo.h"
114 #include "AArch64RegisterInfo.h"
115 #include "AArch64StackOffset.h"
116 #include "AArch64Subtarget.h"
117 #include "AArch64TargetMachine.h"
118 #include "MCTargetDesc/AArch64AddressingModes.h"
119 #include "llvm/ADT/ScopeExit.h"
120 #include "llvm/ADT/SmallVector.h"
121 #include "llvm/ADT/Statistic.h"
122 #include "llvm/CodeGen/LivePhysRegs.h"
123 #include "llvm/CodeGen/MachineBasicBlock.h"
124 #include "llvm/CodeGen/MachineFrameInfo.h"
125 #include "llvm/CodeGen/MachineFunction.h"
126 #include "llvm/CodeGen/MachineInstr.h"
127 #include "llvm/CodeGen/MachineInstrBuilder.h"
128 #include "llvm/CodeGen/MachineMemOperand.h"
129 #include "llvm/CodeGen/MachineModuleInfo.h"
130 #include "llvm/CodeGen/MachineOperand.h"
131 #include "llvm/CodeGen/MachineRegisterInfo.h"
132 #include "llvm/CodeGen/RegisterScavenging.h"
133 #include "llvm/CodeGen/TargetInstrInfo.h"
134 #include "llvm/CodeGen/TargetRegisterInfo.h"
135 #include "llvm/CodeGen/TargetSubtargetInfo.h"
136 #include "llvm/CodeGen/WinEHFuncInfo.h"
137 #include "llvm/IR/Attributes.h"
138 #include "llvm/IR/CallingConv.h"
139 #include "llvm/IR/DataLayout.h"
140 #include "llvm/IR/DebugLoc.h"
141 #include "llvm/IR/Function.h"
142 #include "llvm/MC/MCAsmInfo.h"
143 #include "llvm/MC/MCDwarf.h"
144 #include "llvm/Support/CommandLine.h"
145 #include "llvm/Support/Debug.h"
146 #include "llvm/Support/ErrorHandling.h"
147 #include "llvm/Support/MathExtras.h"
148 #include "llvm/Support/raw_ostream.h"
149 #include "llvm/Target/TargetMachine.h"
150 #include "llvm/Target/TargetOptions.h"
156 using namespace llvm
;
158 #define DEBUG_TYPE "frame-info"
160 static cl::opt
<bool> EnableRedZone("aarch64-redzone",
161 cl::desc("enable use of redzone on AArch64"),
162 cl::init(false), cl::Hidden
);
165 ReverseCSRRestoreSeq("reverse-csr-restore-seq",
166 cl::desc("reverse the CSR restore sequence"),
167 cl::init(false), cl::Hidden
);
169 STATISTIC(NumRedZoneFunctions
, "Number of functions using red zone");
171 /// This is the biggest offset to the stack pointer we can encode in aarch64
172 /// instructions (without using a separate calculation and a temp register).
173 /// Note that the exception here are vector stores/loads which cannot encode any
174 /// displacements (see estimateRSStackSizeLimit(), isAArch64FrameOffsetLegal()).
175 static const unsigned DefaultSafeSPDisplacement
= 255;
177 /// Look at each instruction that references stack frames and return the stack
178 /// size limit beyond which some of these instructions will require a scratch
179 /// register during their expansion later.
180 static unsigned estimateRSStackSizeLimit(MachineFunction
&MF
) {
181 // FIXME: For now, just conservatively guestimate based on unscaled indexing
182 // range. We'll end up allocating an unnecessary spill slot a lot, but
183 // realistically that's not a big deal at this stage of the game.
184 for (MachineBasicBlock
&MBB
: MF
) {
185 for (MachineInstr
&MI
: MBB
) {
186 if (MI
.isDebugInstr() || MI
.isPseudo() ||
187 MI
.getOpcode() == AArch64::ADDXri
||
188 MI
.getOpcode() == AArch64::ADDSXri
)
191 for (const MachineOperand
&MO
: MI
.operands()) {
196 if (isAArch64FrameOffsetLegal(MI
, Offset
, nullptr, nullptr, nullptr) ==
197 AArch64FrameOffsetCannotUpdate
)
202 return DefaultSafeSPDisplacement
;
205 bool AArch64FrameLowering::canUseRedZone(const MachineFunction
&MF
) const {
208 // Don't use the red zone if the function explicitly asks us not to.
209 // This is typically used for kernel code.
210 if (MF
.getFunction().hasFnAttribute(Attribute::NoRedZone
))
213 const MachineFrameInfo
&MFI
= MF
.getFrameInfo();
214 const AArch64FunctionInfo
*AFI
= MF
.getInfo
<AArch64FunctionInfo
>();
215 unsigned NumBytes
= AFI
->getLocalStackSize();
217 return !(MFI
.hasCalls() || hasFP(MF
) || NumBytes
> 128);
220 /// hasFP - Return true if the specified function should have a dedicated frame
221 /// pointer register.
222 bool AArch64FrameLowering::hasFP(const MachineFunction
&MF
) const {
223 const MachineFrameInfo
&MFI
= MF
.getFrameInfo();
224 const TargetRegisterInfo
*RegInfo
= MF
.getSubtarget().getRegisterInfo();
225 // Win64 EH requires a frame pointer if funclets are present, as the locals
226 // are accessed off the frame pointer in both the parent function and the
228 if (MF
.hasEHFunclets())
230 // Retain behavior of always omitting the FP for leaf functions when possible.
231 if (MFI
.hasCalls() && MF
.getTarget().Options
.DisableFramePointerElim(MF
))
233 if (MFI
.hasVarSizedObjects() || MFI
.isFrameAddressTaken() ||
234 MFI
.hasStackMap() || MFI
.hasPatchPoint() ||
235 RegInfo
->needsStackRealignment(MF
))
237 // With large callframes around we may need to use FP to access the scavenging
238 // emergency spillslot.
240 // Unfortunately some calls to hasFP() like machine verifier ->
241 // getReservedReg() -> hasFP in the middle of global isel are too early
242 // to know the max call frame size. Hopefully conservatively returning "true"
243 // in those cases is fine.
244 // DefaultSafeSPDisplacement is fine as we only emergency spill GP regs.
245 if (!MFI
.isMaxCallFrameSizeComputed() ||
246 MFI
.getMaxCallFrameSize() > DefaultSafeSPDisplacement
)
252 /// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
253 /// not required, we reserve argument space for call sites in the function
254 /// immediately on entry to the current function. This eliminates the need for
255 /// add/sub sp brackets around call sites. Returns true if the call frame is
256 /// included as part of the stack frame.
258 AArch64FrameLowering::hasReservedCallFrame(const MachineFunction
&MF
) const {
259 return !MF
.getFrameInfo().hasVarSizedObjects();
262 MachineBasicBlock::iterator
AArch64FrameLowering::eliminateCallFramePseudoInstr(
263 MachineFunction
&MF
, MachineBasicBlock
&MBB
,
264 MachineBasicBlock::iterator I
) const {
265 const AArch64InstrInfo
*TII
=
266 static_cast<const AArch64InstrInfo
*>(MF
.getSubtarget().getInstrInfo());
267 DebugLoc DL
= I
->getDebugLoc();
268 unsigned Opc
= I
->getOpcode();
269 bool IsDestroy
= Opc
== TII
->getCallFrameDestroyOpcode();
270 uint64_t CalleePopAmount
= IsDestroy
? I
->getOperand(1).getImm() : 0;
272 if (!hasReservedCallFrame(MF
)) {
273 unsigned Align
= getStackAlignment();
275 int64_t Amount
= I
->getOperand(0).getImm();
276 Amount
= alignTo(Amount
, Align
);
280 // N.b. if CalleePopAmount is valid but zero (i.e. callee would pop, but it
281 // doesn't have to pop anything), then the first operand will be zero too so
282 // this adjustment is a no-op.
283 if (CalleePopAmount
== 0) {
284 // FIXME: in-function stack adjustment for calls is limited to 24-bits
285 // because there's no guaranteed temporary register available.
287 // ADD/SUB (immediate) has only LSL #0 and LSL #12 available.
288 // 1) For offset <= 12-bit, we use LSL #0
289 // 2) For 12-bit <= offset <= 24-bit, we use two instructions. One uses
290 // LSL #0, and the other uses LSL #12.
292 // Most call frames will be allocated at the start of a function so
293 // this is OK, but it is a limitation that needs dealing with.
294 assert(Amount
> -0xffffff && Amount
< 0xffffff && "call frame too large");
295 emitFrameOffset(MBB
, I
, DL
, AArch64::SP
, AArch64::SP
, {Amount
, MVT::i8
},
298 } else if (CalleePopAmount
!= 0) {
299 // If the calling convention demands that the callee pops arguments from the
300 // stack, we want to add it back if we have a reserved call frame.
301 assert(CalleePopAmount
< 0xffffff && "call frame too large");
302 emitFrameOffset(MBB
, I
, DL
, AArch64::SP
, AArch64::SP
,
303 {-(int64_t)CalleePopAmount
, MVT::i8
}, TII
);
308 static bool ShouldSignReturnAddress(MachineFunction
&MF
) {
309 // The function should be signed in the following situations:
310 // - sign-return-address=all
311 // - sign-return-address=non-leaf and the functions spills the LR
313 const Function
&F
= MF
.getFunction();
314 if (!F
.hasFnAttribute("sign-return-address"))
317 StringRef Scope
= F
.getFnAttribute("sign-return-address").getValueAsString();
318 if (Scope
.equals("none"))
321 if (Scope
.equals("all"))
324 assert(Scope
.equals("non-leaf") && "Expected all, none or non-leaf");
326 for (const auto &Info
: MF
.getFrameInfo().getCalleeSavedInfo())
327 if (Info
.getReg() == AArch64::LR
)
333 void AArch64FrameLowering::emitCalleeSavedFrameMoves(
334 MachineBasicBlock
&MBB
, MachineBasicBlock::iterator MBBI
) const {
335 MachineFunction
&MF
= *MBB
.getParent();
336 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
337 const TargetSubtargetInfo
&STI
= MF
.getSubtarget();
338 const MCRegisterInfo
*MRI
= STI
.getRegisterInfo();
339 const TargetInstrInfo
*TII
= STI
.getInstrInfo();
340 DebugLoc DL
= MBB
.findDebugLoc(MBBI
);
342 // Add callee saved registers to move list.
343 const std::vector
<CalleeSavedInfo
> &CSI
= MFI
.getCalleeSavedInfo();
347 for (const auto &Info
: CSI
) {
348 unsigned Reg
= Info
.getReg();
350 MFI
.getObjectOffset(Info
.getFrameIdx()) - getOffsetOfLocalArea();
351 unsigned DwarfReg
= MRI
->getDwarfRegNum(Reg
, true);
352 unsigned CFIIndex
= MF
.addFrameInst(
353 MCCFIInstruction::createOffset(nullptr, DwarfReg
, Offset
));
354 BuildMI(MBB
, MBBI
, DL
, TII
->get(TargetOpcode::CFI_INSTRUCTION
))
355 .addCFIIndex(CFIIndex
)
356 .setMIFlags(MachineInstr::FrameSetup
);
360 // Find a scratch register that we can use at the start of the prologue to
361 // re-align the stack pointer. We avoid using callee-save registers since they
362 // may appear to be free when this is called from canUseAsPrologue (during
363 // shrink wrapping), but then no longer be free when this is called from
366 // FIXME: This is a bit conservative, since in the above case we could use one
367 // of the callee-save registers as a scratch temp to re-align the stack pointer,
368 // but we would then have to make sure that we were in fact saving at least one
369 // callee-save register in the prologue, which is additional complexity that
370 // doesn't seem worth the benefit.
371 static unsigned findScratchNonCalleeSaveRegister(MachineBasicBlock
*MBB
) {
372 MachineFunction
*MF
= MBB
->getParent();
374 // If MBB is an entry block, use X9 as the scratch register
375 if (&MF
->front() == MBB
)
378 const AArch64Subtarget
&Subtarget
= MF
->getSubtarget
<AArch64Subtarget
>();
379 const AArch64RegisterInfo
&TRI
= *Subtarget
.getRegisterInfo();
380 LivePhysRegs
LiveRegs(TRI
);
381 LiveRegs
.addLiveIns(*MBB
);
383 // Mark callee saved registers as used so we will not choose them.
384 const MCPhysReg
*CSRegs
= MF
->getRegInfo().getCalleeSavedRegs();
385 for (unsigned i
= 0; CSRegs
[i
]; ++i
)
386 LiveRegs
.addReg(CSRegs
[i
]);
388 // Prefer X9 since it was historically used for the prologue scratch reg.
389 const MachineRegisterInfo
&MRI
= MF
->getRegInfo();
390 if (LiveRegs
.available(MRI
, AArch64::X9
))
393 for (unsigned Reg
: AArch64::GPR64RegClass
) {
394 if (LiveRegs
.available(MRI
, Reg
))
397 return AArch64::NoRegister
;
400 bool AArch64FrameLowering::canUseAsPrologue(
401 const MachineBasicBlock
&MBB
) const {
402 const MachineFunction
*MF
= MBB
.getParent();
403 MachineBasicBlock
*TmpMBB
= const_cast<MachineBasicBlock
*>(&MBB
);
404 const AArch64Subtarget
&Subtarget
= MF
->getSubtarget
<AArch64Subtarget
>();
405 const AArch64RegisterInfo
*RegInfo
= Subtarget
.getRegisterInfo();
407 // Don't need a scratch register if we're not going to re-align the stack.
408 if (!RegInfo
->needsStackRealignment(*MF
))
410 // Otherwise, we can use any block as long as it has a scratch register
412 return findScratchNonCalleeSaveRegister(TmpMBB
) != AArch64::NoRegister
;
415 static bool windowsRequiresStackProbe(MachineFunction
&MF
,
416 unsigned StackSizeInBytes
) {
417 const AArch64Subtarget
&Subtarget
= MF
.getSubtarget
<AArch64Subtarget
>();
418 if (!Subtarget
.isTargetWindows())
420 const Function
&F
= MF
.getFunction();
421 // TODO: When implementing stack protectors, take that into account
422 // for the probe threshold.
423 unsigned StackProbeSize
= 4096;
424 if (F
.hasFnAttribute("stack-probe-size"))
425 F
.getFnAttribute("stack-probe-size")
427 .getAsInteger(0, StackProbeSize
);
428 return (StackSizeInBytes
>= StackProbeSize
) &&
429 !F
.hasFnAttribute("no-stack-arg-probe");
432 bool AArch64FrameLowering::shouldCombineCSRLocalStackBump(
433 MachineFunction
&MF
, unsigned StackBumpBytes
) const {
434 AArch64FunctionInfo
*AFI
= MF
.getInfo
<AArch64FunctionInfo
>();
435 const MachineFrameInfo
&MFI
= MF
.getFrameInfo();
436 const AArch64Subtarget
&Subtarget
= MF
.getSubtarget
<AArch64Subtarget
>();
437 const AArch64RegisterInfo
*RegInfo
= Subtarget
.getRegisterInfo();
439 if (AFI
->getLocalStackSize() == 0)
442 // 512 is the maximum immediate for stp/ldp that will be used for
443 // callee-save save/restores
444 if (StackBumpBytes
>= 512 || windowsRequiresStackProbe(MF
, StackBumpBytes
))
447 if (MFI
.hasVarSizedObjects())
450 if (RegInfo
->needsStackRealignment(MF
))
453 // This isn't strictly necessary, but it simplifies things a bit since the
454 // current RedZone handling code assumes the SP is adjusted by the
455 // callee-save save/restore code.
456 if (canUseRedZone(MF
))
462 // Given a load or a store instruction, generate an appropriate unwinding SEH
464 static MachineBasicBlock::iterator
InsertSEH(MachineBasicBlock::iterator MBBI
,
465 const TargetInstrInfo
&TII
,
466 MachineInstr::MIFlag Flag
) {
467 unsigned Opc
= MBBI
->getOpcode();
468 MachineBasicBlock
*MBB
= MBBI
->getParent();
469 MachineFunction
&MF
= *MBB
->getParent();
470 DebugLoc DL
= MBBI
->getDebugLoc();
471 unsigned ImmIdx
= MBBI
->getNumOperands() - 1;
472 int Imm
= MBBI
->getOperand(ImmIdx
).getImm();
473 MachineInstrBuilder MIB
;
474 const AArch64Subtarget
&Subtarget
= MF
.getSubtarget
<AArch64Subtarget
>();
475 const AArch64RegisterInfo
*RegInfo
= Subtarget
.getRegisterInfo();
479 llvm_unreachable("No SEH Opcode for this instruction");
480 case AArch64::LDPDpost
:
483 case AArch64::STPDpre
: {
484 unsigned Reg0
= RegInfo
->getSEHRegNum(MBBI
->getOperand(1).getReg());
485 unsigned Reg1
= RegInfo
->getSEHRegNum(MBBI
->getOperand(2).getReg());
486 MIB
= BuildMI(MF
, DL
, TII
.get(AArch64::SEH_SaveFRegP_X
))
493 case AArch64::LDPXpost
:
496 case AArch64::STPXpre
: {
497 Register Reg0
= MBBI
->getOperand(1).getReg();
498 Register Reg1
= MBBI
->getOperand(2).getReg();
499 if (Reg0
== AArch64::FP
&& Reg1
== AArch64::LR
)
500 MIB
= BuildMI(MF
, DL
, TII
.get(AArch64::SEH_SaveFPLR_X
))
504 MIB
= BuildMI(MF
, DL
, TII
.get(AArch64::SEH_SaveRegP_X
))
505 .addImm(RegInfo
->getSEHRegNum(Reg0
))
506 .addImm(RegInfo
->getSEHRegNum(Reg1
))
511 case AArch64::LDRDpost
:
514 case AArch64::STRDpre
: {
515 unsigned Reg
= RegInfo
->getSEHRegNum(MBBI
->getOperand(1).getReg());
516 MIB
= BuildMI(MF
, DL
, TII
.get(AArch64::SEH_SaveFReg_X
))
522 case AArch64::LDRXpost
:
525 case AArch64::STRXpre
: {
526 unsigned Reg
= RegInfo
->getSEHRegNum(MBBI
->getOperand(1).getReg());
527 MIB
= BuildMI(MF
, DL
, TII
.get(AArch64::SEH_SaveReg_X
))
534 case AArch64::LDPDi
: {
535 unsigned Reg0
= RegInfo
->getSEHRegNum(MBBI
->getOperand(0).getReg());
536 unsigned Reg1
= RegInfo
->getSEHRegNum(MBBI
->getOperand(1).getReg());
537 MIB
= BuildMI(MF
, DL
, TII
.get(AArch64::SEH_SaveFRegP
))
545 case AArch64::LDPXi
: {
546 Register Reg0
= MBBI
->getOperand(0).getReg();
547 Register Reg1
= MBBI
->getOperand(1).getReg();
548 if (Reg0
== AArch64::FP
&& Reg1
== AArch64::LR
)
549 MIB
= BuildMI(MF
, DL
, TII
.get(AArch64::SEH_SaveFPLR
))
553 MIB
= BuildMI(MF
, DL
, TII
.get(AArch64::SEH_SaveRegP
))
554 .addImm(RegInfo
->getSEHRegNum(Reg0
))
555 .addImm(RegInfo
->getSEHRegNum(Reg1
))
560 case AArch64::STRXui
:
561 case AArch64::LDRXui
: {
562 int Reg
= RegInfo
->getSEHRegNum(MBBI
->getOperand(0).getReg());
563 MIB
= BuildMI(MF
, DL
, TII
.get(AArch64::SEH_SaveReg
))
569 case AArch64::STRDui
:
570 case AArch64::LDRDui
: {
571 unsigned Reg
= RegInfo
->getSEHRegNum(MBBI
->getOperand(0).getReg());
572 MIB
= BuildMI(MF
, DL
, TII
.get(AArch64::SEH_SaveFReg
))
579 auto I
= MBB
->insertAfter(MBBI
, MIB
);
583 // Fix up the SEH opcode associated with the save/restore instruction.
584 static void fixupSEHOpcode(MachineBasicBlock::iterator MBBI
,
585 unsigned LocalStackSize
) {
586 MachineOperand
*ImmOpnd
= nullptr;
587 unsigned ImmIdx
= MBBI
->getNumOperands() - 1;
588 switch (MBBI
->getOpcode()) {
590 llvm_unreachable("Fix the offset in the SEH instruction");
591 case AArch64::SEH_SaveFPLR
:
592 case AArch64::SEH_SaveRegP
:
593 case AArch64::SEH_SaveReg
:
594 case AArch64::SEH_SaveFRegP
:
595 case AArch64::SEH_SaveFReg
:
596 ImmOpnd
= &MBBI
->getOperand(ImmIdx
);
600 ImmOpnd
->setImm(ImmOpnd
->getImm() + LocalStackSize
);
603 // Convert callee-save register save/restore instruction to do stack pointer
604 // decrement/increment to allocate/deallocate the callee-save stack area by
605 // converting store/load to use pre/post increment version.
606 static MachineBasicBlock::iterator
convertCalleeSaveRestoreToSPPrePostIncDec(
607 MachineBasicBlock
&MBB
, MachineBasicBlock::iterator MBBI
,
608 const DebugLoc
&DL
, const TargetInstrInfo
*TII
, int CSStackSizeInc
,
609 bool NeedsWinCFI
, bool *HasWinCFI
, bool InProlog
= true) {
610 // Ignore instructions that do not operate on SP, i.e. shadow call stack
611 // instructions and associated CFI instruction.
612 while (MBBI
->getOpcode() == AArch64::STRXpost
||
613 MBBI
->getOpcode() == AArch64::LDRXpre
||
614 MBBI
->getOpcode() == AArch64::CFI_INSTRUCTION
) {
615 if (MBBI
->getOpcode() != AArch64::CFI_INSTRUCTION
)
616 assert(MBBI
->getOperand(0).getReg() != AArch64::SP
);
621 switch (MBBI
->getOpcode()) {
623 llvm_unreachable("Unexpected callee-save save/restore opcode!");
625 NewOpc
= AArch64::STPXpre
;
629 NewOpc
= AArch64::STPDpre
;
633 NewOpc
= AArch64::STPQpre
;
636 case AArch64::STRXui
:
637 NewOpc
= AArch64::STRXpre
;
639 case AArch64::STRDui
:
640 NewOpc
= AArch64::STRDpre
;
642 case AArch64::STRQui
:
643 NewOpc
= AArch64::STRQpre
;
646 NewOpc
= AArch64::LDPXpost
;
650 NewOpc
= AArch64::LDPDpost
;
654 NewOpc
= AArch64::LDPQpost
;
657 case AArch64::LDRXui
:
658 NewOpc
= AArch64::LDRXpost
;
660 case AArch64::LDRDui
:
661 NewOpc
= AArch64::LDRDpost
;
663 case AArch64::LDRQui
:
664 NewOpc
= AArch64::LDRQpost
;
667 // Get rid of the SEH code associated with the old instruction.
669 auto SEH
= std::next(MBBI
);
670 if (AArch64InstrInfo::isSEHInstruction(*SEH
))
671 SEH
->eraseFromParent();
674 MachineInstrBuilder MIB
= BuildMI(MBB
, MBBI
, DL
, TII
->get(NewOpc
));
675 MIB
.addReg(AArch64::SP
, RegState::Define
);
677 // Copy all operands other than the immediate offset.
678 unsigned OpndIdx
= 0;
679 for (unsigned OpndEnd
= MBBI
->getNumOperands() - 1; OpndIdx
< OpndEnd
;
681 MIB
.add(MBBI
->getOperand(OpndIdx
));
683 assert(MBBI
->getOperand(OpndIdx
).getImm() == 0 &&
684 "Unexpected immediate offset in first/last callee-save save/restore "
686 assert(MBBI
->getOperand(OpndIdx
- 1).getReg() == AArch64::SP
&&
687 "Unexpected base register in callee-save save/restore instruction!");
688 assert(CSStackSizeInc
% Scale
== 0);
689 MIB
.addImm(CSStackSizeInc
/ Scale
);
691 MIB
.setMIFlags(MBBI
->getFlags());
692 MIB
.setMemRefs(MBBI
->memoperands());
694 // Generate a new SEH code that corresponds to the new instruction.
697 InsertSEH(*MIB
, *TII
,
698 InProlog
? MachineInstr::FrameSetup
: MachineInstr::FrameDestroy
);
701 return std::prev(MBB
.erase(MBBI
));
704 // Fixup callee-save register save/restore instructions to take into account
705 // combined SP bump by adding the local stack size to the stack offsets.
706 static void fixupCalleeSaveRestoreStackOffset(MachineInstr
&MI
,
707 unsigned LocalStackSize
,
710 if (AArch64InstrInfo::isSEHInstruction(MI
))
713 unsigned Opc
= MI
.getOpcode();
715 // Ignore instructions that do not operate on SP, i.e. shadow call stack
716 // instructions and associated CFI instruction.
717 if (Opc
== AArch64::STRXpost
|| Opc
== AArch64::LDRXpre
||
718 Opc
== AArch64::CFI_INSTRUCTION
) {
719 if (Opc
!= AArch64::CFI_INSTRUCTION
)
720 assert(MI
.getOperand(0).getReg() != AArch64::SP
);
727 case AArch64::STRXui
:
729 case AArch64::STRDui
:
731 case AArch64::LDRXui
:
733 case AArch64::LDRDui
:
737 case AArch64::STRQui
:
739 case AArch64::LDRQui
:
743 llvm_unreachable("Unexpected callee-save save/restore opcode!");
746 unsigned OffsetIdx
= MI
.getNumExplicitOperands() - 1;
747 assert(MI
.getOperand(OffsetIdx
- 1).getReg() == AArch64::SP
&&
748 "Unexpected base register in callee-save save/restore instruction!");
749 // Last operand is immediate offset that needs fixing.
750 MachineOperand
&OffsetOpnd
= MI
.getOperand(OffsetIdx
);
751 // All generated opcodes have scaled offsets.
752 assert(LocalStackSize
% Scale
== 0);
753 OffsetOpnd
.setImm(OffsetOpnd
.getImm() + LocalStackSize
/ Scale
);
757 auto MBBI
= std::next(MachineBasicBlock::iterator(MI
));
758 assert(MBBI
!= MI
.getParent()->end() && "Expecting a valid instruction");
759 assert(AArch64InstrInfo::isSEHInstruction(*MBBI
) &&
760 "Expecting a SEH instruction");
761 fixupSEHOpcode(MBBI
, LocalStackSize
);
765 static void adaptForLdStOpt(MachineBasicBlock
&MBB
,
766 MachineBasicBlock::iterator FirstSPPopI
,
767 MachineBasicBlock::iterator LastPopI
) {
768 // Sometimes (when we restore in the same order as we save), we can end up
769 // with code like this:
771 // ldp x26, x25, [sp]
772 // ldp x24, x23, [sp, #16]
773 // ldp x22, x21, [sp, #32]
774 // ldp x20, x19, [sp, #48]
777 // In this case, it is always better to put the first ldp at the end, so
778 // that the load-store optimizer can run and merge the ldp and the add into
780 // If we managed to grab the first pop instruction, move it to the end.
781 if (ReverseCSRRestoreSeq
)
782 MBB
.splice(FirstSPPopI
, &MBB
, LastPopI
);
783 // We should end up with something like this now:
785 // ldp x24, x23, [sp, #16]
786 // ldp x22, x21, [sp, #32]
787 // ldp x20, x19, [sp, #48]
788 // ldp x26, x25, [sp]
791 // and the load-store optimizer can merge the last two instructions into:
793 // ldp x26, x25, [sp], #64
797 static bool ShouldSignWithAKey(MachineFunction
&MF
) {
798 const Function
&F
= MF
.getFunction();
799 if (!F
.hasFnAttribute("sign-return-address-key"))
802 const StringRef Key
=
803 F
.getFnAttribute("sign-return-address-key").getValueAsString();
804 assert(Key
.equals_lower("a_key") || Key
.equals_lower("b_key"));
805 return Key
.equals_lower("a_key");
808 static bool needsWinCFI(const MachineFunction
&MF
) {
809 const Function
&F
= MF
.getFunction();
810 return MF
.getTarget().getMCAsmInfo()->usesWindowsCFI() &&
811 F
.needsUnwindTableEntry();
814 static bool isTargetDarwin(const MachineFunction
&MF
) {
815 return MF
.getSubtarget
<AArch64Subtarget
>().isTargetDarwin();
818 void AArch64FrameLowering::emitPrologue(MachineFunction
&MF
,
819 MachineBasicBlock
&MBB
) const {
820 MachineBasicBlock::iterator MBBI
= MBB
.begin();
821 const MachineFrameInfo
&MFI
= MF
.getFrameInfo();
822 const Function
&F
= MF
.getFunction();
823 const AArch64Subtarget
&Subtarget
= MF
.getSubtarget
<AArch64Subtarget
>();
824 const AArch64RegisterInfo
*RegInfo
= Subtarget
.getRegisterInfo();
825 const TargetInstrInfo
*TII
= Subtarget
.getInstrInfo();
826 MachineModuleInfo
&MMI
= MF
.getMMI();
827 AArch64FunctionInfo
*AFI
= MF
.getInfo
<AArch64FunctionInfo
>();
828 bool needsFrameMoves
= (MMI
.hasDebugInfo() || F
.needsUnwindTableEntry()) &&
829 !MF
.getTarget().getMCAsmInfo()->usesWindowsCFI();
830 bool HasFP
= hasFP(MF
);
831 bool NeedsWinCFI
= needsWinCFI(MF
);
832 bool HasWinCFI
= false;
833 auto Cleanup
= make_scope_exit([&]() { MF
.setHasWinCFI(HasWinCFI
); });
835 bool IsFunclet
= MBB
.isEHFuncletEntry();
837 // At this point, we're going to decide whether or not the function uses a
838 // redzone. In most cases, the function doesn't have a redzone so let's
839 // assume that's false and set it to true in the case that there's a redzone.
840 AFI
->setHasRedZone(false);
842 // Debug location must be unknown since the first debug location is used
843 // to determine the end of the prologue.
846 if (ShouldSignReturnAddress(MF
)) {
847 if (ShouldSignWithAKey(MF
))
848 BuildMI(MBB
, MBBI
, DL
, TII
->get(AArch64::PACIASP
))
849 .setMIFlag(MachineInstr::FrameSetup
);
851 BuildMI(MBB
, MBBI
, DL
, TII
->get(AArch64::EMITBKEY
))
852 .setMIFlag(MachineInstr::FrameSetup
);
853 BuildMI(MBB
, MBBI
, DL
, TII
->get(AArch64::PACIBSP
))
854 .setMIFlag(MachineInstr::FrameSetup
);
858 MF
.addFrameInst(MCCFIInstruction::createNegateRAState(nullptr));
859 BuildMI(MBB
, MBBI
, DL
, TII
->get(TargetOpcode::CFI_INSTRUCTION
))
860 .addCFIIndex(CFIIndex
)
861 .setMIFlags(MachineInstr::FrameSetup
);
864 // All calls are tail calls in GHC calling conv, and functions have no
865 // prologue/epilogue.
866 if (MF
.getFunction().getCallingConv() == CallingConv::GHC
)
869 // Set tagged base pointer to the bottom of the stack frame.
870 // Ideally it should match SP value after prologue.
871 AFI
->setTaggedBasePointerOffset(MFI
.getStackSize());
873 // getStackSize() includes all the locals in its size calculation. We don't
874 // include these locals when computing the stack size of a funclet, as they
875 // are allocated in the parent's stack frame and accessed via the frame
876 // pointer from the funclet. We only save the callee saved registers in the
877 // funclet, which are really the callee saved registers of the parent
878 // function, including the funclet.
879 int NumBytes
= IsFunclet
? (int)getWinEHFuncletFrameSize(MF
)
880 : (int)MFI
.getStackSize();
881 if (!AFI
->hasStackFrame() && !windowsRequiresStackProbe(MF
, NumBytes
)) {
882 assert(!HasFP
&& "unexpected function without stack frame but with FP");
883 // All of the stack allocation is for locals.
884 AFI
->setLocalStackSize(NumBytes
);
887 // REDZONE: If the stack size is less than 128 bytes, we don't need
888 // to actually allocate.
889 if (canUseRedZone(MF
)) {
890 AFI
->setHasRedZone(true);
891 ++NumRedZoneFunctions
;
893 emitFrameOffset(MBB
, MBBI
, DL
, AArch64::SP
, AArch64::SP
,
894 {-NumBytes
, MVT::i8
}, TII
, MachineInstr::FrameSetup
,
895 false, NeedsWinCFI
, &HasWinCFI
);
897 // Label used to tie together the PROLOG_LABEL and the MachineMoves.
898 MCSymbol
*FrameLabel
= MMI
.getContext().createTempSymbol();
899 // Encode the stack size of the leaf function.
900 unsigned CFIIndex
= MF
.addFrameInst(
901 MCCFIInstruction::createDefCfaOffset(FrameLabel
, -NumBytes
));
902 BuildMI(MBB
, MBBI
, DL
, TII
->get(TargetOpcode::CFI_INSTRUCTION
))
903 .addCFIIndex(CFIIndex
)
904 .setMIFlags(MachineInstr::FrameSetup
);
910 BuildMI(MBB
, MBBI
, DL
, TII
->get(AArch64::SEH_PrologEnd
))
911 .setMIFlag(MachineInstr::FrameSetup
);
918 Subtarget
.isCallingConvWin64(MF
.getFunction().getCallingConv());
919 // Var args are accounted for in the containing function, so don't
920 // include them for funclets.
921 unsigned FixedObject
= (IsWin64
&& !IsFunclet
) ?
922 alignTo(AFI
->getVarArgsGPRSize(), 16) : 0;
924 auto PrologueSaveSize
= AFI
->getCalleeSavedStackSize() + FixedObject
;
925 // All of the remaining stack allocations are for locals.
926 AFI
->setLocalStackSize(NumBytes
- PrologueSaveSize
);
927 bool CombineSPBump
= shouldCombineCSRLocalStackBump(MF
, NumBytes
);
929 emitFrameOffset(MBB
, MBBI
, DL
, AArch64::SP
, AArch64::SP
,
930 {-NumBytes
, MVT::i8
}, TII
, MachineInstr::FrameSetup
, false,
931 NeedsWinCFI
, &HasWinCFI
);
933 } else if (PrologueSaveSize
!= 0) {
934 MBBI
= convertCalleeSaveRestoreToSPPrePostIncDec(
935 MBB
, MBBI
, DL
, TII
, -PrologueSaveSize
, NeedsWinCFI
, &HasWinCFI
);
936 NumBytes
-= PrologueSaveSize
;
938 assert(NumBytes
>= 0 && "Negative stack allocation size!?");
940 // Move past the saves of the callee-saved registers, fixing up the offsets
941 // and pre-inc if we decided to combine the callee-save and local stack
942 // pointer bump above.
943 MachineBasicBlock::iterator End
= MBB
.end();
944 while (MBBI
!= End
&& MBBI
->getFlag(MachineInstr::FrameSetup
)) {
946 fixupCalleeSaveRestoreStackOffset(*MBBI
, AFI
->getLocalStackSize(),
947 NeedsWinCFI
, &HasWinCFI
);
951 // The code below is not applicable to funclets. We have emitted all the SEH
952 // opcodes that we needed to emit. The FP and BP belong to the containing
957 BuildMI(MBB
, MBBI
, DL
, TII
->get(AArch64::SEH_PrologEnd
))
958 .setMIFlag(MachineInstr::FrameSetup
);
961 // SEH funclets are passed the frame pointer in X1. If the parent
962 // function uses the base register, then the base register is used
963 // directly, and is not retrieved from X1.
964 if (F
.hasPersonalityFn()) {
965 EHPersonality Per
= classifyEHPersonality(F
.getPersonalityFn());
966 if (isAsynchronousEHPersonality(Per
)) {
967 BuildMI(MBB
, MBBI
, DL
, TII
->get(TargetOpcode::COPY
), AArch64::FP
)
968 .addReg(AArch64::X1
).setMIFlag(MachineInstr::FrameSetup
);
969 MBB
.addLiveIn(AArch64::X1
);
977 // Only set up FP if we actually need to.
978 int FPOffset
= isTargetDarwin(MF
) ? (AFI
->getCalleeSavedStackSize() - 16) : 0;
981 FPOffset
+= AFI
->getLocalStackSize();
983 // Issue sub fp, sp, FPOffset or
984 // mov fp,sp when FPOffset is zero.
985 // Note: All stores of callee-saved registers are marked as "FrameSetup".
986 // This code marks the instruction(s) that set the FP also.
987 emitFrameOffset(MBB
, MBBI
, DL
, AArch64::FP
, AArch64::SP
,
988 {FPOffset
, MVT::i8
}, TII
, MachineInstr::FrameSetup
, false,
989 NeedsWinCFI
, &HasWinCFI
);
992 if (windowsRequiresStackProbe(MF
, NumBytes
)) {
993 uint32_t NumWords
= NumBytes
>> 4;
996 // alloc_l can hold at most 256MB, so assume that NumBytes doesn't
997 // exceed this amount. We need to move at most 2^24 - 1 into x15.
998 // This is at most two instructions, MOVZ follwed by MOVK.
999 // TODO: Fix to use multiple stack alloc unwind codes for stacks
1000 // exceeding 256MB in size.
1001 if (NumBytes
>= (1 << 28))
1002 report_fatal_error("Stack size cannot exceed 256MB for stack "
1003 "unwinding purposes");
1005 uint32_t LowNumWords
= NumWords
& 0xFFFF;
1006 BuildMI(MBB
, MBBI
, DL
, TII
->get(AArch64::MOVZXi
), AArch64::X15
)
1007 .addImm(LowNumWords
)
1008 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL
, 0))
1009 .setMIFlag(MachineInstr::FrameSetup
);
1010 BuildMI(MBB
, MBBI
, DL
, TII
->get(AArch64::SEH_Nop
))
1011 .setMIFlag(MachineInstr::FrameSetup
);
1012 if ((NumWords
& 0xFFFF0000) != 0) {
1013 BuildMI(MBB
, MBBI
, DL
, TII
->get(AArch64::MOVKXi
), AArch64::X15
)
1014 .addReg(AArch64::X15
)
1015 .addImm((NumWords
& 0xFFFF0000) >> 16) // High half
1016 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL
, 16))
1017 .setMIFlag(MachineInstr::FrameSetup
);
1018 BuildMI(MBB
, MBBI
, DL
, TII
->get(AArch64::SEH_Nop
))
1019 .setMIFlag(MachineInstr::FrameSetup
);
1022 BuildMI(MBB
, MBBI
, DL
, TII
->get(AArch64::MOVi64imm
), AArch64::X15
)
1024 .setMIFlags(MachineInstr::FrameSetup
);
1027 switch (MF
.getTarget().getCodeModel()) {
1028 case CodeModel::Tiny
:
1029 case CodeModel::Small
:
1030 case CodeModel::Medium
:
1031 case CodeModel::Kernel
:
1032 BuildMI(MBB
, MBBI
, DL
, TII
->get(AArch64::BL
))
1033 .addExternalSymbol("__chkstk")
1034 .addReg(AArch64::X15
, RegState::Implicit
)
1035 .addReg(AArch64::X16
, RegState::Implicit
| RegState::Define
| RegState::Dead
)
1036 .addReg(AArch64::X17
, RegState::Implicit
| RegState::Define
| RegState::Dead
)
1037 .addReg(AArch64::NZCV
, RegState::Implicit
| RegState::Define
| RegState::Dead
)
1038 .setMIFlags(MachineInstr::FrameSetup
);
1041 BuildMI(MBB
, MBBI
, DL
, TII
->get(AArch64::SEH_Nop
))
1042 .setMIFlag(MachineInstr::FrameSetup
);
1045 case CodeModel::Large
:
1046 BuildMI(MBB
, MBBI
, DL
, TII
->get(AArch64::MOVaddrEXT
))
1047 .addReg(AArch64::X16
, RegState::Define
)
1048 .addExternalSymbol("__chkstk")
1049 .addExternalSymbol("__chkstk")
1050 .setMIFlags(MachineInstr::FrameSetup
);
1053 BuildMI(MBB
, MBBI
, DL
, TII
->get(AArch64::SEH_Nop
))
1054 .setMIFlag(MachineInstr::FrameSetup
);
1057 BuildMI(MBB
, MBBI
, DL
, TII
->get(AArch64::BLR
))
1058 .addReg(AArch64::X16
, RegState::Kill
)
1059 .addReg(AArch64::X15
, RegState::Implicit
| RegState::Define
)
1060 .addReg(AArch64::X16
, RegState::Implicit
| RegState::Define
| RegState::Dead
)
1061 .addReg(AArch64::X17
, RegState::Implicit
| RegState::Define
| RegState::Dead
)
1062 .addReg(AArch64::NZCV
, RegState::Implicit
| RegState::Define
| RegState::Dead
)
1063 .setMIFlags(MachineInstr::FrameSetup
);
1066 BuildMI(MBB
, MBBI
, DL
, TII
->get(AArch64::SEH_Nop
))
1067 .setMIFlag(MachineInstr::FrameSetup
);
1072 BuildMI(MBB
, MBBI
, DL
, TII
->get(AArch64::SUBXrx64
), AArch64::SP
)
1073 .addReg(AArch64::SP
, RegState::Kill
)
1074 .addReg(AArch64::X15
, RegState::Kill
)
1075 .addImm(AArch64_AM::getArithExtendImm(AArch64_AM::UXTX
, 4))
1076 .setMIFlags(MachineInstr::FrameSetup
);
1079 BuildMI(MBB
, MBBI
, DL
, TII
->get(AArch64::SEH_StackAlloc
))
1081 .setMIFlag(MachineInstr::FrameSetup
);
1086 // Allocate space for the rest of the frame.
1088 const bool NeedsRealignment
= RegInfo
->needsStackRealignment(MF
);
1089 unsigned scratchSPReg
= AArch64::SP
;
1091 if (NeedsRealignment
) {
1092 scratchSPReg
= findScratchNonCalleeSaveRegister(&MBB
);
1093 assert(scratchSPReg
!= AArch64::NoRegister
);
1096 // If we're a leaf function, try using the red zone.
1097 if (!canUseRedZone(MF
))
1098 // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have
1099 // the correct value here, as NumBytes also includes padding bytes,
1100 // which shouldn't be counted here.
1101 emitFrameOffset(MBB
, MBBI
, DL
, scratchSPReg
, AArch64::SP
,
1102 {-NumBytes
, MVT::i8
}, TII
, MachineInstr::FrameSetup
,
1103 false, NeedsWinCFI
, &HasWinCFI
);
1105 if (NeedsRealignment
) {
1106 const unsigned Alignment
= MFI
.getMaxAlignment();
1107 const unsigned NrBitsToZero
= countTrailingZeros(Alignment
);
1108 assert(NrBitsToZero
> 1);
1109 assert(scratchSPReg
!= AArch64::SP
);
1111 // SUB X9, SP, NumBytes
1112 // -- X9 is temporary register, so shouldn't contain any live data here,
1113 // -- free to use. This is already produced by emitFrameOffset above.
1114 // AND SP, X9, 0b11111...0000
1115 // The logical immediates have a non-trivial encoding. The following
1116 // formula computes the encoded immediate with all ones but
1117 // NrBitsToZero zero bits as least significant bits.
1118 uint32_t andMaskEncoded
= (1 << 12) // = N
1119 | ((64 - NrBitsToZero
) << 6) // immr
1120 | ((64 - NrBitsToZero
- 1) << 0); // imms
1122 BuildMI(MBB
, MBBI
, DL
, TII
->get(AArch64::ANDXri
), AArch64::SP
)
1123 .addReg(scratchSPReg
, RegState::Kill
)
1124 .addImm(andMaskEncoded
);
1125 AFI
->setStackRealigned(true);
1128 BuildMI(MBB
, MBBI
, DL
, TII
->get(AArch64::SEH_StackAlloc
))
1129 .addImm(NumBytes
& andMaskEncoded
)
1130 .setMIFlag(MachineInstr::FrameSetup
);
1135 // If we need a base pointer, set it up here. It's whatever the value of the
1136 // stack pointer is at this point. Any variable size objects will be allocated
1137 // after this, so we can still use the base pointer to reference locals.
1139 // FIXME: Clarify FrameSetup flags here.
1140 // Note: Use emitFrameOffset() like above for FP if the FrameSetup flag is
1142 if (RegInfo
->hasBasePointer(MF
)) {
1143 TII
->copyPhysReg(MBB
, MBBI
, DL
, RegInfo
->getBaseRegister(), AArch64::SP
,
1147 BuildMI(MBB
, MBBI
, DL
, TII
->get(AArch64::SEH_Nop
))
1148 .setMIFlag(MachineInstr::FrameSetup
);
1152 // The very last FrameSetup instruction indicates the end of prologue. Emit a
1153 // SEH opcode indicating the prologue end.
1154 if (NeedsWinCFI
&& HasWinCFI
) {
1155 BuildMI(MBB
, MBBI
, DL
, TII
->get(AArch64::SEH_PrologEnd
))
1156 .setMIFlag(MachineInstr::FrameSetup
);
1159 if (needsFrameMoves
) {
1160 const DataLayout
&TD
= MF
.getDataLayout();
1161 const int StackGrowth
= isTargetDarwin(MF
)
1162 ? (2 * -TD
.getPointerSize(0))
1163 : -AFI
->getCalleeSavedStackSize();
1164 Register FramePtr
= RegInfo
->getFrameRegister(MF
);
1165 // An example of the prologue:
1172 // .cfi_personality 155, ___gxx_personality_v0
1174 // .cfi_lsda 16, Lexception33
1176 // stp xa,bx, [sp, -#offset]!
1178 // stp x28, x27, [sp, #offset-32]
1179 // stp fp, lr, [sp, #offset-16]
1180 // add fp, sp, #offset - 16
1181 // sub sp, sp, #1360
1184 // +-------------------------------------------+
1185 // 10000 | ........ | ........ | ........ | ........ |
1186 // 10004 | ........ | ........ | ........ | ........ |
1187 // +-------------------------------------------+
1188 // 10008 | ........ | ........ | ........ | ........ |
1189 // 1000c | ........ | ........ | ........ | ........ |
1190 // +===========================================+
1191 // 10010 | X28 Register |
1192 // 10014 | X28 Register |
1193 // +-------------------------------------------+
1194 // 10018 | X27 Register |
1195 // 1001c | X27 Register |
1196 // +===========================================+
1197 // 10020 | Frame Pointer |
1198 // 10024 | Frame Pointer |
1199 // +-------------------------------------------+
1200 // 10028 | Link Register |
1201 // 1002c | Link Register |
1202 // +===========================================+
1203 // 10030 | ........ | ........ | ........ | ........ |
1204 // 10034 | ........ | ........ | ........ | ........ |
1205 // +-------------------------------------------+
1206 // 10038 | ........ | ........ | ........ | ........ |
1207 // 1003c | ........ | ........ | ........ | ........ |
1208 // +-------------------------------------------+
1210 // [sp] = 10030 :: >>initial value<<
1211 // sp = 10020 :: stp fp, lr, [sp, #-16]!
1212 // fp = sp == 10020 :: mov fp, sp
1213 // [sp] == 10020 :: stp x28, x27, [sp, #-16]!
1214 // sp == 10010 :: >>final value<<
1216 // The frame pointer (w29) points to address 10020. If we use an offset of
1217 // '16' from 'w29', we get the CFI offsets of -8 for w30, -16 for w29, -24
1218 // for w27, and -32 for w28:
1221 // .cfi_def_cfa w29, 16
1223 // .cfi_offset w30, -8
1225 // .cfi_offset w29, -16
1227 // .cfi_offset w27, -24
1229 // .cfi_offset w28, -32
1232 // Define the current CFA rule to use the provided FP.
1233 unsigned Reg
= RegInfo
->getDwarfRegNum(FramePtr
, true);
1234 unsigned CFIIndex
= MF
.addFrameInst(MCCFIInstruction::createDefCfa(
1235 nullptr, Reg
, StackGrowth
- FixedObject
));
1236 BuildMI(MBB
, MBBI
, DL
, TII
->get(TargetOpcode::CFI_INSTRUCTION
))
1237 .addCFIIndex(CFIIndex
)
1238 .setMIFlags(MachineInstr::FrameSetup
);
1240 // Encode the stack size of the leaf function.
1241 unsigned CFIIndex
= MF
.addFrameInst(
1242 MCCFIInstruction::createDefCfaOffset(nullptr, -MFI
.getStackSize()));
1243 BuildMI(MBB
, MBBI
, DL
, TII
->get(TargetOpcode::CFI_INSTRUCTION
))
1244 .addCFIIndex(CFIIndex
)
1245 .setMIFlags(MachineInstr::FrameSetup
);
1248 // Now emit the moves for whatever callee saved regs we have (including FP,
1249 // LR if those are saved).
1250 emitCalleeSavedFrameMoves(MBB
, MBBI
);
1254 static void InsertReturnAddressAuth(MachineFunction
&MF
,
1255 MachineBasicBlock
&MBB
) {
1256 if (!ShouldSignReturnAddress(MF
))
1258 const AArch64Subtarget
&Subtarget
= MF
.getSubtarget
<AArch64Subtarget
>();
1259 const TargetInstrInfo
*TII
= Subtarget
.getInstrInfo();
1261 MachineBasicBlock::iterator MBBI
= MBB
.getFirstTerminator();
1263 if (MBBI
!= MBB
.end())
1264 DL
= MBBI
->getDebugLoc();
1266 // The AUTIASP instruction assembles to a hint instruction before v8.3a so
1267 // this instruction can safely used for any v8a architecture.
1268 // From v8.3a onwards there are optimised authenticate LR and return
1269 // instructions, namely RETA{A,B}, that can be used instead.
1270 if (Subtarget
.hasV8_3aOps() && MBBI
!= MBB
.end() &&
1271 MBBI
->getOpcode() == AArch64::RET_ReallyLR
) {
1272 BuildMI(MBB
, MBBI
, DL
,
1273 TII
->get(ShouldSignWithAKey(MF
) ? AArch64::RETAA
: AArch64::RETAB
))
1274 .copyImplicitOps(*MBBI
);
1279 TII
->get(ShouldSignWithAKey(MF
) ? AArch64::AUTIASP
: AArch64::AUTIBSP
))
1280 .setMIFlag(MachineInstr::FrameDestroy
);
1284 static bool isFuncletReturnInstr(const MachineInstr
&MI
) {
1285 switch (MI
.getOpcode()) {
1288 case AArch64::CATCHRET
:
1289 case AArch64::CLEANUPRET
:
1294 void AArch64FrameLowering::emitEpilogue(MachineFunction
&MF
,
1295 MachineBasicBlock
&MBB
) const {
1296 MachineBasicBlock::iterator MBBI
= MBB
.getLastNonDebugInstr();
1297 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
1298 const AArch64Subtarget
&Subtarget
= MF
.getSubtarget
<AArch64Subtarget
>();
1299 const TargetInstrInfo
*TII
= Subtarget
.getInstrInfo();
1301 bool IsTailCallReturn
= false;
1302 bool NeedsWinCFI
= needsWinCFI(MF
);
1303 bool HasWinCFI
= false;
1304 bool IsFunclet
= false;
1305 auto WinCFI
= make_scope_exit([&]() {
1306 if (!MF
.hasWinCFI())
1307 MF
.setHasWinCFI(HasWinCFI
);
1310 if (MBB
.end() != MBBI
) {
1311 DL
= MBBI
->getDebugLoc();
1312 unsigned RetOpcode
= MBBI
->getOpcode();
1313 IsTailCallReturn
= RetOpcode
== AArch64::TCRETURNdi
||
1314 RetOpcode
== AArch64::TCRETURNri
||
1315 RetOpcode
== AArch64::TCRETURNriBTI
;
1316 IsFunclet
= isFuncletReturnInstr(*MBBI
);
1319 int NumBytes
= IsFunclet
? (int)getWinEHFuncletFrameSize(MF
)
1320 : MFI
.getStackSize();
1321 AArch64FunctionInfo
*AFI
= MF
.getInfo
<AArch64FunctionInfo
>();
1323 // All calls are tail calls in GHC calling conv, and functions have no
1324 // prologue/epilogue.
1325 if (MF
.getFunction().getCallingConv() == CallingConv::GHC
)
1328 // Initial and residual are named for consistency with the prologue. Note that
1329 // in the epilogue, the residual adjustment is executed first.
1330 uint64_t ArgumentPopSize
= 0;
1331 if (IsTailCallReturn
) {
1332 MachineOperand
&StackAdjust
= MBBI
->getOperand(1);
1334 // For a tail-call in a callee-pops-arguments environment, some or all of
1335 // the stack may actually be in use for the call's arguments, this is
1336 // calculated during LowerCall and consumed here...
1337 ArgumentPopSize
= StackAdjust
.getImm();
1339 // ... otherwise the amount to pop is *all* of the argument space,
1340 // conveniently stored in the MachineFunctionInfo by
1341 // LowerFormalArguments. This will, of course, be zero for the C calling
1343 ArgumentPopSize
= AFI
->getArgumentStackToRestore();
1346 // The stack frame should be like below,
1348 // ---------------------- ---
1350 // | BytesInStackArgArea| CalleeArgStackSize
1351 // | (NumReusableBytes) | (of tail call)
1354 // ---------------------| --- |
1356 // | CalleeSavedReg | | |
1357 // | (CalleeSavedStackSize)| | |
1359 // ---------------------| | NumBytes
1360 // | | StackSize (StackAdjustUp)
1361 // | LocalStackSize | | |
1362 // | (covering callee | | |
1365 // ---------------------- --- ---
1367 // So NumBytes = StackSize + BytesInStackArgArea - CalleeArgStackSize
1368 // = StackSize + ArgumentPopSize
1370 // AArch64TargetLowering::LowerCall figures out ArgumentPopSize and keeps
1371 // it as the 2nd argument of AArch64ISD::TC_RETURN.
1373 auto Cleanup
= make_scope_exit([&] { InsertReturnAddressAuth(MF
, MBB
); });
1376 Subtarget
.isCallingConvWin64(MF
.getFunction().getCallingConv());
1377 // Var args are accounted for in the containing function, so don't
1378 // include them for funclets.
1379 unsigned FixedObject
=
1380 (IsWin64
&& !IsFunclet
) ? alignTo(AFI
->getVarArgsGPRSize(), 16) : 0;
1382 uint64_t AfterCSRPopSize
= ArgumentPopSize
;
1383 auto PrologueSaveSize
= AFI
->getCalleeSavedStackSize() + FixedObject
;
1384 // We cannot rely on the local stack size set in emitPrologue if the function
1385 // has funclets, as funclets have different local stack size requirements, and
1386 // the current value set in emitPrologue may be that of the containing
1388 if (MF
.hasEHFunclets())
1389 AFI
->setLocalStackSize(NumBytes
- PrologueSaveSize
);
1390 bool CombineSPBump
= shouldCombineCSRLocalStackBump(MF
, NumBytes
);
1391 // Assume we can't combine the last pop with the sp restore.
1393 if (!CombineSPBump
&& PrologueSaveSize
!= 0) {
1394 MachineBasicBlock::iterator Pop
= std::prev(MBB
.getFirstTerminator());
1395 while (AArch64InstrInfo::isSEHInstruction(*Pop
))
1396 Pop
= std::prev(Pop
);
1397 // Converting the last ldp to a post-index ldp is valid only if the last
1398 // ldp's offset is 0.
1399 const MachineOperand
&OffsetOp
= Pop
->getOperand(Pop
->getNumOperands() - 1);
1400 // If the offset is 0, convert it to a post-index ldp.
1401 if (OffsetOp
.getImm() == 0)
1402 convertCalleeSaveRestoreToSPPrePostIncDec(
1403 MBB
, Pop
, DL
, TII
, PrologueSaveSize
, NeedsWinCFI
, &HasWinCFI
, false);
1405 // If not, make sure to emit an add after the last ldp.
1406 // We're doing this by transfering the size to be restored from the
1407 // adjustment *before* the CSR pops to the adjustment *after* the CSR
1409 AfterCSRPopSize
+= PrologueSaveSize
;
1413 // Move past the restores of the callee-saved registers.
1414 // If we plan on combining the sp bump of the local stack size and the callee
1415 // save stack size, we might need to adjust the CSR save and restore offsets.
1416 MachineBasicBlock::iterator LastPopI
= MBB
.getFirstTerminator();
1417 MachineBasicBlock::iterator Begin
= MBB
.begin();
1418 while (LastPopI
!= Begin
) {
1420 if (!LastPopI
->getFlag(MachineInstr::FrameDestroy
)) {
1423 } else if (CombineSPBump
)
1424 fixupCalleeSaveRestoreStackOffset(*LastPopI
, AFI
->getLocalStackSize(),
1425 NeedsWinCFI
, &HasWinCFI
);
1430 BuildMI(MBB
, LastPopI
, DL
, TII
->get(AArch64::SEH_EpilogStart
))
1431 .setMIFlag(MachineInstr::FrameDestroy
);
1434 // If there is a single SP update, insert it before the ret and we're done.
1435 if (CombineSPBump
) {
1436 emitFrameOffset(MBB
, MBB
.getFirstTerminator(), DL
, AArch64::SP
, AArch64::SP
,
1437 {NumBytes
+ (int64_t)AfterCSRPopSize
, MVT::i8
}, TII
,
1438 MachineInstr::FrameDestroy
, false, NeedsWinCFI
, &HasWinCFI
);
1439 if (NeedsWinCFI
&& HasWinCFI
)
1440 BuildMI(MBB
, MBB
.getFirstTerminator(), DL
,
1441 TII
->get(AArch64::SEH_EpilogEnd
))
1442 .setMIFlag(MachineInstr::FrameDestroy
);
1446 NumBytes
-= PrologueSaveSize
;
1447 assert(NumBytes
>= 0 && "Negative stack allocation size!?");
1450 bool RedZone
= canUseRedZone(MF
);
1451 // If this was a redzone leaf function, we don't need to restore the
1452 // stack pointer (but we may need to pop stack args for fastcc).
1453 if (RedZone
&& AfterCSRPopSize
== 0)
1456 bool NoCalleeSaveRestore
= PrologueSaveSize
== 0;
1457 int StackRestoreBytes
= RedZone
? 0 : NumBytes
;
1458 if (NoCalleeSaveRestore
)
1459 StackRestoreBytes
+= AfterCSRPopSize
;
1461 // If we were able to combine the local stack pop with the argument pop,
1463 bool Done
= NoCalleeSaveRestore
|| AfterCSRPopSize
== 0;
1465 // If we're done after this, make sure to help the load store optimizer.
1467 adaptForLdStOpt(MBB
, MBB
.getFirstTerminator(), LastPopI
);
1469 emitFrameOffset(MBB
, LastPopI
, DL
, AArch64::SP
, AArch64::SP
,
1470 {StackRestoreBytes
, MVT::i8
}, TII
,
1471 MachineInstr::FrameDestroy
, false, NeedsWinCFI
, &HasWinCFI
);
1475 BuildMI(MBB
, MBB
.getFirstTerminator(), DL
,
1476 TII
->get(AArch64::SEH_EpilogEnd
))
1477 .setMIFlag(MachineInstr::FrameDestroy
);
1485 // Restore the original stack pointer.
1486 // FIXME: Rather than doing the math here, we should instead just use
1487 // non-post-indexed loads for the restores if we aren't actually going to
1488 // be able to save any instructions.
1489 if (!IsFunclet
&& (MFI
.hasVarSizedObjects() || AFI
->isStackRealigned())) {
1490 int64_t OffsetToFrameRecord
=
1491 isTargetDarwin(MF
) ? (-(int64_t)AFI
->getCalleeSavedStackSize() + 16) : 0;
1492 emitFrameOffset(MBB
, LastPopI
, DL
, AArch64::SP
, AArch64::FP
,
1493 {OffsetToFrameRecord
, MVT::i8
},
1494 TII
, MachineInstr::FrameDestroy
, false, NeedsWinCFI
);
1495 } else if (NumBytes
)
1496 emitFrameOffset(MBB
, LastPopI
, DL
, AArch64::SP
, AArch64::SP
,
1497 {NumBytes
, MVT::i8
}, TII
, MachineInstr::FrameDestroy
, false,
1500 // This must be placed after the callee-save restore code because that code
1501 // assumes the SP is at the same location as it was after the callee-save save
1502 // code in the prologue.
1503 if (AfterCSRPopSize
) {
1504 // Find an insertion point for the first ldp so that it goes before the
1505 // shadow call stack epilog instruction. This ensures that the restore of
1506 // lr from x18 is placed after the restore from sp.
1507 auto FirstSPPopI
= MBB
.getFirstTerminator();
1508 while (FirstSPPopI
!= Begin
) {
1509 auto Prev
= std::prev(FirstSPPopI
);
1510 if (Prev
->getOpcode() != AArch64::LDRXpre
||
1511 Prev
->getOperand(0).getReg() == AArch64::SP
)
1516 adaptForLdStOpt(MBB
, FirstSPPopI
, LastPopI
);
1518 emitFrameOffset(MBB
, FirstSPPopI
, DL
, AArch64::SP
, AArch64::SP
,
1519 {(int64_t)AfterCSRPopSize
, MVT::i8
}, TII
,
1520 MachineInstr::FrameDestroy
, false, NeedsWinCFI
, &HasWinCFI
);
1522 if (NeedsWinCFI
&& HasWinCFI
)
1523 BuildMI(MBB
, MBB
.getFirstTerminator(), DL
, TII
->get(AArch64::SEH_EpilogEnd
))
1524 .setMIFlag(MachineInstr::FrameDestroy
);
1526 MF
.setHasWinCFI(HasWinCFI
);
1529 /// getFrameIndexReference - Provide a base+offset reference to an FI slot for
1530 /// debug info. It's the same as what we use for resolving the code-gen
1531 /// references for now. FIXME: This can go wrong when references are
1532 /// SP-relative and simple call frames aren't used.
1533 int AArch64FrameLowering::getFrameIndexReference(const MachineFunction
&MF
,
1535 unsigned &FrameReg
) const {
1536 return resolveFrameIndexReference(
1539 MF
.getFunction().hasFnAttribute(Attribute::SanitizeHWAddress
),
1544 int AArch64FrameLowering::getNonLocalFrameIndexReference(
1545 const MachineFunction
&MF
, int FI
) const {
1546 return getSEHFrameIndexOffset(MF
, FI
);
1549 static StackOffset
getFPOffset(const MachineFunction
&MF
, int ObjectOffset
) {
1550 const auto *AFI
= MF
.getInfo
<AArch64FunctionInfo
>();
1551 const auto &Subtarget
= MF
.getSubtarget
<AArch64Subtarget
>();
1553 Subtarget
.isCallingConvWin64(MF
.getFunction().getCallingConv());
1554 unsigned FixedObject
= IsWin64
? alignTo(AFI
->getVarArgsGPRSize(), 16) : 0;
1555 unsigned FPAdjust
= isTargetDarwin(MF
) ? 16 : AFI
->getCalleeSavedStackSize();
1556 return {ObjectOffset
+ FixedObject
+ FPAdjust
, MVT::i8
};
1559 static StackOffset
getStackOffset(const MachineFunction
&MF
, int ObjectOffset
) {
1560 const auto &MFI
= MF
.getFrameInfo();
1561 return {ObjectOffset
+ (int)MFI
.getStackSize(), MVT::i8
};
1564 int AArch64FrameLowering::getSEHFrameIndexOffset(const MachineFunction
&MF
,
1566 const auto *RegInfo
= static_cast<const AArch64RegisterInfo
*>(
1567 MF
.getSubtarget().getRegisterInfo());
1568 int ObjectOffset
= MF
.getFrameInfo().getObjectOffset(FI
);
1569 return RegInfo
->getLocalAddressRegister(MF
) == AArch64::FP
1570 ? getFPOffset(MF
, ObjectOffset
).getBytes()
1571 : getStackOffset(MF
, ObjectOffset
).getBytes();
1574 StackOffset
AArch64FrameLowering::resolveFrameIndexReference(
1575 const MachineFunction
&MF
, int FI
, unsigned &FrameReg
, bool PreferFP
,
1576 bool ForSimm
) const {
1577 const auto &MFI
= MF
.getFrameInfo();
1578 int ObjectOffset
= MFI
.getObjectOffset(FI
);
1579 bool isFixed
= MFI
.isFixedObjectIndex(FI
);
1580 return resolveFrameOffsetReference(MF
, ObjectOffset
, isFixed
, FrameReg
,
1584 StackOffset
AArch64FrameLowering::resolveFrameOffsetReference(
1585 const MachineFunction
&MF
, int ObjectOffset
, bool isFixed
,
1586 unsigned &FrameReg
, bool PreferFP
, bool ForSimm
) const {
1587 const auto &MFI
= MF
.getFrameInfo();
1588 const auto *RegInfo
= static_cast<const AArch64RegisterInfo
*>(
1589 MF
.getSubtarget().getRegisterInfo());
1590 const auto *AFI
= MF
.getInfo
<AArch64FunctionInfo
>();
1591 const auto &Subtarget
= MF
.getSubtarget
<AArch64Subtarget
>();
1593 int FPOffset
= getFPOffset(MF
, ObjectOffset
).getBytes();
1594 int Offset
= getStackOffset(MF
, ObjectOffset
).getBytes();
1596 !isFixed
&& ObjectOffset
>= -((int)AFI
->getCalleeSavedStackSize());
1598 // Use frame pointer to reference fixed objects. Use it for locals if
1599 // there are VLAs or a dynamically realigned SP (and thus the SP isn't
1600 // reliable as a base). Make sure useFPForScavengingIndex() does the
1601 // right thing for the emergency spill slot.
1603 if (AFI
->hasStackFrame()) {
1604 // Note: Keeping the following as multiple 'if' statements rather than
1605 // merging to a single expression for readability.
1607 // Argument access should always use the FP.
1610 } else if (isCSR
&& RegInfo
->needsStackRealignment(MF
)) {
1611 // References to the CSR area must use FP if we're re-aligning the stack
1612 // since the dynamically-sized alignment padding is between the SP/BP and
1614 assert(hasFP(MF
) && "Re-aligned stack must have frame pointer");
1616 } else if (hasFP(MF
) && !RegInfo
->needsStackRealignment(MF
)) {
1617 // If the FPOffset is negative and we're producing a signed immediate, we
1618 // have to keep in mind that the available offset range for negative
1619 // offsets is smaller than for positive ones. If an offset is available
1620 // via the FP and the SP, use whichever is closest.
1621 bool FPOffsetFits
= !ForSimm
|| FPOffset
>= -256;
1622 PreferFP
|= Offset
> -FPOffset
;
1624 if (MFI
.hasVarSizedObjects()) {
1625 // If we have variable sized objects, we can use either FP or BP, as the
1626 // SP offset is unknown. We can use the base pointer if we have one and
1627 // FP is not preferred. If not, we're stuck with using FP.
1628 bool CanUseBP
= RegInfo
->hasBasePointer(MF
);
1629 if (FPOffsetFits
&& CanUseBP
) // Both are ok. Pick the best.
1631 else if (!CanUseBP
) // Can't use BP. Forced to use FP.
1633 // else we can use BP and FP, but the offset from FP won't fit.
1634 // That will make us scavenge registers which we can probably avoid by
1635 // using BP. If it won't fit for BP either, we'll scavenge anyway.
1636 } else if (FPOffset
>= 0) {
1637 // Use SP or FP, whichever gives us the best chance of the offset
1638 // being in range for direct access. If the FPOffset is positive,
1639 // that'll always be best, as the SP will be even further away.
1641 } else if (MF
.hasEHFunclets() && !RegInfo
->hasBasePointer(MF
)) {
1642 // Funclets access the locals contained in the parent's stack frame
1643 // via the frame pointer, so we have to use the FP in the parent
1647 Subtarget
.isCallingConvWin64(MF
.getFunction().getCallingConv()) &&
1648 "Funclets should only be present on Win64");
1651 // We have the choice between FP and (SP or BP).
1652 if (FPOffsetFits
&& PreferFP
) // If FP is the best fit, use it.
1658 assert(((isFixed
|| isCSR
) || !RegInfo
->needsStackRealignment(MF
) || !UseFP
) &&
1659 "In the presence of dynamic stack pointer realignment, "
1660 "non-argument/CSR objects cannot be accessed through the frame pointer");
1663 FrameReg
= RegInfo
->getFrameRegister(MF
);
1664 return StackOffset(FPOffset
, MVT::i8
);
1667 // Use the base pointer if we have one.
1668 if (RegInfo
->hasBasePointer(MF
))
1669 FrameReg
= RegInfo
->getBaseRegister();
1671 assert(!MFI
.hasVarSizedObjects() &&
1672 "Can't use SP when we have var sized objects.");
1673 FrameReg
= AArch64::SP
;
1674 // If we're using the red zone for this function, the SP won't actually
1675 // be adjusted, so the offsets will be negative. They're also all
1676 // within range of the signed 9-bit immediate instructions.
1677 if (canUseRedZone(MF
))
1678 Offset
-= AFI
->getLocalStackSize();
1681 return StackOffset(Offset
, MVT::i8
);
1684 static unsigned getPrologueDeath(MachineFunction
&MF
, unsigned Reg
) {
1685 // Do not set a kill flag on values that are also marked as live-in. This
1686 // happens with the @llvm-returnaddress intrinsic and with arguments passed in
1687 // callee saved registers.
1688 // Omitting the kill flags is conservatively correct even if the live-in
1689 // is not used after all.
1690 bool IsLiveIn
= MF
.getRegInfo().isLiveIn(Reg
);
1691 return getKillRegState(!IsLiveIn
);
1694 static bool produceCompactUnwindFrame(MachineFunction
&MF
) {
1695 const AArch64Subtarget
&Subtarget
= MF
.getSubtarget
<AArch64Subtarget
>();
1696 AttributeList Attrs
= MF
.getFunction().getAttributes();
1697 return Subtarget
.isTargetMachO() &&
1698 !(Subtarget
.getTargetLowering()->supportSwiftError() &&
1699 Attrs
.hasAttrSomewhere(Attribute::SwiftError
));
1702 static bool invalidateWindowsRegisterPairing(unsigned Reg1
, unsigned Reg2
,
1704 // If we are generating register pairs for a Windows function that requires
1705 // EH support, then pair consecutive registers only. There are no unwind
1706 // opcodes for saves/restores of non-consectuve register pairs.
1707 // The unwind opcodes are save_regp, save_regp_x, save_fregp, save_frepg_x.
1708 // https://docs.microsoft.com/en-us/cpp/build/arm64-exception-handling
1710 // TODO: LR can be paired with any register. We don't support this yet in
1711 // the MCLayer. We need to add support for the save_lrpair unwind code.
1714 if (Reg2
== Reg1
+ 1)
1719 /// Returns true if Reg1 and Reg2 cannot be paired using a ldp/stp instruction.
1720 /// WindowsCFI requires that only consecutive registers can be paired.
1721 /// LR and FP need to be allocated together when the frame needs to save
1722 /// the frame-record. This means any other register pairing with LR is invalid.
1723 static bool invalidateRegisterPairing(unsigned Reg1
, unsigned Reg2
,
1724 bool NeedsWinCFI
, bool NeedsFrameRecord
) {
1726 return invalidateWindowsRegisterPairing(Reg1
, Reg2
, true);
1728 // If we need to store the frame record, don't pair any register
1729 // with LR other than FP.
1730 if (NeedsFrameRecord
)
1731 return Reg2
== AArch64::LR
;
1738 struct RegPairInfo
{
1739 unsigned Reg1
= AArch64::NoRegister
;
1740 unsigned Reg2
= AArch64::NoRegister
;
1743 enum RegType
{ GPR
, FPR64
, FPR128
} Type
;
1745 RegPairInfo() = default;
1747 bool isPaired() const { return Reg2
!= AArch64::NoRegister
; }
1750 } // end anonymous namespace
1752 static void computeCalleeSaveRegisterPairs(
1753 MachineFunction
&MF
, const std::vector
<CalleeSavedInfo
> &CSI
,
1754 const TargetRegisterInfo
*TRI
, SmallVectorImpl
<RegPairInfo
> &RegPairs
,
1755 bool &NeedShadowCallStackProlog
, bool NeedsFrameRecord
) {
1760 bool NeedsWinCFI
= needsWinCFI(MF
);
1761 AArch64FunctionInfo
*AFI
= MF
.getInfo
<AArch64FunctionInfo
>();
1762 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
1763 CallingConv::ID CC
= MF
.getFunction().getCallingConv();
1764 unsigned Count
= CSI
.size();
1766 // MachO's compact unwind format relies on all registers being stored in
1768 assert((!produceCompactUnwindFrame(MF
) ||
1769 CC
== CallingConv::PreserveMost
||
1770 (Count
& 1) == 0) &&
1771 "Odd number of callee-saved regs to spill!");
1772 int Offset
= AFI
->getCalleeSavedStackSize();
1773 // On Linux, we will have either one or zero non-paired register. On Windows
1774 // with CFI, we can have multiple unpaired registers in order to utilize the
1775 // available unwind codes. This flag assures that the alignment fixup is done
1776 // only once, as intened.
1777 bool FixupDone
= false;
1778 for (unsigned i
= 0; i
< Count
; ++i
) {
1780 RPI
.Reg1
= CSI
[i
].getReg();
1782 if (AArch64::GPR64RegClass
.contains(RPI
.Reg1
))
1783 RPI
.Type
= RegPairInfo::GPR
;
1784 else if (AArch64::FPR64RegClass
.contains(RPI
.Reg1
))
1785 RPI
.Type
= RegPairInfo::FPR64
;
1786 else if (AArch64::FPR128RegClass
.contains(RPI
.Reg1
))
1787 RPI
.Type
= RegPairInfo::FPR128
;
1789 llvm_unreachable("Unsupported register class.");
1791 // Add the next reg to the pair if it is in the same register class.
1792 if (i
+ 1 < Count
) {
1793 unsigned NextReg
= CSI
[i
+ 1].getReg();
1795 case RegPairInfo::GPR
:
1796 if (AArch64::GPR64RegClass
.contains(NextReg
) &&
1797 !invalidateRegisterPairing(RPI
.Reg1
, NextReg
, NeedsWinCFI
,
1801 case RegPairInfo::FPR64
:
1802 if (AArch64::FPR64RegClass
.contains(NextReg
) &&
1803 !invalidateWindowsRegisterPairing(RPI
.Reg1
, NextReg
, NeedsWinCFI
))
1806 case RegPairInfo::FPR128
:
1807 if (AArch64::FPR128RegClass
.contains(NextReg
))
1813 // If either of the registers to be saved is the lr register, it means that
1814 // we also need to save lr in the shadow call stack.
1815 if ((RPI
.Reg1
== AArch64::LR
|| RPI
.Reg2
== AArch64::LR
) &&
1816 MF
.getFunction().hasFnAttribute(Attribute::ShadowCallStack
)) {
1817 if (!MF
.getSubtarget
<AArch64Subtarget
>().isXRegisterReserved(18))
1818 report_fatal_error("Must reserve x18 to use shadow call stack");
1819 NeedShadowCallStackProlog
= true;
1822 // GPRs and FPRs are saved in pairs of 64-bit regs. We expect the CSI
1823 // list to come in sorted by frame index so that we can issue the store
1824 // pair instructions directly. Assert if we see anything otherwise.
1826 // The order of the registers in the list is controlled by
1827 // getCalleeSavedRegs(), so they will always be in-order, as well.
1828 assert((!RPI
.isPaired() ||
1829 (CSI
[i
].getFrameIdx() + 1 == CSI
[i
+ 1].getFrameIdx())) &&
1830 "Out of order callee saved regs!");
1832 assert((!RPI
.isPaired() || !NeedsFrameRecord
|| RPI
.Reg2
!= AArch64::FP
||
1833 RPI
.Reg1
== AArch64::LR
) &&
1834 "FrameRecord must be allocated together with LR");
1836 // MachO's compact unwind format relies on all registers being stored in
1837 // adjacent register pairs.
1838 assert((!produceCompactUnwindFrame(MF
) ||
1839 CC
== CallingConv::PreserveMost
||
1841 ((RPI
.Reg1
== AArch64::LR
&& RPI
.Reg2
== AArch64::FP
) ||
1842 RPI
.Reg1
+ 1 == RPI
.Reg2
))) &&
1843 "Callee-save registers not saved as adjacent register pair!");
1845 RPI
.FrameIdx
= CSI
[i
].getFrameIdx();
1847 int Scale
= RPI
.Type
== RegPairInfo::FPR128
? 16 : 8;
1848 Offset
-= RPI
.isPaired() ? 2 * Scale
: Scale
;
1850 // Round up size of non-pair to pair size if we need to pad the
1851 // callee-save area to ensure 16-byte alignment.
1852 if (AFI
->hasCalleeSaveStackFreeSpace() && !FixupDone
&&
1853 RPI
.Type
!= RegPairInfo::FPR128
&& !RPI
.isPaired()) {
1856 assert(Offset
% 16 == 0);
1857 assert(MFI
.getObjectAlignment(RPI
.FrameIdx
) <= 16);
1858 MFI
.setObjectAlignment(RPI
.FrameIdx
, 16);
1861 assert(Offset
% Scale
== 0);
1862 RPI
.Offset
= Offset
/ Scale
;
1863 assert((RPI
.Offset
>= -64 && RPI
.Offset
<= 63) &&
1864 "Offset out of bounds for LDP/STP immediate");
1866 RegPairs
.push_back(RPI
);
1872 bool AArch64FrameLowering::spillCalleeSavedRegisters(
1873 MachineBasicBlock
&MBB
, MachineBasicBlock::iterator MI
,
1874 const std::vector
<CalleeSavedInfo
> &CSI
,
1875 const TargetRegisterInfo
*TRI
) const {
1876 MachineFunction
&MF
= *MBB
.getParent();
1877 const TargetInstrInfo
&TII
= *MF
.getSubtarget().getInstrInfo();
1878 bool NeedsWinCFI
= needsWinCFI(MF
);
1880 SmallVector
<RegPairInfo
, 8> RegPairs
;
1882 bool NeedShadowCallStackProlog
= false;
1883 computeCalleeSaveRegisterPairs(MF
, CSI
, TRI
, RegPairs
,
1884 NeedShadowCallStackProlog
, hasFP(MF
));
1885 const MachineRegisterInfo
&MRI
= MF
.getRegInfo();
1887 if (NeedShadowCallStackProlog
) {
1888 // Shadow call stack prolog: str x30, [x18], #8
1889 BuildMI(MBB
, MI
, DL
, TII
.get(AArch64::STRXpost
))
1890 .addReg(AArch64::X18
, RegState::Define
)
1891 .addReg(AArch64::LR
)
1892 .addReg(AArch64::X18
)
1894 .setMIFlag(MachineInstr::FrameSetup
);
1897 BuildMI(MBB
, MI
, DL
, TII
.get(AArch64::SEH_Nop
))
1898 .setMIFlag(MachineInstr::FrameSetup
);
1900 if (!MF
.getFunction().hasFnAttribute(Attribute::NoUnwind
)) {
1901 // Emit a CFI instruction that causes 8 to be subtracted from the value of
1902 // x18 when unwinding past this frame.
1903 static const char CFIInst
[] = {
1904 dwarf::DW_CFA_val_expression
,
1907 static_cast<char>(unsigned(dwarf::DW_OP_breg18
)),
1908 static_cast<char>(-8) & 0x7f, // addend (sleb128)
1910 unsigned CFIIndex
= MF
.addFrameInst(MCCFIInstruction::createEscape(
1911 nullptr, StringRef(CFIInst
, sizeof(CFIInst
))));
1912 BuildMI(MBB
, MI
, DL
, TII
.get(AArch64::CFI_INSTRUCTION
))
1913 .addCFIIndex(CFIIndex
)
1914 .setMIFlag(MachineInstr::FrameSetup
);
1917 // This instruction also makes x18 live-in to the entry block.
1918 MBB
.addLiveIn(AArch64::X18
);
1921 for (auto RPII
= RegPairs
.rbegin(), RPIE
= RegPairs
.rend(); RPII
!= RPIE
;
1923 RegPairInfo RPI
= *RPII
;
1924 unsigned Reg1
= RPI
.Reg1
;
1925 unsigned Reg2
= RPI
.Reg2
;
1928 // Issue sequence of spills for cs regs. The first spill may be converted
1929 // to a pre-decrement store later by emitPrologue if the callee-save stack
1930 // area allocation can't be combined with the local stack area allocation.
1932 // stp x22, x21, [sp, #0] // addImm(+0)
1933 // stp x20, x19, [sp, #16] // addImm(+2)
1934 // stp fp, lr, [sp, #32] // addImm(+4)
1935 // Rationale: This sequence saves uop updates compared to a sequence of
1936 // pre-increment spills like stp xi,xj,[sp,#-16]!
1937 // Note: Similar rationale and sequence for restores in epilog.
1938 unsigned Size
, Align
;
1940 case RegPairInfo::GPR
:
1941 StrOpc
= RPI
.isPaired() ? AArch64::STPXi
: AArch64::STRXui
;
1945 case RegPairInfo::FPR64
:
1946 StrOpc
= RPI
.isPaired() ? AArch64::STPDi
: AArch64::STRDui
;
1950 case RegPairInfo::FPR128
:
1951 StrOpc
= RPI
.isPaired() ? AArch64::STPQi
: AArch64::STRQui
;
1956 LLVM_DEBUG(dbgs() << "CSR spill: (" << printReg(Reg1
, TRI
);
1957 if (RPI
.isPaired()) dbgs() << ", " << printReg(Reg2
, TRI
);
1958 dbgs() << ") -> fi#(" << RPI
.FrameIdx
;
1959 if (RPI
.isPaired()) dbgs() << ", " << RPI
.FrameIdx
+ 1;
1962 assert((!NeedsWinCFI
|| !(Reg1
== AArch64::LR
&& Reg2
== AArch64::FP
)) &&
1963 "Windows unwdinding requires a consecutive (FP,LR) pair");
1964 // Windows unwind codes require consecutive registers if registers are
1965 // paired. Make the switch here, so that the code below will save (x,x+1)
1967 unsigned FrameIdxReg1
= RPI
.FrameIdx
;
1968 unsigned FrameIdxReg2
= RPI
.FrameIdx
+ 1;
1969 if (NeedsWinCFI
&& RPI
.isPaired()) {
1970 std::swap(Reg1
, Reg2
);
1971 std::swap(FrameIdxReg1
, FrameIdxReg2
);
1973 MachineInstrBuilder MIB
= BuildMI(MBB
, MI
, DL
, TII
.get(StrOpc
));
1974 if (!MRI
.isReserved(Reg1
))
1975 MBB
.addLiveIn(Reg1
);
1976 if (RPI
.isPaired()) {
1977 if (!MRI
.isReserved(Reg2
))
1978 MBB
.addLiveIn(Reg2
);
1979 MIB
.addReg(Reg2
, getPrologueDeath(MF
, Reg2
));
1980 MIB
.addMemOperand(MF
.getMachineMemOperand(
1981 MachinePointerInfo::getFixedStack(MF
, FrameIdxReg2
),
1982 MachineMemOperand::MOStore
, Size
, Align
));
1984 MIB
.addReg(Reg1
, getPrologueDeath(MF
, Reg1
))
1985 .addReg(AArch64::SP
)
1986 .addImm(RPI
.Offset
) // [sp, #offset*scale],
1987 // where factor*scale is implicit
1988 .setMIFlag(MachineInstr::FrameSetup
);
1989 MIB
.addMemOperand(MF
.getMachineMemOperand(
1990 MachinePointerInfo::getFixedStack(MF
,FrameIdxReg1
),
1991 MachineMemOperand::MOStore
, Size
, Align
));
1993 InsertSEH(MIB
, TII
, MachineInstr::FrameSetup
);
1999 bool AArch64FrameLowering::restoreCalleeSavedRegisters(
2000 MachineBasicBlock
&MBB
, MachineBasicBlock::iterator MI
,
2001 std::vector
<CalleeSavedInfo
> &CSI
,
2002 const TargetRegisterInfo
*TRI
) const {
2003 MachineFunction
&MF
= *MBB
.getParent();
2004 const TargetInstrInfo
&TII
= *MF
.getSubtarget().getInstrInfo();
2006 SmallVector
<RegPairInfo
, 8> RegPairs
;
2007 bool NeedsWinCFI
= needsWinCFI(MF
);
2009 if (MI
!= MBB
.end())
2010 DL
= MI
->getDebugLoc();
2012 bool NeedShadowCallStackProlog
= false;
2013 computeCalleeSaveRegisterPairs(MF
, CSI
, TRI
, RegPairs
,
2014 NeedShadowCallStackProlog
, hasFP(MF
));
2016 auto EmitMI
= [&](const RegPairInfo
&RPI
) {
2017 unsigned Reg1
= RPI
.Reg1
;
2018 unsigned Reg2
= RPI
.Reg2
;
2020 // Issue sequence of restores for cs regs. The last restore may be converted
2021 // to a post-increment load later by emitEpilogue if the callee-save stack
2022 // area allocation can't be combined with the local stack area allocation.
2024 // ldp fp, lr, [sp, #32] // addImm(+4)
2025 // ldp x20, x19, [sp, #16] // addImm(+2)
2026 // ldp x22, x21, [sp, #0] // addImm(+0)
2027 // Note: see comment in spillCalleeSavedRegisters()
2029 unsigned Size
, Align
;
2031 case RegPairInfo::GPR
:
2032 LdrOpc
= RPI
.isPaired() ? AArch64::LDPXi
: AArch64::LDRXui
;
2036 case RegPairInfo::FPR64
:
2037 LdrOpc
= RPI
.isPaired() ? AArch64::LDPDi
: AArch64::LDRDui
;
2041 case RegPairInfo::FPR128
:
2042 LdrOpc
= RPI
.isPaired() ? AArch64::LDPQi
: AArch64::LDRQui
;
2047 LLVM_DEBUG(dbgs() << "CSR restore: (" << printReg(Reg1
, TRI
);
2048 if (RPI
.isPaired()) dbgs() << ", " << printReg(Reg2
, TRI
);
2049 dbgs() << ") -> fi#(" << RPI
.FrameIdx
;
2050 if (RPI
.isPaired()) dbgs() << ", " << RPI
.FrameIdx
+ 1;
2053 // Windows unwind codes require consecutive registers if registers are
2054 // paired. Make the switch here, so that the code below will save (x,x+1)
2056 unsigned FrameIdxReg1
= RPI
.FrameIdx
;
2057 unsigned FrameIdxReg2
= RPI
.FrameIdx
+ 1;
2058 if (NeedsWinCFI
&& RPI
.isPaired()) {
2059 std::swap(Reg1
, Reg2
);
2060 std::swap(FrameIdxReg1
, FrameIdxReg2
);
2062 MachineInstrBuilder MIB
= BuildMI(MBB
, MI
, DL
, TII
.get(LdrOpc
));
2063 if (RPI
.isPaired()) {
2064 MIB
.addReg(Reg2
, getDefRegState(true));
2065 MIB
.addMemOperand(MF
.getMachineMemOperand(
2066 MachinePointerInfo::getFixedStack(MF
, FrameIdxReg2
),
2067 MachineMemOperand::MOLoad
, Size
, Align
));
2069 MIB
.addReg(Reg1
, getDefRegState(true))
2070 .addReg(AArch64::SP
)
2071 .addImm(RPI
.Offset
) // [sp, #offset*scale]
2072 // where factor*scale is implicit
2073 .setMIFlag(MachineInstr::FrameDestroy
);
2074 MIB
.addMemOperand(MF
.getMachineMemOperand(
2075 MachinePointerInfo::getFixedStack(MF
, FrameIdxReg1
),
2076 MachineMemOperand::MOLoad
, Size
, Align
));
2078 InsertSEH(MIB
, TII
, MachineInstr::FrameDestroy
);
2080 if (ReverseCSRRestoreSeq
)
2081 for (const RegPairInfo
&RPI
: reverse(RegPairs
))
2084 for (const RegPairInfo
&RPI
: RegPairs
)
2087 if (NeedShadowCallStackProlog
) {
2088 // Shadow call stack epilog: ldr x30, [x18, #-8]!
2089 BuildMI(MBB
, MI
, DL
, TII
.get(AArch64::LDRXpre
))
2090 .addReg(AArch64::X18
, RegState::Define
)
2091 .addReg(AArch64::LR
, RegState::Define
)
2092 .addReg(AArch64::X18
)
2094 .setMIFlag(MachineInstr::FrameDestroy
);
2100 void AArch64FrameLowering::determineCalleeSaves(MachineFunction
&MF
,
2101 BitVector
&SavedRegs
,
2102 RegScavenger
*RS
) const {
2103 // All calls are tail calls in GHC calling conv, and functions have no
2104 // prologue/epilogue.
2105 if (MF
.getFunction().getCallingConv() == CallingConv::GHC
)
2108 TargetFrameLowering::determineCalleeSaves(MF
, SavedRegs
, RS
);
2109 const AArch64RegisterInfo
*RegInfo
= static_cast<const AArch64RegisterInfo
*>(
2110 MF
.getSubtarget().getRegisterInfo());
2111 AArch64FunctionInfo
*AFI
= MF
.getInfo
<AArch64FunctionInfo
>();
2112 unsigned UnspilledCSGPR
= AArch64::NoRegister
;
2113 unsigned UnspilledCSGPRPaired
= AArch64::NoRegister
;
2115 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
2116 const MCPhysReg
*CSRegs
= MF
.getRegInfo().getCalleeSavedRegs();
2118 unsigned BasePointerReg
= RegInfo
->hasBasePointer(MF
)
2119 ? RegInfo
->getBaseRegister()
2120 : (unsigned)AArch64::NoRegister
;
2122 unsigned ExtraCSSpill
= 0;
2123 // Figure out which callee-saved registers to save/restore.
2124 for (unsigned i
= 0; CSRegs
[i
]; ++i
) {
2125 const unsigned Reg
= CSRegs
[i
];
2127 // Add the base pointer register to SavedRegs if it is callee-save.
2128 if (Reg
== BasePointerReg
)
2131 bool RegUsed
= SavedRegs
.test(Reg
);
2132 unsigned PairedReg
= CSRegs
[i
^ 1];
2134 if (AArch64::GPR64RegClass
.contains(Reg
) &&
2135 !RegInfo
->isReservedReg(MF
, Reg
)) {
2136 UnspilledCSGPR
= Reg
;
2137 UnspilledCSGPRPaired
= PairedReg
;
2142 // MachO's compact unwind format relies on all registers being stored in
2144 // FIXME: the usual format is actually better if unwinding isn't needed.
2145 if (produceCompactUnwindFrame(MF
) && PairedReg
!= AArch64::NoRegister
&&
2146 !SavedRegs
.test(PairedReg
)) {
2147 SavedRegs
.set(PairedReg
);
2148 if (AArch64::GPR64RegClass
.contains(PairedReg
) &&
2149 !RegInfo
->isReservedReg(MF
, PairedReg
))
2150 ExtraCSSpill
= PairedReg
;
2154 // Calculates the callee saved stack size.
2155 unsigned CSStackSize
= 0;
2156 const TargetRegisterInfo
*TRI
= MF
.getSubtarget().getRegisterInfo();
2157 const MachineRegisterInfo
&MRI
= MF
.getRegInfo();
2158 for (unsigned Reg
: SavedRegs
.set_bits())
2159 CSStackSize
+= TRI
->getRegSizeInBits(Reg
, MRI
) / 8;
2161 // Save number of saved regs, so we can easily update CSStackSize later.
2162 unsigned NumSavedRegs
= SavedRegs
.count();
2164 // The frame record needs to be created by saving the appropriate registers
2165 unsigned EstimatedStackSize
= MFI
.estimateStackSize(MF
);
2167 windowsRequiresStackProbe(MF
, EstimatedStackSize
+ CSStackSize
+ 16)) {
2168 SavedRegs
.set(AArch64::FP
);
2169 SavedRegs
.set(AArch64::LR
);
2172 LLVM_DEBUG(dbgs() << "*** determineCalleeSaves\nSaved CSRs:";
2174 : SavedRegs
.set_bits()) dbgs()
2175 << ' ' << printReg(Reg
, RegInfo
);
2178 // If any callee-saved registers are used, the frame cannot be eliminated.
2179 bool CanEliminateFrame
= SavedRegs
.count() == 0;
2181 // The CSR spill slots have not been allocated yet, so estimateStackSize
2182 // won't include them.
2183 unsigned EstimatedStackSizeLimit
= estimateRSStackSizeLimit(MF
);
2184 bool BigStack
= (EstimatedStackSize
+ CSStackSize
) > EstimatedStackSizeLimit
;
2185 if (BigStack
|| !CanEliminateFrame
|| RegInfo
->cannotEliminateFrame(MF
))
2186 AFI
->setHasStackFrame(true);
2188 // Estimate if we might need to scavenge a register at some point in order
2189 // to materialize a stack offset. If so, either spill one additional
2190 // callee-saved register or reserve a special spill slot to facilitate
2191 // register scavenging. If we already spilled an extra callee-saved register
2192 // above to keep the number of spills even, we don't need to do anything else
2195 if (!ExtraCSSpill
&& UnspilledCSGPR
!= AArch64::NoRegister
) {
2196 LLVM_DEBUG(dbgs() << "Spilling " << printReg(UnspilledCSGPR
, RegInfo
)
2197 << " to get a scratch register.\n");
2198 SavedRegs
.set(UnspilledCSGPR
);
2199 // MachO's compact unwind format relies on all registers being stored in
2200 // pairs, so if we need to spill one extra for BigStack, then we need to
2202 if (produceCompactUnwindFrame(MF
))
2203 SavedRegs
.set(UnspilledCSGPRPaired
);
2204 ExtraCSSpill
= UnspilledCSGPR
;
2207 // If we didn't find an extra callee-saved register to spill, create
2208 // an emergency spill slot.
2209 if (!ExtraCSSpill
|| MF
.getRegInfo().isPhysRegUsed(ExtraCSSpill
)) {
2210 const TargetRegisterInfo
*TRI
= MF
.getSubtarget().getRegisterInfo();
2211 const TargetRegisterClass
&RC
= AArch64::GPR64RegClass
;
2212 unsigned Size
= TRI
->getSpillSize(RC
);
2213 unsigned Align
= TRI
->getSpillAlignment(RC
);
2214 int FI
= MFI
.CreateStackObject(Size
, Align
, false);
2215 RS
->addScavengingFrameIndex(FI
);
2216 LLVM_DEBUG(dbgs() << "No available CS registers, allocated fi#" << FI
2217 << " as the emergency spill slot.\n");
2221 // Adding the size of additional 64bit GPR saves.
2222 CSStackSize
+= 8 * (SavedRegs
.count() - NumSavedRegs
);
2223 unsigned AlignedCSStackSize
= alignTo(CSStackSize
, 16);
2224 LLVM_DEBUG(dbgs() << "Estimated stack frame size: "
2225 << EstimatedStackSize
+ AlignedCSStackSize
2228 // Round up to register pair alignment to avoid additional SP adjustment
2230 AFI
->setCalleeSavedStackSize(AlignedCSStackSize
);
2231 AFI
->setCalleeSaveStackHasFreeSpace(AlignedCSStackSize
!= CSStackSize
);
2234 bool AArch64FrameLowering::enableStackSlotScavenging(
2235 const MachineFunction
&MF
) const {
2236 const AArch64FunctionInfo
*AFI
= MF
.getInfo
<AArch64FunctionInfo
>();
2237 return AFI
->hasCalleeSaveStackFreeSpace();
2240 void AArch64FrameLowering::processFunctionBeforeFrameFinalized(
2241 MachineFunction
&MF
, RegScavenger
*RS
) const {
2242 // If this function isn't doing Win64-style C++ EH, we don't need to do
2244 if (!MF
.hasEHFunclets())
2246 const TargetInstrInfo
&TII
= *MF
.getSubtarget().getInstrInfo();
2247 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
2248 WinEHFuncInfo
&EHInfo
= *MF
.getWinEHFuncInfo();
2250 MachineBasicBlock
&MBB
= MF
.front();
2251 auto MBBI
= MBB
.begin();
2252 while (MBBI
!= MBB
.end() && MBBI
->getFlag(MachineInstr::FrameSetup
))
2255 // Create an UnwindHelp object.
2257 MFI
.CreateStackObject(/*size*/8, /*alignment*/16, false);
2258 EHInfo
.UnwindHelpFrameIdx
= UnwindHelpFI
;
2259 // We need to store -2 into the UnwindHelp object at the start of the
2262 RS
->enterBasicBlockEnd(MBB
);
2263 RS
->backward(std::prev(MBBI
));
2264 unsigned DstReg
= RS
->FindUnusedReg(&AArch64::GPR64commonRegClass
);
2265 assert(DstReg
&& "There must be a free register after frame setup");
2266 BuildMI(MBB
, MBBI
, DL
, TII
.get(AArch64::MOVi64imm
), DstReg
).addImm(-2);
2267 BuildMI(MBB
, MBBI
, DL
, TII
.get(AArch64::STURXi
))
2268 .addReg(DstReg
, getKillRegState(true))
2269 .addFrameIndex(UnwindHelpFI
)
2273 /// For Win64 AArch64 EH, the offset to the Unwind object is from the SP before
2274 /// the update. This is easily retrieved as it is exactly the offset that is set
2275 /// in processFunctionBeforeFrameFinalized.
2276 int AArch64FrameLowering::getFrameIndexReferencePreferSP(
2277 const MachineFunction
&MF
, int FI
, unsigned &FrameReg
,
2278 bool IgnoreSPUpdates
) const {
2279 const MachineFrameInfo
&MFI
= MF
.getFrameInfo();
2280 LLVM_DEBUG(dbgs() << "Offset from the SP for " << FI
<< " is "
2281 << MFI
.getObjectOffset(FI
) << "\n");
2282 FrameReg
= AArch64::SP
;
2283 return MFI
.getObjectOffset(FI
);
2286 /// The parent frame offset (aka dispFrame) is only used on X86_64 to retrieve
2287 /// the parent's frame pointer
2288 unsigned AArch64FrameLowering::getWinEHParentFrameOffset(
2289 const MachineFunction
&MF
) const {
2293 /// Funclets only need to account for space for the callee saved registers,
2294 /// as the locals are accounted for in the parent's stack frame.
2295 unsigned AArch64FrameLowering::getWinEHFuncletFrameSize(
2296 const MachineFunction
&MF
) const {
2297 // This is the size of the pushed CSRs.
2299 MF
.getInfo
<AArch64FunctionInfo
>()->getCalleeSavedStackSize();
2300 // This is the amount of stack a funclet needs to allocate.
2301 return alignTo(CSSize
+ MF
.getFrameInfo().getMaxCallFrameSize(),
2302 getStackAlignment());