Recommit r373598 "[yaml2obj/obj2yaml] - Add support for SHT_LLVM_ADDRSIG sections."
[llvm-complete.git] / lib / Target / AArch64 / AArch64FrameLowering.cpp
blobc42c16bc1aad43db20f4d62e54085c701c87c64b
1 //===- AArch64FrameLowering.cpp - AArch64 Frame Lowering -------*- C++ -*-====//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the AArch64 implementation of TargetFrameLowering class.
11 // On AArch64, stack frames are structured as follows:
13 // The stack grows downward.
15 // All of the individual frame areas on the frame below are optional, i.e. it's
16 // possible to create a function so that the particular area isn't present
17 // in the frame.
19 // At function entry, the "frame" looks as follows:
21 // | | Higher address
22 // |-----------------------------------|
23 // | |
24 // | arguments passed on the stack |
25 // | |
26 // |-----------------------------------| <- sp
27 // | | Lower address
30 // After the prologue has run, the frame has the following general structure.
31 // Note that this doesn't depict the case where a red-zone is used. Also,
32 // technically the last frame area (VLAs) doesn't get created until in the
33 // main function body, after the prologue is run. However, it's depicted here
34 // for completeness.
36 // | | Higher address
37 // |-----------------------------------|
38 // | |
39 // | arguments passed on the stack |
40 // | |
41 // |-----------------------------------|
42 // | |
43 // | (Win64 only) varargs from reg |
44 // | |
45 // |-----------------------------------|
46 // | |
47 // | callee-saved gpr registers | <--.
48 // | | | On Darwin platforms these
49 // |- - - - - - - - - - - - - - - - - -| | callee saves are swapped,
50 // | | | (frame record first)
51 // | prev_fp, prev_lr | <--'
52 // | (a.k.a. "frame record") |
53 // |-----------------------------------| <- fp(=x29)
54 // | |
55 // | callee-saved fp/simd/SVE regs |
56 // | |
57 // |-----------------------------------|
58 // | |
59 // | SVE stack objects |
60 // | |
61 // |-----------------------------------|
62 // |.empty.space.to.make.part.below....|
63 // |.aligned.in.case.it.needs.more.than| (size of this area is unknown at
64 // |.the.standard.16-byte.alignment....| compile time; if present)
65 // |-----------------------------------|
66 // | |
67 // | local variables of fixed size |
68 // | including spill slots |
69 // |-----------------------------------| <- bp(not defined by ABI,
70 // |.variable-sized.local.variables....| LLVM chooses X19)
71 // |.(VLAs)............................| (size of this area is unknown at
72 // |...................................| compile time)
73 // |-----------------------------------| <- sp
74 // | | Lower address
77 // To access the data in a frame, at-compile time, a constant offset must be
78 // computable from one of the pointers (fp, bp, sp) to access it. The size
79 // of the areas with a dotted background cannot be computed at compile-time
80 // if they are present, making it required to have all three of fp, bp and
81 // sp to be set up to be able to access all contents in the frame areas,
82 // assuming all of the frame areas are non-empty.
84 // For most functions, some of the frame areas are empty. For those functions,
85 // it may not be necessary to set up fp or bp:
86 // * A base pointer is definitely needed when there are both VLAs and local
87 // variables with more-than-default alignment requirements.
88 // * A frame pointer is definitely needed when there are local variables with
89 // more-than-default alignment requirements.
91 // For Darwin platforms the frame-record (fp, lr) is stored at the top of the
92 // callee-saved area, since the unwind encoding does not allow for encoding
93 // this dynamically and existing tools depend on this layout. For other
94 // platforms, the frame-record is stored at the bottom of the (gpr) callee-saved
95 // area to allow SVE stack objects (allocated directly below the callee-saves,
96 // if available) to be accessed directly from the framepointer.
97 // The SVE spill/fill instructions have VL-scaled addressing modes such
98 // as:
99 // ldr z8, [fp, #-7 mul vl]
100 // For SVE the size of the vector length (VL) is not known at compile-time, so
101 // '#-7 mul vl' is an offset that can only be evaluated at runtime. With this
102 // layout, we don't need to add an unscaled offset to the framepointer before
103 // accessing the SVE object in the frame.
105 // In some cases when a base pointer is not strictly needed, it is generated
106 // anyway when offsets from the frame pointer to access local variables become
107 // so large that the offset can't be encoded in the immediate fields of loads
108 // or stores.
110 // FIXME: also explain the redzone concept.
111 // FIXME: also explain the concept of reserved call frames.
113 //===----------------------------------------------------------------------===//
115 #include "AArch64FrameLowering.h"
116 #include "AArch64InstrInfo.h"
117 #include "AArch64MachineFunctionInfo.h"
118 #include "AArch64RegisterInfo.h"
119 #include "AArch64StackOffset.h"
120 #include "AArch64Subtarget.h"
121 #include "AArch64TargetMachine.h"
122 #include "MCTargetDesc/AArch64AddressingModes.h"
123 #include "llvm/ADT/ScopeExit.h"
124 #include "llvm/ADT/SmallVector.h"
125 #include "llvm/ADT/Statistic.h"
126 #include "llvm/CodeGen/LivePhysRegs.h"
127 #include "llvm/CodeGen/MachineBasicBlock.h"
128 #include "llvm/CodeGen/MachineFrameInfo.h"
129 #include "llvm/CodeGen/MachineFunction.h"
130 #include "llvm/CodeGen/MachineInstr.h"
131 #include "llvm/CodeGen/MachineInstrBuilder.h"
132 #include "llvm/CodeGen/MachineMemOperand.h"
133 #include "llvm/CodeGen/MachineModuleInfo.h"
134 #include "llvm/CodeGen/MachineOperand.h"
135 #include "llvm/CodeGen/MachineRegisterInfo.h"
136 #include "llvm/CodeGen/RegisterScavenging.h"
137 #include "llvm/CodeGen/TargetInstrInfo.h"
138 #include "llvm/CodeGen/TargetRegisterInfo.h"
139 #include "llvm/CodeGen/TargetSubtargetInfo.h"
140 #include "llvm/CodeGen/WinEHFuncInfo.h"
141 #include "llvm/IR/Attributes.h"
142 #include "llvm/IR/CallingConv.h"
143 #include "llvm/IR/DataLayout.h"
144 #include "llvm/IR/DebugLoc.h"
145 #include "llvm/IR/Function.h"
146 #include "llvm/MC/MCAsmInfo.h"
147 #include "llvm/MC/MCDwarf.h"
148 #include "llvm/Support/CommandLine.h"
149 #include "llvm/Support/Debug.h"
150 #include "llvm/Support/ErrorHandling.h"
151 #include "llvm/Support/MathExtras.h"
152 #include "llvm/Support/raw_ostream.h"
153 #include "llvm/Target/TargetMachine.h"
154 #include "llvm/Target/TargetOptions.h"
155 #include <cassert>
156 #include <cstdint>
157 #include <iterator>
158 #include <vector>
160 using namespace llvm;
162 #define DEBUG_TYPE "frame-info"
164 static cl::opt<bool> EnableRedZone("aarch64-redzone",
165 cl::desc("enable use of redzone on AArch64"),
166 cl::init(false), cl::Hidden);
168 static cl::opt<bool>
169 ReverseCSRRestoreSeq("reverse-csr-restore-seq",
170 cl::desc("reverse the CSR restore sequence"),
171 cl::init(false), cl::Hidden);
173 STATISTIC(NumRedZoneFunctions, "Number of functions using red zone");
175 /// This is the biggest offset to the stack pointer we can encode in aarch64
176 /// instructions (without using a separate calculation and a temp register).
177 /// Note that the exception here are vector stores/loads which cannot encode any
178 /// displacements (see estimateRSStackSizeLimit(), isAArch64FrameOffsetLegal()).
179 static const unsigned DefaultSafeSPDisplacement = 255;
181 /// Look at each instruction that references stack frames and return the stack
182 /// size limit beyond which some of these instructions will require a scratch
183 /// register during their expansion later.
184 static unsigned estimateRSStackSizeLimit(MachineFunction &MF) {
185 // FIXME: For now, just conservatively guestimate based on unscaled indexing
186 // range. We'll end up allocating an unnecessary spill slot a lot, but
187 // realistically that's not a big deal at this stage of the game.
188 for (MachineBasicBlock &MBB : MF) {
189 for (MachineInstr &MI : MBB) {
190 if (MI.isDebugInstr() || MI.isPseudo() ||
191 MI.getOpcode() == AArch64::ADDXri ||
192 MI.getOpcode() == AArch64::ADDSXri)
193 continue;
195 for (const MachineOperand &MO : MI.operands()) {
196 if (!MO.isFI())
197 continue;
199 StackOffset Offset;
200 if (isAArch64FrameOffsetLegal(MI, Offset, nullptr, nullptr, nullptr) ==
201 AArch64FrameOffsetCannotUpdate)
202 return 0;
206 return DefaultSafeSPDisplacement;
209 /// Returns the size of the entire SVE stackframe (calleesaves + spills).
210 static StackOffset getSVEStackSize(const MachineFunction &MF) {
211 const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
212 return {(int64_t)AFI->getStackSizeSVE(), MVT::nxv1i8};
215 bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const {
216 if (!EnableRedZone)
217 return false;
218 // Don't use the red zone if the function explicitly asks us not to.
219 // This is typically used for kernel code.
220 if (MF.getFunction().hasFnAttribute(Attribute::NoRedZone))
221 return false;
223 const MachineFrameInfo &MFI = MF.getFrameInfo();
224 const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
225 unsigned NumBytes = AFI->getLocalStackSize();
227 return !(MFI.hasCalls() || hasFP(MF) || NumBytes > 128 ||
228 getSVEStackSize(MF));
231 /// hasFP - Return true if the specified function should have a dedicated frame
232 /// pointer register.
233 bool AArch64FrameLowering::hasFP(const MachineFunction &MF) const {
234 const MachineFrameInfo &MFI = MF.getFrameInfo();
235 const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
236 // Win64 EH requires a frame pointer if funclets are present, as the locals
237 // are accessed off the frame pointer in both the parent function and the
238 // funclets.
239 if (MF.hasEHFunclets())
240 return true;
241 // Retain behavior of always omitting the FP for leaf functions when possible.
242 if (MFI.hasCalls() && MF.getTarget().Options.DisableFramePointerElim(MF))
243 return true;
244 if (MFI.hasVarSizedObjects() || MFI.isFrameAddressTaken() ||
245 MFI.hasStackMap() || MFI.hasPatchPoint() ||
246 RegInfo->needsStackRealignment(MF))
247 return true;
248 // With large callframes around we may need to use FP to access the scavenging
249 // emergency spillslot.
251 // Unfortunately some calls to hasFP() like machine verifier ->
252 // getReservedReg() -> hasFP in the middle of global isel are too early
253 // to know the max call frame size. Hopefully conservatively returning "true"
254 // in those cases is fine.
255 // DefaultSafeSPDisplacement is fine as we only emergency spill GP regs.
256 if (!MFI.isMaxCallFrameSizeComputed() ||
257 MFI.getMaxCallFrameSize() > DefaultSafeSPDisplacement)
258 return true;
260 return false;
263 /// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
264 /// not required, we reserve argument space for call sites in the function
265 /// immediately on entry to the current function. This eliminates the need for
266 /// add/sub sp brackets around call sites. Returns true if the call frame is
267 /// included as part of the stack frame.
268 bool
269 AArch64FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
270 return !MF.getFrameInfo().hasVarSizedObjects();
273 MachineBasicBlock::iterator AArch64FrameLowering::eliminateCallFramePseudoInstr(
274 MachineFunction &MF, MachineBasicBlock &MBB,
275 MachineBasicBlock::iterator I) const {
276 const AArch64InstrInfo *TII =
277 static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
278 DebugLoc DL = I->getDebugLoc();
279 unsigned Opc = I->getOpcode();
280 bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode();
281 uint64_t CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0;
283 if (!hasReservedCallFrame(MF)) {
284 unsigned Align = getStackAlignment();
286 int64_t Amount = I->getOperand(0).getImm();
287 Amount = alignTo(Amount, Align);
288 if (!IsDestroy)
289 Amount = -Amount;
291 // N.b. if CalleePopAmount is valid but zero (i.e. callee would pop, but it
292 // doesn't have to pop anything), then the first operand will be zero too so
293 // this adjustment is a no-op.
294 if (CalleePopAmount == 0) {
295 // FIXME: in-function stack adjustment for calls is limited to 24-bits
296 // because there's no guaranteed temporary register available.
298 // ADD/SUB (immediate) has only LSL #0 and LSL #12 available.
299 // 1) For offset <= 12-bit, we use LSL #0
300 // 2) For 12-bit <= offset <= 24-bit, we use two instructions. One uses
301 // LSL #0, and the other uses LSL #12.
303 // Most call frames will be allocated at the start of a function so
304 // this is OK, but it is a limitation that needs dealing with.
305 assert(Amount > -0xffffff && Amount < 0xffffff && "call frame too large");
306 emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP, {Amount, MVT::i8},
307 TII);
309 } else if (CalleePopAmount != 0) {
310 // If the calling convention demands that the callee pops arguments from the
311 // stack, we want to add it back if we have a reserved call frame.
312 assert(CalleePopAmount < 0xffffff && "call frame too large");
313 emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP,
314 {-(int64_t)CalleePopAmount, MVT::i8}, TII);
316 return MBB.erase(I);
319 static bool ShouldSignReturnAddress(MachineFunction &MF) {
320 // The function should be signed in the following situations:
321 // - sign-return-address=all
322 // - sign-return-address=non-leaf and the functions spills the LR
324 const Function &F = MF.getFunction();
325 if (!F.hasFnAttribute("sign-return-address"))
326 return false;
328 StringRef Scope = F.getFnAttribute("sign-return-address").getValueAsString();
329 if (Scope.equals("none"))
330 return false;
332 if (Scope.equals("all"))
333 return true;
335 assert(Scope.equals("non-leaf") && "Expected all, none or non-leaf");
337 for (const auto &Info : MF.getFrameInfo().getCalleeSavedInfo())
338 if (Info.getReg() == AArch64::LR)
339 return true;
341 return false;
344 void AArch64FrameLowering::emitCalleeSavedFrameMoves(
345 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const {
346 MachineFunction &MF = *MBB.getParent();
347 MachineFrameInfo &MFI = MF.getFrameInfo();
348 const TargetSubtargetInfo &STI = MF.getSubtarget();
349 const MCRegisterInfo *MRI = STI.getRegisterInfo();
350 const TargetInstrInfo *TII = STI.getInstrInfo();
351 DebugLoc DL = MBB.findDebugLoc(MBBI);
353 // Add callee saved registers to move list.
354 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
355 if (CSI.empty())
356 return;
358 for (const auto &Info : CSI) {
359 unsigned Reg = Info.getReg();
360 int64_t Offset =
361 MFI.getObjectOffset(Info.getFrameIdx()) - getOffsetOfLocalArea();
362 unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
363 unsigned CFIIndex = MF.addFrameInst(
364 MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
365 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
366 .addCFIIndex(CFIIndex)
367 .setMIFlags(MachineInstr::FrameSetup);
371 // Find a scratch register that we can use at the start of the prologue to
372 // re-align the stack pointer. We avoid using callee-save registers since they
373 // may appear to be free when this is called from canUseAsPrologue (during
374 // shrink wrapping), but then no longer be free when this is called from
375 // emitPrologue.
377 // FIXME: This is a bit conservative, since in the above case we could use one
378 // of the callee-save registers as a scratch temp to re-align the stack pointer,
379 // but we would then have to make sure that we were in fact saving at least one
380 // callee-save register in the prologue, which is additional complexity that
381 // doesn't seem worth the benefit.
382 static unsigned findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB) {
383 MachineFunction *MF = MBB->getParent();
385 // If MBB is an entry block, use X9 as the scratch register
386 if (&MF->front() == MBB)
387 return AArch64::X9;
389 const AArch64Subtarget &Subtarget = MF->getSubtarget<AArch64Subtarget>();
390 const AArch64RegisterInfo &TRI = *Subtarget.getRegisterInfo();
391 LivePhysRegs LiveRegs(TRI);
392 LiveRegs.addLiveIns(*MBB);
394 // Mark callee saved registers as used so we will not choose them.
395 const MCPhysReg *CSRegs = MF->getRegInfo().getCalleeSavedRegs();
396 for (unsigned i = 0; CSRegs[i]; ++i)
397 LiveRegs.addReg(CSRegs[i]);
399 // Prefer X9 since it was historically used for the prologue scratch reg.
400 const MachineRegisterInfo &MRI = MF->getRegInfo();
401 if (LiveRegs.available(MRI, AArch64::X9))
402 return AArch64::X9;
404 for (unsigned Reg : AArch64::GPR64RegClass) {
405 if (LiveRegs.available(MRI, Reg))
406 return Reg;
408 return AArch64::NoRegister;
411 bool AArch64FrameLowering::canUseAsPrologue(
412 const MachineBasicBlock &MBB) const {
413 const MachineFunction *MF = MBB.getParent();
414 MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
415 const AArch64Subtarget &Subtarget = MF->getSubtarget<AArch64Subtarget>();
416 const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
418 // Don't need a scratch register if we're not going to re-align the stack.
419 if (!RegInfo->needsStackRealignment(*MF))
420 return true;
421 // Otherwise, we can use any block as long as it has a scratch register
422 // available.
423 return findScratchNonCalleeSaveRegister(TmpMBB) != AArch64::NoRegister;
426 static bool windowsRequiresStackProbe(MachineFunction &MF,
427 unsigned StackSizeInBytes) {
428 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
429 if (!Subtarget.isTargetWindows())
430 return false;
431 const Function &F = MF.getFunction();
432 // TODO: When implementing stack protectors, take that into account
433 // for the probe threshold.
434 unsigned StackProbeSize = 4096;
435 if (F.hasFnAttribute("stack-probe-size"))
436 F.getFnAttribute("stack-probe-size")
437 .getValueAsString()
438 .getAsInteger(0, StackProbeSize);
439 return (StackSizeInBytes >= StackProbeSize) &&
440 !F.hasFnAttribute("no-stack-arg-probe");
443 bool AArch64FrameLowering::shouldCombineCSRLocalStackBump(
444 MachineFunction &MF, unsigned StackBumpBytes) const {
445 AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
446 const MachineFrameInfo &MFI = MF.getFrameInfo();
447 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
448 const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
450 if (AFI->getLocalStackSize() == 0)
451 return false;
453 // 512 is the maximum immediate for stp/ldp that will be used for
454 // callee-save save/restores
455 if (StackBumpBytes >= 512 || windowsRequiresStackProbe(MF, StackBumpBytes))
456 return false;
458 if (MFI.hasVarSizedObjects())
459 return false;
461 if (RegInfo->needsStackRealignment(MF))
462 return false;
464 // This isn't strictly necessary, but it simplifies things a bit since the
465 // current RedZone handling code assumes the SP is adjusted by the
466 // callee-save save/restore code.
467 if (canUseRedZone(MF))
468 return false;
470 // When there is an SVE area on the stack, always allocate the
471 // callee-saves and spills/locals separately.
472 if (getSVEStackSize(MF))
473 return false;
475 return true;
478 // Given a load or a store instruction, generate an appropriate unwinding SEH
479 // code on Windows.
480 static MachineBasicBlock::iterator InsertSEH(MachineBasicBlock::iterator MBBI,
481 const TargetInstrInfo &TII,
482 MachineInstr::MIFlag Flag) {
483 unsigned Opc = MBBI->getOpcode();
484 MachineBasicBlock *MBB = MBBI->getParent();
485 MachineFunction &MF = *MBB->getParent();
486 DebugLoc DL = MBBI->getDebugLoc();
487 unsigned ImmIdx = MBBI->getNumOperands() - 1;
488 int Imm = MBBI->getOperand(ImmIdx).getImm();
489 MachineInstrBuilder MIB;
490 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
491 const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
493 switch (Opc) {
494 default:
495 llvm_unreachable("No SEH Opcode for this instruction");
496 case AArch64::LDPDpost:
497 Imm = -Imm;
498 LLVM_FALLTHROUGH;
499 case AArch64::STPDpre: {
500 unsigned Reg0 = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());
501 unsigned Reg1 = RegInfo->getSEHRegNum(MBBI->getOperand(2).getReg());
502 MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFRegP_X))
503 .addImm(Reg0)
504 .addImm(Reg1)
505 .addImm(Imm * 8)
506 .setMIFlag(Flag);
507 break;
509 case AArch64::LDPXpost:
510 Imm = -Imm;
511 LLVM_FALLTHROUGH;
512 case AArch64::STPXpre: {
513 Register Reg0 = MBBI->getOperand(1).getReg();
514 Register Reg1 = MBBI->getOperand(2).getReg();
515 if (Reg0 == AArch64::FP && Reg1 == AArch64::LR)
516 MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFPLR_X))
517 .addImm(Imm * 8)
518 .setMIFlag(Flag);
519 else
520 MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveRegP_X))
521 .addImm(RegInfo->getSEHRegNum(Reg0))
522 .addImm(RegInfo->getSEHRegNum(Reg1))
523 .addImm(Imm * 8)
524 .setMIFlag(Flag);
525 break;
527 case AArch64::LDRDpost:
528 Imm = -Imm;
529 LLVM_FALLTHROUGH;
530 case AArch64::STRDpre: {
531 unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());
532 MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFReg_X))
533 .addImm(Reg)
534 .addImm(Imm)
535 .setMIFlag(Flag);
536 break;
538 case AArch64::LDRXpost:
539 Imm = -Imm;
540 LLVM_FALLTHROUGH;
541 case AArch64::STRXpre: {
542 unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());
543 MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveReg_X))
544 .addImm(Reg)
545 .addImm(Imm)
546 .setMIFlag(Flag);
547 break;
549 case AArch64::STPDi:
550 case AArch64::LDPDi: {
551 unsigned Reg0 = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg());
552 unsigned Reg1 = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());
553 MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFRegP))
554 .addImm(Reg0)
555 .addImm(Reg1)
556 .addImm(Imm * 8)
557 .setMIFlag(Flag);
558 break;
560 case AArch64::STPXi:
561 case AArch64::LDPXi: {
562 Register Reg0 = MBBI->getOperand(0).getReg();
563 Register Reg1 = MBBI->getOperand(1).getReg();
564 if (Reg0 == AArch64::FP && Reg1 == AArch64::LR)
565 MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFPLR))
566 .addImm(Imm * 8)
567 .setMIFlag(Flag);
568 else
569 MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveRegP))
570 .addImm(RegInfo->getSEHRegNum(Reg0))
571 .addImm(RegInfo->getSEHRegNum(Reg1))
572 .addImm(Imm * 8)
573 .setMIFlag(Flag);
574 break;
576 case AArch64::STRXui:
577 case AArch64::LDRXui: {
578 int Reg = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg());
579 MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveReg))
580 .addImm(Reg)
581 .addImm(Imm * 8)
582 .setMIFlag(Flag);
583 break;
585 case AArch64::STRDui:
586 case AArch64::LDRDui: {
587 unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg());
588 MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFReg))
589 .addImm(Reg)
590 .addImm(Imm * 8)
591 .setMIFlag(Flag);
592 break;
595 auto I = MBB->insertAfter(MBBI, MIB);
596 return I;
599 // Fix up the SEH opcode associated with the save/restore instruction.
600 static void fixupSEHOpcode(MachineBasicBlock::iterator MBBI,
601 unsigned LocalStackSize) {
602 MachineOperand *ImmOpnd = nullptr;
603 unsigned ImmIdx = MBBI->getNumOperands() - 1;
604 switch (MBBI->getOpcode()) {
605 default:
606 llvm_unreachable("Fix the offset in the SEH instruction");
607 case AArch64::SEH_SaveFPLR:
608 case AArch64::SEH_SaveRegP:
609 case AArch64::SEH_SaveReg:
610 case AArch64::SEH_SaveFRegP:
611 case AArch64::SEH_SaveFReg:
612 ImmOpnd = &MBBI->getOperand(ImmIdx);
613 break;
615 if (ImmOpnd)
616 ImmOpnd->setImm(ImmOpnd->getImm() + LocalStackSize);
619 // Convert callee-save register save/restore instruction to do stack pointer
620 // decrement/increment to allocate/deallocate the callee-save stack area by
621 // converting store/load to use pre/post increment version.
622 static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(
623 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
624 const DebugLoc &DL, const TargetInstrInfo *TII, int CSStackSizeInc,
625 bool NeedsWinCFI, bool *HasWinCFI, bool InProlog = true) {
626 // Ignore instructions that do not operate on SP, i.e. shadow call stack
627 // instructions and associated CFI instruction.
628 while (MBBI->getOpcode() == AArch64::STRXpost ||
629 MBBI->getOpcode() == AArch64::LDRXpre ||
630 MBBI->getOpcode() == AArch64::CFI_INSTRUCTION) {
631 if (MBBI->getOpcode() != AArch64::CFI_INSTRUCTION)
632 assert(MBBI->getOperand(0).getReg() != AArch64::SP);
633 ++MBBI;
635 unsigned NewOpc;
636 int Scale = 1;
637 switch (MBBI->getOpcode()) {
638 default:
639 llvm_unreachable("Unexpected callee-save save/restore opcode!");
640 case AArch64::STPXi:
641 NewOpc = AArch64::STPXpre;
642 Scale = 8;
643 break;
644 case AArch64::STPDi:
645 NewOpc = AArch64::STPDpre;
646 Scale = 8;
647 break;
648 case AArch64::STPQi:
649 NewOpc = AArch64::STPQpre;
650 Scale = 16;
651 break;
652 case AArch64::STRXui:
653 NewOpc = AArch64::STRXpre;
654 break;
655 case AArch64::STRDui:
656 NewOpc = AArch64::STRDpre;
657 break;
658 case AArch64::STRQui:
659 NewOpc = AArch64::STRQpre;
660 break;
661 case AArch64::LDPXi:
662 NewOpc = AArch64::LDPXpost;
663 Scale = 8;
664 break;
665 case AArch64::LDPDi:
666 NewOpc = AArch64::LDPDpost;
667 Scale = 8;
668 break;
669 case AArch64::LDPQi:
670 NewOpc = AArch64::LDPQpost;
671 Scale = 16;
672 break;
673 case AArch64::LDRXui:
674 NewOpc = AArch64::LDRXpost;
675 break;
676 case AArch64::LDRDui:
677 NewOpc = AArch64::LDRDpost;
678 break;
679 case AArch64::LDRQui:
680 NewOpc = AArch64::LDRQpost;
681 break;
683 // Get rid of the SEH code associated with the old instruction.
684 if (NeedsWinCFI) {
685 auto SEH = std::next(MBBI);
686 if (AArch64InstrInfo::isSEHInstruction(*SEH))
687 SEH->eraseFromParent();
690 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc));
691 MIB.addReg(AArch64::SP, RegState::Define);
693 // Copy all operands other than the immediate offset.
694 unsigned OpndIdx = 0;
695 for (unsigned OpndEnd = MBBI->getNumOperands() - 1; OpndIdx < OpndEnd;
696 ++OpndIdx)
697 MIB.add(MBBI->getOperand(OpndIdx));
699 assert(MBBI->getOperand(OpndIdx).getImm() == 0 &&
700 "Unexpected immediate offset in first/last callee-save save/restore "
701 "instruction!");
702 assert(MBBI->getOperand(OpndIdx - 1).getReg() == AArch64::SP &&
703 "Unexpected base register in callee-save save/restore instruction!");
704 assert(CSStackSizeInc % Scale == 0);
705 MIB.addImm(CSStackSizeInc / Scale);
707 MIB.setMIFlags(MBBI->getFlags());
708 MIB.setMemRefs(MBBI->memoperands());
710 // Generate a new SEH code that corresponds to the new instruction.
711 if (NeedsWinCFI) {
712 *HasWinCFI = true;
713 InsertSEH(*MIB, *TII,
714 InProlog ? MachineInstr::FrameSetup : MachineInstr::FrameDestroy);
717 return std::prev(MBB.erase(MBBI));
720 // Fixup callee-save register save/restore instructions to take into account
721 // combined SP bump by adding the local stack size to the stack offsets.
722 static void fixupCalleeSaveRestoreStackOffset(MachineInstr &MI,
723 unsigned LocalStackSize,
724 bool NeedsWinCFI,
725 bool *HasWinCFI) {
726 if (AArch64InstrInfo::isSEHInstruction(MI))
727 return;
729 unsigned Opc = MI.getOpcode();
731 // Ignore instructions that do not operate on SP, i.e. shadow call stack
732 // instructions and associated CFI instruction.
733 if (Opc == AArch64::STRXpost || Opc == AArch64::LDRXpre ||
734 Opc == AArch64::CFI_INSTRUCTION) {
735 if (Opc != AArch64::CFI_INSTRUCTION)
736 assert(MI.getOperand(0).getReg() != AArch64::SP);
737 return;
740 unsigned Scale;
741 switch (Opc) {
742 case AArch64::STPXi:
743 case AArch64::STRXui:
744 case AArch64::STPDi:
745 case AArch64::STRDui:
746 case AArch64::LDPXi:
747 case AArch64::LDRXui:
748 case AArch64::LDPDi:
749 case AArch64::LDRDui:
750 Scale = 8;
751 break;
752 case AArch64::STPQi:
753 case AArch64::STRQui:
754 case AArch64::LDPQi:
755 case AArch64::LDRQui:
756 Scale = 16;
757 break;
758 default:
759 llvm_unreachable("Unexpected callee-save save/restore opcode!");
762 unsigned OffsetIdx = MI.getNumExplicitOperands() - 1;
763 assert(MI.getOperand(OffsetIdx - 1).getReg() == AArch64::SP &&
764 "Unexpected base register in callee-save save/restore instruction!");
765 // Last operand is immediate offset that needs fixing.
766 MachineOperand &OffsetOpnd = MI.getOperand(OffsetIdx);
767 // All generated opcodes have scaled offsets.
768 assert(LocalStackSize % Scale == 0);
769 OffsetOpnd.setImm(OffsetOpnd.getImm() + LocalStackSize / Scale);
771 if (NeedsWinCFI) {
772 *HasWinCFI = true;
773 auto MBBI = std::next(MachineBasicBlock::iterator(MI));
774 assert(MBBI != MI.getParent()->end() && "Expecting a valid instruction");
775 assert(AArch64InstrInfo::isSEHInstruction(*MBBI) &&
776 "Expecting a SEH instruction");
777 fixupSEHOpcode(MBBI, LocalStackSize);
781 static void adaptForLdStOpt(MachineBasicBlock &MBB,
782 MachineBasicBlock::iterator FirstSPPopI,
783 MachineBasicBlock::iterator LastPopI) {
784 // Sometimes (when we restore in the same order as we save), we can end up
785 // with code like this:
787 // ldp x26, x25, [sp]
788 // ldp x24, x23, [sp, #16]
789 // ldp x22, x21, [sp, #32]
790 // ldp x20, x19, [sp, #48]
791 // add sp, sp, #64
793 // In this case, it is always better to put the first ldp at the end, so
794 // that the load-store optimizer can run and merge the ldp and the add into
795 // a post-index ldp.
796 // If we managed to grab the first pop instruction, move it to the end.
797 if (ReverseCSRRestoreSeq)
798 MBB.splice(FirstSPPopI, &MBB, LastPopI);
799 // We should end up with something like this now:
801 // ldp x24, x23, [sp, #16]
802 // ldp x22, x21, [sp, #32]
803 // ldp x20, x19, [sp, #48]
804 // ldp x26, x25, [sp]
805 // add sp, sp, #64
807 // and the load-store optimizer can merge the last two instructions into:
809 // ldp x26, x25, [sp], #64
813 static bool ShouldSignWithAKey(MachineFunction &MF) {
814 const Function &F = MF.getFunction();
815 if (!F.hasFnAttribute("sign-return-address-key"))
816 return true;
818 const StringRef Key =
819 F.getFnAttribute("sign-return-address-key").getValueAsString();
820 assert(Key.equals_lower("a_key") || Key.equals_lower("b_key"));
821 return Key.equals_lower("a_key");
824 static bool needsWinCFI(const MachineFunction &MF) {
825 const Function &F = MF.getFunction();
826 return MF.getTarget().getMCAsmInfo()->usesWindowsCFI() &&
827 F.needsUnwindTableEntry();
830 static bool isTargetDarwin(const MachineFunction &MF) {
831 return MF.getSubtarget<AArch64Subtarget>().isTargetDarwin();
834 void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
835 MachineBasicBlock &MBB) const {
836 MachineBasicBlock::iterator MBBI = MBB.begin();
837 const MachineFrameInfo &MFI = MF.getFrameInfo();
838 const Function &F = MF.getFunction();
839 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
840 const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
841 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
842 MachineModuleInfo &MMI = MF.getMMI();
843 AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
844 bool needsFrameMoves = (MMI.hasDebugInfo() || F.needsUnwindTableEntry()) &&
845 !MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
846 bool HasFP = hasFP(MF);
847 bool NeedsWinCFI = needsWinCFI(MF);
848 bool HasWinCFI = false;
849 auto Cleanup = make_scope_exit([&]() { MF.setHasWinCFI(HasWinCFI); });
851 bool IsFunclet = MBB.isEHFuncletEntry();
853 // At this point, we're going to decide whether or not the function uses a
854 // redzone. In most cases, the function doesn't have a redzone so let's
855 // assume that's false and set it to true in the case that there's a redzone.
856 AFI->setHasRedZone(false);
858 // Debug location must be unknown since the first debug location is used
859 // to determine the end of the prologue.
860 DebugLoc DL;
862 if (ShouldSignReturnAddress(MF)) {
863 if (ShouldSignWithAKey(MF))
864 BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACIASP))
865 .setMIFlag(MachineInstr::FrameSetup);
866 else {
867 BuildMI(MBB, MBBI, DL, TII->get(AArch64::EMITBKEY))
868 .setMIFlag(MachineInstr::FrameSetup);
869 BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACIBSP))
870 .setMIFlag(MachineInstr::FrameSetup);
873 unsigned CFIIndex =
874 MF.addFrameInst(MCCFIInstruction::createNegateRAState(nullptr));
875 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
876 .addCFIIndex(CFIIndex)
877 .setMIFlags(MachineInstr::FrameSetup);
880 // All calls are tail calls in GHC calling conv, and functions have no
881 // prologue/epilogue.
882 if (MF.getFunction().getCallingConv() == CallingConv::GHC)
883 return;
885 // Set tagged base pointer to the bottom of the stack frame.
886 // Ideally it should match SP value after prologue.
887 AFI->setTaggedBasePointerOffset(MFI.getStackSize());
889 const StackOffset &SVEStackSize = getSVEStackSize(MF);
891 // getStackSize() includes all the locals in its size calculation. We don't
892 // include these locals when computing the stack size of a funclet, as they
893 // are allocated in the parent's stack frame and accessed via the frame
894 // pointer from the funclet. We only save the callee saved registers in the
895 // funclet, which are really the callee saved registers of the parent
896 // function, including the funclet.
897 int NumBytes = IsFunclet ? (int)getWinEHFuncletFrameSize(MF)
898 : (int)MFI.getStackSize();
899 if (!AFI->hasStackFrame() && !windowsRequiresStackProbe(MF, NumBytes)) {
900 assert(!HasFP && "unexpected function without stack frame but with FP");
901 assert(!SVEStackSize &&
902 "unexpected function without stack frame but with SVE objects");
903 // All of the stack allocation is for locals.
904 AFI->setLocalStackSize(NumBytes);
905 if (!NumBytes)
906 return;
907 // REDZONE: If the stack size is less than 128 bytes, we don't need
908 // to actually allocate.
909 if (canUseRedZone(MF)) {
910 AFI->setHasRedZone(true);
911 ++NumRedZoneFunctions;
912 } else {
913 emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP,
914 {-NumBytes, MVT::i8}, TII, MachineInstr::FrameSetup,
915 false, NeedsWinCFI, &HasWinCFI);
916 if (!NeedsWinCFI) {
917 // Label used to tie together the PROLOG_LABEL and the MachineMoves.
918 MCSymbol *FrameLabel = MMI.getContext().createTempSymbol();
919 // Encode the stack size of the leaf function.
920 unsigned CFIIndex = MF.addFrameInst(
921 MCCFIInstruction::createDefCfaOffset(FrameLabel, -NumBytes));
922 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
923 .addCFIIndex(CFIIndex)
924 .setMIFlags(MachineInstr::FrameSetup);
928 if (NeedsWinCFI) {
929 HasWinCFI = true;
930 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd))
931 .setMIFlag(MachineInstr::FrameSetup);
934 return;
937 bool IsWin64 =
938 Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv());
939 // Var args are accounted for in the containing function, so don't
940 // include them for funclets.
941 unsigned FixedObject = (IsWin64 && !IsFunclet) ?
942 alignTo(AFI->getVarArgsGPRSize(), 16) : 0;
944 auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject;
945 // All of the remaining stack allocations are for locals.
946 AFI->setLocalStackSize(NumBytes - PrologueSaveSize);
947 bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes);
948 if (CombineSPBump) {
949 assert(!SVEStackSize && "Cannot combine SP bump with SVE");
950 emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP,
951 {-NumBytes, MVT::i8}, TII, MachineInstr::FrameSetup, false,
952 NeedsWinCFI, &HasWinCFI);
953 NumBytes = 0;
954 } else if (PrologueSaveSize != 0) {
955 MBBI = convertCalleeSaveRestoreToSPPrePostIncDec(
956 MBB, MBBI, DL, TII, -PrologueSaveSize, NeedsWinCFI, &HasWinCFI);
957 NumBytes -= PrologueSaveSize;
959 assert(NumBytes >= 0 && "Negative stack allocation size!?");
961 // Move past the saves of the callee-saved registers, fixing up the offsets
962 // and pre-inc if we decided to combine the callee-save and local stack
963 // pointer bump above.
964 MachineBasicBlock::iterator End = MBB.end();
965 while (MBBI != End && MBBI->getFlag(MachineInstr::FrameSetup)) {
966 if (CombineSPBump)
967 fixupCalleeSaveRestoreStackOffset(*MBBI, AFI->getLocalStackSize(),
968 NeedsWinCFI, &HasWinCFI);
969 ++MBBI;
972 // The code below is not applicable to funclets. We have emitted all the SEH
973 // opcodes that we needed to emit. The FP and BP belong to the containing
974 // function.
975 if (IsFunclet) {
976 if (NeedsWinCFI) {
977 HasWinCFI = true;
978 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd))
979 .setMIFlag(MachineInstr::FrameSetup);
982 // SEH funclets are passed the frame pointer in X1. If the parent
983 // function uses the base register, then the base register is used
984 // directly, and is not retrieved from X1.
985 if (F.hasPersonalityFn()) {
986 EHPersonality Per = classifyEHPersonality(F.getPersonalityFn());
987 if (isAsynchronousEHPersonality(Per)) {
988 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::COPY), AArch64::FP)
989 .addReg(AArch64::X1).setMIFlag(MachineInstr::FrameSetup);
990 MBB.addLiveIn(AArch64::X1);
994 return;
997 if (HasFP) {
998 // Only set up FP if we actually need to.
999 int FPOffset = isTargetDarwin(MF) ? (AFI->getCalleeSavedStackSize() - 16) : 0;
1001 if (CombineSPBump)
1002 FPOffset += AFI->getLocalStackSize();
1004 // Issue sub fp, sp, FPOffset or
1005 // mov fp,sp when FPOffset is zero.
1006 // Note: All stores of callee-saved registers are marked as "FrameSetup".
1007 // This code marks the instruction(s) that set the FP also.
1008 emitFrameOffset(MBB, MBBI, DL, AArch64::FP, AArch64::SP,
1009 {FPOffset, MVT::i8}, TII, MachineInstr::FrameSetup, false,
1010 NeedsWinCFI, &HasWinCFI);
1013 if (windowsRequiresStackProbe(MF, NumBytes)) {
1014 uint32_t NumWords = NumBytes >> 4;
1015 if (NeedsWinCFI) {
1016 HasWinCFI = true;
1017 // alloc_l can hold at most 256MB, so assume that NumBytes doesn't
1018 // exceed this amount. We need to move at most 2^24 - 1 into x15.
1019 // This is at most two instructions, MOVZ follwed by MOVK.
1020 // TODO: Fix to use multiple stack alloc unwind codes for stacks
1021 // exceeding 256MB in size.
1022 if (NumBytes >= (1 << 28))
1023 report_fatal_error("Stack size cannot exceed 256MB for stack "
1024 "unwinding purposes");
1026 uint32_t LowNumWords = NumWords & 0xFFFF;
1027 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVZXi), AArch64::X15)
1028 .addImm(LowNumWords)
1029 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0))
1030 .setMIFlag(MachineInstr::FrameSetup);
1031 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
1032 .setMIFlag(MachineInstr::FrameSetup);
1033 if ((NumWords & 0xFFFF0000) != 0) {
1034 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVKXi), AArch64::X15)
1035 .addReg(AArch64::X15)
1036 .addImm((NumWords & 0xFFFF0000) >> 16) // High half
1037 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 16))
1038 .setMIFlag(MachineInstr::FrameSetup);
1039 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
1040 .setMIFlag(MachineInstr::FrameSetup);
1042 } else {
1043 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm), AArch64::X15)
1044 .addImm(NumWords)
1045 .setMIFlags(MachineInstr::FrameSetup);
1048 switch (MF.getTarget().getCodeModel()) {
1049 case CodeModel::Tiny:
1050 case CodeModel::Small:
1051 case CodeModel::Medium:
1052 case CodeModel::Kernel:
1053 BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL))
1054 .addExternalSymbol("__chkstk")
1055 .addReg(AArch64::X15, RegState::Implicit)
1056 .addReg(AArch64::X16, RegState::Implicit | RegState::Define | RegState::Dead)
1057 .addReg(AArch64::X17, RegState::Implicit | RegState::Define | RegState::Dead)
1058 .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define | RegState::Dead)
1059 .setMIFlags(MachineInstr::FrameSetup);
1060 if (NeedsWinCFI) {
1061 HasWinCFI = true;
1062 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
1063 .setMIFlag(MachineInstr::FrameSetup);
1065 break;
1066 case CodeModel::Large:
1067 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVaddrEXT))
1068 .addReg(AArch64::X16, RegState::Define)
1069 .addExternalSymbol("__chkstk")
1070 .addExternalSymbol("__chkstk")
1071 .setMIFlags(MachineInstr::FrameSetup);
1072 if (NeedsWinCFI) {
1073 HasWinCFI = true;
1074 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
1075 .setMIFlag(MachineInstr::FrameSetup);
1078 BuildMI(MBB, MBBI, DL, TII->get(AArch64::BLR))
1079 .addReg(AArch64::X16, RegState::Kill)
1080 .addReg(AArch64::X15, RegState::Implicit | RegState::Define)
1081 .addReg(AArch64::X16, RegState::Implicit | RegState::Define | RegState::Dead)
1082 .addReg(AArch64::X17, RegState::Implicit | RegState::Define | RegState::Dead)
1083 .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define | RegState::Dead)
1084 .setMIFlags(MachineInstr::FrameSetup);
1085 if (NeedsWinCFI) {
1086 HasWinCFI = true;
1087 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
1088 .setMIFlag(MachineInstr::FrameSetup);
1090 break;
1093 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SUBXrx64), AArch64::SP)
1094 .addReg(AArch64::SP, RegState::Kill)
1095 .addReg(AArch64::X15, RegState::Kill)
1096 .addImm(AArch64_AM::getArithExtendImm(AArch64_AM::UXTX, 4))
1097 .setMIFlags(MachineInstr::FrameSetup);
1098 if (NeedsWinCFI) {
1099 HasWinCFI = true;
1100 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc))
1101 .addImm(NumBytes)
1102 .setMIFlag(MachineInstr::FrameSetup);
1104 NumBytes = 0;
1107 emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -SVEStackSize, TII,
1108 MachineInstr::FrameSetup);
1110 // Allocate space for the rest of the frame.
1111 if (NumBytes) {
1112 const bool NeedsRealignment = RegInfo->needsStackRealignment(MF);
1113 unsigned scratchSPReg = AArch64::SP;
1115 if (NeedsRealignment) {
1116 scratchSPReg = findScratchNonCalleeSaveRegister(&MBB);
1117 assert(scratchSPReg != AArch64::NoRegister);
1120 // If we're a leaf function, try using the red zone.
1121 if (!canUseRedZone(MF))
1122 // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have
1123 // the correct value here, as NumBytes also includes padding bytes,
1124 // which shouldn't be counted here.
1125 emitFrameOffset(MBB, MBBI, DL, scratchSPReg, AArch64::SP,
1126 {-NumBytes, MVT::i8}, TII, MachineInstr::FrameSetup,
1127 false, NeedsWinCFI, &HasWinCFI);
1129 if (NeedsRealignment) {
1130 const unsigned Alignment = MFI.getMaxAlignment();
1131 const unsigned NrBitsToZero = countTrailingZeros(Alignment);
1132 assert(NrBitsToZero > 1);
1133 assert(scratchSPReg != AArch64::SP);
1135 // SUB X9, SP, NumBytes
1136 // -- X9 is temporary register, so shouldn't contain any live data here,
1137 // -- free to use. This is already produced by emitFrameOffset above.
1138 // AND SP, X9, 0b11111...0000
1139 // The logical immediates have a non-trivial encoding. The following
1140 // formula computes the encoded immediate with all ones but
1141 // NrBitsToZero zero bits as least significant bits.
1142 uint32_t andMaskEncoded = (1 << 12) // = N
1143 | ((64 - NrBitsToZero) << 6) // immr
1144 | ((64 - NrBitsToZero - 1) << 0); // imms
1146 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP)
1147 .addReg(scratchSPReg, RegState::Kill)
1148 .addImm(andMaskEncoded);
1149 AFI->setStackRealigned(true);
1150 if (NeedsWinCFI) {
1151 HasWinCFI = true;
1152 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc))
1153 .addImm(NumBytes & andMaskEncoded)
1154 .setMIFlag(MachineInstr::FrameSetup);
1159 // If we need a base pointer, set it up here. It's whatever the value of the
1160 // stack pointer is at this point. Any variable size objects will be allocated
1161 // after this, so we can still use the base pointer to reference locals.
1163 // FIXME: Clarify FrameSetup flags here.
1164 // Note: Use emitFrameOffset() like above for FP if the FrameSetup flag is
1165 // needed.
1166 if (RegInfo->hasBasePointer(MF)) {
1167 TII->copyPhysReg(MBB, MBBI, DL, RegInfo->getBaseRegister(), AArch64::SP,
1168 false);
1169 if (NeedsWinCFI) {
1170 HasWinCFI = true;
1171 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
1172 .setMIFlag(MachineInstr::FrameSetup);
1176 // The very last FrameSetup instruction indicates the end of prologue. Emit a
1177 // SEH opcode indicating the prologue end.
1178 if (NeedsWinCFI && HasWinCFI) {
1179 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd))
1180 .setMIFlag(MachineInstr::FrameSetup);
1183 if (needsFrameMoves) {
1184 const DataLayout &TD = MF.getDataLayout();
1185 const int StackGrowth = isTargetDarwin(MF)
1186 ? (2 * -TD.getPointerSize(0))
1187 : -AFI->getCalleeSavedStackSize();
1188 Register FramePtr = RegInfo->getFrameRegister(MF);
1189 // An example of the prologue:
1191 // .globl __foo
1192 // .align 2
1193 // __foo:
1194 // Ltmp0:
1195 // .cfi_startproc
1196 // .cfi_personality 155, ___gxx_personality_v0
1197 // Leh_func_begin:
1198 // .cfi_lsda 16, Lexception33
1200 // stp xa,bx, [sp, -#offset]!
1201 // ...
1202 // stp x28, x27, [sp, #offset-32]
1203 // stp fp, lr, [sp, #offset-16]
1204 // add fp, sp, #offset - 16
1205 // sub sp, sp, #1360
1207 // The Stack:
1208 // +-------------------------------------------+
1209 // 10000 | ........ | ........ | ........ | ........ |
1210 // 10004 | ........ | ........ | ........ | ........ |
1211 // +-------------------------------------------+
1212 // 10008 | ........ | ........ | ........ | ........ |
1213 // 1000c | ........ | ........ | ........ | ........ |
1214 // +===========================================+
1215 // 10010 | X28 Register |
1216 // 10014 | X28 Register |
1217 // +-------------------------------------------+
1218 // 10018 | X27 Register |
1219 // 1001c | X27 Register |
1220 // +===========================================+
1221 // 10020 | Frame Pointer |
1222 // 10024 | Frame Pointer |
1223 // +-------------------------------------------+
1224 // 10028 | Link Register |
1225 // 1002c | Link Register |
1226 // +===========================================+
1227 // 10030 | ........ | ........ | ........ | ........ |
1228 // 10034 | ........ | ........ | ........ | ........ |
1229 // +-------------------------------------------+
1230 // 10038 | ........ | ........ | ........ | ........ |
1231 // 1003c | ........ | ........ | ........ | ........ |
1232 // +-------------------------------------------+
1234 // [sp] = 10030 :: >>initial value<<
1235 // sp = 10020 :: stp fp, lr, [sp, #-16]!
1236 // fp = sp == 10020 :: mov fp, sp
1237 // [sp] == 10020 :: stp x28, x27, [sp, #-16]!
1238 // sp == 10010 :: >>final value<<
1240 // The frame pointer (w29) points to address 10020. If we use an offset of
1241 // '16' from 'w29', we get the CFI offsets of -8 for w30, -16 for w29, -24
1242 // for w27, and -32 for w28:
1244 // Ltmp1:
1245 // .cfi_def_cfa w29, 16
1246 // Ltmp2:
1247 // .cfi_offset w30, -8
1248 // Ltmp3:
1249 // .cfi_offset w29, -16
1250 // Ltmp4:
1251 // .cfi_offset w27, -24
1252 // Ltmp5:
1253 // .cfi_offset w28, -32
1255 if (HasFP) {
1256 // Define the current CFA rule to use the provided FP.
1257 unsigned Reg = RegInfo->getDwarfRegNum(FramePtr, true);
1258 unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfa(
1259 nullptr, Reg, StackGrowth - FixedObject));
1260 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
1261 .addCFIIndex(CFIIndex)
1262 .setMIFlags(MachineInstr::FrameSetup);
1263 } else {
1264 // Encode the stack size of the leaf function.
1265 unsigned CFIIndex = MF.addFrameInst(
1266 MCCFIInstruction::createDefCfaOffset(nullptr, -MFI.getStackSize()));
1267 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
1268 .addCFIIndex(CFIIndex)
1269 .setMIFlags(MachineInstr::FrameSetup);
1272 // Now emit the moves for whatever callee saved regs we have (including FP,
1273 // LR if those are saved).
1274 emitCalleeSavedFrameMoves(MBB, MBBI);
1278 static void InsertReturnAddressAuth(MachineFunction &MF,
1279 MachineBasicBlock &MBB) {
1280 if (!ShouldSignReturnAddress(MF))
1281 return;
1282 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
1283 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
1285 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
1286 DebugLoc DL;
1287 if (MBBI != MBB.end())
1288 DL = MBBI->getDebugLoc();
1290 // The AUTIASP instruction assembles to a hint instruction before v8.3a so
1291 // this instruction can safely used for any v8a architecture.
1292 // From v8.3a onwards there are optimised authenticate LR and return
1293 // instructions, namely RETA{A,B}, that can be used instead.
1294 if (Subtarget.hasV8_3aOps() && MBBI != MBB.end() &&
1295 MBBI->getOpcode() == AArch64::RET_ReallyLR) {
1296 BuildMI(MBB, MBBI, DL,
1297 TII->get(ShouldSignWithAKey(MF) ? AArch64::RETAA : AArch64::RETAB))
1298 .copyImplicitOps(*MBBI);
1299 MBB.erase(MBBI);
1300 } else {
1301 BuildMI(
1302 MBB, MBBI, DL,
1303 TII->get(ShouldSignWithAKey(MF) ? AArch64::AUTIASP : AArch64::AUTIBSP))
1304 .setMIFlag(MachineInstr::FrameDestroy);
1308 static bool isFuncletReturnInstr(const MachineInstr &MI) {
1309 switch (MI.getOpcode()) {
1310 default:
1311 return false;
1312 case AArch64::CATCHRET:
1313 case AArch64::CLEANUPRET:
1314 return true;
1318 void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
1319 MachineBasicBlock &MBB) const {
1320 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
1321 MachineFrameInfo &MFI = MF.getFrameInfo();
1322 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
1323 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
1324 DebugLoc DL;
1325 bool IsTailCallReturn = false;
1326 bool NeedsWinCFI = needsWinCFI(MF);
1327 bool HasWinCFI = false;
1328 bool IsFunclet = false;
1329 auto WinCFI = make_scope_exit([&]() {
1330 if (!MF.hasWinCFI())
1331 MF.setHasWinCFI(HasWinCFI);
1334 if (MBB.end() != MBBI) {
1335 DL = MBBI->getDebugLoc();
1336 unsigned RetOpcode = MBBI->getOpcode();
1337 IsTailCallReturn = RetOpcode == AArch64::TCRETURNdi ||
1338 RetOpcode == AArch64::TCRETURNri ||
1339 RetOpcode == AArch64::TCRETURNriBTI;
1340 IsFunclet = isFuncletReturnInstr(*MBBI);
1343 int NumBytes = IsFunclet ? (int)getWinEHFuncletFrameSize(MF)
1344 : MFI.getStackSize();
1345 AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
1347 // All calls are tail calls in GHC calling conv, and functions have no
1348 // prologue/epilogue.
1349 if (MF.getFunction().getCallingConv() == CallingConv::GHC)
1350 return;
1352 // Initial and residual are named for consistency with the prologue. Note that
1353 // in the epilogue, the residual adjustment is executed first.
1354 uint64_t ArgumentPopSize = 0;
1355 if (IsTailCallReturn) {
1356 MachineOperand &StackAdjust = MBBI->getOperand(1);
1358 // For a tail-call in a callee-pops-arguments environment, some or all of
1359 // the stack may actually be in use for the call's arguments, this is
1360 // calculated during LowerCall and consumed here...
1361 ArgumentPopSize = StackAdjust.getImm();
1362 } else {
1363 // ... otherwise the amount to pop is *all* of the argument space,
1364 // conveniently stored in the MachineFunctionInfo by
1365 // LowerFormalArguments. This will, of course, be zero for the C calling
1366 // convention.
1367 ArgumentPopSize = AFI->getArgumentStackToRestore();
1370 // The stack frame should be like below,
1372 // ---------------------- ---
1373 // | | |
1374 // | BytesInStackArgArea| CalleeArgStackSize
1375 // | (NumReusableBytes) | (of tail call)
1376 // | | ---
1377 // | | |
1378 // ---------------------| --- |
1379 // | | | |
1380 // | CalleeSavedReg | | |
1381 // | (CalleeSavedStackSize)| | |
1382 // | | | |
1383 // ---------------------| | NumBytes
1384 // | | StackSize (StackAdjustUp)
1385 // | LocalStackSize | | |
1386 // | (covering callee | | |
1387 // | args) | | |
1388 // | | | |
1389 // ---------------------- --- ---
1391 // So NumBytes = StackSize + BytesInStackArgArea - CalleeArgStackSize
1392 // = StackSize + ArgumentPopSize
1394 // AArch64TargetLowering::LowerCall figures out ArgumentPopSize and keeps
1395 // it as the 2nd argument of AArch64ISD::TC_RETURN.
1397 auto Cleanup = make_scope_exit([&] { InsertReturnAddressAuth(MF, MBB); });
1399 bool IsWin64 =
1400 Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv());
1401 // Var args are accounted for in the containing function, so don't
1402 // include them for funclets.
1403 unsigned FixedObject =
1404 (IsWin64 && !IsFunclet) ? alignTo(AFI->getVarArgsGPRSize(), 16) : 0;
1406 uint64_t AfterCSRPopSize = ArgumentPopSize;
1407 auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject;
1408 // We cannot rely on the local stack size set in emitPrologue if the function
1409 // has funclets, as funclets have different local stack size requirements, and
1410 // the current value set in emitPrologue may be that of the containing
1411 // function.
1412 if (MF.hasEHFunclets())
1413 AFI->setLocalStackSize(NumBytes - PrologueSaveSize);
1414 bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes);
1415 // Assume we can't combine the last pop with the sp restore.
1417 if (!CombineSPBump && PrologueSaveSize != 0) {
1418 MachineBasicBlock::iterator Pop = std::prev(MBB.getFirstTerminator());
1419 while (AArch64InstrInfo::isSEHInstruction(*Pop))
1420 Pop = std::prev(Pop);
1421 // Converting the last ldp to a post-index ldp is valid only if the last
1422 // ldp's offset is 0.
1423 const MachineOperand &OffsetOp = Pop->getOperand(Pop->getNumOperands() - 1);
1424 // If the offset is 0, convert it to a post-index ldp.
1425 if (OffsetOp.getImm() == 0)
1426 convertCalleeSaveRestoreToSPPrePostIncDec(
1427 MBB, Pop, DL, TII, PrologueSaveSize, NeedsWinCFI, &HasWinCFI, false);
1428 else {
1429 // If not, make sure to emit an add after the last ldp.
1430 // We're doing this by transfering the size to be restored from the
1431 // adjustment *before* the CSR pops to the adjustment *after* the CSR
1432 // pops.
1433 AfterCSRPopSize += PrologueSaveSize;
1437 // Move past the restores of the callee-saved registers.
1438 // If we plan on combining the sp bump of the local stack size and the callee
1439 // save stack size, we might need to adjust the CSR save and restore offsets.
1440 MachineBasicBlock::iterator LastPopI = MBB.getFirstTerminator();
1441 MachineBasicBlock::iterator Begin = MBB.begin();
1442 while (LastPopI != Begin) {
1443 --LastPopI;
1444 if (!LastPopI->getFlag(MachineInstr::FrameDestroy)) {
1445 ++LastPopI;
1446 break;
1447 } else if (CombineSPBump)
1448 fixupCalleeSaveRestoreStackOffset(*LastPopI, AFI->getLocalStackSize(),
1449 NeedsWinCFI, &HasWinCFI);
1452 if (NeedsWinCFI) {
1453 HasWinCFI = true;
1454 BuildMI(MBB, LastPopI, DL, TII->get(AArch64::SEH_EpilogStart))
1455 .setMIFlag(MachineInstr::FrameDestroy);
1458 const StackOffset &SVEStackSize = getSVEStackSize(MF);
1460 // If there is a single SP update, insert it before the ret and we're done.
1461 if (CombineSPBump) {
1462 assert(!SVEStackSize && "Cannot combine SP bump with SVE");
1463 emitFrameOffset(MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP,
1464 {NumBytes + (int64_t)AfterCSRPopSize, MVT::i8}, TII,
1465 MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI);
1466 if (NeedsWinCFI && HasWinCFI)
1467 BuildMI(MBB, MBB.getFirstTerminator(), DL,
1468 TII->get(AArch64::SEH_EpilogEnd))
1469 .setMIFlag(MachineInstr::FrameDestroy);
1470 return;
1473 NumBytes -= PrologueSaveSize;
1474 assert(NumBytes >= 0 && "Negative stack allocation size!?");
1476 // Deallocate the SVE area.
1477 if (SVEStackSize)
1478 if (!AFI->isStackRealigned())
1479 emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, SVEStackSize,
1480 TII, MachineInstr::FrameDestroy);
1482 if (!hasFP(MF)) {
1483 bool RedZone = canUseRedZone(MF);
1484 // If this was a redzone leaf function, we don't need to restore the
1485 // stack pointer (but we may need to pop stack args for fastcc).
1486 if (RedZone && AfterCSRPopSize == 0)
1487 return;
1489 bool NoCalleeSaveRestore = PrologueSaveSize == 0;
1490 int StackRestoreBytes = RedZone ? 0 : NumBytes;
1491 if (NoCalleeSaveRestore)
1492 StackRestoreBytes += AfterCSRPopSize;
1494 // If we were able to combine the local stack pop with the argument pop,
1495 // then we're done.
1496 bool Done = NoCalleeSaveRestore || AfterCSRPopSize == 0;
1498 // If we're done after this, make sure to help the load store optimizer.
1499 if (Done)
1500 adaptForLdStOpt(MBB, MBB.getFirstTerminator(), LastPopI);
1502 emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP,
1503 {StackRestoreBytes, MVT::i8}, TII,
1504 MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI);
1505 if (Done) {
1506 if (NeedsWinCFI) {
1507 HasWinCFI = true;
1508 BuildMI(MBB, MBB.getFirstTerminator(), DL,
1509 TII->get(AArch64::SEH_EpilogEnd))
1510 .setMIFlag(MachineInstr::FrameDestroy);
1512 return;
1515 NumBytes = 0;
1518 // Restore the original stack pointer.
1519 // FIXME: Rather than doing the math here, we should instead just use
1520 // non-post-indexed loads for the restores if we aren't actually going to
1521 // be able to save any instructions.
1522 if (!IsFunclet && (MFI.hasVarSizedObjects() || AFI->isStackRealigned())) {
1523 int64_t OffsetToFrameRecord =
1524 isTargetDarwin(MF) ? (-(int64_t)AFI->getCalleeSavedStackSize() + 16) : 0;
1525 emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::FP,
1526 {OffsetToFrameRecord, MVT::i8},
1527 TII, MachineInstr::FrameDestroy, false, NeedsWinCFI);
1528 } else if (NumBytes)
1529 emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP,
1530 {NumBytes, MVT::i8}, TII, MachineInstr::FrameDestroy, false,
1531 NeedsWinCFI);
1533 // This must be placed after the callee-save restore code because that code
1534 // assumes the SP is at the same location as it was after the callee-save save
1535 // code in the prologue.
1536 if (AfterCSRPopSize) {
1537 // Find an insertion point for the first ldp so that it goes before the
1538 // shadow call stack epilog instruction. This ensures that the restore of
1539 // lr from x18 is placed after the restore from sp.
1540 auto FirstSPPopI = MBB.getFirstTerminator();
1541 while (FirstSPPopI != Begin) {
1542 auto Prev = std::prev(FirstSPPopI);
1543 if (Prev->getOpcode() != AArch64::LDRXpre ||
1544 Prev->getOperand(0).getReg() == AArch64::SP)
1545 break;
1546 FirstSPPopI = Prev;
1549 adaptForLdStOpt(MBB, FirstSPPopI, LastPopI);
1551 emitFrameOffset(MBB, FirstSPPopI, DL, AArch64::SP, AArch64::SP,
1552 {(int64_t)AfterCSRPopSize, MVT::i8}, TII,
1553 MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI);
1555 if (NeedsWinCFI && HasWinCFI)
1556 BuildMI(MBB, MBB.getFirstTerminator(), DL, TII->get(AArch64::SEH_EpilogEnd))
1557 .setMIFlag(MachineInstr::FrameDestroy);
1559 MF.setHasWinCFI(HasWinCFI);
1562 /// getFrameIndexReference - Provide a base+offset reference to an FI slot for
1563 /// debug info. It's the same as what we use for resolving the code-gen
1564 /// references for now. FIXME: This can go wrong when references are
1565 /// SP-relative and simple call frames aren't used.
1566 int AArch64FrameLowering::getFrameIndexReference(const MachineFunction &MF,
1567 int FI,
1568 unsigned &FrameReg) const {
1569 return resolveFrameIndexReference(
1570 MF, FI, FrameReg,
1571 /*PreferFP=*/
1572 MF.getFunction().hasFnAttribute(Attribute::SanitizeHWAddress),
1573 /*ForSimm=*/false)
1574 .getBytes();
1577 int AArch64FrameLowering::getNonLocalFrameIndexReference(
1578 const MachineFunction &MF, int FI) const {
1579 return getSEHFrameIndexOffset(MF, FI);
1582 static StackOffset getFPOffset(const MachineFunction &MF, int ObjectOffset) {
1583 const auto *AFI = MF.getInfo<AArch64FunctionInfo>();
1584 const auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
1585 bool IsWin64 =
1586 Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv());
1587 unsigned FixedObject = IsWin64 ? alignTo(AFI->getVarArgsGPRSize(), 16) : 0;
1588 unsigned FPAdjust = isTargetDarwin(MF) ? 16 : AFI->getCalleeSavedStackSize();
1589 return {ObjectOffset + FixedObject + FPAdjust, MVT::i8};
1592 static StackOffset getStackOffset(const MachineFunction &MF, int ObjectOffset) {
1593 const auto &MFI = MF.getFrameInfo();
1594 return {ObjectOffset + (int)MFI.getStackSize(), MVT::i8};
1597 int AArch64FrameLowering::getSEHFrameIndexOffset(const MachineFunction &MF,
1598 int FI) const {
1599 const auto *RegInfo = static_cast<const AArch64RegisterInfo *>(
1600 MF.getSubtarget().getRegisterInfo());
1601 int ObjectOffset = MF.getFrameInfo().getObjectOffset(FI);
1602 return RegInfo->getLocalAddressRegister(MF) == AArch64::FP
1603 ? getFPOffset(MF, ObjectOffset).getBytes()
1604 : getStackOffset(MF, ObjectOffset).getBytes();
1607 StackOffset AArch64FrameLowering::resolveFrameIndexReference(
1608 const MachineFunction &MF, int FI, unsigned &FrameReg, bool PreferFP,
1609 bool ForSimm) const {
1610 const auto &MFI = MF.getFrameInfo();
1611 int ObjectOffset = MFI.getObjectOffset(FI);
1612 bool isFixed = MFI.isFixedObjectIndex(FI);
1613 return resolveFrameOffsetReference(MF, ObjectOffset, isFixed, FrameReg,
1614 PreferFP, ForSimm);
1617 StackOffset AArch64FrameLowering::resolveFrameOffsetReference(
1618 const MachineFunction &MF, int ObjectOffset, bool isFixed,
1619 unsigned &FrameReg, bool PreferFP, bool ForSimm) const {
1620 const auto &MFI = MF.getFrameInfo();
1621 const auto *RegInfo = static_cast<const AArch64RegisterInfo *>(
1622 MF.getSubtarget().getRegisterInfo());
1623 const auto *AFI = MF.getInfo<AArch64FunctionInfo>();
1624 const auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
1626 int FPOffset = getFPOffset(MF, ObjectOffset).getBytes();
1627 int Offset = getStackOffset(MF, ObjectOffset).getBytes();
1628 bool isCSR =
1629 !isFixed && ObjectOffset >= -((int)AFI->getCalleeSavedStackSize());
1631 const StackOffset &SVEStackSize = getSVEStackSize(MF);
1632 if (SVEStackSize)
1633 llvm_unreachable("Accessing frame indices in presence of SVE "
1634 "not yet supported");
1636 // Use frame pointer to reference fixed objects. Use it for locals if
1637 // there are VLAs or a dynamically realigned SP (and thus the SP isn't
1638 // reliable as a base). Make sure useFPForScavengingIndex() does the
1639 // right thing for the emergency spill slot.
1640 bool UseFP = false;
1641 if (AFI->hasStackFrame()) {
1642 // Note: Keeping the following as multiple 'if' statements rather than
1643 // merging to a single expression for readability.
1645 // Argument access should always use the FP.
1646 if (isFixed) {
1647 UseFP = hasFP(MF);
1648 } else if (isCSR && RegInfo->needsStackRealignment(MF)) {
1649 // References to the CSR area must use FP if we're re-aligning the stack
1650 // since the dynamically-sized alignment padding is between the SP/BP and
1651 // the CSR area.
1652 assert(hasFP(MF) && "Re-aligned stack must have frame pointer");
1653 UseFP = true;
1654 } else if (hasFP(MF) && !RegInfo->needsStackRealignment(MF)) {
1655 // If the FPOffset is negative and we're producing a signed immediate, we
1656 // have to keep in mind that the available offset range for negative
1657 // offsets is smaller than for positive ones. If an offset is available
1658 // via the FP and the SP, use whichever is closest.
1659 bool FPOffsetFits = !ForSimm || FPOffset >= -256;
1660 PreferFP |= Offset > -FPOffset;
1662 if (MFI.hasVarSizedObjects()) {
1663 // If we have variable sized objects, we can use either FP or BP, as the
1664 // SP offset is unknown. We can use the base pointer if we have one and
1665 // FP is not preferred. If not, we're stuck with using FP.
1666 bool CanUseBP = RegInfo->hasBasePointer(MF);
1667 if (FPOffsetFits && CanUseBP) // Both are ok. Pick the best.
1668 UseFP = PreferFP;
1669 else if (!CanUseBP) // Can't use BP. Forced to use FP.
1670 UseFP = true;
1671 // else we can use BP and FP, but the offset from FP won't fit.
1672 // That will make us scavenge registers which we can probably avoid by
1673 // using BP. If it won't fit for BP either, we'll scavenge anyway.
1674 } else if (FPOffset >= 0) {
1675 // Use SP or FP, whichever gives us the best chance of the offset
1676 // being in range for direct access. If the FPOffset is positive,
1677 // that'll always be best, as the SP will be even further away.
1678 UseFP = true;
1679 } else if (MF.hasEHFunclets() && !RegInfo->hasBasePointer(MF)) {
1680 // Funclets access the locals contained in the parent's stack frame
1681 // via the frame pointer, so we have to use the FP in the parent
1682 // function.
1683 (void) Subtarget;
1684 assert(
1685 Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv()) &&
1686 "Funclets should only be present on Win64");
1687 UseFP = true;
1688 } else {
1689 // We have the choice between FP and (SP or BP).
1690 if (FPOffsetFits && PreferFP) // If FP is the best fit, use it.
1691 UseFP = true;
1696 assert(((isFixed || isCSR) || !RegInfo->needsStackRealignment(MF) || !UseFP) &&
1697 "In the presence of dynamic stack pointer realignment, "
1698 "non-argument/CSR objects cannot be accessed through the frame pointer");
1700 if (UseFP) {
1701 FrameReg = RegInfo->getFrameRegister(MF);
1702 return StackOffset(FPOffset, MVT::i8);
1705 // Use the base pointer if we have one.
1706 if (RegInfo->hasBasePointer(MF))
1707 FrameReg = RegInfo->getBaseRegister();
1708 else {
1709 assert(!MFI.hasVarSizedObjects() &&
1710 "Can't use SP when we have var sized objects.");
1711 FrameReg = AArch64::SP;
1712 // If we're using the red zone for this function, the SP won't actually
1713 // be adjusted, so the offsets will be negative. They're also all
1714 // within range of the signed 9-bit immediate instructions.
1715 if (canUseRedZone(MF))
1716 Offset -= AFI->getLocalStackSize();
1719 return StackOffset(Offset, MVT::i8);
1722 static unsigned getPrologueDeath(MachineFunction &MF, unsigned Reg) {
1723 // Do not set a kill flag on values that are also marked as live-in. This
1724 // happens with the @llvm-returnaddress intrinsic and with arguments passed in
1725 // callee saved registers.
1726 // Omitting the kill flags is conservatively correct even if the live-in
1727 // is not used after all.
1728 bool IsLiveIn = MF.getRegInfo().isLiveIn(Reg);
1729 return getKillRegState(!IsLiveIn);
1732 static bool produceCompactUnwindFrame(MachineFunction &MF) {
1733 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
1734 AttributeList Attrs = MF.getFunction().getAttributes();
1735 return Subtarget.isTargetMachO() &&
1736 !(Subtarget.getTargetLowering()->supportSwiftError() &&
1737 Attrs.hasAttrSomewhere(Attribute::SwiftError));
1740 static bool invalidateWindowsRegisterPairing(unsigned Reg1, unsigned Reg2,
1741 bool NeedsWinCFI) {
1742 // If we are generating register pairs for a Windows function that requires
1743 // EH support, then pair consecutive registers only. There are no unwind
1744 // opcodes for saves/restores of non-consectuve register pairs.
1745 // The unwind opcodes are save_regp, save_regp_x, save_fregp, save_frepg_x.
1746 // https://docs.microsoft.com/en-us/cpp/build/arm64-exception-handling
1748 // TODO: LR can be paired with any register. We don't support this yet in
1749 // the MCLayer. We need to add support for the save_lrpair unwind code.
1750 if (!NeedsWinCFI)
1751 return false;
1752 if (Reg2 == Reg1 + 1)
1753 return false;
1754 return true;
1757 /// Returns true if Reg1 and Reg2 cannot be paired using a ldp/stp instruction.
1758 /// WindowsCFI requires that only consecutive registers can be paired.
1759 /// LR and FP need to be allocated together when the frame needs to save
1760 /// the frame-record. This means any other register pairing with LR is invalid.
1761 static bool invalidateRegisterPairing(unsigned Reg1, unsigned Reg2,
1762 bool NeedsWinCFI, bool NeedsFrameRecord) {
1763 if (NeedsWinCFI)
1764 return invalidateWindowsRegisterPairing(Reg1, Reg2, true);
1766 // If we need to store the frame record, don't pair any register
1767 // with LR other than FP.
1768 if (NeedsFrameRecord)
1769 return Reg2 == AArch64::LR;
1771 return false;
1774 namespace {
1776 struct RegPairInfo {
1777 unsigned Reg1 = AArch64::NoRegister;
1778 unsigned Reg2 = AArch64::NoRegister;
1779 int FrameIdx;
1780 int Offset;
1781 enum RegType { GPR, FPR64, FPR128 } Type;
1783 RegPairInfo() = default;
1785 bool isPaired() const { return Reg2 != AArch64::NoRegister; }
1788 } // end anonymous namespace
1790 static void computeCalleeSaveRegisterPairs(
1791 MachineFunction &MF, const std::vector<CalleeSavedInfo> &CSI,
1792 const TargetRegisterInfo *TRI, SmallVectorImpl<RegPairInfo> &RegPairs,
1793 bool &NeedShadowCallStackProlog, bool NeedsFrameRecord) {
1795 if (CSI.empty())
1796 return;
1798 bool NeedsWinCFI = needsWinCFI(MF);
1799 AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
1800 MachineFrameInfo &MFI = MF.getFrameInfo();
1801 CallingConv::ID CC = MF.getFunction().getCallingConv();
1802 unsigned Count = CSI.size();
1803 (void)CC;
1804 // MachO's compact unwind format relies on all registers being stored in
1805 // pairs.
1806 assert((!produceCompactUnwindFrame(MF) ||
1807 CC == CallingConv::PreserveMost ||
1808 (Count & 1) == 0) &&
1809 "Odd number of callee-saved regs to spill!");
1810 int Offset = AFI->getCalleeSavedStackSize();
1811 // On Linux, we will have either one or zero non-paired register. On Windows
1812 // with CFI, we can have multiple unpaired registers in order to utilize the
1813 // available unwind codes. This flag assures that the alignment fixup is done
1814 // only once, as intened.
1815 bool FixupDone = false;
1816 for (unsigned i = 0; i < Count; ++i) {
1817 RegPairInfo RPI;
1818 RPI.Reg1 = CSI[i].getReg();
1820 if (AArch64::GPR64RegClass.contains(RPI.Reg1))
1821 RPI.Type = RegPairInfo::GPR;
1822 else if (AArch64::FPR64RegClass.contains(RPI.Reg1))
1823 RPI.Type = RegPairInfo::FPR64;
1824 else if (AArch64::FPR128RegClass.contains(RPI.Reg1))
1825 RPI.Type = RegPairInfo::FPR128;
1826 else
1827 llvm_unreachable("Unsupported register class.");
1829 // Add the next reg to the pair if it is in the same register class.
1830 if (i + 1 < Count) {
1831 unsigned NextReg = CSI[i + 1].getReg();
1832 switch (RPI.Type) {
1833 case RegPairInfo::GPR:
1834 if (AArch64::GPR64RegClass.contains(NextReg) &&
1835 !invalidateRegisterPairing(RPI.Reg1, NextReg, NeedsWinCFI,
1836 NeedsFrameRecord))
1837 RPI.Reg2 = NextReg;
1838 break;
1839 case RegPairInfo::FPR64:
1840 if (AArch64::FPR64RegClass.contains(NextReg) &&
1841 !invalidateWindowsRegisterPairing(RPI.Reg1, NextReg, NeedsWinCFI))
1842 RPI.Reg2 = NextReg;
1843 break;
1844 case RegPairInfo::FPR128:
1845 if (AArch64::FPR128RegClass.contains(NextReg))
1846 RPI.Reg2 = NextReg;
1847 break;
1851 // If either of the registers to be saved is the lr register, it means that
1852 // we also need to save lr in the shadow call stack.
1853 if ((RPI.Reg1 == AArch64::LR || RPI.Reg2 == AArch64::LR) &&
1854 MF.getFunction().hasFnAttribute(Attribute::ShadowCallStack)) {
1855 if (!MF.getSubtarget<AArch64Subtarget>().isXRegisterReserved(18))
1856 report_fatal_error("Must reserve x18 to use shadow call stack");
1857 NeedShadowCallStackProlog = true;
1860 // GPRs and FPRs are saved in pairs of 64-bit regs. We expect the CSI
1861 // list to come in sorted by frame index so that we can issue the store
1862 // pair instructions directly. Assert if we see anything otherwise.
1864 // The order of the registers in the list is controlled by
1865 // getCalleeSavedRegs(), so they will always be in-order, as well.
1866 assert((!RPI.isPaired() ||
1867 (CSI[i].getFrameIdx() + 1 == CSI[i + 1].getFrameIdx())) &&
1868 "Out of order callee saved regs!");
1870 assert((!RPI.isPaired() || !NeedsFrameRecord || RPI.Reg2 != AArch64::FP ||
1871 RPI.Reg1 == AArch64::LR) &&
1872 "FrameRecord must be allocated together with LR");
1874 // MachO's compact unwind format relies on all registers being stored in
1875 // adjacent register pairs.
1876 assert((!produceCompactUnwindFrame(MF) ||
1877 CC == CallingConv::PreserveMost ||
1878 (RPI.isPaired() &&
1879 ((RPI.Reg1 == AArch64::LR && RPI.Reg2 == AArch64::FP) ||
1880 RPI.Reg1 + 1 == RPI.Reg2))) &&
1881 "Callee-save registers not saved as adjacent register pair!");
1883 RPI.FrameIdx = CSI[i].getFrameIdx();
1885 int Scale = RPI.Type == RegPairInfo::FPR128 ? 16 : 8;
1886 Offset -= RPI.isPaired() ? 2 * Scale : Scale;
1888 // Round up size of non-pair to pair size if we need to pad the
1889 // callee-save area to ensure 16-byte alignment.
1890 if (AFI->hasCalleeSaveStackFreeSpace() && !FixupDone &&
1891 RPI.Type != RegPairInfo::FPR128 && !RPI.isPaired()) {
1892 FixupDone = true;
1893 Offset -= 8;
1894 assert(Offset % 16 == 0);
1895 assert(MFI.getObjectAlignment(RPI.FrameIdx) <= 16);
1896 MFI.setObjectAlignment(RPI.FrameIdx, 16);
1899 assert(Offset % Scale == 0);
1900 RPI.Offset = Offset / Scale;
1901 assert((RPI.Offset >= -64 && RPI.Offset <= 63) &&
1902 "Offset out of bounds for LDP/STP immediate");
1904 RegPairs.push_back(RPI);
1905 if (RPI.isPaired())
1906 ++i;
1910 bool AArch64FrameLowering::spillCalleeSavedRegisters(
1911 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
1912 const std::vector<CalleeSavedInfo> &CSI,
1913 const TargetRegisterInfo *TRI) const {
1914 MachineFunction &MF = *MBB.getParent();
1915 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1916 bool NeedsWinCFI = needsWinCFI(MF);
1917 DebugLoc DL;
1918 SmallVector<RegPairInfo, 8> RegPairs;
1920 bool NeedShadowCallStackProlog = false;
1921 computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs,
1922 NeedShadowCallStackProlog, hasFP(MF));
1923 const MachineRegisterInfo &MRI = MF.getRegInfo();
1925 if (NeedShadowCallStackProlog) {
1926 // Shadow call stack prolog: str x30, [x18], #8
1927 BuildMI(MBB, MI, DL, TII.get(AArch64::STRXpost))
1928 .addReg(AArch64::X18, RegState::Define)
1929 .addReg(AArch64::LR)
1930 .addReg(AArch64::X18)
1931 .addImm(8)
1932 .setMIFlag(MachineInstr::FrameSetup);
1934 if (NeedsWinCFI)
1935 BuildMI(MBB, MI, DL, TII.get(AArch64::SEH_Nop))
1936 .setMIFlag(MachineInstr::FrameSetup);
1938 if (!MF.getFunction().hasFnAttribute(Attribute::NoUnwind)) {
1939 // Emit a CFI instruction that causes 8 to be subtracted from the value of
1940 // x18 when unwinding past this frame.
1941 static const char CFIInst[] = {
1942 dwarf::DW_CFA_val_expression,
1943 18, // register
1944 2, // length
1945 static_cast<char>(unsigned(dwarf::DW_OP_breg18)),
1946 static_cast<char>(-8) & 0x7f, // addend (sleb128)
1948 unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createEscape(
1949 nullptr, StringRef(CFIInst, sizeof(CFIInst))));
1950 BuildMI(MBB, MI, DL, TII.get(AArch64::CFI_INSTRUCTION))
1951 .addCFIIndex(CFIIndex)
1952 .setMIFlag(MachineInstr::FrameSetup);
1955 // This instruction also makes x18 live-in to the entry block.
1956 MBB.addLiveIn(AArch64::X18);
1959 for (auto RPII = RegPairs.rbegin(), RPIE = RegPairs.rend(); RPII != RPIE;
1960 ++RPII) {
1961 RegPairInfo RPI = *RPII;
1962 unsigned Reg1 = RPI.Reg1;
1963 unsigned Reg2 = RPI.Reg2;
1964 unsigned StrOpc;
1966 // Issue sequence of spills for cs regs. The first spill may be converted
1967 // to a pre-decrement store later by emitPrologue if the callee-save stack
1968 // area allocation can't be combined with the local stack area allocation.
1969 // For example:
1970 // stp x22, x21, [sp, #0] // addImm(+0)
1971 // stp x20, x19, [sp, #16] // addImm(+2)
1972 // stp fp, lr, [sp, #32] // addImm(+4)
1973 // Rationale: This sequence saves uop updates compared to a sequence of
1974 // pre-increment spills like stp xi,xj,[sp,#-16]!
1975 // Note: Similar rationale and sequence for restores in epilog.
1976 unsigned Size, Align;
1977 switch (RPI.Type) {
1978 case RegPairInfo::GPR:
1979 StrOpc = RPI.isPaired() ? AArch64::STPXi : AArch64::STRXui;
1980 Size = 8;
1981 Align = 8;
1982 break;
1983 case RegPairInfo::FPR64:
1984 StrOpc = RPI.isPaired() ? AArch64::STPDi : AArch64::STRDui;
1985 Size = 8;
1986 Align = 8;
1987 break;
1988 case RegPairInfo::FPR128:
1989 StrOpc = RPI.isPaired() ? AArch64::STPQi : AArch64::STRQui;
1990 Size = 16;
1991 Align = 16;
1992 break;
1994 LLVM_DEBUG(dbgs() << "CSR spill: (" << printReg(Reg1, TRI);
1995 if (RPI.isPaired()) dbgs() << ", " << printReg(Reg2, TRI);
1996 dbgs() << ") -> fi#(" << RPI.FrameIdx;
1997 if (RPI.isPaired()) dbgs() << ", " << RPI.FrameIdx + 1;
1998 dbgs() << ")\n");
2000 assert((!NeedsWinCFI || !(Reg1 == AArch64::LR && Reg2 == AArch64::FP)) &&
2001 "Windows unwdinding requires a consecutive (FP,LR) pair");
2002 // Windows unwind codes require consecutive registers if registers are
2003 // paired. Make the switch here, so that the code below will save (x,x+1)
2004 // and not (x+1,x).
2005 unsigned FrameIdxReg1 = RPI.FrameIdx;
2006 unsigned FrameIdxReg2 = RPI.FrameIdx + 1;
2007 if (NeedsWinCFI && RPI.isPaired()) {
2008 std::swap(Reg1, Reg2);
2009 std::swap(FrameIdxReg1, FrameIdxReg2);
2011 MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc));
2012 if (!MRI.isReserved(Reg1))
2013 MBB.addLiveIn(Reg1);
2014 if (RPI.isPaired()) {
2015 if (!MRI.isReserved(Reg2))
2016 MBB.addLiveIn(Reg2);
2017 MIB.addReg(Reg2, getPrologueDeath(MF, Reg2));
2018 MIB.addMemOperand(MF.getMachineMemOperand(
2019 MachinePointerInfo::getFixedStack(MF, FrameIdxReg2),
2020 MachineMemOperand::MOStore, Size, Align));
2022 MIB.addReg(Reg1, getPrologueDeath(MF, Reg1))
2023 .addReg(AArch64::SP)
2024 .addImm(RPI.Offset) // [sp, #offset*scale],
2025 // where factor*scale is implicit
2026 .setMIFlag(MachineInstr::FrameSetup);
2027 MIB.addMemOperand(MF.getMachineMemOperand(
2028 MachinePointerInfo::getFixedStack(MF,FrameIdxReg1),
2029 MachineMemOperand::MOStore, Size, Align));
2030 if (NeedsWinCFI)
2031 InsertSEH(MIB, TII, MachineInstr::FrameSetup);
2034 return true;
2037 bool AArch64FrameLowering::restoreCalleeSavedRegisters(
2038 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
2039 std::vector<CalleeSavedInfo> &CSI,
2040 const TargetRegisterInfo *TRI) const {
2041 MachineFunction &MF = *MBB.getParent();
2042 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
2043 DebugLoc DL;
2044 SmallVector<RegPairInfo, 8> RegPairs;
2045 bool NeedsWinCFI = needsWinCFI(MF);
2047 if (MI != MBB.end())
2048 DL = MI->getDebugLoc();
2050 bool NeedShadowCallStackProlog = false;
2051 computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs,
2052 NeedShadowCallStackProlog, hasFP(MF));
2054 auto EmitMI = [&](const RegPairInfo &RPI) {
2055 unsigned Reg1 = RPI.Reg1;
2056 unsigned Reg2 = RPI.Reg2;
2058 // Issue sequence of restores for cs regs. The last restore may be converted
2059 // to a post-increment load later by emitEpilogue if the callee-save stack
2060 // area allocation can't be combined with the local stack area allocation.
2061 // For example:
2062 // ldp fp, lr, [sp, #32] // addImm(+4)
2063 // ldp x20, x19, [sp, #16] // addImm(+2)
2064 // ldp x22, x21, [sp, #0] // addImm(+0)
2065 // Note: see comment in spillCalleeSavedRegisters()
2066 unsigned LdrOpc;
2067 unsigned Size, Align;
2068 switch (RPI.Type) {
2069 case RegPairInfo::GPR:
2070 LdrOpc = RPI.isPaired() ? AArch64::LDPXi : AArch64::LDRXui;
2071 Size = 8;
2072 Align = 8;
2073 break;
2074 case RegPairInfo::FPR64:
2075 LdrOpc = RPI.isPaired() ? AArch64::LDPDi : AArch64::LDRDui;
2076 Size = 8;
2077 Align = 8;
2078 break;
2079 case RegPairInfo::FPR128:
2080 LdrOpc = RPI.isPaired() ? AArch64::LDPQi : AArch64::LDRQui;
2081 Size = 16;
2082 Align = 16;
2083 break;
2085 LLVM_DEBUG(dbgs() << "CSR restore: (" << printReg(Reg1, TRI);
2086 if (RPI.isPaired()) dbgs() << ", " << printReg(Reg2, TRI);
2087 dbgs() << ") -> fi#(" << RPI.FrameIdx;
2088 if (RPI.isPaired()) dbgs() << ", " << RPI.FrameIdx + 1;
2089 dbgs() << ")\n");
2091 // Windows unwind codes require consecutive registers if registers are
2092 // paired. Make the switch here, so that the code below will save (x,x+1)
2093 // and not (x+1,x).
2094 unsigned FrameIdxReg1 = RPI.FrameIdx;
2095 unsigned FrameIdxReg2 = RPI.FrameIdx + 1;
2096 if (NeedsWinCFI && RPI.isPaired()) {
2097 std::swap(Reg1, Reg2);
2098 std::swap(FrameIdxReg1, FrameIdxReg2);
2100 MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(LdrOpc));
2101 if (RPI.isPaired()) {
2102 MIB.addReg(Reg2, getDefRegState(true));
2103 MIB.addMemOperand(MF.getMachineMemOperand(
2104 MachinePointerInfo::getFixedStack(MF, FrameIdxReg2),
2105 MachineMemOperand::MOLoad, Size, Align));
2107 MIB.addReg(Reg1, getDefRegState(true))
2108 .addReg(AArch64::SP)
2109 .addImm(RPI.Offset) // [sp, #offset*scale]
2110 // where factor*scale is implicit
2111 .setMIFlag(MachineInstr::FrameDestroy);
2112 MIB.addMemOperand(MF.getMachineMemOperand(
2113 MachinePointerInfo::getFixedStack(MF, FrameIdxReg1),
2114 MachineMemOperand::MOLoad, Size, Align));
2115 if (NeedsWinCFI)
2116 InsertSEH(MIB, TII, MachineInstr::FrameDestroy);
2118 if (ReverseCSRRestoreSeq)
2119 for (const RegPairInfo &RPI : reverse(RegPairs))
2120 EmitMI(RPI);
2121 else
2122 for (const RegPairInfo &RPI : RegPairs)
2123 EmitMI(RPI);
2125 if (NeedShadowCallStackProlog) {
2126 // Shadow call stack epilog: ldr x30, [x18, #-8]!
2127 BuildMI(MBB, MI, DL, TII.get(AArch64::LDRXpre))
2128 .addReg(AArch64::X18, RegState::Define)
2129 .addReg(AArch64::LR, RegState::Define)
2130 .addReg(AArch64::X18)
2131 .addImm(-8)
2132 .setMIFlag(MachineInstr::FrameDestroy);
2135 return true;
2138 void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
2139 BitVector &SavedRegs,
2140 RegScavenger *RS) const {
2141 // All calls are tail calls in GHC calling conv, and functions have no
2142 // prologue/epilogue.
2143 if (MF.getFunction().getCallingConv() == CallingConv::GHC)
2144 return;
2146 TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
2147 const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>(
2148 MF.getSubtarget().getRegisterInfo());
2149 AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
2150 unsigned UnspilledCSGPR = AArch64::NoRegister;
2151 unsigned UnspilledCSGPRPaired = AArch64::NoRegister;
2153 MachineFrameInfo &MFI = MF.getFrameInfo();
2154 const MCPhysReg *CSRegs = MF.getRegInfo().getCalleeSavedRegs();
2156 unsigned BasePointerReg = RegInfo->hasBasePointer(MF)
2157 ? RegInfo->getBaseRegister()
2158 : (unsigned)AArch64::NoRegister;
2160 unsigned ExtraCSSpill = 0;
2161 // Figure out which callee-saved registers to save/restore.
2162 for (unsigned i = 0; CSRegs[i]; ++i) {
2163 const unsigned Reg = CSRegs[i];
2165 // Add the base pointer register to SavedRegs if it is callee-save.
2166 if (Reg == BasePointerReg)
2167 SavedRegs.set(Reg);
2169 bool RegUsed = SavedRegs.test(Reg);
2170 unsigned PairedReg = CSRegs[i ^ 1];
2171 if (!RegUsed) {
2172 if (AArch64::GPR64RegClass.contains(Reg) &&
2173 !RegInfo->isReservedReg(MF, Reg)) {
2174 UnspilledCSGPR = Reg;
2175 UnspilledCSGPRPaired = PairedReg;
2177 continue;
2180 // MachO's compact unwind format relies on all registers being stored in
2181 // pairs.
2182 // FIXME: the usual format is actually better if unwinding isn't needed.
2183 if (produceCompactUnwindFrame(MF) && PairedReg != AArch64::NoRegister &&
2184 !SavedRegs.test(PairedReg)) {
2185 SavedRegs.set(PairedReg);
2186 if (AArch64::GPR64RegClass.contains(PairedReg) &&
2187 !RegInfo->isReservedReg(MF, PairedReg))
2188 ExtraCSSpill = PairedReg;
2192 // Calculates the callee saved stack size.
2193 unsigned CSStackSize = 0;
2194 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
2195 const MachineRegisterInfo &MRI = MF.getRegInfo();
2196 for (unsigned Reg : SavedRegs.set_bits())
2197 CSStackSize += TRI->getRegSizeInBits(Reg, MRI) / 8;
2199 // Save number of saved regs, so we can easily update CSStackSize later.
2200 unsigned NumSavedRegs = SavedRegs.count();
2202 // The frame record needs to be created by saving the appropriate registers
2203 unsigned EstimatedStackSize = MFI.estimateStackSize(MF);
2204 if (hasFP(MF) ||
2205 windowsRequiresStackProbe(MF, EstimatedStackSize + CSStackSize + 16)) {
2206 SavedRegs.set(AArch64::FP);
2207 SavedRegs.set(AArch64::LR);
2210 LLVM_DEBUG(dbgs() << "*** determineCalleeSaves\nSaved CSRs:";
2211 for (unsigned Reg
2212 : SavedRegs.set_bits()) dbgs()
2213 << ' ' << printReg(Reg, RegInfo);
2214 dbgs() << "\n";);
2216 bool HasSVEStackObjects = [&MFI]() {
2217 for (int I = MFI.getObjectIndexBegin(); I != 0; ++I)
2218 if (MFI.getStackID(I) == TargetStackID::SVEVector &&
2219 MFI.getObjectOffset(I) < 0)
2220 return true;
2221 // Note: We don't take allocatable stack objects into
2222 // account yet, because allocation for those is not yet
2223 // implemented.
2224 return false;
2225 }();
2227 // If any callee-saved registers are used, the frame cannot be eliminated.
2228 bool CanEliminateFrame = (SavedRegs.count() == 0) && !HasSVEStackObjects;
2230 // The CSR spill slots have not been allocated yet, so estimateStackSize
2231 // won't include them.
2232 unsigned EstimatedStackSizeLimit = estimateRSStackSizeLimit(MF);
2233 bool BigStack = (EstimatedStackSize + CSStackSize) > EstimatedStackSizeLimit;
2234 if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF))
2235 AFI->setHasStackFrame(true);
2237 // Estimate if we might need to scavenge a register at some point in order
2238 // to materialize a stack offset. If so, either spill one additional
2239 // callee-saved register or reserve a special spill slot to facilitate
2240 // register scavenging. If we already spilled an extra callee-saved register
2241 // above to keep the number of spills even, we don't need to do anything else
2242 // here.
2243 if (BigStack) {
2244 if (!ExtraCSSpill && UnspilledCSGPR != AArch64::NoRegister) {
2245 LLVM_DEBUG(dbgs() << "Spilling " << printReg(UnspilledCSGPR, RegInfo)
2246 << " to get a scratch register.\n");
2247 SavedRegs.set(UnspilledCSGPR);
2248 // MachO's compact unwind format relies on all registers being stored in
2249 // pairs, so if we need to spill one extra for BigStack, then we need to
2250 // store the pair.
2251 if (produceCompactUnwindFrame(MF))
2252 SavedRegs.set(UnspilledCSGPRPaired);
2253 ExtraCSSpill = UnspilledCSGPR;
2256 // If we didn't find an extra callee-saved register to spill, create
2257 // an emergency spill slot.
2258 if (!ExtraCSSpill || MF.getRegInfo().isPhysRegUsed(ExtraCSSpill)) {
2259 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
2260 const TargetRegisterClass &RC = AArch64::GPR64RegClass;
2261 unsigned Size = TRI->getSpillSize(RC);
2262 unsigned Align = TRI->getSpillAlignment(RC);
2263 int FI = MFI.CreateStackObject(Size, Align, false);
2264 RS->addScavengingFrameIndex(FI);
2265 LLVM_DEBUG(dbgs() << "No available CS registers, allocated fi#" << FI
2266 << " as the emergency spill slot.\n");
2270 // Adding the size of additional 64bit GPR saves.
2271 CSStackSize += 8 * (SavedRegs.count() - NumSavedRegs);
2272 unsigned AlignedCSStackSize = alignTo(CSStackSize, 16);
2273 LLVM_DEBUG(dbgs() << "Estimated stack frame size: "
2274 << EstimatedStackSize + AlignedCSStackSize
2275 << " bytes.\n");
2277 // Round up to register pair alignment to avoid additional SP adjustment
2278 // instructions.
2279 AFI->setCalleeSavedStackSize(AlignedCSStackSize);
2280 AFI->setCalleeSaveStackHasFreeSpace(AlignedCSStackSize != CSStackSize);
2283 bool AArch64FrameLowering::enableStackSlotScavenging(
2284 const MachineFunction &MF) const {
2285 const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
2286 return AFI->hasCalleeSaveStackFreeSpace();
2289 void AArch64FrameLowering::processFunctionBeforeFrameFinalized(
2290 MachineFunction &MF, RegScavenger *RS) const {
2291 MachineFrameInfo &MFI = MF.getFrameInfo();
2293 assert(getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown &&
2294 "Upwards growing stack unsupported");
2296 // Process all fixed stack SVE objects.
2297 int64_t Offset = 0;
2298 for (int I = MFI.getObjectIndexBegin(); I != 0; ++I) {
2299 unsigned StackID = MFI.getStackID(I);
2300 if (StackID == TargetStackID::SVEVector) {
2301 int64_t FixedOffset = -MFI.getObjectOffset(I);
2302 if (FixedOffset > Offset)
2303 Offset = FixedOffset;
2307 unsigned MaxAlign = getStackAlignment();
2308 uint64_t SVEStackSize = alignTo(Offset, MaxAlign);
2310 AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
2311 AFI->setStackSizeSVE(SVEStackSize);
2312 assert(MaxAlign <= 16 && "Cannot align scalable vectors more than 16 bytes");
2314 // If this function isn't doing Win64-style C++ EH, we don't need to do
2315 // anything.
2316 if (!MF.hasEHFunclets())
2317 return;
2318 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
2319 WinEHFuncInfo &EHInfo = *MF.getWinEHFuncInfo();
2321 MachineBasicBlock &MBB = MF.front();
2322 auto MBBI = MBB.begin();
2323 while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup))
2324 ++MBBI;
2326 // Create an UnwindHelp object.
2327 int UnwindHelpFI =
2328 MFI.CreateStackObject(/*size*/8, /*alignment*/16, false);
2329 EHInfo.UnwindHelpFrameIdx = UnwindHelpFI;
2330 // We need to store -2 into the UnwindHelp object at the start of the
2331 // function.
2332 DebugLoc DL;
2333 RS->enterBasicBlockEnd(MBB);
2334 RS->backward(std::prev(MBBI));
2335 unsigned DstReg = RS->FindUnusedReg(&AArch64::GPR64commonRegClass);
2336 assert(DstReg && "There must be a free register after frame setup");
2337 BuildMI(MBB, MBBI, DL, TII.get(AArch64::MOVi64imm), DstReg).addImm(-2);
2338 BuildMI(MBB, MBBI, DL, TII.get(AArch64::STURXi))
2339 .addReg(DstReg, getKillRegState(true))
2340 .addFrameIndex(UnwindHelpFI)
2341 .addImm(0);
2344 /// For Win64 AArch64 EH, the offset to the Unwind object is from the SP before
2345 /// the update. This is easily retrieved as it is exactly the offset that is set
2346 /// in processFunctionBeforeFrameFinalized.
2347 int AArch64FrameLowering::getFrameIndexReferencePreferSP(
2348 const MachineFunction &MF, int FI, unsigned &FrameReg,
2349 bool IgnoreSPUpdates) const {
2350 const MachineFrameInfo &MFI = MF.getFrameInfo();
2351 LLVM_DEBUG(dbgs() << "Offset from the SP for " << FI << " is "
2352 << MFI.getObjectOffset(FI) << "\n");
2353 FrameReg = AArch64::SP;
2354 return MFI.getObjectOffset(FI);
2357 /// The parent frame offset (aka dispFrame) is only used on X86_64 to retrieve
2358 /// the parent's frame pointer
2359 unsigned AArch64FrameLowering::getWinEHParentFrameOffset(
2360 const MachineFunction &MF) const {
2361 return 0;
2364 /// Funclets only need to account for space for the callee saved registers,
2365 /// as the locals are accounted for in the parent's stack frame.
2366 unsigned AArch64FrameLowering::getWinEHFuncletFrameSize(
2367 const MachineFunction &MF) const {
2368 // This is the size of the pushed CSRs.
2369 unsigned CSSize =
2370 MF.getInfo<AArch64FunctionInfo>()->getCalleeSavedStackSize();
2371 // This is the amount of stack a funclet needs to allocate.
2372 return alignTo(CSSize + MF.getFrameInfo().getMaxCallFrameSize(),
2373 getStackAlignment());