[Alignment][NFC] Use Align with TargetLowering::setPrefLoopAlignment
[llvm-complete.git] / lib / Target / AArch64 / AArch64FrameLowering.cpp
blob8357b763179d2bd9aada464ec66382a1b4382e1e
1 //===- AArch64FrameLowering.cpp - AArch64 Frame Lowering -------*- C++ -*-====//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the AArch64 implementation of TargetFrameLowering class.
11 // On AArch64, stack frames are structured as follows:
13 // The stack grows downward.
15 // All of the individual frame areas on the frame below are optional, i.e. it's
16 // possible to create a function so that the particular area isn't present
17 // in the frame.
19 // At function entry, the "frame" looks as follows:
21 // | | Higher address
22 // |-----------------------------------|
23 // | |
24 // | arguments passed on the stack |
25 // | |
26 // |-----------------------------------| <- sp
27 // | | Lower address
30 // After the prologue has run, the frame has the following general structure.
31 // Note that this doesn't depict the case where a red-zone is used. Also,
32 // technically the last frame area (VLAs) doesn't get created until in the
33 // main function body, after the prologue is run. However, it's depicted here
34 // for completeness.
36 // | | Higher address
37 // |-----------------------------------|
38 // | |
39 // | arguments passed on the stack |
40 // | |
41 // |-----------------------------------|
42 // | |
43 // | (Win64 only) varargs from reg |
44 // | |
45 // |-----------------------------------|
46 // | |
47 // | callee-saved gpr registers | <--.
48 // | | | On Darwin platforms these
49 // |- - - - - - - - - - - - - - - - - -| | callee saves are swapped,
50 // | | | (frame record first)
51 // | prev_fp, prev_lr | <--'
52 // | (a.k.a. "frame record") |
53 // |-----------------------------------| <- fp(=x29)
54 // | |
55 // | callee-saved fp/simd/SVE regs |
56 // | |
57 // |-----------------------------------|
58 // |.empty.space.to.make.part.below....|
59 // |.aligned.in.case.it.needs.more.than| (size of this area is unknown at
60 // |.the.standard.16-byte.alignment....| compile time; if present)
61 // |-----------------------------------|
62 // | |
63 // | local variables of fixed size |
64 // | including spill slots |
65 // |-----------------------------------| <- bp(not defined by ABI,
66 // |.variable-sized.local.variables....| LLVM chooses X19)
67 // |.(VLAs)............................| (size of this area is unknown at
68 // |...................................| compile time)
69 // |-----------------------------------| <- sp
70 // | | Lower address
73 // To access the data in a frame, at-compile time, a constant offset must be
74 // computable from one of the pointers (fp, bp, sp) to access it. The size
75 // of the areas with a dotted background cannot be computed at compile-time
76 // if they are present, making it required to have all three of fp, bp and
77 // sp to be set up to be able to access all contents in the frame areas,
78 // assuming all of the frame areas are non-empty.
80 // For most functions, some of the frame areas are empty. For those functions,
81 // it may not be necessary to set up fp or bp:
82 // * A base pointer is definitely needed when there are both VLAs and local
83 // variables with more-than-default alignment requirements.
84 // * A frame pointer is definitely needed when there are local variables with
85 // more-than-default alignment requirements.
87 // For Darwin platforms the frame-record (fp, lr) is stored at the top of the
88 // callee-saved area, since the unwind encoding does not allow for encoding
89 // this dynamically and existing tools depend on this layout. For other
90 // platforms, the frame-record is stored at the bottom of the (gpr) callee-saved
91 // area to allow SVE stack objects (allocated directly below the callee-saves,
92 // if available) to be accessed directly from the framepointer.
93 // The SVE spill/fill instructions have VL-scaled addressing modes such
94 // as:
95 // ldr z8, [fp, #-7 mul vl]
96 // For SVE the size of the vector length (VL) is not known at compile-time, so
97 // '#-7 mul vl' is an offset that can only be evaluated at runtime. With this
98 // layout, we don't need to add an unscaled offset to the framepointer before
99 // accessing the SVE object in the frame.
101 // In some cases when a base pointer is not strictly needed, it is generated
102 // anyway when offsets from the frame pointer to access local variables become
103 // so large that the offset can't be encoded in the immediate fields of loads
104 // or stores.
106 // FIXME: also explain the redzone concept.
107 // FIXME: also explain the concept of reserved call frames.
109 //===----------------------------------------------------------------------===//
111 #include "AArch64FrameLowering.h"
112 #include "AArch64InstrInfo.h"
113 #include "AArch64MachineFunctionInfo.h"
114 #include "AArch64RegisterInfo.h"
115 #include "AArch64StackOffset.h"
116 #include "AArch64Subtarget.h"
117 #include "AArch64TargetMachine.h"
118 #include "MCTargetDesc/AArch64AddressingModes.h"
119 #include "llvm/ADT/ScopeExit.h"
120 #include "llvm/ADT/SmallVector.h"
121 #include "llvm/ADT/Statistic.h"
122 #include "llvm/CodeGen/LivePhysRegs.h"
123 #include "llvm/CodeGen/MachineBasicBlock.h"
124 #include "llvm/CodeGen/MachineFrameInfo.h"
125 #include "llvm/CodeGen/MachineFunction.h"
126 #include "llvm/CodeGen/MachineInstr.h"
127 #include "llvm/CodeGen/MachineInstrBuilder.h"
128 #include "llvm/CodeGen/MachineMemOperand.h"
129 #include "llvm/CodeGen/MachineModuleInfo.h"
130 #include "llvm/CodeGen/MachineOperand.h"
131 #include "llvm/CodeGen/MachineRegisterInfo.h"
132 #include "llvm/CodeGen/RegisterScavenging.h"
133 #include "llvm/CodeGen/TargetInstrInfo.h"
134 #include "llvm/CodeGen/TargetRegisterInfo.h"
135 #include "llvm/CodeGen/TargetSubtargetInfo.h"
136 #include "llvm/CodeGen/WinEHFuncInfo.h"
137 #include "llvm/IR/Attributes.h"
138 #include "llvm/IR/CallingConv.h"
139 #include "llvm/IR/DataLayout.h"
140 #include "llvm/IR/DebugLoc.h"
141 #include "llvm/IR/Function.h"
142 #include "llvm/MC/MCAsmInfo.h"
143 #include "llvm/MC/MCDwarf.h"
144 #include "llvm/Support/CommandLine.h"
145 #include "llvm/Support/Debug.h"
146 #include "llvm/Support/ErrorHandling.h"
147 #include "llvm/Support/MathExtras.h"
148 #include "llvm/Support/raw_ostream.h"
149 #include "llvm/Target/TargetMachine.h"
150 #include "llvm/Target/TargetOptions.h"
151 #include <cassert>
152 #include <cstdint>
153 #include <iterator>
154 #include <vector>
156 using namespace llvm;
158 #define DEBUG_TYPE "frame-info"
160 static cl::opt<bool> EnableRedZone("aarch64-redzone",
161 cl::desc("enable use of redzone on AArch64"),
162 cl::init(false), cl::Hidden);
164 static cl::opt<bool>
165 ReverseCSRRestoreSeq("reverse-csr-restore-seq",
166 cl::desc("reverse the CSR restore sequence"),
167 cl::init(false), cl::Hidden);
169 STATISTIC(NumRedZoneFunctions, "Number of functions using red zone");
171 /// This is the biggest offset to the stack pointer we can encode in aarch64
172 /// instructions (without using a separate calculation and a temp register).
173 /// Note that the exception here are vector stores/loads which cannot encode any
174 /// displacements (see estimateRSStackSizeLimit(), isAArch64FrameOffsetLegal()).
175 static const unsigned DefaultSafeSPDisplacement = 255;
177 /// Look at each instruction that references stack frames and return the stack
178 /// size limit beyond which some of these instructions will require a scratch
179 /// register during their expansion later.
180 static unsigned estimateRSStackSizeLimit(MachineFunction &MF) {
181 // FIXME: For now, just conservatively guestimate based on unscaled indexing
182 // range. We'll end up allocating an unnecessary spill slot a lot, but
183 // realistically that's not a big deal at this stage of the game.
184 for (MachineBasicBlock &MBB : MF) {
185 for (MachineInstr &MI : MBB) {
186 if (MI.isDebugInstr() || MI.isPseudo() ||
187 MI.getOpcode() == AArch64::ADDXri ||
188 MI.getOpcode() == AArch64::ADDSXri)
189 continue;
191 for (const MachineOperand &MO : MI.operands()) {
192 if (!MO.isFI())
193 continue;
195 StackOffset Offset;
196 if (isAArch64FrameOffsetLegal(MI, Offset, nullptr, nullptr, nullptr) ==
197 AArch64FrameOffsetCannotUpdate)
198 return 0;
202 return DefaultSafeSPDisplacement;
205 bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const {
206 if (!EnableRedZone)
207 return false;
208 // Don't use the red zone if the function explicitly asks us not to.
209 // This is typically used for kernel code.
210 if (MF.getFunction().hasFnAttribute(Attribute::NoRedZone))
211 return false;
213 const MachineFrameInfo &MFI = MF.getFrameInfo();
214 const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
215 unsigned NumBytes = AFI->getLocalStackSize();
217 return !(MFI.hasCalls() || hasFP(MF) || NumBytes > 128);
220 /// hasFP - Return true if the specified function should have a dedicated frame
221 /// pointer register.
222 bool AArch64FrameLowering::hasFP(const MachineFunction &MF) const {
223 const MachineFrameInfo &MFI = MF.getFrameInfo();
224 const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
225 // Win64 EH requires a frame pointer if funclets are present, as the locals
226 // are accessed off the frame pointer in both the parent function and the
227 // funclets.
228 if (MF.hasEHFunclets())
229 return true;
230 // Retain behavior of always omitting the FP for leaf functions when possible.
231 if (MFI.hasCalls() && MF.getTarget().Options.DisableFramePointerElim(MF))
232 return true;
233 if (MFI.hasVarSizedObjects() || MFI.isFrameAddressTaken() ||
234 MFI.hasStackMap() || MFI.hasPatchPoint() ||
235 RegInfo->needsStackRealignment(MF))
236 return true;
237 // With large callframes around we may need to use FP to access the scavenging
238 // emergency spillslot.
240 // Unfortunately some calls to hasFP() like machine verifier ->
241 // getReservedReg() -> hasFP in the middle of global isel are too early
242 // to know the max call frame size. Hopefully conservatively returning "true"
243 // in those cases is fine.
244 // DefaultSafeSPDisplacement is fine as we only emergency spill GP regs.
245 if (!MFI.isMaxCallFrameSizeComputed() ||
246 MFI.getMaxCallFrameSize() > DefaultSafeSPDisplacement)
247 return true;
249 return false;
252 /// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
253 /// not required, we reserve argument space for call sites in the function
254 /// immediately on entry to the current function. This eliminates the need for
255 /// add/sub sp brackets around call sites. Returns true if the call frame is
256 /// included as part of the stack frame.
257 bool
258 AArch64FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
259 return !MF.getFrameInfo().hasVarSizedObjects();
262 MachineBasicBlock::iterator AArch64FrameLowering::eliminateCallFramePseudoInstr(
263 MachineFunction &MF, MachineBasicBlock &MBB,
264 MachineBasicBlock::iterator I) const {
265 const AArch64InstrInfo *TII =
266 static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
267 DebugLoc DL = I->getDebugLoc();
268 unsigned Opc = I->getOpcode();
269 bool IsDestroy = Opc == TII->getCallFrameDestroyOpcode();
270 uint64_t CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0;
272 if (!hasReservedCallFrame(MF)) {
273 unsigned Align = getStackAlignment();
275 int64_t Amount = I->getOperand(0).getImm();
276 Amount = alignTo(Amount, Align);
277 if (!IsDestroy)
278 Amount = -Amount;
280 // N.b. if CalleePopAmount is valid but zero (i.e. callee would pop, but it
281 // doesn't have to pop anything), then the first operand will be zero too so
282 // this adjustment is a no-op.
283 if (CalleePopAmount == 0) {
284 // FIXME: in-function stack adjustment for calls is limited to 24-bits
285 // because there's no guaranteed temporary register available.
287 // ADD/SUB (immediate) has only LSL #0 and LSL #12 available.
288 // 1) For offset <= 12-bit, we use LSL #0
289 // 2) For 12-bit <= offset <= 24-bit, we use two instructions. One uses
290 // LSL #0, and the other uses LSL #12.
292 // Most call frames will be allocated at the start of a function so
293 // this is OK, but it is a limitation that needs dealing with.
294 assert(Amount > -0xffffff && Amount < 0xffffff && "call frame too large");
295 emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP, {Amount, MVT::i8},
296 TII);
298 } else if (CalleePopAmount != 0) {
299 // If the calling convention demands that the callee pops arguments from the
300 // stack, we want to add it back if we have a reserved call frame.
301 assert(CalleePopAmount < 0xffffff && "call frame too large");
302 emitFrameOffset(MBB, I, DL, AArch64::SP, AArch64::SP,
303 {-(int64_t)CalleePopAmount, MVT::i8}, TII);
305 return MBB.erase(I);
308 static bool ShouldSignReturnAddress(MachineFunction &MF) {
309 // The function should be signed in the following situations:
310 // - sign-return-address=all
311 // - sign-return-address=non-leaf and the functions spills the LR
313 const Function &F = MF.getFunction();
314 if (!F.hasFnAttribute("sign-return-address"))
315 return false;
317 StringRef Scope = F.getFnAttribute("sign-return-address").getValueAsString();
318 if (Scope.equals("none"))
319 return false;
321 if (Scope.equals("all"))
322 return true;
324 assert(Scope.equals("non-leaf") && "Expected all, none or non-leaf");
326 for (const auto &Info : MF.getFrameInfo().getCalleeSavedInfo())
327 if (Info.getReg() == AArch64::LR)
328 return true;
330 return false;
333 void AArch64FrameLowering::emitCalleeSavedFrameMoves(
334 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const {
335 MachineFunction &MF = *MBB.getParent();
336 MachineFrameInfo &MFI = MF.getFrameInfo();
337 const TargetSubtargetInfo &STI = MF.getSubtarget();
338 const MCRegisterInfo *MRI = STI.getRegisterInfo();
339 const TargetInstrInfo *TII = STI.getInstrInfo();
340 DebugLoc DL = MBB.findDebugLoc(MBBI);
342 // Add callee saved registers to move list.
343 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
344 if (CSI.empty())
345 return;
347 for (const auto &Info : CSI) {
348 unsigned Reg = Info.getReg();
349 int64_t Offset =
350 MFI.getObjectOffset(Info.getFrameIdx()) - getOffsetOfLocalArea();
351 unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
352 unsigned CFIIndex = MF.addFrameInst(
353 MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
354 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
355 .addCFIIndex(CFIIndex)
356 .setMIFlags(MachineInstr::FrameSetup);
360 // Find a scratch register that we can use at the start of the prologue to
361 // re-align the stack pointer. We avoid using callee-save registers since they
362 // may appear to be free when this is called from canUseAsPrologue (during
363 // shrink wrapping), but then no longer be free when this is called from
364 // emitPrologue.
366 // FIXME: This is a bit conservative, since in the above case we could use one
367 // of the callee-save registers as a scratch temp to re-align the stack pointer,
368 // but we would then have to make sure that we were in fact saving at least one
369 // callee-save register in the prologue, which is additional complexity that
370 // doesn't seem worth the benefit.
371 static unsigned findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB) {
372 MachineFunction *MF = MBB->getParent();
374 // If MBB is an entry block, use X9 as the scratch register
375 if (&MF->front() == MBB)
376 return AArch64::X9;
378 const AArch64Subtarget &Subtarget = MF->getSubtarget<AArch64Subtarget>();
379 const AArch64RegisterInfo &TRI = *Subtarget.getRegisterInfo();
380 LivePhysRegs LiveRegs(TRI);
381 LiveRegs.addLiveIns(*MBB);
383 // Mark callee saved registers as used so we will not choose them.
384 const MCPhysReg *CSRegs = MF->getRegInfo().getCalleeSavedRegs();
385 for (unsigned i = 0; CSRegs[i]; ++i)
386 LiveRegs.addReg(CSRegs[i]);
388 // Prefer X9 since it was historically used for the prologue scratch reg.
389 const MachineRegisterInfo &MRI = MF->getRegInfo();
390 if (LiveRegs.available(MRI, AArch64::X9))
391 return AArch64::X9;
393 for (unsigned Reg : AArch64::GPR64RegClass) {
394 if (LiveRegs.available(MRI, Reg))
395 return Reg;
397 return AArch64::NoRegister;
400 bool AArch64FrameLowering::canUseAsPrologue(
401 const MachineBasicBlock &MBB) const {
402 const MachineFunction *MF = MBB.getParent();
403 MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
404 const AArch64Subtarget &Subtarget = MF->getSubtarget<AArch64Subtarget>();
405 const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
407 // Don't need a scratch register if we're not going to re-align the stack.
408 if (!RegInfo->needsStackRealignment(*MF))
409 return true;
410 // Otherwise, we can use any block as long as it has a scratch register
411 // available.
412 return findScratchNonCalleeSaveRegister(TmpMBB) != AArch64::NoRegister;
415 static bool windowsRequiresStackProbe(MachineFunction &MF,
416 unsigned StackSizeInBytes) {
417 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
418 if (!Subtarget.isTargetWindows())
419 return false;
420 const Function &F = MF.getFunction();
421 // TODO: When implementing stack protectors, take that into account
422 // for the probe threshold.
423 unsigned StackProbeSize = 4096;
424 if (F.hasFnAttribute("stack-probe-size"))
425 F.getFnAttribute("stack-probe-size")
426 .getValueAsString()
427 .getAsInteger(0, StackProbeSize);
428 return (StackSizeInBytes >= StackProbeSize) &&
429 !F.hasFnAttribute("no-stack-arg-probe");
432 bool AArch64FrameLowering::shouldCombineCSRLocalStackBump(
433 MachineFunction &MF, unsigned StackBumpBytes) const {
434 AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
435 const MachineFrameInfo &MFI = MF.getFrameInfo();
436 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
437 const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
439 if (AFI->getLocalStackSize() == 0)
440 return false;
442 // 512 is the maximum immediate for stp/ldp that will be used for
443 // callee-save save/restores
444 if (StackBumpBytes >= 512 || windowsRequiresStackProbe(MF, StackBumpBytes))
445 return false;
447 if (MFI.hasVarSizedObjects())
448 return false;
450 if (RegInfo->needsStackRealignment(MF))
451 return false;
453 // This isn't strictly necessary, but it simplifies things a bit since the
454 // current RedZone handling code assumes the SP is adjusted by the
455 // callee-save save/restore code.
456 if (canUseRedZone(MF))
457 return false;
459 return true;
462 // Given a load or a store instruction, generate an appropriate unwinding SEH
463 // code on Windows.
464 static MachineBasicBlock::iterator InsertSEH(MachineBasicBlock::iterator MBBI,
465 const TargetInstrInfo &TII,
466 MachineInstr::MIFlag Flag) {
467 unsigned Opc = MBBI->getOpcode();
468 MachineBasicBlock *MBB = MBBI->getParent();
469 MachineFunction &MF = *MBB->getParent();
470 DebugLoc DL = MBBI->getDebugLoc();
471 unsigned ImmIdx = MBBI->getNumOperands() - 1;
472 int Imm = MBBI->getOperand(ImmIdx).getImm();
473 MachineInstrBuilder MIB;
474 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
475 const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
477 switch (Opc) {
478 default:
479 llvm_unreachable("No SEH Opcode for this instruction");
480 case AArch64::LDPDpost:
481 Imm = -Imm;
482 LLVM_FALLTHROUGH;
483 case AArch64::STPDpre: {
484 unsigned Reg0 = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());
485 unsigned Reg1 = RegInfo->getSEHRegNum(MBBI->getOperand(2).getReg());
486 MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFRegP_X))
487 .addImm(Reg0)
488 .addImm(Reg1)
489 .addImm(Imm * 8)
490 .setMIFlag(Flag);
491 break;
493 case AArch64::LDPXpost:
494 Imm = -Imm;
495 LLVM_FALLTHROUGH;
496 case AArch64::STPXpre: {
497 Register Reg0 = MBBI->getOperand(1).getReg();
498 Register Reg1 = MBBI->getOperand(2).getReg();
499 if (Reg0 == AArch64::FP && Reg1 == AArch64::LR)
500 MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFPLR_X))
501 .addImm(Imm * 8)
502 .setMIFlag(Flag);
503 else
504 MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveRegP_X))
505 .addImm(RegInfo->getSEHRegNum(Reg0))
506 .addImm(RegInfo->getSEHRegNum(Reg1))
507 .addImm(Imm * 8)
508 .setMIFlag(Flag);
509 break;
511 case AArch64::LDRDpost:
512 Imm = -Imm;
513 LLVM_FALLTHROUGH;
514 case AArch64::STRDpre: {
515 unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());
516 MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFReg_X))
517 .addImm(Reg)
518 .addImm(Imm)
519 .setMIFlag(Flag);
520 break;
522 case AArch64::LDRXpost:
523 Imm = -Imm;
524 LLVM_FALLTHROUGH;
525 case AArch64::STRXpre: {
526 unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());
527 MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveReg_X))
528 .addImm(Reg)
529 .addImm(Imm)
530 .setMIFlag(Flag);
531 break;
533 case AArch64::STPDi:
534 case AArch64::LDPDi: {
535 unsigned Reg0 = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg());
536 unsigned Reg1 = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());
537 MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFRegP))
538 .addImm(Reg0)
539 .addImm(Reg1)
540 .addImm(Imm * 8)
541 .setMIFlag(Flag);
542 break;
544 case AArch64::STPXi:
545 case AArch64::LDPXi: {
546 Register Reg0 = MBBI->getOperand(0).getReg();
547 Register Reg1 = MBBI->getOperand(1).getReg();
548 if (Reg0 == AArch64::FP && Reg1 == AArch64::LR)
549 MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFPLR))
550 .addImm(Imm * 8)
551 .setMIFlag(Flag);
552 else
553 MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveRegP))
554 .addImm(RegInfo->getSEHRegNum(Reg0))
555 .addImm(RegInfo->getSEHRegNum(Reg1))
556 .addImm(Imm * 8)
557 .setMIFlag(Flag);
558 break;
560 case AArch64::STRXui:
561 case AArch64::LDRXui: {
562 int Reg = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg());
563 MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveReg))
564 .addImm(Reg)
565 .addImm(Imm * 8)
566 .setMIFlag(Flag);
567 break;
569 case AArch64::STRDui:
570 case AArch64::LDRDui: {
571 unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg());
572 MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveFReg))
573 .addImm(Reg)
574 .addImm(Imm * 8)
575 .setMIFlag(Flag);
576 break;
579 auto I = MBB->insertAfter(MBBI, MIB);
580 return I;
583 // Fix up the SEH opcode associated with the save/restore instruction.
584 static void fixupSEHOpcode(MachineBasicBlock::iterator MBBI,
585 unsigned LocalStackSize) {
586 MachineOperand *ImmOpnd = nullptr;
587 unsigned ImmIdx = MBBI->getNumOperands() - 1;
588 switch (MBBI->getOpcode()) {
589 default:
590 llvm_unreachable("Fix the offset in the SEH instruction");
591 case AArch64::SEH_SaveFPLR:
592 case AArch64::SEH_SaveRegP:
593 case AArch64::SEH_SaveReg:
594 case AArch64::SEH_SaveFRegP:
595 case AArch64::SEH_SaveFReg:
596 ImmOpnd = &MBBI->getOperand(ImmIdx);
597 break;
599 if (ImmOpnd)
600 ImmOpnd->setImm(ImmOpnd->getImm() + LocalStackSize);
603 // Convert callee-save register save/restore instruction to do stack pointer
604 // decrement/increment to allocate/deallocate the callee-save stack area by
605 // converting store/load to use pre/post increment version.
606 static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(
607 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
608 const DebugLoc &DL, const TargetInstrInfo *TII, int CSStackSizeInc,
609 bool NeedsWinCFI, bool *HasWinCFI, bool InProlog = true) {
610 // Ignore instructions that do not operate on SP, i.e. shadow call stack
611 // instructions and associated CFI instruction.
612 while (MBBI->getOpcode() == AArch64::STRXpost ||
613 MBBI->getOpcode() == AArch64::LDRXpre ||
614 MBBI->getOpcode() == AArch64::CFI_INSTRUCTION) {
615 if (MBBI->getOpcode() != AArch64::CFI_INSTRUCTION)
616 assert(MBBI->getOperand(0).getReg() != AArch64::SP);
617 ++MBBI;
619 unsigned NewOpc;
620 int Scale = 1;
621 switch (MBBI->getOpcode()) {
622 default:
623 llvm_unreachable("Unexpected callee-save save/restore opcode!");
624 case AArch64::STPXi:
625 NewOpc = AArch64::STPXpre;
626 Scale = 8;
627 break;
628 case AArch64::STPDi:
629 NewOpc = AArch64::STPDpre;
630 Scale = 8;
631 break;
632 case AArch64::STPQi:
633 NewOpc = AArch64::STPQpre;
634 Scale = 16;
635 break;
636 case AArch64::STRXui:
637 NewOpc = AArch64::STRXpre;
638 break;
639 case AArch64::STRDui:
640 NewOpc = AArch64::STRDpre;
641 break;
642 case AArch64::STRQui:
643 NewOpc = AArch64::STRQpre;
644 break;
645 case AArch64::LDPXi:
646 NewOpc = AArch64::LDPXpost;
647 Scale = 8;
648 break;
649 case AArch64::LDPDi:
650 NewOpc = AArch64::LDPDpost;
651 Scale = 8;
652 break;
653 case AArch64::LDPQi:
654 NewOpc = AArch64::LDPQpost;
655 Scale = 16;
656 break;
657 case AArch64::LDRXui:
658 NewOpc = AArch64::LDRXpost;
659 break;
660 case AArch64::LDRDui:
661 NewOpc = AArch64::LDRDpost;
662 break;
663 case AArch64::LDRQui:
664 NewOpc = AArch64::LDRQpost;
665 break;
667 // Get rid of the SEH code associated with the old instruction.
668 if (NeedsWinCFI) {
669 auto SEH = std::next(MBBI);
670 if (AArch64InstrInfo::isSEHInstruction(*SEH))
671 SEH->eraseFromParent();
674 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc));
675 MIB.addReg(AArch64::SP, RegState::Define);
677 // Copy all operands other than the immediate offset.
678 unsigned OpndIdx = 0;
679 for (unsigned OpndEnd = MBBI->getNumOperands() - 1; OpndIdx < OpndEnd;
680 ++OpndIdx)
681 MIB.add(MBBI->getOperand(OpndIdx));
683 assert(MBBI->getOperand(OpndIdx).getImm() == 0 &&
684 "Unexpected immediate offset in first/last callee-save save/restore "
685 "instruction!");
686 assert(MBBI->getOperand(OpndIdx - 1).getReg() == AArch64::SP &&
687 "Unexpected base register in callee-save save/restore instruction!");
688 assert(CSStackSizeInc % Scale == 0);
689 MIB.addImm(CSStackSizeInc / Scale);
691 MIB.setMIFlags(MBBI->getFlags());
692 MIB.setMemRefs(MBBI->memoperands());
694 // Generate a new SEH code that corresponds to the new instruction.
695 if (NeedsWinCFI) {
696 *HasWinCFI = true;
697 InsertSEH(*MIB, *TII,
698 InProlog ? MachineInstr::FrameSetup : MachineInstr::FrameDestroy);
701 return std::prev(MBB.erase(MBBI));
704 // Fixup callee-save register save/restore instructions to take into account
705 // combined SP bump by adding the local stack size to the stack offsets.
706 static void fixupCalleeSaveRestoreStackOffset(MachineInstr &MI,
707 unsigned LocalStackSize,
708 bool NeedsWinCFI,
709 bool *HasWinCFI) {
710 if (AArch64InstrInfo::isSEHInstruction(MI))
711 return;
713 unsigned Opc = MI.getOpcode();
715 // Ignore instructions that do not operate on SP, i.e. shadow call stack
716 // instructions and associated CFI instruction.
717 if (Opc == AArch64::STRXpost || Opc == AArch64::LDRXpre ||
718 Opc == AArch64::CFI_INSTRUCTION) {
719 if (Opc != AArch64::CFI_INSTRUCTION)
720 assert(MI.getOperand(0).getReg() != AArch64::SP);
721 return;
724 unsigned Scale;
725 switch (Opc) {
726 case AArch64::STPXi:
727 case AArch64::STRXui:
728 case AArch64::STPDi:
729 case AArch64::STRDui:
730 case AArch64::LDPXi:
731 case AArch64::LDRXui:
732 case AArch64::LDPDi:
733 case AArch64::LDRDui:
734 Scale = 8;
735 break;
736 case AArch64::STPQi:
737 case AArch64::STRQui:
738 case AArch64::LDPQi:
739 case AArch64::LDRQui:
740 Scale = 16;
741 break;
742 default:
743 llvm_unreachable("Unexpected callee-save save/restore opcode!");
746 unsigned OffsetIdx = MI.getNumExplicitOperands() - 1;
747 assert(MI.getOperand(OffsetIdx - 1).getReg() == AArch64::SP &&
748 "Unexpected base register in callee-save save/restore instruction!");
749 // Last operand is immediate offset that needs fixing.
750 MachineOperand &OffsetOpnd = MI.getOperand(OffsetIdx);
751 // All generated opcodes have scaled offsets.
752 assert(LocalStackSize % Scale == 0);
753 OffsetOpnd.setImm(OffsetOpnd.getImm() + LocalStackSize / Scale);
755 if (NeedsWinCFI) {
756 *HasWinCFI = true;
757 auto MBBI = std::next(MachineBasicBlock::iterator(MI));
758 assert(MBBI != MI.getParent()->end() && "Expecting a valid instruction");
759 assert(AArch64InstrInfo::isSEHInstruction(*MBBI) &&
760 "Expecting a SEH instruction");
761 fixupSEHOpcode(MBBI, LocalStackSize);
765 static void adaptForLdStOpt(MachineBasicBlock &MBB,
766 MachineBasicBlock::iterator FirstSPPopI,
767 MachineBasicBlock::iterator LastPopI) {
768 // Sometimes (when we restore in the same order as we save), we can end up
769 // with code like this:
771 // ldp x26, x25, [sp]
772 // ldp x24, x23, [sp, #16]
773 // ldp x22, x21, [sp, #32]
774 // ldp x20, x19, [sp, #48]
775 // add sp, sp, #64
777 // In this case, it is always better to put the first ldp at the end, so
778 // that the load-store optimizer can run and merge the ldp and the add into
779 // a post-index ldp.
780 // If we managed to grab the first pop instruction, move it to the end.
781 if (ReverseCSRRestoreSeq)
782 MBB.splice(FirstSPPopI, &MBB, LastPopI);
783 // We should end up with something like this now:
785 // ldp x24, x23, [sp, #16]
786 // ldp x22, x21, [sp, #32]
787 // ldp x20, x19, [sp, #48]
788 // ldp x26, x25, [sp]
789 // add sp, sp, #64
791 // and the load-store optimizer can merge the last two instructions into:
793 // ldp x26, x25, [sp], #64
797 static bool ShouldSignWithAKey(MachineFunction &MF) {
798 const Function &F = MF.getFunction();
799 if (!F.hasFnAttribute("sign-return-address-key"))
800 return true;
802 const StringRef Key =
803 F.getFnAttribute("sign-return-address-key").getValueAsString();
804 assert(Key.equals_lower("a_key") || Key.equals_lower("b_key"));
805 return Key.equals_lower("a_key");
808 static bool needsWinCFI(const MachineFunction &MF) {
809 const Function &F = MF.getFunction();
810 return MF.getTarget().getMCAsmInfo()->usesWindowsCFI() &&
811 F.needsUnwindTableEntry();
814 static bool isTargetDarwin(const MachineFunction &MF) {
815 return MF.getSubtarget<AArch64Subtarget>().isTargetDarwin();
818 void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
819 MachineBasicBlock &MBB) const {
820 MachineBasicBlock::iterator MBBI = MBB.begin();
821 const MachineFrameInfo &MFI = MF.getFrameInfo();
822 const Function &F = MF.getFunction();
823 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
824 const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
825 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
826 MachineModuleInfo &MMI = MF.getMMI();
827 AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
828 bool needsFrameMoves = (MMI.hasDebugInfo() || F.needsUnwindTableEntry()) &&
829 !MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
830 bool HasFP = hasFP(MF);
831 bool NeedsWinCFI = needsWinCFI(MF);
832 bool HasWinCFI = false;
833 auto Cleanup = make_scope_exit([&]() { MF.setHasWinCFI(HasWinCFI); });
835 bool IsFunclet = MBB.isEHFuncletEntry();
837 // At this point, we're going to decide whether or not the function uses a
838 // redzone. In most cases, the function doesn't have a redzone so let's
839 // assume that's false and set it to true in the case that there's a redzone.
840 AFI->setHasRedZone(false);
842 // Debug location must be unknown since the first debug location is used
843 // to determine the end of the prologue.
844 DebugLoc DL;
846 if (ShouldSignReturnAddress(MF)) {
847 if (ShouldSignWithAKey(MF))
848 BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACIASP))
849 .setMIFlag(MachineInstr::FrameSetup);
850 else {
851 BuildMI(MBB, MBBI, DL, TII->get(AArch64::EMITBKEY))
852 .setMIFlag(MachineInstr::FrameSetup);
853 BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACIBSP))
854 .setMIFlag(MachineInstr::FrameSetup);
857 unsigned CFIIndex =
858 MF.addFrameInst(MCCFIInstruction::createNegateRAState(nullptr));
859 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
860 .addCFIIndex(CFIIndex)
861 .setMIFlags(MachineInstr::FrameSetup);
864 // All calls are tail calls in GHC calling conv, and functions have no
865 // prologue/epilogue.
866 if (MF.getFunction().getCallingConv() == CallingConv::GHC)
867 return;
869 // Set tagged base pointer to the bottom of the stack frame.
870 // Ideally it should match SP value after prologue.
871 AFI->setTaggedBasePointerOffset(MFI.getStackSize());
873 // getStackSize() includes all the locals in its size calculation. We don't
874 // include these locals when computing the stack size of a funclet, as they
875 // are allocated in the parent's stack frame and accessed via the frame
876 // pointer from the funclet. We only save the callee saved registers in the
877 // funclet, which are really the callee saved registers of the parent
878 // function, including the funclet.
879 int NumBytes = IsFunclet ? (int)getWinEHFuncletFrameSize(MF)
880 : (int)MFI.getStackSize();
881 if (!AFI->hasStackFrame() && !windowsRequiresStackProbe(MF, NumBytes)) {
882 assert(!HasFP && "unexpected function without stack frame but with FP");
883 // All of the stack allocation is for locals.
884 AFI->setLocalStackSize(NumBytes);
885 if (!NumBytes)
886 return;
887 // REDZONE: If the stack size is less than 128 bytes, we don't need
888 // to actually allocate.
889 if (canUseRedZone(MF)) {
890 AFI->setHasRedZone(true);
891 ++NumRedZoneFunctions;
892 } else {
893 emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP,
894 {-NumBytes, MVT::i8}, TII, MachineInstr::FrameSetup,
895 false, NeedsWinCFI, &HasWinCFI);
896 if (!NeedsWinCFI) {
897 // Label used to tie together the PROLOG_LABEL and the MachineMoves.
898 MCSymbol *FrameLabel = MMI.getContext().createTempSymbol();
899 // Encode the stack size of the leaf function.
900 unsigned CFIIndex = MF.addFrameInst(
901 MCCFIInstruction::createDefCfaOffset(FrameLabel, -NumBytes));
902 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
903 .addCFIIndex(CFIIndex)
904 .setMIFlags(MachineInstr::FrameSetup);
908 if (NeedsWinCFI) {
909 HasWinCFI = true;
910 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd))
911 .setMIFlag(MachineInstr::FrameSetup);
914 return;
917 bool IsWin64 =
918 Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv());
919 // Var args are accounted for in the containing function, so don't
920 // include them for funclets.
921 unsigned FixedObject = (IsWin64 && !IsFunclet) ?
922 alignTo(AFI->getVarArgsGPRSize(), 16) : 0;
924 auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject;
925 // All of the remaining stack allocations are for locals.
926 AFI->setLocalStackSize(NumBytes - PrologueSaveSize);
927 bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes);
928 if (CombineSPBump) {
929 emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP,
930 {-NumBytes, MVT::i8}, TII, MachineInstr::FrameSetup, false,
931 NeedsWinCFI, &HasWinCFI);
932 NumBytes = 0;
933 } else if (PrologueSaveSize != 0) {
934 MBBI = convertCalleeSaveRestoreToSPPrePostIncDec(
935 MBB, MBBI, DL, TII, -PrologueSaveSize, NeedsWinCFI, &HasWinCFI);
936 NumBytes -= PrologueSaveSize;
938 assert(NumBytes >= 0 && "Negative stack allocation size!?");
940 // Move past the saves of the callee-saved registers, fixing up the offsets
941 // and pre-inc if we decided to combine the callee-save and local stack
942 // pointer bump above.
943 MachineBasicBlock::iterator End = MBB.end();
944 while (MBBI != End && MBBI->getFlag(MachineInstr::FrameSetup)) {
945 if (CombineSPBump)
946 fixupCalleeSaveRestoreStackOffset(*MBBI, AFI->getLocalStackSize(),
947 NeedsWinCFI, &HasWinCFI);
948 ++MBBI;
951 // The code below is not applicable to funclets. We have emitted all the SEH
952 // opcodes that we needed to emit. The FP and BP belong to the containing
953 // function.
954 if (IsFunclet) {
955 if (NeedsWinCFI) {
956 HasWinCFI = true;
957 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd))
958 .setMIFlag(MachineInstr::FrameSetup);
961 // SEH funclets are passed the frame pointer in X1. If the parent
962 // function uses the base register, then the base register is used
963 // directly, and is not retrieved from X1.
964 if (F.hasPersonalityFn()) {
965 EHPersonality Per = classifyEHPersonality(F.getPersonalityFn());
966 if (isAsynchronousEHPersonality(Per)) {
967 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::COPY), AArch64::FP)
968 .addReg(AArch64::X1).setMIFlag(MachineInstr::FrameSetup);
969 MBB.addLiveIn(AArch64::X1);
973 return;
976 if (HasFP) {
977 // Only set up FP if we actually need to.
978 int FPOffset = isTargetDarwin(MF) ? (AFI->getCalleeSavedStackSize() - 16) : 0;
980 if (CombineSPBump)
981 FPOffset += AFI->getLocalStackSize();
983 // Issue sub fp, sp, FPOffset or
984 // mov fp,sp when FPOffset is zero.
985 // Note: All stores of callee-saved registers are marked as "FrameSetup".
986 // This code marks the instruction(s) that set the FP also.
987 emitFrameOffset(MBB, MBBI, DL, AArch64::FP, AArch64::SP,
988 {FPOffset, MVT::i8}, TII, MachineInstr::FrameSetup, false,
989 NeedsWinCFI, &HasWinCFI);
992 if (windowsRequiresStackProbe(MF, NumBytes)) {
993 uint32_t NumWords = NumBytes >> 4;
994 if (NeedsWinCFI) {
995 HasWinCFI = true;
996 // alloc_l can hold at most 256MB, so assume that NumBytes doesn't
997 // exceed this amount. We need to move at most 2^24 - 1 into x15.
998 // This is at most two instructions, MOVZ follwed by MOVK.
999 // TODO: Fix to use multiple stack alloc unwind codes for stacks
1000 // exceeding 256MB in size.
1001 if (NumBytes >= (1 << 28))
1002 report_fatal_error("Stack size cannot exceed 256MB for stack "
1003 "unwinding purposes");
1005 uint32_t LowNumWords = NumWords & 0xFFFF;
1006 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVZXi), AArch64::X15)
1007 .addImm(LowNumWords)
1008 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0))
1009 .setMIFlag(MachineInstr::FrameSetup);
1010 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
1011 .setMIFlag(MachineInstr::FrameSetup);
1012 if ((NumWords & 0xFFFF0000) != 0) {
1013 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVKXi), AArch64::X15)
1014 .addReg(AArch64::X15)
1015 .addImm((NumWords & 0xFFFF0000) >> 16) // High half
1016 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 16))
1017 .setMIFlag(MachineInstr::FrameSetup);
1018 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
1019 .setMIFlag(MachineInstr::FrameSetup);
1021 } else {
1022 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm), AArch64::X15)
1023 .addImm(NumWords)
1024 .setMIFlags(MachineInstr::FrameSetup);
1027 switch (MF.getTarget().getCodeModel()) {
1028 case CodeModel::Tiny:
1029 case CodeModel::Small:
1030 case CodeModel::Medium:
1031 case CodeModel::Kernel:
1032 BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL))
1033 .addExternalSymbol("__chkstk")
1034 .addReg(AArch64::X15, RegState::Implicit)
1035 .addReg(AArch64::X16, RegState::Implicit | RegState::Define | RegState::Dead)
1036 .addReg(AArch64::X17, RegState::Implicit | RegState::Define | RegState::Dead)
1037 .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define | RegState::Dead)
1038 .setMIFlags(MachineInstr::FrameSetup);
1039 if (NeedsWinCFI) {
1040 HasWinCFI = true;
1041 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
1042 .setMIFlag(MachineInstr::FrameSetup);
1044 break;
1045 case CodeModel::Large:
1046 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVaddrEXT))
1047 .addReg(AArch64::X16, RegState::Define)
1048 .addExternalSymbol("__chkstk")
1049 .addExternalSymbol("__chkstk")
1050 .setMIFlags(MachineInstr::FrameSetup);
1051 if (NeedsWinCFI) {
1052 HasWinCFI = true;
1053 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
1054 .setMIFlag(MachineInstr::FrameSetup);
1057 BuildMI(MBB, MBBI, DL, TII->get(AArch64::BLR))
1058 .addReg(AArch64::X16, RegState::Kill)
1059 .addReg(AArch64::X15, RegState::Implicit | RegState::Define)
1060 .addReg(AArch64::X16, RegState::Implicit | RegState::Define | RegState::Dead)
1061 .addReg(AArch64::X17, RegState::Implicit | RegState::Define | RegState::Dead)
1062 .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define | RegState::Dead)
1063 .setMIFlags(MachineInstr::FrameSetup);
1064 if (NeedsWinCFI) {
1065 HasWinCFI = true;
1066 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
1067 .setMIFlag(MachineInstr::FrameSetup);
1069 break;
1072 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SUBXrx64), AArch64::SP)
1073 .addReg(AArch64::SP, RegState::Kill)
1074 .addReg(AArch64::X15, RegState::Kill)
1075 .addImm(AArch64_AM::getArithExtendImm(AArch64_AM::UXTX, 4))
1076 .setMIFlags(MachineInstr::FrameSetup);
1077 if (NeedsWinCFI) {
1078 HasWinCFI = true;
1079 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc))
1080 .addImm(NumBytes)
1081 .setMIFlag(MachineInstr::FrameSetup);
1083 NumBytes = 0;
1086 // Allocate space for the rest of the frame.
1087 if (NumBytes) {
1088 const bool NeedsRealignment = RegInfo->needsStackRealignment(MF);
1089 unsigned scratchSPReg = AArch64::SP;
1091 if (NeedsRealignment) {
1092 scratchSPReg = findScratchNonCalleeSaveRegister(&MBB);
1093 assert(scratchSPReg != AArch64::NoRegister);
1096 // If we're a leaf function, try using the red zone.
1097 if (!canUseRedZone(MF))
1098 // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have
1099 // the correct value here, as NumBytes also includes padding bytes,
1100 // which shouldn't be counted here.
1101 emitFrameOffset(MBB, MBBI, DL, scratchSPReg, AArch64::SP,
1102 {-NumBytes, MVT::i8}, TII, MachineInstr::FrameSetup,
1103 false, NeedsWinCFI, &HasWinCFI);
1105 if (NeedsRealignment) {
1106 const unsigned Alignment = MFI.getMaxAlignment();
1107 const unsigned NrBitsToZero = countTrailingZeros(Alignment);
1108 assert(NrBitsToZero > 1);
1109 assert(scratchSPReg != AArch64::SP);
1111 // SUB X9, SP, NumBytes
1112 // -- X9 is temporary register, so shouldn't contain any live data here,
1113 // -- free to use. This is already produced by emitFrameOffset above.
1114 // AND SP, X9, 0b11111...0000
1115 // The logical immediates have a non-trivial encoding. The following
1116 // formula computes the encoded immediate with all ones but
1117 // NrBitsToZero zero bits as least significant bits.
1118 uint32_t andMaskEncoded = (1 << 12) // = N
1119 | ((64 - NrBitsToZero) << 6) // immr
1120 | ((64 - NrBitsToZero - 1) << 0); // imms
1122 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP)
1123 .addReg(scratchSPReg, RegState::Kill)
1124 .addImm(andMaskEncoded);
1125 AFI->setStackRealigned(true);
1126 if (NeedsWinCFI) {
1127 HasWinCFI = true;
1128 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc))
1129 .addImm(NumBytes & andMaskEncoded)
1130 .setMIFlag(MachineInstr::FrameSetup);
1135 // If we need a base pointer, set it up here. It's whatever the value of the
1136 // stack pointer is at this point. Any variable size objects will be allocated
1137 // after this, so we can still use the base pointer to reference locals.
1139 // FIXME: Clarify FrameSetup flags here.
1140 // Note: Use emitFrameOffset() like above for FP if the FrameSetup flag is
1141 // needed.
1142 if (RegInfo->hasBasePointer(MF)) {
1143 TII->copyPhysReg(MBB, MBBI, DL, RegInfo->getBaseRegister(), AArch64::SP,
1144 false);
1145 if (NeedsWinCFI) {
1146 HasWinCFI = true;
1147 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_Nop))
1148 .setMIFlag(MachineInstr::FrameSetup);
1152 // The very last FrameSetup instruction indicates the end of prologue. Emit a
1153 // SEH opcode indicating the prologue end.
1154 if (NeedsWinCFI && HasWinCFI) {
1155 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd))
1156 .setMIFlag(MachineInstr::FrameSetup);
1159 if (needsFrameMoves) {
1160 const DataLayout &TD = MF.getDataLayout();
1161 const int StackGrowth = isTargetDarwin(MF)
1162 ? (2 * -TD.getPointerSize(0))
1163 : -AFI->getCalleeSavedStackSize();
1164 Register FramePtr = RegInfo->getFrameRegister(MF);
1165 // An example of the prologue:
1167 // .globl __foo
1168 // .align 2
1169 // __foo:
1170 // Ltmp0:
1171 // .cfi_startproc
1172 // .cfi_personality 155, ___gxx_personality_v0
1173 // Leh_func_begin:
1174 // .cfi_lsda 16, Lexception33
1176 // stp xa,bx, [sp, -#offset]!
1177 // ...
1178 // stp x28, x27, [sp, #offset-32]
1179 // stp fp, lr, [sp, #offset-16]
1180 // add fp, sp, #offset - 16
1181 // sub sp, sp, #1360
1183 // The Stack:
1184 // +-------------------------------------------+
1185 // 10000 | ........ | ........ | ........ | ........ |
1186 // 10004 | ........ | ........ | ........ | ........ |
1187 // +-------------------------------------------+
1188 // 10008 | ........ | ........ | ........ | ........ |
1189 // 1000c | ........ | ........ | ........ | ........ |
1190 // +===========================================+
1191 // 10010 | X28 Register |
1192 // 10014 | X28 Register |
1193 // +-------------------------------------------+
1194 // 10018 | X27 Register |
1195 // 1001c | X27 Register |
1196 // +===========================================+
1197 // 10020 | Frame Pointer |
1198 // 10024 | Frame Pointer |
1199 // +-------------------------------------------+
1200 // 10028 | Link Register |
1201 // 1002c | Link Register |
1202 // +===========================================+
1203 // 10030 | ........ | ........ | ........ | ........ |
1204 // 10034 | ........ | ........ | ........ | ........ |
1205 // +-------------------------------------------+
1206 // 10038 | ........ | ........ | ........ | ........ |
1207 // 1003c | ........ | ........ | ........ | ........ |
1208 // +-------------------------------------------+
1210 // [sp] = 10030 :: >>initial value<<
1211 // sp = 10020 :: stp fp, lr, [sp, #-16]!
1212 // fp = sp == 10020 :: mov fp, sp
1213 // [sp] == 10020 :: stp x28, x27, [sp, #-16]!
1214 // sp == 10010 :: >>final value<<
1216 // The frame pointer (w29) points to address 10020. If we use an offset of
1217 // '16' from 'w29', we get the CFI offsets of -8 for w30, -16 for w29, -24
1218 // for w27, and -32 for w28:
1220 // Ltmp1:
1221 // .cfi_def_cfa w29, 16
1222 // Ltmp2:
1223 // .cfi_offset w30, -8
1224 // Ltmp3:
1225 // .cfi_offset w29, -16
1226 // Ltmp4:
1227 // .cfi_offset w27, -24
1228 // Ltmp5:
1229 // .cfi_offset w28, -32
1231 if (HasFP) {
1232 // Define the current CFA rule to use the provided FP.
1233 unsigned Reg = RegInfo->getDwarfRegNum(FramePtr, true);
1234 unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfa(
1235 nullptr, Reg, StackGrowth - FixedObject));
1236 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
1237 .addCFIIndex(CFIIndex)
1238 .setMIFlags(MachineInstr::FrameSetup);
1239 } else {
1240 // Encode the stack size of the leaf function.
1241 unsigned CFIIndex = MF.addFrameInst(
1242 MCCFIInstruction::createDefCfaOffset(nullptr, -MFI.getStackSize()));
1243 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
1244 .addCFIIndex(CFIIndex)
1245 .setMIFlags(MachineInstr::FrameSetup);
1248 // Now emit the moves for whatever callee saved regs we have (including FP,
1249 // LR if those are saved).
1250 emitCalleeSavedFrameMoves(MBB, MBBI);
1254 static void InsertReturnAddressAuth(MachineFunction &MF,
1255 MachineBasicBlock &MBB) {
1256 if (!ShouldSignReturnAddress(MF))
1257 return;
1258 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
1259 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
1261 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
1262 DebugLoc DL;
1263 if (MBBI != MBB.end())
1264 DL = MBBI->getDebugLoc();
1266 // The AUTIASP instruction assembles to a hint instruction before v8.3a so
1267 // this instruction can safely used for any v8a architecture.
1268 // From v8.3a onwards there are optimised authenticate LR and return
1269 // instructions, namely RETA{A,B}, that can be used instead.
1270 if (Subtarget.hasV8_3aOps() && MBBI != MBB.end() &&
1271 MBBI->getOpcode() == AArch64::RET_ReallyLR) {
1272 BuildMI(MBB, MBBI, DL,
1273 TII->get(ShouldSignWithAKey(MF) ? AArch64::RETAA : AArch64::RETAB))
1274 .copyImplicitOps(*MBBI);
1275 MBB.erase(MBBI);
1276 } else {
1277 BuildMI(
1278 MBB, MBBI, DL,
1279 TII->get(ShouldSignWithAKey(MF) ? AArch64::AUTIASP : AArch64::AUTIBSP))
1280 .setMIFlag(MachineInstr::FrameDestroy);
1284 static bool isFuncletReturnInstr(const MachineInstr &MI) {
1285 switch (MI.getOpcode()) {
1286 default:
1287 return false;
1288 case AArch64::CATCHRET:
1289 case AArch64::CLEANUPRET:
1290 return true;
1294 void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
1295 MachineBasicBlock &MBB) const {
1296 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
1297 MachineFrameInfo &MFI = MF.getFrameInfo();
1298 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
1299 const TargetInstrInfo *TII = Subtarget.getInstrInfo();
1300 DebugLoc DL;
1301 bool IsTailCallReturn = false;
1302 bool NeedsWinCFI = needsWinCFI(MF);
1303 bool HasWinCFI = false;
1304 bool IsFunclet = false;
1305 auto WinCFI = make_scope_exit([&]() {
1306 if (!MF.hasWinCFI())
1307 MF.setHasWinCFI(HasWinCFI);
1310 if (MBB.end() != MBBI) {
1311 DL = MBBI->getDebugLoc();
1312 unsigned RetOpcode = MBBI->getOpcode();
1313 IsTailCallReturn = RetOpcode == AArch64::TCRETURNdi ||
1314 RetOpcode == AArch64::TCRETURNri ||
1315 RetOpcode == AArch64::TCRETURNriBTI;
1316 IsFunclet = isFuncletReturnInstr(*MBBI);
1319 int NumBytes = IsFunclet ? (int)getWinEHFuncletFrameSize(MF)
1320 : MFI.getStackSize();
1321 AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
1323 // All calls are tail calls in GHC calling conv, and functions have no
1324 // prologue/epilogue.
1325 if (MF.getFunction().getCallingConv() == CallingConv::GHC)
1326 return;
1328 // Initial and residual are named for consistency with the prologue. Note that
1329 // in the epilogue, the residual adjustment is executed first.
1330 uint64_t ArgumentPopSize = 0;
1331 if (IsTailCallReturn) {
1332 MachineOperand &StackAdjust = MBBI->getOperand(1);
1334 // For a tail-call in a callee-pops-arguments environment, some or all of
1335 // the stack may actually be in use for the call's arguments, this is
1336 // calculated during LowerCall and consumed here...
1337 ArgumentPopSize = StackAdjust.getImm();
1338 } else {
1339 // ... otherwise the amount to pop is *all* of the argument space,
1340 // conveniently stored in the MachineFunctionInfo by
1341 // LowerFormalArguments. This will, of course, be zero for the C calling
1342 // convention.
1343 ArgumentPopSize = AFI->getArgumentStackToRestore();
1346 // The stack frame should be like below,
1348 // ---------------------- ---
1349 // | | |
1350 // | BytesInStackArgArea| CalleeArgStackSize
1351 // | (NumReusableBytes) | (of tail call)
1352 // | | ---
1353 // | | |
1354 // ---------------------| --- |
1355 // | | | |
1356 // | CalleeSavedReg | | |
1357 // | (CalleeSavedStackSize)| | |
1358 // | | | |
1359 // ---------------------| | NumBytes
1360 // | | StackSize (StackAdjustUp)
1361 // | LocalStackSize | | |
1362 // | (covering callee | | |
1363 // | args) | | |
1364 // | | | |
1365 // ---------------------- --- ---
1367 // So NumBytes = StackSize + BytesInStackArgArea - CalleeArgStackSize
1368 // = StackSize + ArgumentPopSize
1370 // AArch64TargetLowering::LowerCall figures out ArgumentPopSize and keeps
1371 // it as the 2nd argument of AArch64ISD::TC_RETURN.
1373 auto Cleanup = make_scope_exit([&] { InsertReturnAddressAuth(MF, MBB); });
1375 bool IsWin64 =
1376 Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv());
1377 // Var args are accounted for in the containing function, so don't
1378 // include them for funclets.
1379 unsigned FixedObject =
1380 (IsWin64 && !IsFunclet) ? alignTo(AFI->getVarArgsGPRSize(), 16) : 0;
1382 uint64_t AfterCSRPopSize = ArgumentPopSize;
1383 auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject;
1384 // We cannot rely on the local stack size set in emitPrologue if the function
1385 // has funclets, as funclets have different local stack size requirements, and
1386 // the current value set in emitPrologue may be that of the containing
1387 // function.
1388 if (MF.hasEHFunclets())
1389 AFI->setLocalStackSize(NumBytes - PrologueSaveSize);
1390 bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes);
1391 // Assume we can't combine the last pop with the sp restore.
1393 if (!CombineSPBump && PrologueSaveSize != 0) {
1394 MachineBasicBlock::iterator Pop = std::prev(MBB.getFirstTerminator());
1395 while (AArch64InstrInfo::isSEHInstruction(*Pop))
1396 Pop = std::prev(Pop);
1397 // Converting the last ldp to a post-index ldp is valid only if the last
1398 // ldp's offset is 0.
1399 const MachineOperand &OffsetOp = Pop->getOperand(Pop->getNumOperands() - 1);
1400 // If the offset is 0, convert it to a post-index ldp.
1401 if (OffsetOp.getImm() == 0)
1402 convertCalleeSaveRestoreToSPPrePostIncDec(
1403 MBB, Pop, DL, TII, PrologueSaveSize, NeedsWinCFI, &HasWinCFI, false);
1404 else {
1405 // If not, make sure to emit an add after the last ldp.
1406 // We're doing this by transfering the size to be restored from the
1407 // adjustment *before* the CSR pops to the adjustment *after* the CSR
1408 // pops.
1409 AfterCSRPopSize += PrologueSaveSize;
1413 // Move past the restores of the callee-saved registers.
1414 // If we plan on combining the sp bump of the local stack size and the callee
1415 // save stack size, we might need to adjust the CSR save and restore offsets.
1416 MachineBasicBlock::iterator LastPopI = MBB.getFirstTerminator();
1417 MachineBasicBlock::iterator Begin = MBB.begin();
1418 while (LastPopI != Begin) {
1419 --LastPopI;
1420 if (!LastPopI->getFlag(MachineInstr::FrameDestroy)) {
1421 ++LastPopI;
1422 break;
1423 } else if (CombineSPBump)
1424 fixupCalleeSaveRestoreStackOffset(*LastPopI, AFI->getLocalStackSize(),
1425 NeedsWinCFI, &HasWinCFI);
1428 if (NeedsWinCFI) {
1429 HasWinCFI = true;
1430 BuildMI(MBB, LastPopI, DL, TII->get(AArch64::SEH_EpilogStart))
1431 .setMIFlag(MachineInstr::FrameDestroy);
1434 // If there is a single SP update, insert it before the ret and we're done.
1435 if (CombineSPBump) {
1436 emitFrameOffset(MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP,
1437 {NumBytes + (int64_t)AfterCSRPopSize, MVT::i8}, TII,
1438 MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI);
1439 if (NeedsWinCFI && HasWinCFI)
1440 BuildMI(MBB, MBB.getFirstTerminator(), DL,
1441 TII->get(AArch64::SEH_EpilogEnd))
1442 .setMIFlag(MachineInstr::FrameDestroy);
1443 return;
1446 NumBytes -= PrologueSaveSize;
1447 assert(NumBytes >= 0 && "Negative stack allocation size!?");
1449 if (!hasFP(MF)) {
1450 bool RedZone = canUseRedZone(MF);
1451 // If this was a redzone leaf function, we don't need to restore the
1452 // stack pointer (but we may need to pop stack args for fastcc).
1453 if (RedZone && AfterCSRPopSize == 0)
1454 return;
1456 bool NoCalleeSaveRestore = PrologueSaveSize == 0;
1457 int StackRestoreBytes = RedZone ? 0 : NumBytes;
1458 if (NoCalleeSaveRestore)
1459 StackRestoreBytes += AfterCSRPopSize;
1461 // If we were able to combine the local stack pop with the argument pop,
1462 // then we're done.
1463 bool Done = NoCalleeSaveRestore || AfterCSRPopSize == 0;
1465 // If we're done after this, make sure to help the load store optimizer.
1466 if (Done)
1467 adaptForLdStOpt(MBB, MBB.getFirstTerminator(), LastPopI);
1469 emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP,
1470 {StackRestoreBytes, MVT::i8}, TII,
1471 MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI);
1472 if (Done) {
1473 if (NeedsWinCFI) {
1474 HasWinCFI = true;
1475 BuildMI(MBB, MBB.getFirstTerminator(), DL,
1476 TII->get(AArch64::SEH_EpilogEnd))
1477 .setMIFlag(MachineInstr::FrameDestroy);
1479 return;
1482 NumBytes = 0;
1485 // Restore the original stack pointer.
1486 // FIXME: Rather than doing the math here, we should instead just use
1487 // non-post-indexed loads for the restores if we aren't actually going to
1488 // be able to save any instructions.
1489 if (!IsFunclet && (MFI.hasVarSizedObjects() || AFI->isStackRealigned())) {
1490 int64_t OffsetToFrameRecord =
1491 isTargetDarwin(MF) ? (-(int64_t)AFI->getCalleeSavedStackSize() + 16) : 0;
1492 emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::FP,
1493 {OffsetToFrameRecord, MVT::i8},
1494 TII, MachineInstr::FrameDestroy, false, NeedsWinCFI);
1495 } else if (NumBytes)
1496 emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP,
1497 {NumBytes, MVT::i8}, TII, MachineInstr::FrameDestroy, false,
1498 NeedsWinCFI);
1500 // This must be placed after the callee-save restore code because that code
1501 // assumes the SP is at the same location as it was after the callee-save save
1502 // code in the prologue.
1503 if (AfterCSRPopSize) {
1504 // Find an insertion point for the first ldp so that it goes before the
1505 // shadow call stack epilog instruction. This ensures that the restore of
1506 // lr from x18 is placed after the restore from sp.
1507 auto FirstSPPopI = MBB.getFirstTerminator();
1508 while (FirstSPPopI != Begin) {
1509 auto Prev = std::prev(FirstSPPopI);
1510 if (Prev->getOpcode() != AArch64::LDRXpre ||
1511 Prev->getOperand(0).getReg() == AArch64::SP)
1512 break;
1513 FirstSPPopI = Prev;
1516 adaptForLdStOpt(MBB, FirstSPPopI, LastPopI);
1518 emitFrameOffset(MBB, FirstSPPopI, DL, AArch64::SP, AArch64::SP,
1519 {(int64_t)AfterCSRPopSize, MVT::i8}, TII,
1520 MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI);
1522 if (NeedsWinCFI && HasWinCFI)
1523 BuildMI(MBB, MBB.getFirstTerminator(), DL, TII->get(AArch64::SEH_EpilogEnd))
1524 .setMIFlag(MachineInstr::FrameDestroy);
1526 MF.setHasWinCFI(HasWinCFI);
1529 /// getFrameIndexReference - Provide a base+offset reference to an FI slot for
1530 /// debug info. It's the same as what we use for resolving the code-gen
1531 /// references for now. FIXME: This can go wrong when references are
1532 /// SP-relative and simple call frames aren't used.
1533 int AArch64FrameLowering::getFrameIndexReference(const MachineFunction &MF,
1534 int FI,
1535 unsigned &FrameReg) const {
1536 return resolveFrameIndexReference(
1537 MF, FI, FrameReg,
1538 /*PreferFP=*/
1539 MF.getFunction().hasFnAttribute(Attribute::SanitizeHWAddress),
1540 /*ForSimm=*/false)
1541 .getBytes();
1544 int AArch64FrameLowering::getNonLocalFrameIndexReference(
1545 const MachineFunction &MF, int FI) const {
1546 return getSEHFrameIndexOffset(MF, FI);
1549 static StackOffset getFPOffset(const MachineFunction &MF, int ObjectOffset) {
1550 const auto *AFI = MF.getInfo<AArch64FunctionInfo>();
1551 const auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
1552 bool IsWin64 =
1553 Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv());
1554 unsigned FixedObject = IsWin64 ? alignTo(AFI->getVarArgsGPRSize(), 16) : 0;
1555 unsigned FPAdjust = isTargetDarwin(MF) ? 16 : AFI->getCalleeSavedStackSize();
1556 return {ObjectOffset + FixedObject + FPAdjust, MVT::i8};
1559 static StackOffset getStackOffset(const MachineFunction &MF, int ObjectOffset) {
1560 const auto &MFI = MF.getFrameInfo();
1561 return {ObjectOffset + (int)MFI.getStackSize(), MVT::i8};
1564 int AArch64FrameLowering::getSEHFrameIndexOffset(const MachineFunction &MF,
1565 int FI) const {
1566 const auto *RegInfo = static_cast<const AArch64RegisterInfo *>(
1567 MF.getSubtarget().getRegisterInfo());
1568 int ObjectOffset = MF.getFrameInfo().getObjectOffset(FI);
1569 return RegInfo->getLocalAddressRegister(MF) == AArch64::FP
1570 ? getFPOffset(MF, ObjectOffset).getBytes()
1571 : getStackOffset(MF, ObjectOffset).getBytes();
1574 StackOffset AArch64FrameLowering::resolveFrameIndexReference(
1575 const MachineFunction &MF, int FI, unsigned &FrameReg, bool PreferFP,
1576 bool ForSimm) const {
1577 const auto &MFI = MF.getFrameInfo();
1578 int ObjectOffset = MFI.getObjectOffset(FI);
1579 bool isFixed = MFI.isFixedObjectIndex(FI);
1580 return resolveFrameOffsetReference(MF, ObjectOffset, isFixed, FrameReg,
1581 PreferFP, ForSimm);
1584 StackOffset AArch64FrameLowering::resolveFrameOffsetReference(
1585 const MachineFunction &MF, int ObjectOffset, bool isFixed,
1586 unsigned &FrameReg, bool PreferFP, bool ForSimm) const {
1587 const auto &MFI = MF.getFrameInfo();
1588 const auto *RegInfo = static_cast<const AArch64RegisterInfo *>(
1589 MF.getSubtarget().getRegisterInfo());
1590 const auto *AFI = MF.getInfo<AArch64FunctionInfo>();
1591 const auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
1593 int FPOffset = getFPOffset(MF, ObjectOffset).getBytes();
1594 int Offset = getStackOffset(MF, ObjectOffset).getBytes();
1595 bool isCSR =
1596 !isFixed && ObjectOffset >= -((int)AFI->getCalleeSavedStackSize());
1598 // Use frame pointer to reference fixed objects. Use it for locals if
1599 // there are VLAs or a dynamically realigned SP (and thus the SP isn't
1600 // reliable as a base). Make sure useFPForScavengingIndex() does the
1601 // right thing for the emergency spill slot.
1602 bool UseFP = false;
1603 if (AFI->hasStackFrame()) {
1604 // Note: Keeping the following as multiple 'if' statements rather than
1605 // merging to a single expression for readability.
1607 // Argument access should always use the FP.
1608 if (isFixed) {
1609 UseFP = hasFP(MF);
1610 } else if (isCSR && RegInfo->needsStackRealignment(MF)) {
1611 // References to the CSR area must use FP if we're re-aligning the stack
1612 // since the dynamically-sized alignment padding is between the SP/BP and
1613 // the CSR area.
1614 assert(hasFP(MF) && "Re-aligned stack must have frame pointer");
1615 UseFP = true;
1616 } else if (hasFP(MF) && !RegInfo->needsStackRealignment(MF)) {
1617 // If the FPOffset is negative and we're producing a signed immediate, we
1618 // have to keep in mind that the available offset range for negative
1619 // offsets is smaller than for positive ones. If an offset is available
1620 // via the FP and the SP, use whichever is closest.
1621 bool FPOffsetFits = !ForSimm || FPOffset >= -256;
1622 PreferFP |= Offset > -FPOffset;
1624 if (MFI.hasVarSizedObjects()) {
1625 // If we have variable sized objects, we can use either FP or BP, as the
1626 // SP offset is unknown. We can use the base pointer if we have one and
1627 // FP is not preferred. If not, we're stuck with using FP.
1628 bool CanUseBP = RegInfo->hasBasePointer(MF);
1629 if (FPOffsetFits && CanUseBP) // Both are ok. Pick the best.
1630 UseFP = PreferFP;
1631 else if (!CanUseBP) // Can't use BP. Forced to use FP.
1632 UseFP = true;
1633 // else we can use BP and FP, but the offset from FP won't fit.
1634 // That will make us scavenge registers which we can probably avoid by
1635 // using BP. If it won't fit for BP either, we'll scavenge anyway.
1636 } else if (FPOffset >= 0) {
1637 // Use SP or FP, whichever gives us the best chance of the offset
1638 // being in range for direct access. If the FPOffset is positive,
1639 // that'll always be best, as the SP will be even further away.
1640 UseFP = true;
1641 } else if (MF.hasEHFunclets() && !RegInfo->hasBasePointer(MF)) {
1642 // Funclets access the locals contained in the parent's stack frame
1643 // via the frame pointer, so we have to use the FP in the parent
1644 // function.
1645 (void) Subtarget;
1646 assert(
1647 Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv()) &&
1648 "Funclets should only be present on Win64");
1649 UseFP = true;
1650 } else {
1651 // We have the choice between FP and (SP or BP).
1652 if (FPOffsetFits && PreferFP) // If FP is the best fit, use it.
1653 UseFP = true;
1658 assert(((isFixed || isCSR) || !RegInfo->needsStackRealignment(MF) || !UseFP) &&
1659 "In the presence of dynamic stack pointer realignment, "
1660 "non-argument/CSR objects cannot be accessed through the frame pointer");
1662 if (UseFP) {
1663 FrameReg = RegInfo->getFrameRegister(MF);
1664 return StackOffset(FPOffset, MVT::i8);
1667 // Use the base pointer if we have one.
1668 if (RegInfo->hasBasePointer(MF))
1669 FrameReg = RegInfo->getBaseRegister();
1670 else {
1671 assert(!MFI.hasVarSizedObjects() &&
1672 "Can't use SP when we have var sized objects.");
1673 FrameReg = AArch64::SP;
1674 // If we're using the red zone for this function, the SP won't actually
1675 // be adjusted, so the offsets will be negative. They're also all
1676 // within range of the signed 9-bit immediate instructions.
1677 if (canUseRedZone(MF))
1678 Offset -= AFI->getLocalStackSize();
1681 return StackOffset(Offset, MVT::i8);
1684 static unsigned getPrologueDeath(MachineFunction &MF, unsigned Reg) {
1685 // Do not set a kill flag on values that are also marked as live-in. This
1686 // happens with the @llvm-returnaddress intrinsic and with arguments passed in
1687 // callee saved registers.
1688 // Omitting the kill flags is conservatively correct even if the live-in
1689 // is not used after all.
1690 bool IsLiveIn = MF.getRegInfo().isLiveIn(Reg);
1691 return getKillRegState(!IsLiveIn);
1694 static bool produceCompactUnwindFrame(MachineFunction &MF) {
1695 const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
1696 AttributeList Attrs = MF.getFunction().getAttributes();
1697 return Subtarget.isTargetMachO() &&
1698 !(Subtarget.getTargetLowering()->supportSwiftError() &&
1699 Attrs.hasAttrSomewhere(Attribute::SwiftError));
1702 static bool invalidateWindowsRegisterPairing(unsigned Reg1, unsigned Reg2,
1703 bool NeedsWinCFI) {
1704 // If we are generating register pairs for a Windows function that requires
1705 // EH support, then pair consecutive registers only. There are no unwind
1706 // opcodes for saves/restores of non-consectuve register pairs.
1707 // The unwind opcodes are save_regp, save_regp_x, save_fregp, save_frepg_x.
1708 // https://docs.microsoft.com/en-us/cpp/build/arm64-exception-handling
1710 // TODO: LR can be paired with any register. We don't support this yet in
1711 // the MCLayer. We need to add support for the save_lrpair unwind code.
1712 if (!NeedsWinCFI)
1713 return false;
1714 if (Reg2 == Reg1 + 1)
1715 return false;
1716 return true;
1719 /// Returns true if Reg1 and Reg2 cannot be paired using a ldp/stp instruction.
1720 /// WindowsCFI requires that only consecutive registers can be paired.
1721 /// LR and FP need to be allocated together when the frame needs to save
1722 /// the frame-record. This means any other register pairing with LR is invalid.
1723 static bool invalidateRegisterPairing(unsigned Reg1, unsigned Reg2,
1724 bool NeedsWinCFI, bool NeedsFrameRecord) {
1725 if (NeedsWinCFI)
1726 return invalidateWindowsRegisterPairing(Reg1, Reg2, true);
1728 // If we need to store the frame record, don't pair any register
1729 // with LR other than FP.
1730 if (NeedsFrameRecord)
1731 return Reg2 == AArch64::LR;
1733 return false;
1736 namespace {
1738 struct RegPairInfo {
1739 unsigned Reg1 = AArch64::NoRegister;
1740 unsigned Reg2 = AArch64::NoRegister;
1741 int FrameIdx;
1742 int Offset;
1743 enum RegType { GPR, FPR64, FPR128 } Type;
1745 RegPairInfo() = default;
1747 bool isPaired() const { return Reg2 != AArch64::NoRegister; }
1750 } // end anonymous namespace
1752 static void computeCalleeSaveRegisterPairs(
1753 MachineFunction &MF, const std::vector<CalleeSavedInfo> &CSI,
1754 const TargetRegisterInfo *TRI, SmallVectorImpl<RegPairInfo> &RegPairs,
1755 bool &NeedShadowCallStackProlog, bool NeedsFrameRecord) {
1757 if (CSI.empty())
1758 return;
1760 bool NeedsWinCFI = needsWinCFI(MF);
1761 AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
1762 MachineFrameInfo &MFI = MF.getFrameInfo();
1763 CallingConv::ID CC = MF.getFunction().getCallingConv();
1764 unsigned Count = CSI.size();
1765 (void)CC;
1766 // MachO's compact unwind format relies on all registers being stored in
1767 // pairs.
1768 assert((!produceCompactUnwindFrame(MF) ||
1769 CC == CallingConv::PreserveMost ||
1770 (Count & 1) == 0) &&
1771 "Odd number of callee-saved regs to spill!");
1772 int Offset = AFI->getCalleeSavedStackSize();
1773 // On Linux, we will have either one or zero non-paired register. On Windows
1774 // with CFI, we can have multiple unpaired registers in order to utilize the
1775 // available unwind codes. This flag assures that the alignment fixup is done
1776 // only once, as intened.
1777 bool FixupDone = false;
1778 for (unsigned i = 0; i < Count; ++i) {
1779 RegPairInfo RPI;
1780 RPI.Reg1 = CSI[i].getReg();
1782 if (AArch64::GPR64RegClass.contains(RPI.Reg1))
1783 RPI.Type = RegPairInfo::GPR;
1784 else if (AArch64::FPR64RegClass.contains(RPI.Reg1))
1785 RPI.Type = RegPairInfo::FPR64;
1786 else if (AArch64::FPR128RegClass.contains(RPI.Reg1))
1787 RPI.Type = RegPairInfo::FPR128;
1788 else
1789 llvm_unreachable("Unsupported register class.");
1791 // Add the next reg to the pair if it is in the same register class.
1792 if (i + 1 < Count) {
1793 unsigned NextReg = CSI[i + 1].getReg();
1794 switch (RPI.Type) {
1795 case RegPairInfo::GPR:
1796 if (AArch64::GPR64RegClass.contains(NextReg) &&
1797 !invalidateRegisterPairing(RPI.Reg1, NextReg, NeedsWinCFI,
1798 NeedsFrameRecord))
1799 RPI.Reg2 = NextReg;
1800 break;
1801 case RegPairInfo::FPR64:
1802 if (AArch64::FPR64RegClass.contains(NextReg) &&
1803 !invalidateWindowsRegisterPairing(RPI.Reg1, NextReg, NeedsWinCFI))
1804 RPI.Reg2 = NextReg;
1805 break;
1806 case RegPairInfo::FPR128:
1807 if (AArch64::FPR128RegClass.contains(NextReg))
1808 RPI.Reg2 = NextReg;
1809 break;
1813 // If either of the registers to be saved is the lr register, it means that
1814 // we also need to save lr in the shadow call stack.
1815 if ((RPI.Reg1 == AArch64::LR || RPI.Reg2 == AArch64::LR) &&
1816 MF.getFunction().hasFnAttribute(Attribute::ShadowCallStack)) {
1817 if (!MF.getSubtarget<AArch64Subtarget>().isXRegisterReserved(18))
1818 report_fatal_error("Must reserve x18 to use shadow call stack");
1819 NeedShadowCallStackProlog = true;
1822 // GPRs and FPRs are saved in pairs of 64-bit regs. We expect the CSI
1823 // list to come in sorted by frame index so that we can issue the store
1824 // pair instructions directly. Assert if we see anything otherwise.
1826 // The order of the registers in the list is controlled by
1827 // getCalleeSavedRegs(), so they will always be in-order, as well.
1828 assert((!RPI.isPaired() ||
1829 (CSI[i].getFrameIdx() + 1 == CSI[i + 1].getFrameIdx())) &&
1830 "Out of order callee saved regs!");
1832 assert((!RPI.isPaired() || !NeedsFrameRecord || RPI.Reg2 != AArch64::FP ||
1833 RPI.Reg1 == AArch64::LR) &&
1834 "FrameRecord must be allocated together with LR");
1836 // MachO's compact unwind format relies on all registers being stored in
1837 // adjacent register pairs.
1838 assert((!produceCompactUnwindFrame(MF) ||
1839 CC == CallingConv::PreserveMost ||
1840 (RPI.isPaired() &&
1841 ((RPI.Reg1 == AArch64::LR && RPI.Reg2 == AArch64::FP) ||
1842 RPI.Reg1 + 1 == RPI.Reg2))) &&
1843 "Callee-save registers not saved as adjacent register pair!");
1845 RPI.FrameIdx = CSI[i].getFrameIdx();
1847 int Scale = RPI.Type == RegPairInfo::FPR128 ? 16 : 8;
1848 Offset -= RPI.isPaired() ? 2 * Scale : Scale;
1850 // Round up size of non-pair to pair size if we need to pad the
1851 // callee-save area to ensure 16-byte alignment.
1852 if (AFI->hasCalleeSaveStackFreeSpace() && !FixupDone &&
1853 RPI.Type != RegPairInfo::FPR128 && !RPI.isPaired()) {
1854 FixupDone = true;
1855 Offset -= 8;
1856 assert(Offset % 16 == 0);
1857 assert(MFI.getObjectAlignment(RPI.FrameIdx) <= 16);
1858 MFI.setObjectAlignment(RPI.FrameIdx, 16);
1861 assert(Offset % Scale == 0);
1862 RPI.Offset = Offset / Scale;
1863 assert((RPI.Offset >= -64 && RPI.Offset <= 63) &&
1864 "Offset out of bounds for LDP/STP immediate");
1866 RegPairs.push_back(RPI);
1867 if (RPI.isPaired())
1868 ++i;
1872 bool AArch64FrameLowering::spillCalleeSavedRegisters(
1873 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
1874 const std::vector<CalleeSavedInfo> &CSI,
1875 const TargetRegisterInfo *TRI) const {
1876 MachineFunction &MF = *MBB.getParent();
1877 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
1878 bool NeedsWinCFI = needsWinCFI(MF);
1879 DebugLoc DL;
1880 SmallVector<RegPairInfo, 8> RegPairs;
1882 bool NeedShadowCallStackProlog = false;
1883 computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs,
1884 NeedShadowCallStackProlog, hasFP(MF));
1885 const MachineRegisterInfo &MRI = MF.getRegInfo();
1887 if (NeedShadowCallStackProlog) {
1888 // Shadow call stack prolog: str x30, [x18], #8
1889 BuildMI(MBB, MI, DL, TII.get(AArch64::STRXpost))
1890 .addReg(AArch64::X18, RegState::Define)
1891 .addReg(AArch64::LR)
1892 .addReg(AArch64::X18)
1893 .addImm(8)
1894 .setMIFlag(MachineInstr::FrameSetup);
1896 if (NeedsWinCFI)
1897 BuildMI(MBB, MI, DL, TII.get(AArch64::SEH_Nop))
1898 .setMIFlag(MachineInstr::FrameSetup);
1900 if (!MF.getFunction().hasFnAttribute(Attribute::NoUnwind)) {
1901 // Emit a CFI instruction that causes 8 to be subtracted from the value of
1902 // x18 when unwinding past this frame.
1903 static const char CFIInst[] = {
1904 dwarf::DW_CFA_val_expression,
1905 18, // register
1906 2, // length
1907 static_cast<char>(unsigned(dwarf::DW_OP_breg18)),
1908 static_cast<char>(-8) & 0x7f, // addend (sleb128)
1910 unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createEscape(
1911 nullptr, StringRef(CFIInst, sizeof(CFIInst))));
1912 BuildMI(MBB, MI, DL, TII.get(AArch64::CFI_INSTRUCTION))
1913 .addCFIIndex(CFIIndex)
1914 .setMIFlag(MachineInstr::FrameSetup);
1917 // This instruction also makes x18 live-in to the entry block.
1918 MBB.addLiveIn(AArch64::X18);
1921 for (auto RPII = RegPairs.rbegin(), RPIE = RegPairs.rend(); RPII != RPIE;
1922 ++RPII) {
1923 RegPairInfo RPI = *RPII;
1924 unsigned Reg1 = RPI.Reg1;
1925 unsigned Reg2 = RPI.Reg2;
1926 unsigned StrOpc;
1928 // Issue sequence of spills for cs regs. The first spill may be converted
1929 // to a pre-decrement store later by emitPrologue if the callee-save stack
1930 // area allocation can't be combined with the local stack area allocation.
1931 // For example:
1932 // stp x22, x21, [sp, #0] // addImm(+0)
1933 // stp x20, x19, [sp, #16] // addImm(+2)
1934 // stp fp, lr, [sp, #32] // addImm(+4)
1935 // Rationale: This sequence saves uop updates compared to a sequence of
1936 // pre-increment spills like stp xi,xj,[sp,#-16]!
1937 // Note: Similar rationale and sequence for restores in epilog.
1938 unsigned Size, Align;
1939 switch (RPI.Type) {
1940 case RegPairInfo::GPR:
1941 StrOpc = RPI.isPaired() ? AArch64::STPXi : AArch64::STRXui;
1942 Size = 8;
1943 Align = 8;
1944 break;
1945 case RegPairInfo::FPR64:
1946 StrOpc = RPI.isPaired() ? AArch64::STPDi : AArch64::STRDui;
1947 Size = 8;
1948 Align = 8;
1949 break;
1950 case RegPairInfo::FPR128:
1951 StrOpc = RPI.isPaired() ? AArch64::STPQi : AArch64::STRQui;
1952 Size = 16;
1953 Align = 16;
1954 break;
1956 LLVM_DEBUG(dbgs() << "CSR spill: (" << printReg(Reg1, TRI);
1957 if (RPI.isPaired()) dbgs() << ", " << printReg(Reg2, TRI);
1958 dbgs() << ") -> fi#(" << RPI.FrameIdx;
1959 if (RPI.isPaired()) dbgs() << ", " << RPI.FrameIdx + 1;
1960 dbgs() << ")\n");
1962 assert((!NeedsWinCFI || !(Reg1 == AArch64::LR && Reg2 == AArch64::FP)) &&
1963 "Windows unwdinding requires a consecutive (FP,LR) pair");
1964 // Windows unwind codes require consecutive registers if registers are
1965 // paired. Make the switch here, so that the code below will save (x,x+1)
1966 // and not (x+1,x).
1967 unsigned FrameIdxReg1 = RPI.FrameIdx;
1968 unsigned FrameIdxReg2 = RPI.FrameIdx + 1;
1969 if (NeedsWinCFI && RPI.isPaired()) {
1970 std::swap(Reg1, Reg2);
1971 std::swap(FrameIdxReg1, FrameIdxReg2);
1973 MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc));
1974 if (!MRI.isReserved(Reg1))
1975 MBB.addLiveIn(Reg1);
1976 if (RPI.isPaired()) {
1977 if (!MRI.isReserved(Reg2))
1978 MBB.addLiveIn(Reg2);
1979 MIB.addReg(Reg2, getPrologueDeath(MF, Reg2));
1980 MIB.addMemOperand(MF.getMachineMemOperand(
1981 MachinePointerInfo::getFixedStack(MF, FrameIdxReg2),
1982 MachineMemOperand::MOStore, Size, Align));
1984 MIB.addReg(Reg1, getPrologueDeath(MF, Reg1))
1985 .addReg(AArch64::SP)
1986 .addImm(RPI.Offset) // [sp, #offset*scale],
1987 // where factor*scale is implicit
1988 .setMIFlag(MachineInstr::FrameSetup);
1989 MIB.addMemOperand(MF.getMachineMemOperand(
1990 MachinePointerInfo::getFixedStack(MF,FrameIdxReg1),
1991 MachineMemOperand::MOStore, Size, Align));
1992 if (NeedsWinCFI)
1993 InsertSEH(MIB, TII, MachineInstr::FrameSetup);
1996 return true;
1999 bool AArch64FrameLowering::restoreCalleeSavedRegisters(
2000 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
2001 std::vector<CalleeSavedInfo> &CSI,
2002 const TargetRegisterInfo *TRI) const {
2003 MachineFunction &MF = *MBB.getParent();
2004 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
2005 DebugLoc DL;
2006 SmallVector<RegPairInfo, 8> RegPairs;
2007 bool NeedsWinCFI = needsWinCFI(MF);
2009 if (MI != MBB.end())
2010 DL = MI->getDebugLoc();
2012 bool NeedShadowCallStackProlog = false;
2013 computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs,
2014 NeedShadowCallStackProlog, hasFP(MF));
2016 auto EmitMI = [&](const RegPairInfo &RPI) {
2017 unsigned Reg1 = RPI.Reg1;
2018 unsigned Reg2 = RPI.Reg2;
2020 // Issue sequence of restores for cs regs. The last restore may be converted
2021 // to a post-increment load later by emitEpilogue if the callee-save stack
2022 // area allocation can't be combined with the local stack area allocation.
2023 // For example:
2024 // ldp fp, lr, [sp, #32] // addImm(+4)
2025 // ldp x20, x19, [sp, #16] // addImm(+2)
2026 // ldp x22, x21, [sp, #0] // addImm(+0)
2027 // Note: see comment in spillCalleeSavedRegisters()
2028 unsigned LdrOpc;
2029 unsigned Size, Align;
2030 switch (RPI.Type) {
2031 case RegPairInfo::GPR:
2032 LdrOpc = RPI.isPaired() ? AArch64::LDPXi : AArch64::LDRXui;
2033 Size = 8;
2034 Align = 8;
2035 break;
2036 case RegPairInfo::FPR64:
2037 LdrOpc = RPI.isPaired() ? AArch64::LDPDi : AArch64::LDRDui;
2038 Size = 8;
2039 Align = 8;
2040 break;
2041 case RegPairInfo::FPR128:
2042 LdrOpc = RPI.isPaired() ? AArch64::LDPQi : AArch64::LDRQui;
2043 Size = 16;
2044 Align = 16;
2045 break;
2047 LLVM_DEBUG(dbgs() << "CSR restore: (" << printReg(Reg1, TRI);
2048 if (RPI.isPaired()) dbgs() << ", " << printReg(Reg2, TRI);
2049 dbgs() << ") -> fi#(" << RPI.FrameIdx;
2050 if (RPI.isPaired()) dbgs() << ", " << RPI.FrameIdx + 1;
2051 dbgs() << ")\n");
2053 // Windows unwind codes require consecutive registers if registers are
2054 // paired. Make the switch here, so that the code below will save (x,x+1)
2055 // and not (x+1,x).
2056 unsigned FrameIdxReg1 = RPI.FrameIdx;
2057 unsigned FrameIdxReg2 = RPI.FrameIdx + 1;
2058 if (NeedsWinCFI && RPI.isPaired()) {
2059 std::swap(Reg1, Reg2);
2060 std::swap(FrameIdxReg1, FrameIdxReg2);
2062 MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(LdrOpc));
2063 if (RPI.isPaired()) {
2064 MIB.addReg(Reg2, getDefRegState(true));
2065 MIB.addMemOperand(MF.getMachineMemOperand(
2066 MachinePointerInfo::getFixedStack(MF, FrameIdxReg2),
2067 MachineMemOperand::MOLoad, Size, Align));
2069 MIB.addReg(Reg1, getDefRegState(true))
2070 .addReg(AArch64::SP)
2071 .addImm(RPI.Offset) // [sp, #offset*scale]
2072 // where factor*scale is implicit
2073 .setMIFlag(MachineInstr::FrameDestroy);
2074 MIB.addMemOperand(MF.getMachineMemOperand(
2075 MachinePointerInfo::getFixedStack(MF, FrameIdxReg1),
2076 MachineMemOperand::MOLoad, Size, Align));
2077 if (NeedsWinCFI)
2078 InsertSEH(MIB, TII, MachineInstr::FrameDestroy);
2080 if (ReverseCSRRestoreSeq)
2081 for (const RegPairInfo &RPI : reverse(RegPairs))
2082 EmitMI(RPI);
2083 else
2084 for (const RegPairInfo &RPI : RegPairs)
2085 EmitMI(RPI);
2087 if (NeedShadowCallStackProlog) {
2088 // Shadow call stack epilog: ldr x30, [x18, #-8]!
2089 BuildMI(MBB, MI, DL, TII.get(AArch64::LDRXpre))
2090 .addReg(AArch64::X18, RegState::Define)
2091 .addReg(AArch64::LR, RegState::Define)
2092 .addReg(AArch64::X18)
2093 .addImm(-8)
2094 .setMIFlag(MachineInstr::FrameDestroy);
2097 return true;
2100 void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
2101 BitVector &SavedRegs,
2102 RegScavenger *RS) const {
2103 // All calls are tail calls in GHC calling conv, and functions have no
2104 // prologue/epilogue.
2105 if (MF.getFunction().getCallingConv() == CallingConv::GHC)
2106 return;
2108 TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
2109 const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>(
2110 MF.getSubtarget().getRegisterInfo());
2111 AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
2112 unsigned UnspilledCSGPR = AArch64::NoRegister;
2113 unsigned UnspilledCSGPRPaired = AArch64::NoRegister;
2115 MachineFrameInfo &MFI = MF.getFrameInfo();
2116 const MCPhysReg *CSRegs = MF.getRegInfo().getCalleeSavedRegs();
2118 unsigned BasePointerReg = RegInfo->hasBasePointer(MF)
2119 ? RegInfo->getBaseRegister()
2120 : (unsigned)AArch64::NoRegister;
2122 unsigned ExtraCSSpill = 0;
2123 // Figure out which callee-saved registers to save/restore.
2124 for (unsigned i = 0; CSRegs[i]; ++i) {
2125 const unsigned Reg = CSRegs[i];
2127 // Add the base pointer register to SavedRegs if it is callee-save.
2128 if (Reg == BasePointerReg)
2129 SavedRegs.set(Reg);
2131 bool RegUsed = SavedRegs.test(Reg);
2132 unsigned PairedReg = CSRegs[i ^ 1];
2133 if (!RegUsed) {
2134 if (AArch64::GPR64RegClass.contains(Reg) &&
2135 !RegInfo->isReservedReg(MF, Reg)) {
2136 UnspilledCSGPR = Reg;
2137 UnspilledCSGPRPaired = PairedReg;
2139 continue;
2142 // MachO's compact unwind format relies on all registers being stored in
2143 // pairs.
2144 // FIXME: the usual format is actually better if unwinding isn't needed.
2145 if (produceCompactUnwindFrame(MF) && PairedReg != AArch64::NoRegister &&
2146 !SavedRegs.test(PairedReg)) {
2147 SavedRegs.set(PairedReg);
2148 if (AArch64::GPR64RegClass.contains(PairedReg) &&
2149 !RegInfo->isReservedReg(MF, PairedReg))
2150 ExtraCSSpill = PairedReg;
2154 // Calculates the callee saved stack size.
2155 unsigned CSStackSize = 0;
2156 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
2157 const MachineRegisterInfo &MRI = MF.getRegInfo();
2158 for (unsigned Reg : SavedRegs.set_bits())
2159 CSStackSize += TRI->getRegSizeInBits(Reg, MRI) / 8;
2161 // Save number of saved regs, so we can easily update CSStackSize later.
2162 unsigned NumSavedRegs = SavedRegs.count();
2164 // The frame record needs to be created by saving the appropriate registers
2165 unsigned EstimatedStackSize = MFI.estimateStackSize(MF);
2166 if (hasFP(MF) ||
2167 windowsRequiresStackProbe(MF, EstimatedStackSize + CSStackSize + 16)) {
2168 SavedRegs.set(AArch64::FP);
2169 SavedRegs.set(AArch64::LR);
2172 LLVM_DEBUG(dbgs() << "*** determineCalleeSaves\nSaved CSRs:";
2173 for (unsigned Reg
2174 : SavedRegs.set_bits()) dbgs()
2175 << ' ' << printReg(Reg, RegInfo);
2176 dbgs() << "\n";);
2178 // If any callee-saved registers are used, the frame cannot be eliminated.
2179 bool CanEliminateFrame = SavedRegs.count() == 0;
2181 // The CSR spill slots have not been allocated yet, so estimateStackSize
2182 // won't include them.
2183 unsigned EstimatedStackSizeLimit = estimateRSStackSizeLimit(MF);
2184 bool BigStack = (EstimatedStackSize + CSStackSize) > EstimatedStackSizeLimit;
2185 if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF))
2186 AFI->setHasStackFrame(true);
2188 // Estimate if we might need to scavenge a register at some point in order
2189 // to materialize a stack offset. If so, either spill one additional
2190 // callee-saved register or reserve a special spill slot to facilitate
2191 // register scavenging. If we already spilled an extra callee-saved register
2192 // above to keep the number of spills even, we don't need to do anything else
2193 // here.
2194 if (BigStack) {
2195 if (!ExtraCSSpill && UnspilledCSGPR != AArch64::NoRegister) {
2196 LLVM_DEBUG(dbgs() << "Spilling " << printReg(UnspilledCSGPR, RegInfo)
2197 << " to get a scratch register.\n");
2198 SavedRegs.set(UnspilledCSGPR);
2199 // MachO's compact unwind format relies on all registers being stored in
2200 // pairs, so if we need to spill one extra for BigStack, then we need to
2201 // store the pair.
2202 if (produceCompactUnwindFrame(MF))
2203 SavedRegs.set(UnspilledCSGPRPaired);
2204 ExtraCSSpill = UnspilledCSGPR;
2207 // If we didn't find an extra callee-saved register to spill, create
2208 // an emergency spill slot.
2209 if (!ExtraCSSpill || MF.getRegInfo().isPhysRegUsed(ExtraCSSpill)) {
2210 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
2211 const TargetRegisterClass &RC = AArch64::GPR64RegClass;
2212 unsigned Size = TRI->getSpillSize(RC);
2213 unsigned Align = TRI->getSpillAlignment(RC);
2214 int FI = MFI.CreateStackObject(Size, Align, false);
2215 RS->addScavengingFrameIndex(FI);
2216 LLVM_DEBUG(dbgs() << "No available CS registers, allocated fi#" << FI
2217 << " as the emergency spill slot.\n");
2221 // Adding the size of additional 64bit GPR saves.
2222 CSStackSize += 8 * (SavedRegs.count() - NumSavedRegs);
2223 unsigned AlignedCSStackSize = alignTo(CSStackSize, 16);
2224 LLVM_DEBUG(dbgs() << "Estimated stack frame size: "
2225 << EstimatedStackSize + AlignedCSStackSize
2226 << " bytes.\n");
2228 // Round up to register pair alignment to avoid additional SP adjustment
2229 // instructions.
2230 AFI->setCalleeSavedStackSize(AlignedCSStackSize);
2231 AFI->setCalleeSaveStackHasFreeSpace(AlignedCSStackSize != CSStackSize);
2234 bool AArch64FrameLowering::enableStackSlotScavenging(
2235 const MachineFunction &MF) const {
2236 const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
2237 return AFI->hasCalleeSaveStackFreeSpace();
2240 void AArch64FrameLowering::processFunctionBeforeFrameFinalized(
2241 MachineFunction &MF, RegScavenger *RS) const {
2242 // If this function isn't doing Win64-style C++ EH, we don't need to do
2243 // anything.
2244 if (!MF.hasEHFunclets())
2245 return;
2246 const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
2247 MachineFrameInfo &MFI = MF.getFrameInfo();
2248 WinEHFuncInfo &EHInfo = *MF.getWinEHFuncInfo();
2250 MachineBasicBlock &MBB = MF.front();
2251 auto MBBI = MBB.begin();
2252 while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup))
2253 ++MBBI;
2255 // Create an UnwindHelp object.
2256 int UnwindHelpFI =
2257 MFI.CreateStackObject(/*size*/8, /*alignment*/16, false);
2258 EHInfo.UnwindHelpFrameIdx = UnwindHelpFI;
2259 // We need to store -2 into the UnwindHelp object at the start of the
2260 // function.
2261 DebugLoc DL;
2262 RS->enterBasicBlockEnd(MBB);
2263 RS->backward(std::prev(MBBI));
2264 unsigned DstReg = RS->FindUnusedReg(&AArch64::GPR64commonRegClass);
2265 assert(DstReg && "There must be a free register after frame setup");
2266 BuildMI(MBB, MBBI, DL, TII.get(AArch64::MOVi64imm), DstReg).addImm(-2);
2267 BuildMI(MBB, MBBI, DL, TII.get(AArch64::STURXi))
2268 .addReg(DstReg, getKillRegState(true))
2269 .addFrameIndex(UnwindHelpFI)
2270 .addImm(0);
2273 /// For Win64 AArch64 EH, the offset to the Unwind object is from the SP before
2274 /// the update. This is easily retrieved as it is exactly the offset that is set
2275 /// in processFunctionBeforeFrameFinalized.
2276 int AArch64FrameLowering::getFrameIndexReferencePreferSP(
2277 const MachineFunction &MF, int FI, unsigned &FrameReg,
2278 bool IgnoreSPUpdates) const {
2279 const MachineFrameInfo &MFI = MF.getFrameInfo();
2280 LLVM_DEBUG(dbgs() << "Offset from the SP for " << FI << " is "
2281 << MFI.getObjectOffset(FI) << "\n");
2282 FrameReg = AArch64::SP;
2283 return MFI.getObjectOffset(FI);
2286 /// The parent frame offset (aka dispFrame) is only used on X86_64 to retrieve
2287 /// the parent's frame pointer
2288 unsigned AArch64FrameLowering::getWinEHParentFrameOffset(
2289 const MachineFunction &MF) const {
2290 return 0;
2293 /// Funclets only need to account for space for the callee saved registers,
2294 /// as the locals are accounted for in the parent's stack frame.
2295 unsigned AArch64FrameLowering::getWinEHFuncletFrameSize(
2296 const MachineFunction &MF) const {
2297 // This is the size of the pushed CSRs.
2298 unsigned CSSize =
2299 MF.getInfo<AArch64FunctionInfo>()->getCalleeSavedStackSize();
2300 // This is the amount of stack a funclet needs to allocate.
2301 return alignTo(CSSize + MF.getFrameInfo().getMaxCallFrameSize(),
2302 getStackAlignment());