1 //===- ARMFrameLowering.cpp - ARM Frame Information -----------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file contains the ARM implementation of TargetFrameLowering class.
11 //===----------------------------------------------------------------------===//
13 // This file contains the ARM implementation of TargetFrameLowering class.
15 // On ARM, stack frames are structured as follows:
17 // The stack grows downward.
19 // All of the individual frame areas on the frame below are optional, i.e. it's
20 // possible to create a function so that the particular area isn't present
23 // At function entry, the "frame" looks as follows:
26 // |-----------------------------------|
28 // | arguments passed on the stack |
30 // |-----------------------------------| <- sp
34 // After the prologue has run, the frame has the following general structure.
35 // Technically the last frame area (VLAs) doesn't get created until in the
36 // main function body, after the prologue is run. However, it's depicted here
40 // |-----------------------------------|
42 // | arguments passed on the stack |
44 // |-----------------------------------| <- (sp at function entry)
46 // | varargs from registers |
48 // |-----------------------------------|
52 // | (a.k.a. "frame record") |
54 // |- - - - - - - - - - - - - - - - - -| <- fp (r7 or r11)
56 // | callee-saved gpr registers |
58 // |-----------------------------------|
60 // | callee-saved fp/simd regs |
62 // |-----------------------------------|
63 // |.empty.space.to.make.part.below....|
64 // |.aligned.in.case.it.needs.more.than| (size of this area is unknown at
65 // |.the.standard.8-byte.alignment.....| compile time; if present)
66 // |-----------------------------------|
68 // | local variables of fixed size |
69 // | including spill slots |
70 // |-----------------------------------| <- base pointer (not defined by ABI,
71 // |.variable-sized.local.variables....| LLVM chooses r6)
72 // |.(VLAs)............................| (size of this area is unknown at
73 // |...................................| compile time)
74 // |-----------------------------------| <- sp
78 // To access the data in a frame, at-compile time, a constant offset must be
79 // computable from one of the pointers (fp, bp, sp) to access it. The size
80 // of the areas with a dotted background cannot be computed at compile-time
81 // if they are present, making it required to have all three of fp, bp and
82 // sp to be set up to be able to access all contents in the frame areas,
83 // assuming all of the frame areas are non-empty.
85 // For most functions, some of the frame areas are empty. For those functions,
86 // it may not be necessary to set up fp or bp:
87 // * A base pointer is definitely needed when there are both VLAs and local
88 // variables with more-than-default alignment requirements.
89 // * A frame pointer is definitely needed when there are local variables with
90 // more-than-default alignment requirements.
92 // In some cases when a base pointer is not strictly needed, it is generated
93 // anyway when offsets from the frame pointer to access local variables become
94 // so large that the offset can't be encoded in the immediate fields of loads
97 // The frame pointer might be chosen to be r7 or r11, depending on the target
98 // architecture and operating system. See ARMSubtarget::getFramePointerReg for
101 // Outgoing function arguments must be at the bottom of the stack frame when
102 // calling another function. If we do not have variable-sized stack objects, we
103 // can allocate a "reserved call frame" area at the bottom of the local
104 // variable area, large enough for all outgoing calls. If we do have VLAs, then
105 // the stack pointer must be decremented and incremented around each call to
106 // make space for the arguments below the VLAs.
108 //===----------------------------------------------------------------------===//
110 #include "ARMFrameLowering.h"
111 #include "ARMBaseInstrInfo.h"
112 #include "ARMBaseRegisterInfo.h"
113 #include "ARMConstantPoolValue.h"
114 #include "ARMMachineFunctionInfo.h"
115 #include "ARMSubtarget.h"
116 #include "MCTargetDesc/ARMAddressingModes.h"
117 #include "MCTargetDesc/ARMBaseInfo.h"
118 #include "Utils/ARMBaseInfo.h"
119 #include "llvm/ADT/BitVector.h"
120 #include "llvm/ADT/STLExtras.h"
121 #include "llvm/ADT/SmallPtrSet.h"
122 #include "llvm/ADT/SmallVector.h"
123 #include "llvm/CodeGen/MachineBasicBlock.h"
124 #include "llvm/CodeGen/MachineConstantPool.h"
125 #include "llvm/CodeGen/MachineFrameInfo.h"
126 #include "llvm/CodeGen/MachineFunction.h"
127 #include "llvm/CodeGen/MachineInstr.h"
128 #include "llvm/CodeGen/MachineInstrBuilder.h"
129 #include "llvm/CodeGen/MachineJumpTableInfo.h"
130 #include "llvm/CodeGen/MachineModuleInfo.h"
131 #include "llvm/CodeGen/MachineOperand.h"
132 #include "llvm/CodeGen/MachineRegisterInfo.h"
133 #include "llvm/CodeGen/RegisterScavenging.h"
134 #include "llvm/CodeGen/TargetInstrInfo.h"
135 #include "llvm/CodeGen/TargetOpcodes.h"
136 #include "llvm/CodeGen/TargetRegisterInfo.h"
137 #include "llvm/CodeGen/TargetSubtargetInfo.h"
138 #include "llvm/IR/Attributes.h"
139 #include "llvm/IR/CallingConv.h"
140 #include "llvm/IR/DebugLoc.h"
141 #include "llvm/IR/Function.h"
142 #include "llvm/MC/MCAsmInfo.h"
143 #include "llvm/MC/MCContext.h"
144 #include "llvm/MC/MCDwarf.h"
145 #include "llvm/MC/MCInstrDesc.h"
146 #include "llvm/MC/MCRegisterInfo.h"
147 #include "llvm/Support/CodeGen.h"
148 #include "llvm/Support/CommandLine.h"
149 #include "llvm/Support/Compiler.h"
150 #include "llvm/Support/Debug.h"
151 #include "llvm/Support/ErrorHandling.h"
152 #include "llvm/Support/raw_ostream.h"
153 #include "llvm/Target/TargetMachine.h"
154 #include "llvm/Target/TargetOptions.h"
163 #define DEBUG_TYPE "arm-frame-lowering"
165 using namespace llvm
;
168 SpillAlignedNEONRegs("align-neon-spills", cl::Hidden
, cl::init(true),
169 cl::desc("Align ARM NEON spills in prolog and epilog"));
171 static MachineBasicBlock::iterator
172 skipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI
,
173 unsigned NumAlignedDPRCS2Regs
);
175 enum class SpillArea
{
184 /// Get the spill area that Reg should be saved into in the prologue.
185 SpillArea
getSpillArea(Register Reg
,
186 ARMSubtarget::PushPopSplitVariation Variation
,
187 unsigned NumAlignedDPRCS2Regs
,
188 const ARMBaseRegisterInfo
*RegInfo
) {
190 // push {r0-r12, lr} GPRCS1
191 // vpush {r8-d15} DPRCS1
194 // push {r0-r7, lr} GPRCS1
195 // push {r8-r12} GPRCS2
196 // vpush {r8-d15} DPRCS1
198 // SplitR11WindowsSEH:
199 // push {r0-r10, r12} GPRCS1
200 // vpush {r8-d15} DPRCS1
201 // push {r11, lr} GPRCS3
203 // SplitR11AAPCSSignRA:
204 // push {r0-r10, r12} GPRSC1
205 // push {r11, lr} GPRCS2
206 // vpush {r8-d15} DPRCS1
208 // If FPCXTNS is spilled (for CMSE secure entryfunctions), it is always at
209 // the top of the stack frame.
210 // The DPRCS2 region is used for ABIs which only guarantee 4-byte alignment
211 // of SP. If used, it will be below the other save areas, after the stack has
216 dbgs() << "Don't know where to spill " << printReg(Reg
, RegInfo
) << "\n";
217 llvm_unreachable("Don't know where to spill this register");
221 return SpillArea::FPCXT
;
231 return SpillArea::GPRCS1
;
236 if (Variation
== ARMSubtarget::SplitR7
)
237 return SpillArea::GPRCS2
;
239 return SpillArea::GPRCS1
;
242 if (Variation
== ARMSubtarget::SplitR7
||
243 Variation
== ARMSubtarget::SplitR11AAPCSSignRA
)
244 return SpillArea::GPRCS2
;
245 if (Variation
== ARMSubtarget::SplitR11WindowsSEH
)
246 return SpillArea::GPRCS3
;
248 return SpillArea::GPRCS1
;
251 if (Variation
== ARMSubtarget::SplitR7
)
252 return SpillArea::GPRCS2
;
254 return SpillArea::GPRCS1
;
257 if (Variation
== ARMSubtarget::SplitR11AAPCSSignRA
)
258 return SpillArea::GPRCS2
;
259 if (Variation
== ARMSubtarget::SplitR11WindowsSEH
)
260 return SpillArea::GPRCS3
;
262 return SpillArea::GPRCS1
;
272 return SpillArea::DPRCS1
;
282 if (Reg
>= ARM::D8
&& Reg
< ARM::D8
+ NumAlignedDPRCS2Regs
)
283 return SpillArea::DPRCS2
;
285 return SpillArea::DPRCS1
;
303 return SpillArea::DPRCS1
;
307 ARMFrameLowering::ARMFrameLowering(const ARMSubtarget
&sti
)
308 : TargetFrameLowering(StackGrowsDown
, sti
.getStackAlignment(), 0, Align(4)),
311 bool ARMFrameLowering::keepFramePointer(const MachineFunction
&MF
) const {
312 // iOS always has a FP for backtracking, force other targets to keep their FP
313 // when doing FastISel. The emitted code is currently superior, and in cases
314 // like test-suite's lencod FastISel isn't quite correct when FP is eliminated.
315 return MF
.getSubtarget
<ARMSubtarget
>().useFastISel();
318 /// Returns true if the target can safely skip saving callee-saved registers
319 /// for noreturn nounwind functions.
320 bool ARMFrameLowering::enableCalleeSaveSkip(const MachineFunction
&MF
) const {
321 assert(MF
.getFunction().hasFnAttribute(Attribute::NoReturn
) &&
322 MF
.getFunction().hasFnAttribute(Attribute::NoUnwind
) &&
323 !MF
.getFunction().hasFnAttribute(Attribute::UWTable
));
325 // Frame pointer and link register are not treated as normal CSR, thus we
326 // can always skip CSR saves for nonreturning functions.
330 /// hasFPImpl - Return true if the specified function should have a dedicated
331 /// frame pointer register. This is true if the function has variable sized
332 /// allocas or if frame pointer elimination is disabled.
333 bool ARMFrameLowering::hasFPImpl(const MachineFunction
&MF
) const {
334 const TargetRegisterInfo
*RegInfo
= MF
.getSubtarget().getRegisterInfo();
335 const MachineFrameInfo
&MFI
= MF
.getFrameInfo();
337 // Check to see if the target want to forcibly keep frame pointer.
338 if (keepFramePointer(MF
))
341 // ABI-required frame pointer.
342 if (MF
.getTarget().Options
.DisableFramePointerElim(MF
))
345 // Frame pointer required for use within this function.
346 return (RegInfo
->hasStackRealignment(MF
) || MFI
.hasVarSizedObjects() ||
347 MFI
.isFrameAddressTaken());
350 /// isFPReserved - Return true if the frame pointer register should be
351 /// considered a reserved register on the scope of the specified function.
352 bool ARMFrameLowering::isFPReserved(const MachineFunction
&MF
) const {
353 return hasFP(MF
) || MF
.getTarget().Options
.FramePointerIsReserved(MF
);
356 /// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
357 /// not required, we reserve argument space for call sites in the function
358 /// immediately on entry to the current function. This eliminates the need for
359 /// add/sub sp brackets around call sites. Returns true if the call frame is
360 /// included as part of the stack frame.
361 bool ARMFrameLowering::hasReservedCallFrame(const MachineFunction
&MF
) const {
362 const MachineFrameInfo
&MFI
= MF
.getFrameInfo();
363 unsigned CFSize
= MFI
.getMaxCallFrameSize();
364 // It's not always a good idea to include the call frame as part of the
365 // stack frame. ARM (especially Thumb) has small immediate offset to
366 // address the stack frame. So a large call frame can cause poor codegen
367 // and may even makes it impossible to scavenge a register.
368 if (CFSize
>= ((1 << 12) - 1) / 2) // Half of imm12
371 return !MFI
.hasVarSizedObjects();
374 /// canSimplifyCallFramePseudos - If there is a reserved call frame, the
375 /// call frame pseudos can be simplified. Unlike most targets, having a FP
376 /// is not sufficient here since we still may reference some objects via SP
377 /// even when FP is available in Thumb2 mode.
379 ARMFrameLowering::canSimplifyCallFramePseudos(const MachineFunction
&MF
) const {
380 return hasReservedCallFrame(MF
) || MF
.getFrameInfo().hasVarSizedObjects();
383 // Returns how much of the incoming argument stack area we should clean up in an
384 // epilogue. For the C calling convention this will be 0, for guaranteed tail
385 // call conventions it can be positive (a normal return or a tail call to a
386 // function that uses less stack space for arguments) or negative (for a tail
387 // call to a function that needs more stack space than us for arguments).
388 static int getArgumentStackToRestore(MachineFunction
&MF
,
389 MachineBasicBlock
&MBB
) {
390 MachineBasicBlock::iterator MBBI
= MBB
.getLastNonDebugInstr();
391 bool IsTailCallReturn
= false;
392 if (MBB
.end() != MBBI
) {
393 unsigned RetOpcode
= MBBI
->getOpcode();
394 IsTailCallReturn
= RetOpcode
== ARM::TCRETURNdi
||
395 RetOpcode
== ARM::TCRETURNri
||
396 RetOpcode
== ARM::TCRETURNrinotr12
;
398 ARMFunctionInfo
*AFI
= MF
.getInfo
<ARMFunctionInfo
>();
400 int ArgumentPopSize
= 0;
401 if (IsTailCallReturn
) {
402 MachineOperand
&StackAdjust
= MBBI
->getOperand(1);
404 // For a tail-call in a callee-pops-arguments environment, some or all of
405 // the stack may actually be in use for the call's arguments, this is
406 // calculated during LowerCall and consumed here...
407 ArgumentPopSize
= StackAdjust
.getImm();
409 // ... otherwise the amount to pop is *all* of the argument space,
410 // conveniently stored in the MachineFunctionInfo by
411 // LowerFormalArguments. This will, of course, be zero for the C calling
413 ArgumentPopSize
= AFI
->getArgumentStackToRestore();
416 return ArgumentPopSize
;
419 static bool needsWinCFI(const MachineFunction
&MF
) {
420 const Function
&F
= MF
.getFunction();
421 return MF
.getTarget().getMCAsmInfo()->usesWindowsCFI() &&
422 F
.needsUnwindTableEntry();
425 // Given a load or a store instruction, generate an appropriate unwinding SEH
427 static MachineBasicBlock::iterator
insertSEH(MachineBasicBlock::iterator MBBI
,
428 const TargetInstrInfo
&TII
,
430 unsigned Opc
= MBBI
->getOpcode();
431 MachineBasicBlock
*MBB
= MBBI
->getParent();
432 MachineFunction
&MF
= *MBB
->getParent();
433 DebugLoc DL
= MBBI
->getDebugLoc();
434 MachineInstrBuilder MIB
;
435 const ARMSubtarget
&Subtarget
= MF
.getSubtarget
<ARMSubtarget
>();
436 const ARMBaseRegisterInfo
*RegInfo
= Subtarget
.getRegisterInfo();
438 Flags
|= MachineInstr::NoMerge
;
442 report_fatal_error("No SEH Opcode for instruction " + TII
.getName(Opc
));
444 case ARM::t2ADDri
: // add.w r11, sp, #xx
445 case ARM::t2ADDri12
: // add.w r11, sp, #xx
446 case ARM::t2MOVTi16
: // movt r4, #xx
447 case ARM::tBL
: // bl __chkstk
448 // These are harmless if used for just setting up a frame pointer,
449 // but that frame pointer can't be relied upon for unwinding, unless
450 // set up with SEH_SaveSP.
451 MIB
= BuildMI(MF
, DL
, TII
.get(ARM::SEH_Nop
))
456 case ARM::t2MOVi16
: { // mov(w) r4, #xx
457 bool Wide
= MBBI
->getOperand(1).getImm() >= 256;
459 MachineInstrBuilder NewInstr
=
460 BuildMI(MF
, DL
, TII
.get(ARM::tMOVi8
)).setMIFlags(MBBI
->getFlags());
461 NewInstr
.add(MBBI
->getOperand(0));
462 NewInstr
.add(t1CondCodeOp(/*isDead=*/true));
463 for (MachineOperand
&MO
: llvm::drop_begin(MBBI
->operands()))
465 MachineBasicBlock::iterator NewMBBI
= MBB
->insertAfter(MBBI
, NewInstr
);
469 MIB
= BuildMI(MF
, DL
, TII
.get(ARM::SEH_Nop
)).addImm(Wide
).setMIFlags(Flags
);
473 case ARM::tBLXr
: // blx r12 (__chkstk)
474 MIB
= BuildMI(MF
, DL
, TII
.get(ARM::SEH_Nop
))
479 case ARM::t2MOVi32imm
: // movw+movt
480 // This pseudo instruction expands into two mov instructions. If the
481 // second operand is a symbol reference, this will stay as two wide
482 // instructions, movw+movt. If they're immediates, the first one can
483 // end up as a narrow mov though.
484 // As two SEH instructions are appended here, they won't get interleaved
485 // between the two final movw/movt instructions, but it doesn't make any
486 // practical difference.
487 MIB
= BuildMI(MF
, DL
, TII
.get(ARM::SEH_Nop
))
490 MBB
->insertAfter(MBBI
, MIB
);
491 MIB
= BuildMI(MF
, DL
, TII
.get(ARM::SEH_Nop
))
497 if (MBBI
->getOperand(0).getReg() == ARM::SP
&&
498 MBBI
->getOperand(2).getReg() == ARM::SP
&&
499 MBBI
->getOperand(3).getImm() == -4) {
500 unsigned Reg
= RegInfo
->getSEHRegNum(MBBI
->getOperand(1).getReg());
501 MIB
= BuildMI(MF
, DL
, TII
.get(ARM::SEH_SaveRegs
))
506 report_fatal_error("No matching SEH Opcode for t2STR_PRE");
510 case ARM::t2LDR_POST
:
511 if (MBBI
->getOperand(1).getReg() == ARM::SP
&&
512 MBBI
->getOperand(2).getReg() == ARM::SP
&&
513 MBBI
->getOperand(3).getImm() == 4) {
514 unsigned Reg
= RegInfo
->getSEHRegNum(MBBI
->getOperand(0).getReg());
515 MIB
= BuildMI(MF
, DL
, TII
.get(ARM::SEH_SaveRegs
))
520 report_fatal_error("No matching SEH Opcode for t2LDR_POST");
524 case ARM::t2LDMIA_RET
:
525 case ARM::t2LDMIA_UPD
:
526 case ARM::t2STMDB_UPD
: {
529 for (unsigned i
= 4, NumOps
= MBBI
->getNumOperands(); i
!= NumOps
; ++i
) {
530 const MachineOperand
&MO
= MBBI
->getOperand(i
);
531 if (!MO
.isReg() || MO
.isImplicit())
533 unsigned Reg
= RegInfo
->getSEHRegNum(MO
.getReg());
536 if (Reg
>= 8 && Reg
<= 13)
538 else if (Opc
== ARM::t2LDMIA_UPD
&& Reg
== 14)
545 case ARM::t2LDMIA_RET
:
546 NewOpc
= ARM::tPOP_RET
;
548 case ARM::t2LDMIA_UPD
:
551 case ARM::t2STMDB_UPD
:
555 llvm_unreachable("");
557 MachineInstrBuilder NewInstr
=
558 BuildMI(MF
, DL
, TII
.get(NewOpc
)).setMIFlags(MBBI
->getFlags());
559 for (unsigned i
= 2, NumOps
= MBBI
->getNumOperands(); i
!= NumOps
; ++i
)
560 NewInstr
.add(MBBI
->getOperand(i
));
561 MachineBasicBlock::iterator NewMBBI
= MBB
->insertAfter(MBBI
, NewInstr
);
566 (Opc
== ARM::t2LDMIA_RET
) ? ARM::SEH_SaveRegs_Ret
: ARM::SEH_SaveRegs
;
567 MIB
= BuildMI(MF
, DL
, TII
.get(SEHOpc
))
569 .addImm(Wide
? 1 : 0)
573 case ARM::VSTMDDB_UPD
:
574 case ARM::VLDMDIA_UPD
: {
575 int First
= -1, Last
= 0;
576 for (const MachineOperand
&MO
: llvm::drop_begin(MBBI
->operands(), 4)) {
577 unsigned Reg
= RegInfo
->getSEHRegNum(MO
.getReg());
582 MIB
= BuildMI(MF
, DL
, TII
.get(ARM::SEH_SaveFRegs
))
590 MIB
= BuildMI(MF
, DL
, TII
.get(ARM::SEH_StackAlloc
))
591 .addImm(MBBI
->getOperand(2).getImm() * 4)
595 case ARM::t2SUBspImm
:
596 case ARM::t2SUBspImm12
:
597 case ARM::t2ADDspImm
:
598 case ARM::t2ADDspImm12
:
599 MIB
= BuildMI(MF
, DL
, TII
.get(ARM::SEH_StackAlloc
))
600 .addImm(MBBI
->getOperand(2).getImm())
606 if (MBBI
->getOperand(1).getReg() == ARM::SP
&&
607 (Flags
& MachineInstr::FrameSetup
)) {
608 unsigned Reg
= RegInfo
->getSEHRegNum(MBBI
->getOperand(0).getReg());
609 MIB
= BuildMI(MF
, DL
, TII
.get(ARM::SEH_SaveSP
))
612 } else if (MBBI
->getOperand(0).getReg() == ARM::SP
&&
613 (Flags
& MachineInstr::FrameDestroy
)) {
614 unsigned Reg
= RegInfo
->getSEHRegNum(MBBI
->getOperand(1).getReg());
615 MIB
= BuildMI(MF
, DL
, TII
.get(ARM::SEH_SaveSP
))
619 report_fatal_error("No SEH Opcode for MOV");
624 case ARM::TCRETURNri
:
625 case ARM::TCRETURNrinotr12
:
626 MIB
= BuildMI(MF
, DL
, TII
.get(ARM::SEH_Nop_Ret
))
631 case ARM::TCRETURNdi
:
632 MIB
= BuildMI(MF
, DL
, TII
.get(ARM::SEH_Nop_Ret
))
637 return MBB
->insertAfter(MBBI
, MIB
);
640 static MachineBasicBlock::iterator
641 initMBBRange(MachineBasicBlock
&MBB
, const MachineBasicBlock::iterator
&MBBI
) {
642 if (MBBI
== MBB
.begin())
643 return MachineBasicBlock::iterator();
644 return std::prev(MBBI
);
647 static void insertSEHRange(MachineBasicBlock
&MBB
,
648 MachineBasicBlock::iterator Start
,
649 const MachineBasicBlock::iterator
&End
,
650 const ARMBaseInstrInfo
&TII
, unsigned MIFlags
) {
652 Start
= std::next(Start
);
656 for (auto MI
= Start
; MI
!= End
;) {
657 auto Next
= std::next(MI
);
658 // Check if this instruction already has got a SEH opcode added. In that
659 // case, don't do this generic mapping.
660 if (Next
!= End
&& isSEHInstruction(*Next
)) {
661 MI
= std::next(Next
);
662 while (MI
!= End
&& isSEHInstruction(*MI
))
666 insertSEH(MI
, TII
, MIFlags
);
671 static void emitRegPlusImmediate(
672 bool isARM
, MachineBasicBlock
&MBB
, MachineBasicBlock::iterator
&MBBI
,
673 const DebugLoc
&dl
, const ARMBaseInstrInfo
&TII
, unsigned DestReg
,
674 unsigned SrcReg
, int NumBytes
, unsigned MIFlags
= MachineInstr::NoFlags
,
675 ARMCC::CondCodes Pred
= ARMCC::AL
, unsigned PredReg
= 0) {
677 emitARMRegPlusImmediate(MBB
, MBBI
, dl
, DestReg
, SrcReg
, NumBytes
,
678 Pred
, PredReg
, TII
, MIFlags
);
680 emitT2RegPlusImmediate(MBB
, MBBI
, dl
, DestReg
, SrcReg
, NumBytes
,
681 Pred
, PredReg
, TII
, MIFlags
);
684 static void emitSPUpdate(bool isARM
, MachineBasicBlock
&MBB
,
685 MachineBasicBlock::iterator
&MBBI
, const DebugLoc
&dl
,
686 const ARMBaseInstrInfo
&TII
, int NumBytes
,
687 unsigned MIFlags
= MachineInstr::NoFlags
,
688 ARMCC::CondCodes Pred
= ARMCC::AL
,
689 unsigned PredReg
= 0) {
690 emitRegPlusImmediate(isARM
, MBB
, MBBI
, dl
, TII
, ARM::SP
, ARM::SP
, NumBytes
,
691 MIFlags
, Pred
, PredReg
);
694 static int sizeOfSPAdjustment(const MachineInstr
&MI
) {
696 switch (MI
.getOpcode()) {
697 case ARM::VSTMDDB_UPD
:
701 case ARM::t2STMDB_UPD
:
705 case ARM::STR_PRE_IMM
:
708 llvm_unreachable("Unknown push or pop like instruction");
712 // ARM and Thumb2 push/pop insts have explicit "sp, sp" operands (+
713 // pred) so the list starts at 4.
714 for (int i
= MI
.getNumOperands() - 1; i
>= 4; --i
)
719 static bool WindowsRequiresStackProbe(const MachineFunction
&MF
,
720 size_t StackSizeInBytes
) {
721 const MachineFrameInfo
&MFI
= MF
.getFrameInfo();
722 const Function
&F
= MF
.getFunction();
723 unsigned StackProbeSize
= (MFI
.getStackProtectorIndex() > 0) ? 4080 : 4096;
726 F
.getFnAttributeAsParsedInteger("stack-probe-size", StackProbeSize
);
727 return (StackSizeInBytes
>= StackProbeSize
) &&
728 !F
.hasFnAttribute("no-stack-arg-probe");
733 struct StackAdjustingInsts
{
735 MachineBasicBlock::iterator I
;
739 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
741 dbgs() << " " << (BeforeFPSet
? "before-fp " : " ")
742 << "sp-adjust=" << SPAdjust
;
748 SmallVector
<InstInfo
, 4> Insts
;
750 void addInst(MachineBasicBlock::iterator I
, unsigned SPAdjust
,
751 bool BeforeFPSet
= false) {
752 InstInfo Info
= {I
, SPAdjust
, BeforeFPSet
};
753 Insts
.push_back(Info
);
756 void addExtraBytes(const MachineBasicBlock::iterator I
, unsigned ExtraBytes
) {
758 llvm::find_if(Insts
, [&](InstInfo
&Info
) { return Info
.I
== I
; });
759 assert(Info
!= Insts
.end() && "invalid sp adjusting instruction");
760 Info
->SPAdjust
+= ExtraBytes
;
763 void emitDefCFAOffsets(MachineBasicBlock
&MBB
, const DebugLoc
&dl
,
764 const ARMBaseInstrInfo
&TII
, bool HasFP
) {
765 MachineFunction
&MF
= *MBB
.getParent();
766 unsigned CFAOffset
= 0;
767 for (auto &Info
: Insts
) {
768 if (HasFP
&& !Info
.BeforeFPSet
)
771 CFAOffset
+= Info
.SPAdjust
;
772 unsigned CFIIndex
= MF
.addFrameInst(
773 MCCFIInstruction::cfiDefCfaOffset(nullptr, CFAOffset
));
774 BuildMI(MBB
, std::next(Info
.I
), dl
,
775 TII
.get(TargetOpcode::CFI_INSTRUCTION
))
776 .addCFIIndex(CFIIndex
)
777 .setMIFlags(MachineInstr::FrameSetup
);
781 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
783 dbgs() << "StackAdjustingInsts:\n";
784 for (auto &Info
: Insts
)
790 } // end anonymous namespace
792 /// Emit an instruction sequence that will align the address in
793 /// register Reg by zero-ing out the lower bits. For versions of the
794 /// architecture that support Neon, this must be done in a single
795 /// instruction, since skipAlignedDPRCS2Spills assumes it is done in a
796 /// single instruction. That function only gets called when optimizing
797 /// spilling of D registers on a core with the Neon instruction set
799 static void emitAligningInstructions(MachineFunction
&MF
, ARMFunctionInfo
*AFI
,
800 const TargetInstrInfo
&TII
,
801 MachineBasicBlock
&MBB
,
802 MachineBasicBlock::iterator MBBI
,
803 const DebugLoc
&DL
, const unsigned Reg
,
804 const Align Alignment
,
805 const bool MustBeSingleInstruction
) {
806 const ARMSubtarget
&AST
= MF
.getSubtarget
<ARMSubtarget
>();
807 const bool CanUseBFC
= AST
.hasV6T2Ops() || AST
.hasV7Ops();
808 const unsigned AlignMask
= Alignment
.value() - 1U;
809 const unsigned NrBitsToZero
= Log2(Alignment
);
810 assert(!AFI
->isThumb1OnlyFunction() && "Thumb1 not supported");
811 if (!AFI
->isThumbFunction()) {
812 // if the BFC instruction is available, use that to zero the lower
814 // bfc Reg, #0, log2(Alignment)
815 // otherwise use BIC, if the mask to zero the required number of bits
816 // can be encoded in the bic immediate field
817 // bic Reg, Reg, Alignment-1
819 // lsr Reg, Reg, log2(Alignment)
820 // lsl Reg, Reg, log2(Alignment)
822 BuildMI(MBB
, MBBI
, DL
, TII
.get(ARM::BFC
), Reg
)
823 .addReg(Reg
, RegState::Kill
)
825 .add(predOps(ARMCC::AL
));
826 } else if (AlignMask
<= 255) {
827 BuildMI(MBB
, MBBI
, DL
, TII
.get(ARM::BICri
), Reg
)
828 .addReg(Reg
, RegState::Kill
)
830 .add(predOps(ARMCC::AL
))
833 assert(!MustBeSingleInstruction
&&
834 "Shouldn't call emitAligningInstructions demanding a single "
835 "instruction to be emitted for large stack alignment for a target "
837 BuildMI(MBB
, MBBI
, DL
, TII
.get(ARM::MOVsi
), Reg
)
838 .addReg(Reg
, RegState::Kill
)
839 .addImm(ARM_AM::getSORegOpc(ARM_AM::lsr
, NrBitsToZero
))
840 .add(predOps(ARMCC::AL
))
842 BuildMI(MBB
, MBBI
, DL
, TII
.get(ARM::MOVsi
), Reg
)
843 .addReg(Reg
, RegState::Kill
)
844 .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl
, NrBitsToZero
))
845 .add(predOps(ARMCC::AL
))
849 // Since this is only reached for Thumb-2 targets, the BFC instruction
850 // should always be available.
852 BuildMI(MBB
, MBBI
, DL
, TII
.get(ARM::t2BFC
), Reg
)
853 .addReg(Reg
, RegState::Kill
)
855 .add(predOps(ARMCC::AL
));
859 /// We need the offset of the frame pointer relative to other MachineFrameInfo
860 /// offsets which are encoded relative to SP at function begin.
861 /// See also emitPrologue() for how the FP is set up.
862 /// Unfortunately we cannot determine this value in determineCalleeSaves() yet
863 /// as assignCalleeSavedSpillSlots() hasn't run at this point. Instead we use
864 /// this to produce a conservative estimate that we check in an assert() later.
865 static int getMaxFPOffset(const ARMSubtarget
&STI
, const ARMFunctionInfo
&AFI
,
866 const MachineFunction
&MF
) {
867 ARMSubtarget::PushPopSplitVariation PushPopSplit
=
868 STI
.getPushPopSplitVariation(MF
);
869 // For Thumb1, push.w isn't available, so the first push will always push
870 // r7 and lr onto the stack first.
871 if (AFI
.isThumb1OnlyFunction())
872 return -AFI
.getArgRegsSaveSize() - (2 * 4);
873 // This is a conservative estimation: Assume the frame pointer being r7 and
874 // pc("r15") up to r8 getting spilled before (= 8 registers).
875 int MaxRegBytes
= 8 * 4;
876 if (PushPopSplit
== ARMSubtarget::SplitR11AAPCSSignRA
)
877 // Here, r11 can be stored below all of r4-r15.
878 MaxRegBytes
= 11 * 4;
879 if (PushPopSplit
== ARMSubtarget::SplitR11WindowsSEH
) {
880 // Here, r11 can be stored below all of r4-r15 plus d8-d15.
881 MaxRegBytes
= 11 * 4 + 8 * 8;
884 (STI
.hasV8_1MMainlineOps() && AFI
.isCmseNSEntryFunction()) ? 4 : 0;
885 return -FPCXTSaveSize
- AFI
.getArgRegsSaveSize() - MaxRegBytes
;
888 void ARMFrameLowering::emitPrologue(MachineFunction
&MF
,
889 MachineBasicBlock
&MBB
) const {
890 MachineBasicBlock::iterator MBBI
= MBB
.begin();
891 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
892 ARMFunctionInfo
*AFI
= MF
.getInfo
<ARMFunctionInfo
>();
893 MCContext
&Context
= MF
.getContext();
894 const TargetMachine
&TM
= MF
.getTarget();
895 const MCRegisterInfo
*MRI
= Context
.getRegisterInfo();
896 const ARMBaseRegisterInfo
*RegInfo
= STI
.getRegisterInfo();
897 const ARMBaseInstrInfo
&TII
= *STI
.getInstrInfo();
898 assert(!AFI
->isThumb1OnlyFunction() &&
899 "This emitPrologue does not support Thumb1!");
900 bool isARM
= !AFI
->isThumbFunction();
901 Align Alignment
= STI
.getFrameLowering()->getStackAlign();
902 unsigned ArgRegsSaveSize
= AFI
->getArgRegsSaveSize();
903 unsigned NumBytes
= MFI
.getStackSize();
904 const std::vector
<CalleeSavedInfo
> &CSI
= MFI
.getCalleeSavedInfo();
905 int FPCXTSaveSize
= 0;
906 bool NeedsWinCFI
= needsWinCFI(MF
);
907 ARMSubtarget::PushPopSplitVariation PushPopSplit
=
908 STI
.getPushPopSplitVariation(MF
);
910 LLVM_DEBUG(dbgs() << "Emitting prologue for " << MF
.getName() << "\n");
912 // Debug location must be unknown since the first debug location is used
913 // to determine the end of the prologue.
916 Register FramePtr
= RegInfo
->getFrameRegister(MF
);
918 // Determine the sizes of each callee-save spill areas and record which frame
919 // belongs to which callee-save spill areas.
920 unsigned GPRCS1Size
= 0, GPRCS2Size
= 0, DPRCS1Size
= 0, GPRCS3Size
= 0,
922 int FramePtrSpillFI
= 0;
925 // All calls are tail calls in GHC calling conv, and functions have no
926 // prologue/epilogue.
927 if (MF
.getFunction().getCallingConv() == CallingConv::GHC
)
930 StackAdjustingInsts DefCFAOffsetCandidates
;
931 bool HasFP
= hasFP(MF
);
933 if (!AFI
->hasStackFrame() &&
934 (!STI
.isTargetWindows() || !WindowsRequiresStackProbe(MF
, NumBytes
))) {
936 emitSPUpdate(isARM
, MBB
, MBBI
, dl
, TII
, -NumBytes
,
937 MachineInstr::FrameSetup
);
938 DefCFAOffsetCandidates
.addInst(std::prev(MBBI
), NumBytes
, true);
941 DefCFAOffsetCandidates
.emitDefCFAOffsets(MBB
, dl
, TII
, HasFP
);
942 if (NeedsWinCFI
&& MBBI
!= MBB
.begin()) {
943 insertSEHRange(MBB
, {}, MBBI
, TII
, MachineInstr::FrameSetup
);
944 BuildMI(MBB
, MBBI
, dl
, TII
.get(ARM::SEH_PrologEnd
))
945 .setMIFlag(MachineInstr::FrameSetup
);
946 MF
.setHasWinCFI(true);
951 // Determine spill area sizes, and some important frame indices.
952 SpillArea FramePtrSpillArea
= SpillArea::GPRCS1
;
953 bool BeforeFPPush
= true;
954 for (const CalleeSavedInfo
&I
: CSI
) {
955 Register Reg
= I
.getReg();
956 int FI
= I
.getFrameIdx();
958 SpillArea Area
= getSpillArea(Reg
, PushPopSplit
,
959 AFI
->getNumAlignedDPRCS2Regs(), RegInfo
);
961 if (Reg
== FramePtr
) {
962 FramePtrSpillFI
= FI
;
963 FramePtrSpillArea
= Area
;
969 case SpillArea::FPCXT
:
972 case SpillArea::GPRCS1
:
975 case SpillArea::GPRCS2
:
978 case SpillArea::DPRCS1
:
981 case SpillArea::GPRCS3
:
984 case SpillArea::DPRCS2
:
990 MachineBasicBlock::iterator LastPush
= MBB
.end(), GPRCS1Push
, GPRCS2Push
,
991 DPRCS1Push
, GPRCS3Push
;
993 // Move past the PAC computation.
994 if (AFI
->shouldSignReturnAddress())
997 // Move past FPCXT area.
998 if (FPCXTSaveSize
> 0) {
1000 DefCFAOffsetCandidates
.addInst(LastPush
, FPCXTSaveSize
, BeforeFPPush
);
1003 // Allocate the vararg register save area.
1004 if (ArgRegsSaveSize
) {
1005 emitSPUpdate(isARM
, MBB
, MBBI
, dl
, TII
, -ArgRegsSaveSize
,
1006 MachineInstr::FrameSetup
);
1007 LastPush
= std::prev(MBBI
);
1008 DefCFAOffsetCandidates
.addInst(LastPush
, ArgRegsSaveSize
, BeforeFPPush
);
1011 // Move past area 1.
1012 if (GPRCS1Size
> 0) {
1013 GPRCS1Push
= LastPush
= MBBI
++;
1014 DefCFAOffsetCandidates
.addInst(LastPush
, GPRCS1Size
, BeforeFPPush
);
1015 if (FramePtrSpillArea
== SpillArea::GPRCS1
)
1016 BeforeFPPush
= false;
1019 // Determine starting offsets of spill areas. These offsets are all positive
1020 // offsets from the bottom of the lowest-addressed callee-save area
1021 // (excluding DPRCS2, which is th the re-aligned stack region) to the bottom
1022 // of the spill area in question.
1023 unsigned FPCXTOffset
= NumBytes
- ArgRegsSaveSize
- FPCXTSaveSize
;
1024 unsigned GPRCS1Offset
= FPCXTOffset
- GPRCS1Size
;
1025 unsigned GPRCS2Offset
= GPRCS1Offset
- GPRCS2Size
;
1027 Align DPRAlign
= DPRCS1Size
? std::min(Align(8), Alignment
) : Align(4);
1028 unsigned DPRGapSize
=
1029 (ArgRegsSaveSize
+ FPCXTSaveSize
+ GPRCS1Size
+ GPRCS2Size
) %
1032 unsigned DPRCS1Offset
= GPRCS2Offset
- DPRGapSize
- DPRCS1Size
;
1035 // Offset from the CFA to the saved frame pointer, will be negative.
1036 [[maybe_unused
]] int FPOffset
= MFI
.getObjectOffset(FramePtrSpillFI
);
1037 LLVM_DEBUG(dbgs() << "FramePtrSpillFI: " << FramePtrSpillFI
1038 << ", FPOffset: " << FPOffset
<< "\n");
1039 assert(getMaxFPOffset(STI
, *AFI
, MF
) <= FPOffset
&&
1040 "Max FP estimation is wrong");
1041 AFI
->setFramePtrSpillOffset(MFI
.getObjectOffset(FramePtrSpillFI
) +
1044 AFI
->setGPRCalleeSavedArea1Offset(GPRCS1Offset
);
1045 AFI
->setGPRCalleeSavedArea2Offset(GPRCS2Offset
);
1046 AFI
->setDPRCalleeSavedArea1Offset(DPRCS1Offset
);
1048 // Move past area 2.
1049 if (GPRCS2Size
> 0) {
1050 assert(PushPopSplit
!= ARMSubtarget::SplitR11WindowsSEH
);
1051 GPRCS2Push
= LastPush
= MBBI
++;
1052 DefCFAOffsetCandidates
.addInst(LastPush
, GPRCS2Size
, BeforeFPPush
);
1053 if (FramePtrSpillArea
== SpillArea::GPRCS2
)
1054 BeforeFPPush
= false;
1057 // Prolog/epilog inserter assumes we correctly align DPRs on the stack, so our
1058 // .cfi_offset operations will reflect that.
1060 assert(DPRGapSize
== 4 && "unexpected alignment requirements for DPRs");
1061 if (LastPush
!= MBB
.end() &&
1062 tryFoldSPUpdateIntoPushPop(STI
, MF
, &*LastPush
, DPRGapSize
))
1063 DefCFAOffsetCandidates
.addExtraBytes(LastPush
, DPRGapSize
);
1065 emitSPUpdate(isARM
, MBB
, MBBI
, dl
, TII
, -DPRGapSize
,
1066 MachineInstr::FrameSetup
);
1067 DefCFAOffsetCandidates
.addInst(std::prev(MBBI
), DPRGapSize
, BeforeFPPush
);
1071 // Move past DPRCS1Size.
1072 if (DPRCS1Size
> 0) {
1073 // Since vpush register list cannot have gaps, there may be multiple vpush
1074 // instructions in the prologue.
1075 while (MBBI
!= MBB
.end() && MBBI
->getOpcode() == ARM::VSTMDDB_UPD
) {
1076 DefCFAOffsetCandidates
.addInst(MBBI
, sizeOfSPAdjustment(*MBBI
),
1078 DPRCS1Push
= LastPush
= MBBI
++;
1082 // Move past the aligned DPRCS2 area.
1083 if (DPRCS2Size
> 0) {
1084 MBBI
= skipAlignedDPRCS2Spills(MBBI
, AFI
->getNumAlignedDPRCS2Regs());
1085 // The code inserted by emitAlignedDPRCS2Spills realigns the stack, and
1086 // leaves the stack pointer pointing to the DPRCS2 area.
1088 // Adjust NumBytes to represent the stack slots below the DPRCS2 area.
1089 NumBytes
+= MFI
.getObjectOffset(D8SpillFI
);
1091 NumBytes
= DPRCS1Offset
;
1093 // Move GPRCS3, if using using SplitR11WindowsSEH.
1094 if (GPRCS3Size
> 0) {
1095 assert(PushPopSplit
== ARMSubtarget::SplitR11WindowsSEH
);
1096 GPRCS3Push
= LastPush
= MBBI
++;
1097 DefCFAOffsetCandidates
.addInst(LastPush
, GPRCS3Size
, BeforeFPPush
);
1098 if (FramePtrSpillArea
== SpillArea::GPRCS3
)
1099 BeforeFPPush
= false;
1102 bool NeedsWinCFIStackAlloc
= NeedsWinCFI
;
1103 if (PushPopSplit
== ARMSubtarget::SplitR11WindowsSEH
&& HasFP
)
1104 NeedsWinCFIStackAlloc
= false;
1106 if (STI
.isTargetWindows() && WindowsRequiresStackProbe(MF
, NumBytes
)) {
1107 uint32_t NumWords
= NumBytes
>> 2;
1109 if (NumWords
< 65536) {
1110 BuildMI(MBB
, MBBI
, dl
, TII
.get(ARM::t2MOVi16
), ARM::R4
)
1112 .setMIFlags(MachineInstr::FrameSetup
)
1113 .add(predOps(ARMCC::AL
));
1115 // Split into two instructions here, instead of using t2MOVi32imm,
1116 // to allow inserting accurate SEH instructions (including accurate
1117 // instruction size for each of them).
1118 BuildMI(MBB
, MBBI
, dl
, TII
.get(ARM::t2MOVi16
), ARM::R4
)
1119 .addImm(NumWords
& 0xffff)
1120 .setMIFlags(MachineInstr::FrameSetup
)
1121 .add(predOps(ARMCC::AL
));
1122 BuildMI(MBB
, MBBI
, dl
, TII
.get(ARM::t2MOVTi16
), ARM::R4
)
1124 .addImm(NumWords
>> 16)
1125 .setMIFlags(MachineInstr::FrameSetup
)
1126 .add(predOps(ARMCC::AL
));
1129 switch (TM
.getCodeModel()) {
1130 case CodeModel::Tiny
:
1131 llvm_unreachable("Tiny code model not available on ARM.");
1132 case CodeModel::Small
:
1133 case CodeModel::Medium
:
1134 case CodeModel::Kernel
:
1135 BuildMI(MBB
, MBBI
, dl
, TII
.get(ARM::tBL
))
1136 .add(predOps(ARMCC::AL
))
1137 .addExternalSymbol("__chkstk")
1138 .addReg(ARM::R4
, RegState::Implicit
)
1139 .setMIFlags(MachineInstr::FrameSetup
);
1141 case CodeModel::Large
:
1142 BuildMI(MBB
, MBBI
, dl
, TII
.get(ARM::t2MOVi32imm
), ARM::R12
)
1143 .addExternalSymbol("__chkstk")
1144 .setMIFlags(MachineInstr::FrameSetup
);
1146 BuildMI(MBB
, MBBI
, dl
, TII
.get(ARM::tBLXr
))
1147 .add(predOps(ARMCC::AL
))
1148 .addReg(ARM::R12
, RegState::Kill
)
1149 .addReg(ARM::R4
, RegState::Implicit
)
1150 .setMIFlags(MachineInstr::FrameSetup
);
1154 MachineInstrBuilder Instr
, SEH
;
1155 Instr
= BuildMI(MBB
, MBBI
, dl
, TII
.get(ARM::t2SUBrr
), ARM::SP
)
1156 .addReg(ARM::SP
, RegState::Kill
)
1157 .addReg(ARM::R4
, RegState::Kill
)
1158 .setMIFlags(MachineInstr::FrameSetup
)
1159 .add(predOps(ARMCC::AL
))
1161 if (NeedsWinCFIStackAlloc
) {
1162 SEH
= BuildMI(MF
, dl
, TII
.get(ARM::SEH_StackAlloc
))
1165 .setMIFlags(MachineInstr::FrameSetup
);
1166 MBB
.insertAfter(Instr
, SEH
);
1172 // Adjust SP after all the callee-save spills.
1173 if (AFI
->getNumAlignedDPRCS2Regs() == 0 &&
1174 tryFoldSPUpdateIntoPushPop(STI
, MF
, &*LastPush
, NumBytes
))
1175 DefCFAOffsetCandidates
.addExtraBytes(LastPush
, NumBytes
);
1177 emitSPUpdate(isARM
, MBB
, MBBI
, dl
, TII
, -NumBytes
,
1178 MachineInstr::FrameSetup
);
1179 DefCFAOffsetCandidates
.addInst(std::prev(MBBI
), NumBytes
);
1183 // Restore from fp only in ARM mode: e.g. sub sp, r7, #24
1184 // Note it's not safe to do this in Thumb2 mode because it would have
1185 // taken two instructions:
1188 // If an interrupt is taken between the two instructions, then sp is in
1189 // an inconsistent state (pointing to the middle of callee-saved area).
1190 // The interrupt handler can end up clobbering the registers.
1191 AFI
->setShouldRestoreSPFromFP(true);
1194 // Set FP to point to the stack slot that contains the previous FP.
1195 // For iOS, FP is R7, which has now been stored in spill area 1.
1196 // Otherwise, if this is not iOS, all the callee-saved registers go
1197 // into spill area 1, including the FP in R11. In either case, it
1198 // is in area one and the adjustment needs to take place just after
1200 MachineBasicBlock::iterator AfterPush
;
1202 MachineBasicBlock::iterator FPPushInst
;
1203 // Offset from SP immediately after the push which saved the FP to the FP
1205 int64_t FPOffsetAfterPush
;
1206 switch (FramePtrSpillArea
) {
1207 case SpillArea::GPRCS1
:
1208 FPPushInst
= GPRCS1Push
;
1209 FPOffsetAfterPush
= MFI
.getObjectOffset(FramePtrSpillFI
) +
1210 ArgRegsSaveSize
+ FPCXTSaveSize
+
1211 sizeOfSPAdjustment(*FPPushInst
);
1212 LLVM_DEBUG(dbgs() << "Frame pointer in GPRCS1, offset "
1213 << FPOffsetAfterPush
<< " after that push\n");
1215 case SpillArea::GPRCS2
:
1216 FPPushInst
= GPRCS2Push
;
1217 FPOffsetAfterPush
= MFI
.getObjectOffset(FramePtrSpillFI
) +
1218 ArgRegsSaveSize
+ FPCXTSaveSize
+ GPRCS1Size
+
1219 sizeOfSPAdjustment(*FPPushInst
);
1220 LLVM_DEBUG(dbgs() << "Frame pointer in GPRCS2, offset "
1221 << FPOffsetAfterPush
<< " after that push\n");
1223 case SpillArea::GPRCS3
:
1224 FPPushInst
= GPRCS3Push
;
1225 FPOffsetAfterPush
= MFI
.getObjectOffset(FramePtrSpillFI
) +
1226 ArgRegsSaveSize
+ FPCXTSaveSize
+ GPRCS1Size
+
1227 GPRCS2Size
+ DPRCS1Size
+ DPRGapSize
+
1228 sizeOfSPAdjustment(*FPPushInst
);
1229 LLVM_DEBUG(dbgs() << "Frame pointer in GPRCS3, offset "
1230 << FPOffsetAfterPush
<< " after that push\n");
1233 llvm_unreachable("frame pointer in unknown spill area");
1236 AfterPush
= std::next(FPPushInst
);
1237 if (PushPopSplit
== ARMSubtarget::SplitR11WindowsSEH
)
1238 assert(FPOffsetAfterPush
== 0);
1240 // Emit the MOV or ADD to set up the frame pointer register.
1241 emitRegPlusImmediate(!AFI
->isThumbFunction(), MBB
, AfterPush
, dl
, TII
,
1242 FramePtr
, ARM::SP
, FPOffsetAfterPush
,
1243 MachineInstr::FrameSetup
);
1246 // Emit DWARF info to find the CFA using the frame pointer from this
1248 if (FPOffsetAfterPush
!= 0) {
1249 unsigned CFIIndex
= MF
.addFrameInst(MCCFIInstruction::cfiDefCfa(
1250 nullptr, MRI
->getDwarfRegNum(FramePtr
, true),
1251 -MFI
.getObjectOffset(FramePtrSpillFI
)));
1252 BuildMI(MBB
, AfterPush
, dl
, TII
.get(TargetOpcode::CFI_INSTRUCTION
))
1253 .addCFIIndex(CFIIndex
)
1254 .setMIFlags(MachineInstr::FrameSetup
);
1257 MF
.addFrameInst(MCCFIInstruction::createDefCfaRegister(
1258 nullptr, MRI
->getDwarfRegNum(FramePtr
, true)));
1259 BuildMI(MBB
, AfterPush
, dl
, TII
.get(TargetOpcode::CFI_INSTRUCTION
))
1260 .addCFIIndex(CFIIndex
)
1261 .setMIFlags(MachineInstr::FrameSetup
);
1266 // Emit a SEH opcode indicating the prologue end. The rest of the prologue
1267 // instructions below don't need to be replayed to unwind the stack.
1268 if (NeedsWinCFI
&& MBBI
!= MBB
.begin()) {
1269 MachineBasicBlock::iterator End
= MBBI
;
1270 if (HasFP
&& PushPopSplit
== ARMSubtarget::SplitR11WindowsSEH
)
1272 insertSEHRange(MBB
, {}, End
, TII
, MachineInstr::FrameSetup
);
1273 BuildMI(MBB
, End
, dl
, TII
.get(ARM::SEH_PrologEnd
))
1274 .setMIFlag(MachineInstr::FrameSetup
);
1275 MF
.setHasWinCFI(true);
1278 // Now that the prologue's actual instructions are finalised, we can insert
1279 // the necessary DWARF cf instructions to describe the situation. Start by
1280 // recording where each register ended up:
1282 for (const auto &Entry
: reverse(CSI
)) {
1283 Register Reg
= Entry
.getReg();
1284 int FI
= Entry
.getFrameIdx();
1285 MachineBasicBlock::iterator CFIPos
;
1286 switch (getSpillArea(Reg
, PushPopSplit
, AFI
->getNumAlignedDPRCS2Regs(),
1288 case SpillArea::GPRCS1
:
1289 CFIPos
= std::next(GPRCS1Push
);
1291 case SpillArea::GPRCS2
:
1292 CFIPos
= std::next(GPRCS2Push
);
1294 case SpillArea::DPRCS1
:
1295 CFIPos
= std::next(DPRCS1Push
);
1297 case SpillArea::GPRCS3
:
1298 CFIPos
= std::next(GPRCS3Push
);
1300 case SpillArea::FPCXT
:
1301 case SpillArea::DPRCS2
:
1302 // FPCXT and DPRCS2 are not represented in the DWARF info.
1306 if (CFIPos
.isValid()) {
1307 int CFIIndex
= MF
.addFrameInst(MCCFIInstruction::createOffset(
1309 MRI
->getDwarfRegNum(Reg
== ARM::R12
? ARM::RA_AUTH_CODE
: Reg
,
1311 MFI
.getObjectOffset(FI
)));
1312 BuildMI(MBB
, CFIPos
, dl
, TII
.get(TargetOpcode::CFI_INSTRUCTION
))
1313 .addCFIIndex(CFIIndex
)
1314 .setMIFlags(MachineInstr::FrameSetup
);
1319 // Now we can emit descriptions of where the canonical frame address was
1320 // throughout the process. If we have a frame pointer, it takes over the job
1321 // half-way through, so only the first few .cfi_def_cfa_offset instructions
1322 // actually get emitted.
1324 LLVM_DEBUG(DefCFAOffsetCandidates
.dump());
1325 DefCFAOffsetCandidates
.emitDefCFAOffsets(MBB
, dl
, TII
, HasFP
);
1328 if (STI
.isTargetELF() && hasFP(MF
))
1329 MFI
.setOffsetAdjustment(MFI
.getOffsetAdjustment() -
1330 AFI
->getFramePtrSpillOffset());
1332 AFI
->setFPCXTSaveAreaSize(FPCXTSaveSize
);
1333 AFI
->setGPRCalleeSavedArea1Size(GPRCS1Size
);
1334 AFI
->setGPRCalleeSavedArea2Size(GPRCS2Size
);
1335 AFI
->setDPRCalleeSavedGapSize(DPRGapSize
);
1336 AFI
->setDPRCalleeSavedArea1Size(DPRCS1Size
);
1337 AFI
->setGPRCalleeSavedArea3Size(GPRCS3Size
);
1339 // If we need dynamic stack realignment, do it here. Be paranoid and make
1340 // sure if we also have VLAs, we have a base pointer for frame access.
1341 // If aligned NEON registers were spilled, the stack has already been
1343 if (!AFI
->getNumAlignedDPRCS2Regs() && RegInfo
->hasStackRealignment(MF
)) {
1344 Align MaxAlign
= MFI
.getMaxAlign();
1345 assert(!AFI
->isThumb1OnlyFunction());
1346 if (!AFI
->isThumbFunction()) {
1347 emitAligningInstructions(MF
, AFI
, TII
, MBB
, MBBI
, dl
, ARM::SP
, MaxAlign
,
1350 // We cannot use sp as source/dest register here, thus we're using r4 to
1351 // perform the calculations. We're emitting the following sequence:
1353 // -- use emitAligningInstructions to produce best sequence to zero
1354 // -- out lower bits in r4
1356 // FIXME: It will be better just to find spare register here.
1357 BuildMI(MBB
, MBBI
, dl
, TII
.get(ARM::tMOVr
), ARM::R4
)
1358 .addReg(ARM::SP
, RegState::Kill
)
1359 .add(predOps(ARMCC::AL
));
1360 emitAligningInstructions(MF
, AFI
, TII
, MBB
, MBBI
, dl
, ARM::R4
, MaxAlign
,
1362 BuildMI(MBB
, MBBI
, dl
, TII
.get(ARM::tMOVr
), ARM::SP
)
1363 .addReg(ARM::R4
, RegState::Kill
)
1364 .add(predOps(ARMCC::AL
));
1367 AFI
->setShouldRestoreSPFromFP(true);
1370 // If we need a base pointer, set it up here. It's whatever the value
1371 // of the stack pointer is at this point. Any variable size objects
1372 // will be allocated after this, so we can still use the base pointer
1373 // to reference locals.
1374 // FIXME: Clarify FrameSetup flags here.
1375 if (RegInfo
->hasBasePointer(MF
)) {
1377 BuildMI(MBB
, MBBI
, dl
, TII
.get(ARM::MOVr
), RegInfo
->getBaseRegister())
1379 .add(predOps(ARMCC::AL
))
1382 BuildMI(MBB
, MBBI
, dl
, TII
.get(ARM::tMOVr
), RegInfo
->getBaseRegister())
1384 .add(predOps(ARMCC::AL
));
1387 // If the frame has variable sized objects then the epilogue must restore
1388 // the sp from fp. We can assume there's an FP here since hasFP already
1389 // checks for hasVarSizedObjects.
1390 if (MFI
.hasVarSizedObjects())
1391 AFI
->setShouldRestoreSPFromFP(true);
1394 void ARMFrameLowering::emitEpilogue(MachineFunction
&MF
,
1395 MachineBasicBlock
&MBB
) const {
1396 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
1397 ARMFunctionInfo
*AFI
= MF
.getInfo
<ARMFunctionInfo
>();
1398 const TargetRegisterInfo
*RegInfo
= MF
.getSubtarget().getRegisterInfo();
1399 const ARMBaseInstrInfo
&TII
=
1400 *static_cast<const ARMBaseInstrInfo
*>(MF
.getSubtarget().getInstrInfo());
1401 assert(!AFI
->isThumb1OnlyFunction() &&
1402 "This emitEpilogue does not support Thumb1!");
1403 bool isARM
= !AFI
->isThumbFunction();
1404 ARMSubtarget::PushPopSplitVariation PushPopSplit
=
1405 STI
.getPushPopSplitVariation(MF
);
1407 LLVM_DEBUG(dbgs() << "Emitting epilogue for " << MF
.getName() << "\n");
1409 // Amount of stack space we reserved next to incoming args for either
1410 // varargs registers or stack arguments in tail calls made by this function.
1411 unsigned ReservedArgStack
= AFI
->getArgRegsSaveSize();
1413 // How much of the stack used by incoming arguments this function is expected
1414 // to restore in this particular epilogue.
1415 int IncomingArgStackToRestore
= getArgumentStackToRestore(MF
, MBB
);
1416 int NumBytes
= (int)MFI
.getStackSize();
1417 Register FramePtr
= RegInfo
->getFrameRegister(MF
);
1419 // All calls are tail calls in GHC calling conv, and functions have no
1420 // prologue/epilogue.
1421 if (MF
.getFunction().getCallingConv() == CallingConv::GHC
)
1424 // First put ourselves on the first (from top) terminator instructions.
1425 MachineBasicBlock::iterator MBBI
= MBB
.getFirstTerminator();
1426 DebugLoc dl
= MBBI
!= MBB
.end() ? MBBI
->getDebugLoc() : DebugLoc();
1428 MachineBasicBlock::iterator RangeStart
;
1429 if (!AFI
->hasStackFrame()) {
1430 if (MF
.hasWinCFI()) {
1431 BuildMI(MBB
, MBBI
, dl
, TII
.get(ARM::SEH_EpilogStart
))
1432 .setMIFlag(MachineInstr::FrameDestroy
);
1433 RangeStart
= initMBBRange(MBB
, MBBI
);
1436 if (NumBytes
+ IncomingArgStackToRestore
!= 0)
1437 emitSPUpdate(isARM
, MBB
, MBBI
, dl
, TII
,
1438 NumBytes
+ IncomingArgStackToRestore
,
1439 MachineInstr::FrameDestroy
);
1441 // Unwind MBBI to point to first LDR / VLDRD.
1442 if (MBBI
!= MBB
.begin()) {
1445 } while (MBBI
!= MBB
.begin() &&
1446 MBBI
->getFlag(MachineInstr::FrameDestroy
));
1447 if (!MBBI
->getFlag(MachineInstr::FrameDestroy
))
1451 if (MF
.hasWinCFI()) {
1452 BuildMI(MBB
, MBBI
, dl
, TII
.get(ARM::SEH_EpilogStart
))
1453 .setMIFlag(MachineInstr::FrameDestroy
);
1454 RangeStart
= initMBBRange(MBB
, MBBI
);
1457 // Move SP to start of FP callee save spill area.
1459 (ReservedArgStack
+ AFI
->getFPCXTSaveAreaSize() +
1460 AFI
->getGPRCalleeSavedArea1Size() + AFI
->getGPRCalleeSavedArea2Size() +
1461 AFI
->getDPRCalleeSavedGapSize() + AFI
->getDPRCalleeSavedArea1Size() +
1462 AFI
->getGPRCalleeSavedArea3Size());
1464 // Reset SP based on frame pointer only if the stack frame extends beyond
1465 // frame pointer stack slot or target is ELF and the function has FP.
1466 if (AFI
->shouldRestoreSPFromFP()) {
1467 NumBytes
= AFI
->getFramePtrSpillOffset() - NumBytes
;
1470 emitARMRegPlusImmediate(MBB
, MBBI
, dl
, ARM::SP
, FramePtr
, -NumBytes
,
1472 MachineInstr::FrameDestroy
);
1474 // It's not possible to restore SP from FP in a single instruction.
1475 // For iOS, this looks like:
1478 // This is bad, if an interrupt is taken after the mov, sp is in an
1479 // inconsistent state.
1480 // Use the first callee-saved register as a scratch register.
1481 assert(!MFI
.getPristineRegs(MF
).test(ARM::R4
) &&
1482 "No scratch register to restore SP from FP!");
1483 emitT2RegPlusImmediate(MBB
, MBBI
, dl
, ARM::R4
, FramePtr
, -NumBytes
,
1484 ARMCC::AL
, 0, TII
, MachineInstr::FrameDestroy
);
1485 BuildMI(MBB
, MBBI
, dl
, TII
.get(ARM::tMOVr
), ARM::SP
)
1487 .add(predOps(ARMCC::AL
))
1488 .setMIFlag(MachineInstr::FrameDestroy
);
1493 BuildMI(MBB
, MBBI
, dl
, TII
.get(ARM::MOVr
), ARM::SP
)
1495 .add(predOps(ARMCC::AL
))
1497 .setMIFlag(MachineInstr::FrameDestroy
);
1499 BuildMI(MBB
, MBBI
, dl
, TII
.get(ARM::tMOVr
), ARM::SP
)
1501 .add(predOps(ARMCC::AL
))
1502 .setMIFlag(MachineInstr::FrameDestroy
);
1504 } else if (NumBytes
&&
1505 !tryFoldSPUpdateIntoPushPop(STI
, MF
, &*MBBI
, NumBytes
))
1506 emitSPUpdate(isARM
, MBB
, MBBI
, dl
, TII
, NumBytes
,
1507 MachineInstr::FrameDestroy
);
1509 // Increment past our save areas.
1510 if (AFI
->getGPRCalleeSavedArea3Size()) {
1511 assert(PushPopSplit
== ARMSubtarget::SplitR11WindowsSEH
);
1516 if (MBBI
!= MBB
.end() && AFI
->getDPRCalleeSavedArea1Size()) {
1518 // Since vpop register list cannot have gaps, there may be multiple vpop
1519 // instructions in the epilogue.
1520 while (MBBI
!= MBB
.end() && MBBI
->getOpcode() == ARM::VLDMDIA_UPD
)
1523 if (AFI
->getDPRCalleeSavedGapSize()) {
1524 assert(AFI
->getDPRCalleeSavedGapSize() == 4 &&
1525 "unexpected DPR alignment gap");
1526 emitSPUpdate(isARM
, MBB
, MBBI
, dl
, TII
, AFI
->getDPRCalleeSavedGapSize(),
1527 MachineInstr::FrameDestroy
);
1530 if (AFI
->getGPRCalleeSavedArea2Size()) {
1531 assert(PushPopSplit
!= ARMSubtarget::SplitR11WindowsSEH
);
1535 if (AFI
->getGPRCalleeSavedArea1Size()) MBBI
++;
1537 if (ReservedArgStack
|| IncomingArgStackToRestore
) {
1538 assert((int)ReservedArgStack
+ IncomingArgStackToRestore
>= 0 &&
1539 "attempting to restore negative stack amount");
1540 emitSPUpdate(isARM
, MBB
, MBBI
, dl
, TII
,
1541 ReservedArgStack
+ IncomingArgStackToRestore
,
1542 MachineInstr::FrameDestroy
);
1545 // Validate PAC, It should have been already popped into R12. For CMSE entry
1546 // function, the validation instruction is emitted during expansion of the
1547 // tBXNS_RET, since the validation must use the value of SP at function
1548 // entry, before saving, resp. after restoring, FPCXTNS.
1549 if (AFI
->shouldSignReturnAddress() && !AFI
->isCmseNSEntryFunction())
1550 BuildMI(MBB
, MBBI
, DebugLoc(), STI
.getInstrInfo()->get(ARM::t2AUT
));
1553 if (MF
.hasWinCFI()) {
1554 insertSEHRange(MBB
, RangeStart
, MBB
.end(), TII
, MachineInstr::FrameDestroy
);
1555 BuildMI(MBB
, MBB
.end(), dl
, TII
.get(ARM::SEH_EpilogEnd
))
1556 .setMIFlag(MachineInstr::FrameDestroy
);
1560 /// getFrameIndexReference - Provide a base+offset reference to an FI slot for
1561 /// debug info. It's the same as what we use for resolving the code-gen
1562 /// references for now. FIXME: This can go wrong when references are
1563 /// SP-relative and simple call frames aren't used.
1564 StackOffset
ARMFrameLowering::getFrameIndexReference(const MachineFunction
&MF
,
1566 Register
&FrameReg
) const {
1567 return StackOffset::getFixed(ResolveFrameIndexReference(MF
, FI
, FrameReg
, 0));
1570 int ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction
&MF
,
1571 int FI
, Register
&FrameReg
,
1573 const MachineFrameInfo
&MFI
= MF
.getFrameInfo();
1574 const ARMBaseRegisterInfo
*RegInfo
= static_cast<const ARMBaseRegisterInfo
*>(
1575 MF
.getSubtarget().getRegisterInfo());
1576 const ARMFunctionInfo
*AFI
= MF
.getInfo
<ARMFunctionInfo
>();
1577 int Offset
= MFI
.getObjectOffset(FI
) + MFI
.getStackSize();
1578 int FPOffset
= Offset
- AFI
->getFramePtrSpillOffset();
1579 bool isFixed
= MFI
.isFixedObjectIndex(FI
);
1584 // SP can move around if there are allocas. We may also lose track of SP
1585 // when emergency spilling inside a non-reserved call frame setup.
1586 bool hasMovingSP
= !hasReservedCallFrame(MF
);
1588 // When dynamically realigning the stack, use the frame pointer for
1589 // parameters, and the stack/base pointer for locals.
1590 if (RegInfo
->hasStackRealignment(MF
)) {
1591 assert(hasFP(MF
) && "dynamic stack realignment without a FP!");
1593 FrameReg
= RegInfo
->getFrameRegister(MF
);
1595 } else if (hasMovingSP
) {
1596 assert(RegInfo
->hasBasePointer(MF
) &&
1597 "VLAs and dynamic stack alignment, but missing base pointer!");
1598 FrameReg
= RegInfo
->getBaseRegister();
1604 // If there is a frame pointer, use it when we can.
1605 if (hasFP(MF
) && AFI
->hasStackFrame()) {
1606 // Use frame pointer to reference fixed objects. Use it for locals if
1607 // there are VLAs (and thus the SP isn't reliable as a base).
1608 if (isFixed
|| (hasMovingSP
&& !RegInfo
->hasBasePointer(MF
))) {
1609 FrameReg
= RegInfo
->getFrameRegister(MF
);
1611 } else if (hasMovingSP
) {
1612 assert(RegInfo
->hasBasePointer(MF
) && "missing base pointer!");
1613 if (AFI
->isThumb2Function()) {
1614 // Try to use the frame pointer if we can, else use the base pointer
1615 // since it's available. This is handy for the emergency spill slot, in
1617 if (FPOffset
>= -255 && FPOffset
< 0) {
1618 FrameReg
= RegInfo
->getFrameRegister(MF
);
1622 } else if (AFI
->isThumbFunction()) {
1623 // Prefer SP to base pointer, if the offset is suitably aligned and in
1624 // range as the effective range of the immediate offset is bigger when
1626 // Use add <rd>, sp, #<imm8>
1627 // ldr <rd>, [sp, #<imm8>]
1628 if (Offset
>= 0 && (Offset
& 3) == 0 && Offset
<= 1020)
1630 // In Thumb2 mode, the negative offset is very limited. Try to avoid
1631 // out of range references. ldr <rt>,[<rn>, #-<imm8>]
1632 if (AFI
->isThumb2Function() && FPOffset
>= -255 && FPOffset
< 0) {
1633 FrameReg
= RegInfo
->getFrameRegister(MF
);
1636 } else if (Offset
> (FPOffset
< 0 ? -FPOffset
: FPOffset
)) {
1637 // Otherwise, use SP or FP, whichever is closer to the stack slot.
1638 FrameReg
= RegInfo
->getFrameRegister(MF
);
1642 // Use the base pointer if we have one.
1643 // FIXME: Maybe prefer sp on Thumb1 if it's legal and the offset is cheaper?
1644 // That can happen if we forced a base pointer for a large call frame.
1645 if (RegInfo
->hasBasePointer(MF
)) {
1646 FrameReg
= RegInfo
->getBaseRegister();
1652 void ARMFrameLowering::emitPushInst(MachineBasicBlock
&MBB
,
1653 MachineBasicBlock::iterator MI
,
1654 ArrayRef
<CalleeSavedInfo
> CSI
,
1655 unsigned StmOpc
, unsigned StrOpc
,
1657 function_ref
<bool(unsigned)> Func
) const {
1658 MachineFunction
&MF
= *MBB
.getParent();
1659 const TargetInstrInfo
&TII
= *MF
.getSubtarget().getInstrInfo();
1660 const TargetRegisterInfo
&TRI
= *STI
.getRegisterInfo();
1664 using RegAndKill
= std::pair
<unsigned, bool>;
1666 SmallVector
<RegAndKill
, 4> Regs
;
1667 unsigned i
= CSI
.size();
1669 unsigned LastReg
= 0;
1670 for (; i
!= 0; --i
) {
1671 Register Reg
= CSI
[i
-1].getReg();
1675 const MachineRegisterInfo
&MRI
= MF
.getRegInfo();
1676 bool isLiveIn
= MRI
.isLiveIn(Reg
);
1677 if (!isLiveIn
&& !MRI
.isReserved(Reg
))
1679 // If NoGap is true, push consecutive registers and then leave the rest
1680 // for other instructions. e.g.
1681 // vpush {d8, d10, d11} -> vpush {d8}, vpush {d10, d11}
1682 if (NoGap
&& LastReg
&& LastReg
!= Reg
-1)
1685 // Do not set a kill flag on values that are also marked as live-in. This
1686 // happens with the @llvm-returnaddress intrinsic and with arguments
1687 // passed in callee saved registers.
1688 // Omitting the kill flags is conservatively correct even if the live-in
1689 // is not used after all.
1690 Regs
.push_back(std::make_pair(Reg
, /*isKill=*/!isLiveIn
));
1696 llvm::sort(Regs
, [&](const RegAndKill
&LHS
, const RegAndKill
&RHS
) {
1697 return TRI
.getEncodingValue(LHS
.first
) < TRI
.getEncodingValue(RHS
.first
);
1700 if (Regs
.size() > 1 || StrOpc
== 0) {
1701 MachineInstrBuilder MIB
= BuildMI(MBB
, MI
, DL
, TII
.get(StmOpc
), ARM::SP
)
1703 .setMIFlags(MachineInstr::FrameSetup
)
1704 .add(predOps(ARMCC::AL
));
1705 for (unsigned i
= 0, e
= Regs
.size(); i
< e
; ++i
)
1706 MIB
.addReg(Regs
[i
].first
, getKillRegState(Regs
[i
].second
));
1707 } else if (Regs
.size() == 1) {
1708 BuildMI(MBB
, MI
, DL
, TII
.get(StrOpc
), ARM::SP
)
1709 .addReg(Regs
[0].first
, getKillRegState(Regs
[0].second
))
1711 .setMIFlags(MachineInstr::FrameSetup
)
1713 .add(predOps(ARMCC::AL
));
1717 // Put any subsequent vpush instructions before this one: they will refer to
1718 // higher register numbers so need to be pushed first in order to preserve
1720 if (MI
!= MBB
.begin())
1725 void ARMFrameLowering::emitPopInst(MachineBasicBlock
&MBB
,
1726 MachineBasicBlock::iterator MI
,
1727 MutableArrayRef
<CalleeSavedInfo
> CSI
,
1728 unsigned LdmOpc
, unsigned LdrOpc
,
1729 bool isVarArg
, bool NoGap
,
1730 function_ref
<bool(unsigned)> Func
) const {
1731 MachineFunction
&MF
= *MBB
.getParent();
1732 const TargetInstrInfo
&TII
= *MF
.getSubtarget().getInstrInfo();
1733 const TargetRegisterInfo
&TRI
= *STI
.getRegisterInfo();
1734 ARMFunctionInfo
*AFI
= MF
.getInfo
<ARMFunctionInfo
>();
1735 bool hasPAC
= AFI
->shouldSignReturnAddress();
1737 bool isTailCall
= false;
1738 bool isInterrupt
= false;
1739 bool isTrap
= false;
1740 bool isCmseEntry
= false;
1741 ARMSubtarget::PushPopSplitVariation PushPopSplit
=
1742 STI
.getPushPopSplitVariation(MF
);
1743 if (MBB
.end() != MI
) {
1744 DL
= MI
->getDebugLoc();
1745 unsigned RetOpcode
= MI
->getOpcode();
1747 (RetOpcode
== ARM::TCRETURNdi
|| RetOpcode
== ARM::TCRETURNri
||
1748 RetOpcode
== ARM::TCRETURNrinotr12
);
1750 RetOpcode
== ARM::SUBS_PC_LR
|| RetOpcode
== ARM::t2SUBS_PC_LR
;
1752 RetOpcode
== ARM::TRAP
|| RetOpcode
== ARM::TRAPNaCl
||
1753 RetOpcode
== ARM::tTRAP
;
1754 isCmseEntry
= (RetOpcode
== ARM::tBXNS
|| RetOpcode
== ARM::tBXNS_RET
);
1757 SmallVector
<unsigned, 4> Regs
;
1758 unsigned i
= CSI
.size();
1760 unsigned LastReg
= 0;
1761 bool DeleteRet
= false;
1762 for (; i
!= 0; --i
) {
1763 CalleeSavedInfo
&Info
= CSI
[i
-1];
1764 Register Reg
= Info
.getReg();
1768 if (Reg
== ARM::LR
&& !isTailCall
&& !isVarArg
&& !isInterrupt
&&
1769 !isCmseEntry
&& !isTrap
&& AFI
->getArgumentStackToRestore() == 0 &&
1770 STI
.hasV5TOps() && MBB
.succ_empty() && !hasPAC
&&
1771 (PushPopSplit
!= ARMSubtarget::SplitR11WindowsSEH
&&
1772 PushPopSplit
!= ARMSubtarget::SplitR11AAPCSSignRA
)) {
1774 // Fold the return instruction into the LDM.
1776 LdmOpc
= AFI
->isThumbFunction() ? ARM::t2LDMIA_RET
: ARM::LDMIA_RET
;
1779 // If NoGap is true, pop consecutive registers and then leave the rest
1780 // for other instructions. e.g.
1781 // vpop {d8, d10, d11} -> vpop {d8}, vpop {d10, d11}
1782 if (NoGap
&& LastReg
&& LastReg
!= Reg
-1)
1786 Regs
.push_back(Reg
);
1792 llvm::sort(Regs
, [&](unsigned LHS
, unsigned RHS
) {
1793 return TRI
.getEncodingValue(LHS
) < TRI
.getEncodingValue(RHS
);
1796 if (Regs
.size() > 1 || LdrOpc
== 0) {
1797 MachineInstrBuilder MIB
= BuildMI(MBB
, MI
, DL
, TII
.get(LdmOpc
), ARM::SP
)
1799 .add(predOps(ARMCC::AL
))
1800 .setMIFlags(MachineInstr::FrameDestroy
);
1801 for (unsigned Reg
: Regs
)
1802 MIB
.addReg(Reg
, getDefRegState(true));
1804 if (MI
!= MBB
.end()) {
1805 MIB
.copyImplicitOps(*MI
);
1806 MI
->eraseFromParent();
1810 } else if (Regs
.size() == 1) {
1811 // If we adjusted the reg to PC from LR above, switch it back here. We
1812 // only do that for LDM.
1813 if (Regs
[0] == ARM::PC
)
1815 MachineInstrBuilder MIB
=
1816 BuildMI(MBB
, MI
, DL
, TII
.get(LdrOpc
), Regs
[0])
1817 .addReg(ARM::SP
, RegState::Define
)
1819 .setMIFlags(MachineInstr::FrameDestroy
);
1820 // ARM mode needs an extra reg0 here due to addrmode2. Will go away once
1821 // that refactoring is complete (eventually).
1822 if (LdrOpc
== ARM::LDR_POST_REG
|| LdrOpc
== ARM::LDR_POST_IMM
) {
1824 MIB
.addImm(ARM_AM::getAM2Opc(ARM_AM::add
, 4, ARM_AM::no_shift
));
1827 MIB
.add(predOps(ARMCC::AL
));
1831 // Put any subsequent vpop instructions after this one: they will refer to
1832 // higher register numbers so need to be popped afterwards.
1833 if (MI
!= MBB
.end())
1838 /// Emit aligned spill instructions for NumAlignedDPRCS2Regs D-registers
1839 /// starting from d8. Also insert stack realignment code and leave the stack
1840 /// pointer pointing to the d8 spill slot.
1841 static void emitAlignedDPRCS2Spills(MachineBasicBlock
&MBB
,
1842 MachineBasicBlock::iterator MI
,
1843 unsigned NumAlignedDPRCS2Regs
,
1844 ArrayRef
<CalleeSavedInfo
> CSI
,
1845 const TargetRegisterInfo
*TRI
) {
1846 MachineFunction
&MF
= *MBB
.getParent();
1847 ARMFunctionInfo
*AFI
= MF
.getInfo
<ARMFunctionInfo
>();
1848 DebugLoc DL
= MI
!= MBB
.end() ? MI
->getDebugLoc() : DebugLoc();
1849 const TargetInstrInfo
&TII
= *MF
.getSubtarget().getInstrInfo();
1850 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
1852 // Mark the D-register spill slots as properly aligned. Since MFI computes
1853 // stack slot layout backwards, this can actually mean that the d-reg stack
1854 // slot offsets can be wrong. The offset for d8 will always be correct.
1855 for (const CalleeSavedInfo
&I
: CSI
) {
1856 unsigned DNum
= I
.getReg() - ARM::D8
;
1857 if (DNum
> NumAlignedDPRCS2Regs
- 1)
1859 int FI
= I
.getFrameIdx();
1860 // The even-numbered registers will be 16-byte aligned, the odd-numbered
1861 // registers will be 8-byte aligned.
1862 MFI
.setObjectAlignment(FI
, DNum
% 2 ? Align(8) : Align(16));
1864 // The stack slot for D8 needs to be maximally aligned because this is
1865 // actually the point where we align the stack pointer. MachineFrameInfo
1866 // computes all offsets relative to the incoming stack pointer which is a
1867 // bit weird when realigning the stack. Any extra padding for this
1868 // over-alignment is not realized because the code inserted below adjusts
1869 // the stack pointer by numregs * 8 before aligning the stack pointer.
1871 MFI
.setObjectAlignment(FI
, MFI
.getMaxAlign());
1874 // Move the stack pointer to the d8 spill slot, and align it at the same
1875 // time. Leave the stack slot address in the scratch register r4.
1877 // sub r4, sp, #numregs * 8
1878 // bic r4, r4, #align - 1
1881 bool isThumb
= AFI
->isThumbFunction();
1882 assert(!AFI
->isThumb1OnlyFunction() && "Can't realign stack for thumb1");
1883 AFI
->setShouldRestoreSPFromFP(true);
1885 // sub r4, sp, #numregs * 8
1886 // The immediate is <= 64, so it doesn't need any special encoding.
1887 unsigned Opc
= isThumb
? ARM::t2SUBri
: ARM::SUBri
;
1888 BuildMI(MBB
, MI
, DL
, TII
.get(Opc
), ARM::R4
)
1890 .addImm(8 * NumAlignedDPRCS2Regs
)
1891 .add(predOps(ARMCC::AL
))
1894 Align MaxAlign
= MF
.getFrameInfo().getMaxAlign();
1895 // We must set parameter MustBeSingleInstruction to true, since
1896 // skipAlignedDPRCS2Spills expects exactly 3 instructions to perform
1897 // stack alignment. Luckily, this can always be done since all ARM
1898 // architecture versions that support Neon also support the BFC
1900 emitAligningInstructions(MF
, AFI
, TII
, MBB
, MI
, DL
, ARM::R4
, MaxAlign
, true);
1903 // The stack pointer must be adjusted before spilling anything, otherwise
1904 // the stack slots could be clobbered by an interrupt handler.
1905 // Leave r4 live, it is used below.
1906 Opc
= isThumb
? ARM::tMOVr
: ARM::MOVr
;
1907 MachineInstrBuilder MIB
= BuildMI(MBB
, MI
, DL
, TII
.get(Opc
), ARM::SP
)
1909 .add(predOps(ARMCC::AL
));
1911 MIB
.add(condCodeOp());
1913 // Now spill NumAlignedDPRCS2Regs registers starting from d8.
1914 // r4 holds the stack slot address.
1915 unsigned NextReg
= ARM::D8
;
1917 // 16-byte aligned vst1.64 with 4 d-regs and address writeback.
1918 // The writeback is only needed when emitting two vst1.64 instructions.
1919 if (NumAlignedDPRCS2Regs
>= 6) {
1921 TRI
->getMatchingSuperReg(NextReg
, ARM::dsub_0
, &ARM::QQPRRegClass
);
1922 MBB
.addLiveIn(SupReg
);
1923 BuildMI(MBB
, MI
, DL
, TII
.get(ARM::VST1d64Qwb_fixed
), ARM::R4
)
1924 .addReg(ARM::R4
, RegState::Kill
)
1927 .addReg(SupReg
, RegState::ImplicitKill
)
1928 .add(predOps(ARMCC::AL
));
1930 NumAlignedDPRCS2Regs
-= 4;
1933 // We won't modify r4 beyond this point. It currently points to the next
1934 // register to be spilled.
1935 unsigned R4BaseReg
= NextReg
;
1937 // 16-byte aligned vst1.64 with 4 d-regs, no writeback.
1938 if (NumAlignedDPRCS2Regs
>= 4) {
1940 TRI
->getMatchingSuperReg(NextReg
, ARM::dsub_0
, &ARM::QQPRRegClass
);
1941 MBB
.addLiveIn(SupReg
);
1942 BuildMI(MBB
, MI
, DL
, TII
.get(ARM::VST1d64Q
))
1946 .addReg(SupReg
, RegState::ImplicitKill
)
1947 .add(predOps(ARMCC::AL
));
1949 NumAlignedDPRCS2Regs
-= 4;
1952 // 16-byte aligned vst1.64 with 2 d-regs.
1953 if (NumAlignedDPRCS2Regs
>= 2) {
1955 TRI
->getMatchingSuperReg(NextReg
, ARM::dsub_0
, &ARM::QPRRegClass
);
1956 MBB
.addLiveIn(SupReg
);
1957 BuildMI(MBB
, MI
, DL
, TII
.get(ARM::VST1q64
))
1961 .add(predOps(ARMCC::AL
));
1963 NumAlignedDPRCS2Regs
-= 2;
1966 // Finally, use a vanilla vstr.64 for the odd last register.
1967 if (NumAlignedDPRCS2Regs
) {
1968 MBB
.addLiveIn(NextReg
);
1969 // vstr.64 uses addrmode5 which has an offset scale of 4.
1970 BuildMI(MBB
, MI
, DL
, TII
.get(ARM::VSTRD
))
1973 .addImm((NextReg
- R4BaseReg
) * 2)
1974 .add(predOps(ARMCC::AL
));
1977 // The last spill instruction inserted should kill the scratch register r4.
1978 std::prev(MI
)->addRegisterKilled(ARM::R4
, TRI
);
1981 /// Skip past the code inserted by emitAlignedDPRCS2Spills, and return an
1982 /// iterator to the following instruction.
1983 static MachineBasicBlock::iterator
1984 skipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI
,
1985 unsigned NumAlignedDPRCS2Regs
) {
1986 // sub r4, sp, #numregs * 8
1987 // bic r4, r4, #align - 1
1990 assert(MI
->mayStore() && "Expecting spill instruction");
1992 // These switches all fall through.
1993 switch(NumAlignedDPRCS2Regs
) {
1996 assert(MI
->mayStore() && "Expecting spill instruction");
2000 assert(MI
->mayStore() && "Expecting spill instruction");
2005 assert(MI
->killsRegister(ARM::R4
, /*TRI=*/nullptr) && "Missed kill flag");
2011 /// Emit aligned reload instructions for NumAlignedDPRCS2Regs D-registers
2012 /// starting from d8. These instructions are assumed to execute while the
2013 /// stack is still aligned, unlike the code inserted by emitPopInst.
2014 static void emitAlignedDPRCS2Restores(MachineBasicBlock
&MBB
,
2015 MachineBasicBlock::iterator MI
,
2016 unsigned NumAlignedDPRCS2Regs
,
2017 ArrayRef
<CalleeSavedInfo
> CSI
,
2018 const TargetRegisterInfo
*TRI
) {
2019 MachineFunction
&MF
= *MBB
.getParent();
2020 ARMFunctionInfo
*AFI
= MF
.getInfo
<ARMFunctionInfo
>();
2021 DebugLoc DL
= MI
!= MBB
.end() ? MI
->getDebugLoc() : DebugLoc();
2022 const TargetInstrInfo
&TII
= *MF
.getSubtarget().getInstrInfo();
2024 // Find the frame index assigned to d8.
2026 for (const CalleeSavedInfo
&I
: CSI
)
2027 if (I
.getReg() == ARM::D8
) {
2028 D8SpillFI
= I
.getFrameIdx();
2032 // Materialize the address of the d8 spill slot into the scratch register r4.
2033 // This can be fairly complicated if the stack frame is large, so just use
2034 // the normal frame index elimination mechanism to do it. This code runs as
2035 // the initial part of the epilog where the stack and base pointers haven't
2036 // been changed yet.
2037 bool isThumb
= AFI
->isThumbFunction();
2038 assert(!AFI
->isThumb1OnlyFunction() && "Can't realign stack for thumb1");
2040 unsigned Opc
= isThumb
? ARM::t2ADDri
: ARM::ADDri
;
2041 BuildMI(MBB
, MI
, DL
, TII
.get(Opc
), ARM::R4
)
2042 .addFrameIndex(D8SpillFI
)
2044 .add(predOps(ARMCC::AL
))
2047 // Now restore NumAlignedDPRCS2Regs registers starting from d8.
2048 unsigned NextReg
= ARM::D8
;
2050 // 16-byte aligned vld1.64 with 4 d-regs and writeback.
2051 if (NumAlignedDPRCS2Regs
>= 6) {
2053 TRI
->getMatchingSuperReg(NextReg
, ARM::dsub_0
, &ARM::QQPRRegClass
);
2054 BuildMI(MBB
, MI
, DL
, TII
.get(ARM::VLD1d64Qwb_fixed
), NextReg
)
2055 .addReg(ARM::R4
, RegState::Define
)
2056 .addReg(ARM::R4
, RegState::Kill
)
2058 .addReg(SupReg
, RegState::ImplicitDefine
)
2059 .add(predOps(ARMCC::AL
));
2061 NumAlignedDPRCS2Regs
-= 4;
2064 // We won't modify r4 beyond this point. It currently points to the next
2065 // register to be spilled.
2066 unsigned R4BaseReg
= NextReg
;
2068 // 16-byte aligned vld1.64 with 4 d-regs, no writeback.
2069 if (NumAlignedDPRCS2Regs
>= 4) {
2071 TRI
->getMatchingSuperReg(NextReg
, ARM::dsub_0
, &ARM::QQPRRegClass
);
2072 BuildMI(MBB
, MI
, DL
, TII
.get(ARM::VLD1d64Q
), NextReg
)
2075 .addReg(SupReg
, RegState::ImplicitDefine
)
2076 .add(predOps(ARMCC::AL
));
2078 NumAlignedDPRCS2Regs
-= 4;
2081 // 16-byte aligned vld1.64 with 2 d-regs.
2082 if (NumAlignedDPRCS2Regs
>= 2) {
2084 TRI
->getMatchingSuperReg(NextReg
, ARM::dsub_0
, &ARM::QPRRegClass
);
2085 BuildMI(MBB
, MI
, DL
, TII
.get(ARM::VLD1q64
), SupReg
)
2088 .add(predOps(ARMCC::AL
));
2090 NumAlignedDPRCS2Regs
-= 2;
2093 // Finally, use a vanilla vldr.64 for the remaining odd register.
2094 if (NumAlignedDPRCS2Regs
)
2095 BuildMI(MBB
, MI
, DL
, TII
.get(ARM::VLDRD
), NextReg
)
2097 .addImm(2 * (NextReg
- R4BaseReg
))
2098 .add(predOps(ARMCC::AL
));
2100 // Last store kills r4.
2101 std::prev(MI
)->addRegisterKilled(ARM::R4
, TRI
);
2104 bool ARMFrameLowering::spillCalleeSavedRegisters(
2105 MachineBasicBlock
&MBB
, MachineBasicBlock::iterator MI
,
2106 ArrayRef
<CalleeSavedInfo
> CSI
, const TargetRegisterInfo
*TRI
) const {
2110 MachineFunction
&MF
= *MBB
.getParent();
2111 ARMFunctionInfo
*AFI
= MF
.getInfo
<ARMFunctionInfo
>();
2112 ARMSubtarget::PushPopSplitVariation PushPopSplit
=
2113 STI
.getPushPopSplitVariation(MF
);
2114 const ARMBaseRegisterInfo
*RegInfo
= STI
.getRegisterInfo();
2116 unsigned PushOpc
= AFI
->isThumbFunction() ? ARM::t2STMDB_UPD
: ARM::STMDB_UPD
;
2117 unsigned PushOneOpc
= AFI
->isThumbFunction() ?
2118 ARM::t2STR_PRE
: ARM::STR_PRE_IMM
;
2119 unsigned FltOpc
= ARM::VSTMDDB_UPD
;
2120 unsigned NumAlignedDPRCS2Regs
= AFI
->getNumAlignedDPRCS2Regs();
2121 // Compute PAC in R12.
2122 if (AFI
->shouldSignReturnAddress()) {
2123 BuildMI(MBB
, MI
, DebugLoc(), STI
.getInstrInfo()->get(ARM::t2PAC
))
2124 .setMIFlags(MachineInstr::FrameSetup
);
2126 // Save the non-secure floating point context.
2127 if (llvm::any_of(CSI
, [](const CalleeSavedInfo
&C
) {
2128 return C
.getReg() == ARM::FPCXTNS
;
2130 BuildMI(MBB
, MI
, DebugLoc(), STI
.getInstrInfo()->get(ARM::VSTR_FPCXTNS_pre
),
2134 .add(predOps(ARMCC::AL
));
2137 auto CheckRegArea
= [PushPopSplit
, NumAlignedDPRCS2Regs
,
2138 RegInfo
](unsigned Reg
, SpillArea TestArea
) {
2139 return getSpillArea(Reg
, PushPopSplit
, NumAlignedDPRCS2Regs
, RegInfo
) ==
2142 auto IsGPRCS1
= [&CheckRegArea
](unsigned Reg
) {
2143 return CheckRegArea(Reg
, SpillArea::GPRCS1
);
2145 auto IsGPRCS2
= [&CheckRegArea
](unsigned Reg
) {
2146 return CheckRegArea(Reg
, SpillArea::GPRCS2
);
2148 auto IsDPRCS1
= [&CheckRegArea
](unsigned Reg
) {
2149 return CheckRegArea(Reg
, SpillArea::DPRCS1
);
2151 auto IsGPRCS3
= [&CheckRegArea
](unsigned Reg
) {
2152 return CheckRegArea(Reg
, SpillArea::GPRCS3
);
2155 emitPushInst(MBB
, MI
, CSI
, PushOpc
, PushOneOpc
, false, IsGPRCS1
);
2156 emitPushInst(MBB
, MI
, CSI
, PushOpc
, PushOneOpc
, false, IsGPRCS2
);
2157 emitPushInst(MBB
, MI
, CSI
, FltOpc
, 0, true, IsDPRCS1
);
2158 emitPushInst(MBB
, MI
, CSI
, PushOpc
, PushOneOpc
, false, IsGPRCS3
);
2160 // The code above does not insert spill code for the aligned DPRCS2 registers.
2161 // The stack realignment code will be inserted between the push instructions
2162 // and these spills.
2163 if (NumAlignedDPRCS2Regs
)
2164 emitAlignedDPRCS2Spills(MBB
, MI
, NumAlignedDPRCS2Regs
, CSI
, TRI
);
2169 bool ARMFrameLowering::restoreCalleeSavedRegisters(
2170 MachineBasicBlock
&MBB
, MachineBasicBlock::iterator MI
,
2171 MutableArrayRef
<CalleeSavedInfo
> CSI
, const TargetRegisterInfo
*TRI
) const {
2175 MachineFunction
&MF
= *MBB
.getParent();
2176 ARMFunctionInfo
*AFI
= MF
.getInfo
<ARMFunctionInfo
>();
2177 const ARMBaseRegisterInfo
*RegInfo
= STI
.getRegisterInfo();
2179 bool isVarArg
= AFI
->getArgRegsSaveSize() > 0;
2180 unsigned NumAlignedDPRCS2Regs
= AFI
->getNumAlignedDPRCS2Regs();
2181 ARMSubtarget::PushPopSplitVariation PushPopSplit
=
2182 STI
.getPushPopSplitVariation(MF
);
2184 // The emitPopInst calls below do not insert reloads for the aligned DPRCS2
2185 // registers. Do that here instead.
2186 if (NumAlignedDPRCS2Regs
)
2187 emitAlignedDPRCS2Restores(MBB
, MI
, NumAlignedDPRCS2Regs
, CSI
, TRI
);
2189 unsigned PopOpc
= AFI
->isThumbFunction() ? ARM::t2LDMIA_UPD
: ARM::LDMIA_UPD
;
2191 AFI
->isThumbFunction() ? ARM::t2LDR_POST
: ARM::LDR_POST_IMM
;
2192 unsigned FltOpc
= ARM::VLDMDIA_UPD
;
2194 auto CheckRegArea
= [PushPopSplit
, NumAlignedDPRCS2Regs
,
2195 RegInfo
](unsigned Reg
, SpillArea TestArea
) {
2196 return getSpillArea(Reg
, PushPopSplit
, NumAlignedDPRCS2Regs
, RegInfo
) ==
2199 auto IsGPRCS1
= [&CheckRegArea
](unsigned Reg
) {
2200 return CheckRegArea(Reg
, SpillArea::GPRCS1
);
2202 auto IsGPRCS2
= [&CheckRegArea
](unsigned Reg
) {
2203 return CheckRegArea(Reg
, SpillArea::GPRCS2
);
2205 auto IsDPRCS1
= [&CheckRegArea
](unsigned Reg
) {
2206 return CheckRegArea(Reg
, SpillArea::DPRCS1
);
2208 auto IsGPRCS3
= [&CheckRegArea
](unsigned Reg
) {
2209 return CheckRegArea(Reg
, SpillArea::GPRCS3
);
2212 emitPopInst(MBB
, MI
, CSI
, PopOpc
, LdrOpc
, isVarArg
, false, IsGPRCS3
);
2213 emitPopInst(MBB
, MI
, CSI
, FltOpc
, 0, isVarArg
, true, IsDPRCS1
);
2214 emitPopInst(MBB
, MI
, CSI
, PopOpc
, LdrOpc
, isVarArg
, false, IsGPRCS2
);
2215 emitPopInst(MBB
, MI
, CSI
, PopOpc
, LdrOpc
, isVarArg
, false, IsGPRCS1
);
2220 // FIXME: Make generic?
2221 static unsigned EstimateFunctionSizeInBytes(const MachineFunction
&MF
,
2222 const ARMBaseInstrInfo
&TII
) {
2223 unsigned FnSize
= 0;
2224 for (auto &MBB
: MF
) {
2225 for (auto &MI
: MBB
)
2226 FnSize
+= TII
.getInstSizeInBytes(MI
);
2228 if (MF
.getJumpTableInfo())
2229 for (auto &Table
: MF
.getJumpTableInfo()->getJumpTables())
2230 FnSize
+= Table
.MBBs
.size() * 4;
2231 FnSize
+= MF
.getConstantPool()->getConstants().size() * 4;
2235 /// estimateRSStackSizeLimit - Look at each instruction that references stack
2236 /// frames and return the stack size limit beyond which some of these
2237 /// instructions will require a scratch register during their expansion later.
2238 // FIXME: Move to TII?
2239 static unsigned estimateRSStackSizeLimit(MachineFunction
&MF
,
2240 const TargetFrameLowering
*TFI
,
2241 bool &HasNonSPFrameIndex
) {
2242 const ARMFunctionInfo
*AFI
= MF
.getInfo
<ARMFunctionInfo
>();
2243 const ARMBaseInstrInfo
&TII
=
2244 *static_cast<const ARMBaseInstrInfo
*>(MF
.getSubtarget().getInstrInfo());
2245 const TargetRegisterInfo
*TRI
= MF
.getSubtarget().getRegisterInfo();
2246 unsigned Limit
= (1 << 12) - 1;
2247 for (auto &MBB
: MF
) {
2248 for (auto &MI
: MBB
) {
2249 if (MI
.isDebugInstr())
2251 for (unsigned i
= 0, e
= MI
.getNumOperands(); i
!= e
; ++i
) {
2252 if (!MI
.getOperand(i
).isFI())
2255 // When using ADDri to get the address of a stack object, 255 is the
2256 // largest offset guaranteed to fit in the immediate offset.
2257 if (MI
.getOpcode() == ARM::ADDri
) {
2258 Limit
= std::min(Limit
, (1U << 8) - 1);
2261 // t2ADDri will not require an extra register, it can reuse the
2263 if (MI
.getOpcode() == ARM::t2ADDri
|| MI
.getOpcode() == ARM::t2ADDri12
)
2266 const MCInstrDesc
&MCID
= MI
.getDesc();
2267 const TargetRegisterClass
*RegClass
= TII
.getRegClass(MCID
, i
, TRI
, MF
);
2268 if (RegClass
&& !RegClass
->contains(ARM::SP
))
2269 HasNonSPFrameIndex
= true;
2271 // Otherwise check the addressing mode.
2272 switch (MI
.getDesc().TSFlags
& ARMII::AddrModeMask
) {
2273 case ARMII::AddrMode_i12
:
2274 case ARMII::AddrMode2
:
2275 // Default 12 bit limit.
2277 case ARMII::AddrMode3
:
2278 case ARMII::AddrModeT2_i8neg
:
2279 Limit
= std::min(Limit
, (1U << 8) - 1);
2281 case ARMII::AddrMode5FP16
:
2282 Limit
= std::min(Limit
, ((1U << 8) - 1) * 2);
2284 case ARMII::AddrMode5
:
2285 case ARMII::AddrModeT2_i8s4
:
2286 case ARMII::AddrModeT2_ldrex
:
2287 Limit
= std::min(Limit
, ((1U << 8) - 1) * 4);
2289 case ARMII::AddrModeT2_i12
:
2290 // i12 supports only positive offset so these will be converted to
2291 // i8 opcodes. See llvm::rewriteT2FrameIndex.
2292 if (TFI
->hasFP(MF
) && AFI
->hasStackFrame())
2293 Limit
= std::min(Limit
, (1U << 8) - 1);
2295 case ARMII::AddrMode4
:
2296 case ARMII::AddrMode6
:
2297 // Addressing modes 4 & 6 (load/store) instructions can't encode an
2298 // immediate offset for stack references.
2300 case ARMII::AddrModeT2_i7
:
2301 Limit
= std::min(Limit
, ((1U << 7) - 1) * 1);
2303 case ARMII::AddrModeT2_i7s2
:
2304 Limit
= std::min(Limit
, ((1U << 7) - 1) * 2);
2306 case ARMII::AddrModeT2_i7s4
:
2307 Limit
= std::min(Limit
, ((1U << 7) - 1) * 4);
2310 llvm_unreachable("Unhandled addressing mode in stack size limit calculation");
2312 break; // At most one FI per instruction
2320 // In functions that realign the stack, it can be an advantage to spill the
2321 // callee-saved vector registers after realigning the stack. The vst1 and vld1
2322 // instructions take alignment hints that can improve performance.
2324 checkNumAlignedDPRCS2Regs(MachineFunction
&MF
, BitVector
&SavedRegs
) {
2325 MF
.getInfo
<ARMFunctionInfo
>()->setNumAlignedDPRCS2Regs(0);
2326 if (!SpillAlignedNEONRegs
)
2329 // Naked functions don't spill callee-saved registers.
2330 if (MF
.getFunction().hasFnAttribute(Attribute::Naked
))
2333 // We are planning to use NEON instructions vst1 / vld1.
2334 if (!MF
.getSubtarget
<ARMSubtarget
>().hasNEON())
2337 // Don't bother if the default stack alignment is sufficiently high.
2338 if (MF
.getSubtarget().getFrameLowering()->getStackAlign() >= Align(8))
2341 // Aligned spills require stack realignment.
2342 if (!static_cast<const ARMBaseRegisterInfo
*>(
2343 MF
.getSubtarget().getRegisterInfo())->canRealignStack(MF
))
2346 // We always spill contiguous d-registers starting from d8. Count how many
2347 // needs spilling. The register allocator will almost always use the
2348 // callee-saved registers in order, but it can happen that there are holes in
2349 // the range. Registers above the hole will be spilled to the standard DPRCS
2351 unsigned NumSpills
= 0;
2352 for (; NumSpills
< 8; ++NumSpills
)
2353 if (!SavedRegs
.test(ARM::D8
+ NumSpills
))
2356 // Don't do this for just one d-register. It's not worth it.
2360 // Spill the first NumSpills D-registers after realigning the stack.
2361 MF
.getInfo
<ARMFunctionInfo
>()->setNumAlignedDPRCS2Regs(NumSpills
);
2363 // A scratch register is required for the vst1 / vld1 instructions.
2364 SavedRegs
.set(ARM::R4
);
2367 bool ARMFrameLowering::enableShrinkWrapping(const MachineFunction
&MF
) const {
2368 // For CMSE entry functions, we want to save the FPCXT_NS immediately
2369 // upon function entry (resp. restore it immmediately before return)
2370 if (STI
.hasV8_1MMainlineOps() &&
2371 MF
.getInfo
<ARMFunctionInfo
>()->isCmseNSEntryFunction())
2374 // We are disabling shrinkwrapping for now when PAC is enabled, as
2375 // shrinkwrapping can cause clobbering of r12 when the PAC code is
2376 // generated. A follow-up patch will fix this in a more performant manner.
2377 if (MF
.getInfo
<ARMFunctionInfo
>()->shouldSignReturnAddress(
2378 true /* SpillsLR */))
2384 bool ARMFrameLowering::requiresAAPCSFrameRecord(
2385 const MachineFunction
&MF
) const {
2386 const auto &Subtarget
= MF
.getSubtarget
<ARMSubtarget
>();
2387 return Subtarget
.createAAPCSFrameChain() && hasFP(MF
);
2390 // Thumb1 may require a spill when storing to a frame index through FP (or any
2391 // access with execute-only), for cases where FP is a high register (R11). This
2392 // scans the function for cases where this may happen.
2393 static bool canSpillOnFrameIndexAccess(const MachineFunction
&MF
,
2394 const TargetFrameLowering
&TFI
) {
2395 const ARMFunctionInfo
*AFI
= MF
.getInfo
<ARMFunctionInfo
>();
2396 if (!AFI
->isThumb1OnlyFunction())
2399 const ARMSubtarget
&STI
= MF
.getSubtarget
<ARMSubtarget
>();
2400 for (const auto &MBB
: MF
)
2401 for (const auto &MI
: MBB
)
2402 if (MI
.getOpcode() == ARM::tSTRspi
|| MI
.getOpcode() == ARM::tSTRi
||
2403 STI
.genExecuteOnly())
2404 for (const auto &Op
: MI
.operands())
2407 TFI
.getFrameIndexReference(MF
, Op
.getIndex(), Reg
);
2408 if (ARM::hGPRRegClass
.contains(Reg
) && Reg
!= ARM::SP
)
2414 void ARMFrameLowering::determineCalleeSaves(MachineFunction
&MF
,
2415 BitVector
&SavedRegs
,
2416 RegScavenger
*RS
) const {
2417 TargetFrameLowering::determineCalleeSaves(MF
, SavedRegs
, RS
);
2418 // This tells PEI to spill the FP as if it is any other callee-save register
2419 // to take advantage the eliminateFrameIndex machinery. This also ensures it
2420 // is spilled in the order specified by getCalleeSavedRegs() to make it easier
2421 // to combine multiple loads / stores.
2422 bool CanEliminateFrame
= !(requiresAAPCSFrameRecord(MF
) && hasFP(MF
)) &&
2423 !MF
.getTarget().Options
.DisableFramePointerElim(MF
);
2424 bool CS1Spilled
= false;
2425 bool LRSpilled
= false;
2426 unsigned NumGPRSpills
= 0;
2427 unsigned NumFPRSpills
= 0;
2428 SmallVector
<unsigned, 4> UnspilledCS1GPRs
;
2429 SmallVector
<unsigned, 4> UnspilledCS2GPRs
;
2430 const ARMBaseRegisterInfo
*RegInfo
= static_cast<const ARMBaseRegisterInfo
*>(
2431 MF
.getSubtarget().getRegisterInfo());
2432 const ARMBaseInstrInfo
&TII
=
2433 *static_cast<const ARMBaseInstrInfo
*>(MF
.getSubtarget().getInstrInfo());
2434 ARMFunctionInfo
*AFI
= MF
.getInfo
<ARMFunctionInfo
>();
2435 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
2436 MachineRegisterInfo
&MRI
= MF
.getRegInfo();
2437 const TargetRegisterInfo
*TRI
= MF
.getSubtarget().getRegisterInfo();
2438 (void)TRI
; // Silence unused warning in non-assert builds.
2439 Register FramePtr
= RegInfo
->getFrameRegister(MF
);
2440 ARMSubtarget::PushPopSplitVariation PushPopSplit
=
2441 STI
.getPushPopSplitVariation(MF
);
2443 // Spill R4 if Thumb2 function requires stack realignment - it will be used as
2444 // scratch register. Also spill R4 if Thumb2 function has varsized objects,
2445 // since it's not always possible to restore sp from fp in a single
2447 // FIXME: It will be better just to find spare register here.
2448 if (AFI
->isThumb2Function() &&
2449 (MFI
.hasVarSizedObjects() || RegInfo
->hasStackRealignment(MF
)))
2450 SavedRegs
.set(ARM::R4
);
2452 // If a stack probe will be emitted, spill R4 and LR, since they are
2453 // clobbered by the stack probe call.
2454 // This estimate should be a safe, conservative estimate. The actual
2455 // stack probe is enabled based on the size of the local objects;
2456 // this estimate also includes the varargs store size.
2457 if (STI
.isTargetWindows() &&
2458 WindowsRequiresStackProbe(MF
, MFI
.estimateStackSize(MF
))) {
2459 SavedRegs
.set(ARM::R4
);
2460 SavedRegs
.set(ARM::LR
);
2463 if (AFI
->isThumb1OnlyFunction()) {
2464 // Spill LR if Thumb1 function uses variable length argument lists.
2465 if (AFI
->getArgRegsSaveSize() > 0)
2466 SavedRegs
.set(ARM::LR
);
2468 // Spill R4 if Thumb1 epilogue has to restore SP from FP or the function
2469 // requires stack alignment. We don't know for sure what the stack size
2470 // will be, but for this, an estimate is good enough. If there anything
2471 // changes it, it'll be a spill, which implies we've used all the registers
2472 // and so R4 is already used, so not marking it here will be OK.
2473 // FIXME: It will be better just to find spare register here.
2474 if (MFI
.hasVarSizedObjects() || RegInfo
->hasStackRealignment(MF
) ||
2475 MFI
.estimateStackSize(MF
) > 508)
2476 SavedRegs
.set(ARM::R4
);
2479 // See if we can spill vector registers to aligned stack.
2480 checkNumAlignedDPRCS2Regs(MF
, SavedRegs
);
2482 // Spill the BasePtr if it's used.
2483 if (RegInfo
->hasBasePointer(MF
))
2484 SavedRegs
.set(RegInfo
->getBaseRegister());
2486 // On v8.1-M.Main CMSE entry functions save/restore FPCXT.
2487 if (STI
.hasV8_1MMainlineOps() && AFI
->isCmseNSEntryFunction())
2488 CanEliminateFrame
= false;
2490 // When return address signing is enabled R12 is treated as callee-saved.
2491 if (AFI
->shouldSignReturnAddress())
2492 CanEliminateFrame
= false;
2494 // Don't spill FP if the frame can be eliminated. This is determined
2495 // by scanning the callee-save registers to see if any is modified.
2496 const MCPhysReg
*CSRegs
= RegInfo
->getCalleeSavedRegs(&MF
);
2497 for (unsigned i
= 0; CSRegs
[i
]; ++i
) {
2498 unsigned Reg
= CSRegs
[i
];
2499 bool Spilled
= false;
2500 if (SavedRegs
.test(Reg
)) {
2502 CanEliminateFrame
= false;
2505 if (!ARM::GPRRegClass
.contains(Reg
)) {
2507 if (ARM::SPRRegClass
.contains(Reg
))
2509 else if (ARM::DPRRegClass
.contains(Reg
))
2511 else if (ARM::QPRRegClass
.contains(Reg
))
2520 if (PushPopSplit
!= ARMSubtarget::SplitR7
) {
2527 // Keep track if LR and any of R4, R5, R6, and R7 is spilled.
2532 case ARM::R0
: case ARM::R1
:
2533 case ARM::R2
: case ARM::R3
:
2534 case ARM::R4
: case ARM::R5
:
2535 case ARM::R6
: case ARM::R7
:
2542 if (PushPopSplit
!= ARMSubtarget::SplitR7
) {
2543 UnspilledCS1GPRs
.push_back(Reg
);
2548 case ARM::R0
: case ARM::R1
:
2549 case ARM::R2
: case ARM::R3
:
2550 case ARM::R4
: case ARM::R5
:
2551 case ARM::R6
: case ARM::R7
:
2553 UnspilledCS1GPRs
.push_back(Reg
);
2556 UnspilledCS2GPRs
.push_back(Reg
);
2562 bool ForceLRSpill
= false;
2563 if (!LRSpilled
&& AFI
->isThumb1OnlyFunction()) {
2564 unsigned FnSize
= EstimateFunctionSizeInBytes(MF
, TII
);
2565 // Force LR to be spilled if the Thumb function size is > 2048. This enables
2566 // use of BL to implement far jump.
2567 if (FnSize
>= (1 << 11)) {
2568 CanEliminateFrame
= false;
2569 ForceLRSpill
= true;
2573 // If any of the stack slot references may be out of range of an immediate
2574 // offset, make sure a register (or a spill slot) is available for the
2575 // register scavenger. Note that if we're indexing off the frame pointer, the
2576 // effective stack size is 4 bytes larger since the FP points to the stack
2577 // slot of the previous FP. Also, if we have variable sized objects in the
2578 // function, stack slot references will often be negative, and some of
2579 // our instructions are positive-offset only, so conservatively consider
2580 // that case to want a spill slot (or register) as well. Similarly, if
2581 // the function adjusts the stack pointer during execution and the
2582 // adjustments aren't already part of our stack size estimate, our offset
2583 // calculations may be off, so be conservative.
2584 // FIXME: We could add logic to be more precise about negative offsets
2585 // and which instructions will need a scratch register for them. Is it
2586 // worth the effort and added fragility?
2587 unsigned EstimatedStackSize
=
2588 MFI
.estimateStackSize(MF
) + 4 * (NumGPRSpills
+ NumFPRSpills
);
2590 // Determine biggest (positive) SP offset in MachineFrameInfo.
2591 int MaxFixedOffset
= 0;
2592 for (int I
= MFI
.getObjectIndexBegin(); I
< 0; ++I
) {
2593 int MaxObjectOffset
= MFI
.getObjectOffset(I
) + MFI
.getObjectSize(I
);
2594 MaxFixedOffset
= std::max(MaxFixedOffset
, MaxObjectOffset
);
2597 bool HasFP
= hasFP(MF
);
2599 if (AFI
->hasStackFrame())
2600 EstimatedStackSize
+= 4;
2602 // If FP is not used, SP will be used to access arguments, so count the
2603 // size of arguments into the estimation.
2604 EstimatedStackSize
+= MaxFixedOffset
;
2606 EstimatedStackSize
+= 16; // For possible paddings.
2608 unsigned EstimatedRSStackSizeLimit
, EstimatedRSFixedSizeLimit
;
2609 bool HasNonSPFrameIndex
= false;
2610 if (AFI
->isThumb1OnlyFunction()) {
2611 // For Thumb1, don't bother to iterate over the function. The only
2612 // instruction that requires an emergency spill slot is a store to a
2615 // tSTRspi, which is used for sp-relative accesses, has an 8-bit unsigned
2616 // immediate. tSTRi, which is used for bp- and fp-relative accesses, has
2617 // a 5-bit unsigned immediate.
2619 // We could try to check if the function actually contains a tSTRspi
2620 // that might need the spill slot, but it's not really important.
2621 // Functions with VLAs or extremely large call frames are rare, and
2622 // if a function is allocating more than 1KB of stack, an extra 4-byte
2623 // slot probably isn't relevant.
2625 // A special case is the scenario where r11 is used as FP, where accesses
2626 // to a frame index will require its value to be moved into a low reg.
2627 // This is handled later on, once we are able to determine if we have any
2628 // fp-relative accesses.
2629 if (RegInfo
->hasBasePointer(MF
))
2630 EstimatedRSStackSizeLimit
= (1U << 5) * 4;
2632 EstimatedRSStackSizeLimit
= (1U << 8) * 4;
2633 EstimatedRSFixedSizeLimit
= (1U << 5) * 4;
2635 EstimatedRSStackSizeLimit
=
2636 estimateRSStackSizeLimit(MF
, this, HasNonSPFrameIndex
);
2637 EstimatedRSFixedSizeLimit
= EstimatedRSStackSizeLimit
;
2639 // Final estimate of whether sp or bp-relative accesses might require
2641 bool HasLargeStack
= EstimatedStackSize
> EstimatedRSStackSizeLimit
;
2643 // If the stack pointer moves and we don't have a base pointer, the
2644 // estimate logic doesn't work. The actual offsets might be larger when
2645 // we're constructing a call frame, or we might need to use negative
2647 bool HasMovingSP
= MFI
.hasVarSizedObjects() ||
2648 (MFI
.adjustsStack() && !canSimplifyCallFramePseudos(MF
));
2649 bool HasBPOrFixedSP
= RegInfo
->hasBasePointer(MF
) || !HasMovingSP
;
2651 // If we have a frame pointer, we assume arguments will be accessed
2652 // relative to the frame pointer. Check whether fp-relative accesses to
2653 // arguments require scavenging.
2655 // We could do slightly better on Thumb1; in some cases, an sp-relative
2656 // offset would be legal even though an fp-relative offset is not.
2657 int MaxFPOffset
= getMaxFPOffset(STI
, *AFI
, MF
);
2658 bool HasLargeArgumentList
=
2659 HasFP
&& (MaxFixedOffset
- MaxFPOffset
) > (int)EstimatedRSFixedSizeLimit
;
2661 bool BigFrameOffsets
= HasLargeStack
|| !HasBPOrFixedSP
||
2662 HasLargeArgumentList
|| HasNonSPFrameIndex
;
2663 LLVM_DEBUG(dbgs() << "EstimatedLimit: " << EstimatedRSStackSizeLimit
2664 << "; EstimatedStack: " << EstimatedStackSize
2665 << "; EstimatedFPStack: " << MaxFixedOffset
- MaxFPOffset
2666 << "; BigFrameOffsets: " << BigFrameOffsets
<< "\n");
2667 if (BigFrameOffsets
||
2668 !CanEliminateFrame
|| RegInfo
->cannotEliminateFrame(MF
)) {
2669 AFI
->setHasStackFrame(true);
2672 SavedRegs
.set(FramePtr
);
2673 // If the frame pointer is required by the ABI, also spill LR so that we
2674 // emit a complete frame record.
2675 if ((requiresAAPCSFrameRecord(MF
) ||
2676 MF
.getTarget().Options
.DisableFramePointerElim(MF
)) &&
2678 SavedRegs
.set(ARM::LR
);
2681 auto LRPos
= llvm::find(UnspilledCS1GPRs
, ARM::LR
);
2682 if (LRPos
!= UnspilledCS1GPRs
.end())
2683 UnspilledCS1GPRs
.erase(LRPos
);
2685 auto FPPos
= llvm::find(UnspilledCS1GPRs
, FramePtr
);
2686 if (FPPos
!= UnspilledCS1GPRs
.end())
2687 UnspilledCS1GPRs
.erase(FPPos
);
2689 if (FramePtr
== ARM::R7
)
2693 // This is the number of extra spills inserted for callee-save GPRs which
2694 // would not otherwise be used by the function. When greater than zero it
2695 // guaranteees that it is possible to scavenge a register to hold the
2696 // address of a stack slot. On Thumb1, the register must be a valid operand
2697 // to tSTRi, i.e. r4-r7. For other subtargets, this is any GPR, i.e. r4-r11
2700 // If we don't insert a spill, we instead allocate an emergency spill
2701 // slot, which can be used by scavenging to spill an arbitrary register.
2703 // We currently don't try to figure out whether any specific instruction
2704 // requires scavening an additional register.
2705 unsigned NumExtraCSSpill
= 0;
2707 if (AFI
->isThumb1OnlyFunction()) {
2708 // For Thumb1-only targets, we need some low registers when we save and
2709 // restore the high registers (which aren't allocatable, but could be
2710 // used by inline assembly) because the push/pop instructions can not
2711 // access high registers. If necessary, we might need to push more low
2712 // registers to ensure that there is at least one free that can be used
2713 // for the saving & restoring, and preferably we should ensure that as
2714 // many as are needed are available so that fewer push/pop instructions
2717 // Low registers which are not currently pushed, but could be (r4-r7).
2718 SmallVector
<unsigned, 4> AvailableRegs
;
2720 // Unused argument registers (r0-r3) can be clobbered in the prologue for
2722 int EntryRegDeficit
= 0;
2723 for (unsigned Reg
: {ARM::R0
, ARM::R1
, ARM::R2
, ARM::R3
}) {
2724 if (!MF
.getRegInfo().isLiveIn(Reg
)) {
2727 << printReg(Reg
, TRI
)
2728 << " is unused argument register, EntryRegDeficit = "
2729 << EntryRegDeficit
<< "\n");
2733 // Unused return registers can be clobbered in the epilogue for free.
2734 int ExitRegDeficit
= AFI
->getReturnRegsCount() - 4;
2735 LLVM_DEBUG(dbgs() << AFI
->getReturnRegsCount()
2736 << " return regs used, ExitRegDeficit = "
2737 << ExitRegDeficit
<< "\n");
2739 int RegDeficit
= std::max(EntryRegDeficit
, ExitRegDeficit
);
2740 LLVM_DEBUG(dbgs() << "RegDeficit = " << RegDeficit
<< "\n");
2742 // r4-r6 can be used in the prologue if they are pushed by the first push
2744 for (unsigned Reg
: {ARM::R4
, ARM::R5
, ARM::R6
}) {
2745 if (SavedRegs
.test(Reg
)) {
2747 LLVM_DEBUG(dbgs() << printReg(Reg
, TRI
)
2748 << " is saved low register, RegDeficit = "
2749 << RegDeficit
<< "\n");
2751 AvailableRegs
.push_back(Reg
);
2754 << printReg(Reg
, TRI
)
2755 << " is non-saved low register, adding to AvailableRegs\n");
2759 // r7 can be used if it is not being used as the frame pointer.
2760 if (!HasFP
|| FramePtr
!= ARM::R7
) {
2761 if (SavedRegs
.test(ARM::R7
)) {
2763 LLVM_DEBUG(dbgs() << "%r7 is saved low register, RegDeficit = "
2764 << RegDeficit
<< "\n");
2766 AvailableRegs
.push_back(ARM::R7
);
2769 << "%r7 is non-saved low register, adding to AvailableRegs\n");
2773 // Each of r8-r11 needs to be copied to a low register, then pushed.
2774 for (unsigned Reg
: {ARM::R8
, ARM::R9
, ARM::R10
, ARM::R11
}) {
2775 if (SavedRegs
.test(Reg
)) {
2777 LLVM_DEBUG(dbgs() << printReg(Reg
, TRI
)
2778 << " is saved high register, RegDeficit = "
2779 << RegDeficit
<< "\n");
2783 // LR can only be used by PUSH, not POP, and can't be used at all if the
2784 // llvm.returnaddress intrinsic is used. This is only worth doing if we
2785 // are more limited at function entry than exit.
2786 if ((EntryRegDeficit
> ExitRegDeficit
) &&
2787 !(MF
.getRegInfo().isLiveIn(ARM::LR
) &&
2788 MF
.getFrameInfo().isReturnAddressTaken())) {
2789 if (SavedRegs
.test(ARM::LR
)) {
2791 LLVM_DEBUG(dbgs() << "%lr is saved register, RegDeficit = "
2792 << RegDeficit
<< "\n");
2794 AvailableRegs
.push_back(ARM::LR
);
2795 LLVM_DEBUG(dbgs() << "%lr is not saved, adding to AvailableRegs\n");
2799 // If there are more high registers that need pushing than low registers
2800 // available, push some more low registers so that we can use fewer push
2801 // instructions. This might not reduce RegDeficit all the way to zero,
2802 // because we can only guarantee that r4-r6 are available, but r8-r11 may
2804 LLVM_DEBUG(dbgs() << "Final RegDeficit = " << RegDeficit
<< "\n");
2805 for (; RegDeficit
> 0 && !AvailableRegs
.empty(); --RegDeficit
) {
2806 unsigned Reg
= AvailableRegs
.pop_back_val();
2807 LLVM_DEBUG(dbgs() << "Spilling " << printReg(Reg
, TRI
)
2808 << " to make up reg deficit\n");
2812 assert(!MRI
.isReserved(Reg
) && "Should not be reserved");
2813 if (Reg
!= ARM::LR
&& !MRI
.isPhysRegUsed(Reg
))
2815 UnspilledCS1GPRs
.erase(llvm::find(UnspilledCS1GPRs
, Reg
));
2819 LLVM_DEBUG(dbgs() << "After adding spills, RegDeficit = " << RegDeficit
2823 // Avoid spilling LR in Thumb1 if there's a tail call: it's expensive to
2824 // restore LR in that case.
2825 bool ExpensiveLRRestore
= AFI
->isThumb1OnlyFunction() && MFI
.hasTailCall();
2827 // If LR is not spilled, but at least one of R4, R5, R6, and R7 is spilled.
2828 // Spill LR as well so we can fold BX_RET to the registers restore (LDM).
2829 if (!LRSpilled
&& CS1Spilled
&& !ExpensiveLRRestore
) {
2830 SavedRegs
.set(ARM::LR
);
2832 SmallVectorImpl
<unsigned>::iterator LRPos
;
2833 LRPos
= llvm::find(UnspilledCS1GPRs
, (unsigned)ARM::LR
);
2834 if (LRPos
!= UnspilledCS1GPRs
.end())
2835 UnspilledCS1GPRs
.erase(LRPos
);
2837 ForceLRSpill
= false;
2838 if (!MRI
.isReserved(ARM::LR
) && !MRI
.isPhysRegUsed(ARM::LR
) &&
2839 !AFI
->isThumb1OnlyFunction())
2843 // If stack and double are 8-byte aligned and we are spilling an odd number
2844 // of GPRs, spill one extra callee save GPR so we won't have to pad between
2845 // the integer and double callee save areas.
2846 LLVM_DEBUG(dbgs() << "NumGPRSpills = " << NumGPRSpills
<< "\n");
2847 const Align TargetAlign
= getStackAlign();
2848 if (TargetAlign
>= Align(8) && (NumGPRSpills
& 1)) {
2849 if (CS1Spilled
&& !UnspilledCS1GPRs
.empty()) {
2850 for (unsigned Reg
: UnspilledCS1GPRs
) {
2851 // Don't spill high register if the function is thumb. In the case of
2852 // Windows on ARM, accept R11 (frame pointer)
2853 if (!AFI
->isThumbFunction() ||
2854 (STI
.isTargetWindows() && Reg
== ARM::R11
) ||
2855 isARMLowRegister(Reg
) ||
2856 (Reg
== ARM::LR
&& !ExpensiveLRRestore
)) {
2858 LLVM_DEBUG(dbgs() << "Spilling " << printReg(Reg
, TRI
)
2859 << " to make up alignment\n");
2860 if (!MRI
.isReserved(Reg
) && !MRI
.isPhysRegUsed(Reg
) &&
2861 !(Reg
== ARM::LR
&& AFI
->isThumb1OnlyFunction()))
2866 } else if (!UnspilledCS2GPRs
.empty() && !AFI
->isThumb1OnlyFunction()) {
2867 unsigned Reg
= UnspilledCS2GPRs
.front();
2869 LLVM_DEBUG(dbgs() << "Spilling " << printReg(Reg
, TRI
)
2870 << " to make up alignment\n");
2871 if (!MRI
.isReserved(Reg
) && !MRI
.isPhysRegUsed(Reg
))
2876 // Estimate if we might need to scavenge registers at some point in order
2877 // to materialize a stack offset. If so, either spill one additional
2878 // callee-saved register or reserve a special spill slot to facilitate
2879 // register scavenging. Thumb1 needs a spill slot for stack pointer
2880 // adjustments and for frame index accesses when FP is high register,
2881 // even when the frame itself is small.
2882 unsigned RegsNeeded
= 0;
2883 if (BigFrameOffsets
|| canSpillOnFrameIndexAccess(MF
, *this)) {
2885 // With thumb1 execute-only we may need an additional register for saving
2886 // and restoring the CPSR.
2887 if (AFI
->isThumb1OnlyFunction() && STI
.genExecuteOnly() && !STI
.useMovt())
2891 if (RegsNeeded
> NumExtraCSSpill
) {
2892 // If any non-reserved CS register isn't spilled, just spill one or two
2893 // extra. That should take care of it!
2894 unsigned NumExtras
= TargetAlign
.value() / 4;
2895 SmallVector
<unsigned, 2> Extras
;
2896 while (NumExtras
&& !UnspilledCS1GPRs
.empty()) {
2897 unsigned Reg
= UnspilledCS1GPRs
.pop_back_val();
2898 if (!MRI
.isReserved(Reg
) &&
2899 (!AFI
->isThumb1OnlyFunction() || isARMLowRegister(Reg
))) {
2900 Extras
.push_back(Reg
);
2904 // For non-Thumb1 functions, also check for hi-reg CS registers
2905 if (!AFI
->isThumb1OnlyFunction()) {
2906 while (NumExtras
&& !UnspilledCS2GPRs
.empty()) {
2907 unsigned Reg
= UnspilledCS2GPRs
.pop_back_val();
2908 if (!MRI
.isReserved(Reg
)) {
2909 Extras
.push_back(Reg
);
2914 if (NumExtras
== 0) {
2915 for (unsigned Reg
: Extras
) {
2917 if (!MRI
.isPhysRegUsed(Reg
))
2921 while ((RegsNeeded
> NumExtraCSSpill
) && RS
) {
2922 // Reserve a slot closest to SP or frame pointer.
2923 LLVM_DEBUG(dbgs() << "Reserving emergency spill slot\n");
2924 const TargetRegisterClass
&RC
= ARM::GPRRegClass
;
2925 unsigned Size
= TRI
->getSpillSize(RC
);
2926 Align Alignment
= TRI
->getSpillAlign(RC
);
2927 RS
->addScavengingFrameIndex(
2928 MFI
.CreateSpillStackObject(Size
, Alignment
));
2935 SavedRegs
.set(ARM::LR
);
2936 AFI
->setLRIsSpilled(SavedRegs
.test(ARM::LR
));
2939 void ARMFrameLowering::updateLRRestored(MachineFunction
&MF
) {
2940 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
2941 if (!MFI
.isCalleeSavedInfoValid())
2944 // Check if all terminators do not implicitly use LR. Then we can 'restore' LR
2945 // into PC so it is not live out of the return block: Clear the Restored bit
2947 for (CalleeSavedInfo
&Info
: MFI
.getCalleeSavedInfo()) {
2948 if (Info
.getReg() != ARM::LR
)
2950 if (all_of(MF
, [](const MachineBasicBlock
&MBB
) {
2951 return all_of(MBB
.terminators(), [](const MachineInstr
&Term
) {
2952 return !Term
.isReturn() || Term
.getOpcode() == ARM::LDMIA_RET
||
2953 Term
.getOpcode() == ARM::t2LDMIA_RET
||
2954 Term
.getOpcode() == ARM::tPOP_RET
;
2957 Info
.setRestored(false);
2963 void ARMFrameLowering::processFunctionBeforeFrameFinalized(
2964 MachineFunction
&MF
, RegScavenger
*RS
) const {
2965 TargetFrameLowering::processFunctionBeforeFrameFinalized(MF
, RS
);
2966 updateLRRestored(MF
);
2969 void ARMFrameLowering::getCalleeSaves(const MachineFunction
&MF
,
2970 BitVector
&SavedRegs
) const {
2971 TargetFrameLowering::getCalleeSaves(MF
, SavedRegs
);
2973 // If we have the "returned" parameter attribute which guarantees that we
2974 // return the value which was passed in r0 unmodified (e.g. C++ 'structors),
2975 // record that fact for IPRA.
2976 const ARMFunctionInfo
*AFI
= MF
.getInfo
<ARMFunctionInfo
>();
2977 if (AFI
->getPreservesR0())
2978 SavedRegs
.set(ARM::R0
);
2981 bool ARMFrameLowering::assignCalleeSavedSpillSlots(
2982 MachineFunction
&MF
, const TargetRegisterInfo
*TRI
,
2983 std::vector
<CalleeSavedInfo
> &CSI
) const {
2984 // For CMSE entry functions, handle floating-point context as if it was a
2985 // callee-saved register.
2986 if (STI
.hasV8_1MMainlineOps() &&
2987 MF
.getInfo
<ARMFunctionInfo
>()->isCmseNSEntryFunction()) {
2988 CSI
.emplace_back(ARM::FPCXTNS
);
2989 CSI
.back().setRestored(false);
2992 // For functions, which sign their return address, upon function entry, the
2993 // return address PAC is computed in R12. Treat R12 as a callee-saved register
2995 const auto &AFI
= *MF
.getInfo
<ARMFunctionInfo
>();
2996 if (AFI
.shouldSignReturnAddress()) {
2997 // The order of register must match the order we push them, because the
2998 // PEI assigns frame indices in that order. That order depends on the
2999 // PushPopSplitVariation, there are only two cases which we use with return
3001 switch (STI
.getPushPopSplitVariation(MF
)) {
3002 case ARMSubtarget::SplitR7
:
3003 // LR, R7, R6, R5, R4, <R12>, R11, R10, R9, R8, D15-D8
3004 CSI
.insert(find_if(CSI
,
3005 [=](const auto &CS
) {
3006 Register Reg
= CS
.getReg();
3007 return Reg
== ARM::R10
|| Reg
== ARM::R11
||
3008 Reg
== ARM::R8
|| Reg
== ARM::R9
||
3009 ARM::DPRRegClass
.contains(Reg
);
3011 CalleeSavedInfo(ARM::R12
));
3013 case ARMSubtarget::SplitR11AAPCSSignRA
:
3014 // With SplitR11AAPCSSignRA, R12 will always be the highest-addressed CSR
3016 CSI
.insert(CSI
.begin(), CalleeSavedInfo(ARM::R12
));
3018 case ARMSubtarget::NoSplit
:
3019 assert(!MF
.getTarget().Options
.DisableFramePointerElim(MF
) &&
3020 "ABI-required frame pointers need a CSR split when signing return "
3022 CSI
.insert(find_if(CSI
,
3023 [=](const auto &CS
) {
3024 Register Reg
= CS
.getReg();
3025 return Reg
!= ARM::LR
;
3027 CalleeSavedInfo(ARM::R12
));
3030 llvm_unreachable("Unexpected CSR split with return address signing");
3037 const TargetFrameLowering::SpillSlot
*
3038 ARMFrameLowering::getCalleeSavedSpillSlots(unsigned &NumEntries
) const {
3039 static const SpillSlot FixedSpillOffsets
[] = {{ARM::FPCXTNS
, -4}};
3040 NumEntries
= std::size(FixedSpillOffsets
);
3041 return FixedSpillOffsets
;
3044 MachineBasicBlock::iterator
ARMFrameLowering::eliminateCallFramePseudoInstr(
3045 MachineFunction
&MF
, MachineBasicBlock
&MBB
,
3046 MachineBasicBlock::iterator I
) const {
3047 const ARMBaseInstrInfo
&TII
=
3048 *static_cast<const ARMBaseInstrInfo
*>(MF
.getSubtarget().getInstrInfo());
3049 ARMFunctionInfo
*AFI
= MF
.getInfo
<ARMFunctionInfo
>();
3050 bool isARM
= !AFI
->isThumbFunction();
3051 DebugLoc dl
= I
->getDebugLoc();
3052 unsigned Opc
= I
->getOpcode();
3053 bool IsDestroy
= Opc
== TII
.getCallFrameDestroyOpcode();
3054 unsigned CalleePopAmount
= IsDestroy
? I
->getOperand(1).getImm() : 0;
3056 assert(!AFI
->isThumb1OnlyFunction() &&
3057 "This eliminateCallFramePseudoInstr does not support Thumb1!");
3059 int PIdx
= I
->findFirstPredOperandIdx();
3060 ARMCC::CondCodes Pred
= (PIdx
== -1)
3062 : (ARMCC::CondCodes
)I
->getOperand(PIdx
).getImm();
3063 unsigned PredReg
= TII
.getFramePred(*I
);
3065 if (!hasReservedCallFrame(MF
)) {
3066 // Bail early if the callee is expected to do the adjustment.
3067 if (IsDestroy
&& CalleePopAmount
!= -1U)
3068 return MBB
.erase(I
);
3070 // If we have alloca, convert as follows:
3071 // ADJCALLSTACKDOWN -> sub, sp, sp, amount
3072 // ADJCALLSTACKUP -> add, sp, sp, amount
3073 unsigned Amount
= TII
.getFrameSize(*I
);
3075 // We need to keep the stack aligned properly. To do this, we round the
3076 // amount of space needed for the outgoing arguments up to the next
3077 // alignment boundary.
3078 Amount
= alignSPAdjust(Amount
);
3080 if (Opc
== ARM::ADJCALLSTACKDOWN
|| Opc
== ARM::tADJCALLSTACKDOWN
) {
3081 emitSPUpdate(isARM
, MBB
, I
, dl
, TII
, -Amount
, MachineInstr::NoFlags
,
3084 assert(Opc
== ARM::ADJCALLSTACKUP
|| Opc
== ARM::tADJCALLSTACKUP
);
3085 emitSPUpdate(isARM
, MBB
, I
, dl
, TII
, Amount
, MachineInstr::NoFlags
,
3089 } else if (CalleePopAmount
!= -1U) {
3090 // If the calling convention demands that the callee pops arguments from the
3091 // stack, we want to add it back if we have a reserved call frame.
3092 emitSPUpdate(isARM
, MBB
, I
, dl
, TII
, -CalleePopAmount
,
3093 MachineInstr::NoFlags
, Pred
, PredReg
);
3095 return MBB
.erase(I
);
3098 /// Get the minimum constant for ARM that is greater than or equal to the
3099 /// argument. In ARM, constants can have any value that can be produced by
3100 /// rotating an 8-bit value to the right by an even number of bits within a
3102 static uint32_t alignToARMConstant(uint32_t Value
) {
3103 unsigned Shifted
= 0;
3108 while (!(Value
& 0xC0000000)) {
3113 bool Carry
= (Value
& 0x00FFFFFF);
3114 Value
= ((Value
& 0xFF000000) >> 24) + Carry
;
3116 if (Value
& 0x0000100)
3117 Value
= Value
& 0x000001FC;
3120 Value
= Value
>> (Shifted
- 24);
3122 Value
= Value
<< (24 - Shifted
);
3127 // The stack limit in the TCB is set to this many bytes above the actual
3129 static const uint64_t kSplitStackAvailable
= 256;
3131 // Adjust the function prologue to enable split stacks. This currently only
3132 // supports android and linux.
3134 // The ABI of the segmented stack prologue is a little arbitrarily chosen, but
3135 // must be well defined in order to allow for consistent implementations of the
3136 // __morestack helper function. The ABI is also not a normal ABI in that it
3137 // doesn't follow the normal calling conventions because this allows the
3138 // prologue of each function to be optimized further.
3140 // Currently, the ABI looks like (when calling __morestack)
3142 // * r4 holds the minimum stack size requested for this function call
3143 // * r5 holds the stack size of the arguments to the function
3144 // * the beginning of the function is 3 instructions after the call to
3147 // Implementations of __morestack should use r4 to allocate a new stack, r5 to
3148 // place the arguments on to the new stack, and the 3-instruction knowledge to
3149 // jump directly to the body of the function when working on the new stack.
3151 // An old (and possibly no longer compatible) implementation of __morestack for
3152 // ARM can be found at [1].
3154 // [1] - https://github.com/mozilla/rust/blob/86efd9/src/rt/arch/arm/morestack.S
3155 void ARMFrameLowering::adjustForSegmentedStacks(
3156 MachineFunction
&MF
, MachineBasicBlock
&PrologueMBB
) const {
3159 const ARMSubtarget
*ST
= &MF
.getSubtarget
<ARMSubtarget
>();
3160 bool Thumb
= ST
->isThumb();
3161 bool Thumb2
= ST
->isThumb2();
3163 // Sadly, this currently doesn't support varargs, platforms other than
3164 // android/linux. Note that thumb1/thumb2 are support for android/linux.
3165 if (MF
.getFunction().isVarArg())
3166 report_fatal_error("Segmented stacks do not support vararg functions.");
3167 if (!ST
->isTargetAndroid() && !ST
->isTargetLinux())
3168 report_fatal_error("Segmented stacks not supported on this platform.");
3170 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
3171 MCContext
&Context
= MF
.getContext();
3172 const MCRegisterInfo
*MRI
= Context
.getRegisterInfo();
3173 const ARMBaseInstrInfo
&TII
=
3174 *static_cast<const ARMBaseInstrInfo
*>(MF
.getSubtarget().getInstrInfo());
3175 ARMFunctionInfo
*ARMFI
= MF
.getInfo
<ARMFunctionInfo
>();
3178 if (!MFI
.needsSplitStackProlog())
3181 uint64_t StackSize
= MFI
.getStackSize();
3183 // Use R4 and R5 as scratch registers.
3184 // We save R4 and R5 before use and restore them before leaving the function.
3185 unsigned ScratchReg0
= ARM::R4
;
3186 unsigned ScratchReg1
= ARM::R5
;
3187 unsigned MovOp
= ST
->useMovt() ? ARM::t2MOVi32imm
: ARM::tMOVi32imm
;
3188 uint64_t AlignedStackSize
;
3190 MachineBasicBlock
*PrevStackMBB
= MF
.CreateMachineBasicBlock();
3191 MachineBasicBlock
*PostStackMBB
= MF
.CreateMachineBasicBlock();
3192 MachineBasicBlock
*AllocMBB
= MF
.CreateMachineBasicBlock();
3193 MachineBasicBlock
*GetMBB
= MF
.CreateMachineBasicBlock();
3194 MachineBasicBlock
*McrMBB
= MF
.CreateMachineBasicBlock();
3196 // Grab everything that reaches PrologueMBB to update there liveness as well.
3197 SmallPtrSet
<MachineBasicBlock
*, 8> BeforePrologueRegion
;
3198 SmallVector
<MachineBasicBlock
*, 2> WalkList
;
3199 WalkList
.push_back(&PrologueMBB
);
3202 MachineBasicBlock
*CurMBB
= WalkList
.pop_back_val();
3203 for (MachineBasicBlock
*PredBB
: CurMBB
->predecessors()) {
3204 if (BeforePrologueRegion
.insert(PredBB
).second
)
3205 WalkList
.push_back(PredBB
);
3207 } while (!WalkList
.empty());
3209 // The order in that list is important.
3210 // The blocks will all be inserted before PrologueMBB using that order.
3211 // Therefore the block that should appear first in the CFG should appear
3212 // first in the list.
3213 MachineBasicBlock
*AddedBlocks
[] = {PrevStackMBB
, McrMBB
, GetMBB
, AllocMBB
,
3216 for (MachineBasicBlock
*B
: AddedBlocks
)
3217 BeforePrologueRegion
.insert(B
);
3219 for (const auto &LI
: PrologueMBB
.liveins()) {
3220 for (MachineBasicBlock
*PredBB
: BeforePrologueRegion
)
3221 PredBB
->addLiveIn(LI
);
3224 // Remove the newly added blocks from the list, since we know
3225 // we do not have to do the following updates for them.
3226 for (MachineBasicBlock
*B
: AddedBlocks
) {
3227 BeforePrologueRegion
.erase(B
);
3228 MF
.insert(PrologueMBB
.getIterator(), B
);
3231 for (MachineBasicBlock
*MBB
: BeforePrologueRegion
) {
3232 // Make sure the LiveIns are still sorted and unique.
3233 MBB
->sortUniqueLiveIns();
3234 // Replace the edges to PrologueMBB by edges to the sequences
3235 // we are about to add, but only update for immediate predecessors.
3236 if (MBB
->isSuccessor(&PrologueMBB
))
3237 MBB
->ReplaceUsesOfBlockWith(&PrologueMBB
, AddedBlocks
[0]);
3240 // The required stack size that is aligned to ARM constant criterion.
3241 AlignedStackSize
= alignToARMConstant(StackSize
);
3243 // When the frame size is less than 256 we just compare the stack
3244 // boundary directly to the value of the stack pointer, per gcc.
3245 bool CompareStackPointer
= AlignedStackSize
< kSplitStackAvailable
;
3247 // We will use two of the callee save registers as scratch registers so we
3248 // need to save those registers onto the stack.
3249 // We will use SR0 to hold stack limit and SR1 to hold the stack size
3250 // requested and arguments for __morestack().
3251 // SR0: Scratch Register #0
3252 // SR1: Scratch Register #1
3255 BuildMI(PrevStackMBB
, DL
, TII
.get(ARM::tPUSH
))
3256 .add(predOps(ARMCC::AL
))
3257 .addReg(ScratchReg0
)
3258 .addReg(ScratchReg1
);
3260 BuildMI(PrevStackMBB
, DL
, TII
.get(ARM::STMDB_UPD
))
3261 .addReg(ARM::SP
, RegState::Define
)
3263 .add(predOps(ARMCC::AL
))
3264 .addReg(ScratchReg0
)
3265 .addReg(ScratchReg1
);
3268 // Emit the relevant DWARF information about the change in stack pointer as
3269 // well as where to find both r4 and r5 (the callee-save registers)
3270 if (!MF
.getTarget().getMCAsmInfo()->usesWindowsCFI()) {
3271 CFIIndex
= MF
.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 8));
3272 BuildMI(PrevStackMBB
, DL
, TII
.get(TargetOpcode::CFI_INSTRUCTION
))
3273 .addCFIIndex(CFIIndex
);
3274 CFIIndex
= MF
.addFrameInst(MCCFIInstruction::createOffset(
3275 nullptr, MRI
->getDwarfRegNum(ScratchReg1
, true), -4));
3276 BuildMI(PrevStackMBB
, DL
, TII
.get(TargetOpcode::CFI_INSTRUCTION
))
3277 .addCFIIndex(CFIIndex
);
3278 CFIIndex
= MF
.addFrameInst(MCCFIInstruction::createOffset(
3279 nullptr, MRI
->getDwarfRegNum(ScratchReg0
, true), -8));
3280 BuildMI(PrevStackMBB
, DL
, TII
.get(TargetOpcode::CFI_INSTRUCTION
))
3281 .addCFIIndex(CFIIndex
);
3286 BuildMI(McrMBB
, DL
, TII
.get(ARM::tMOVr
), ScratchReg1
)
3288 .add(predOps(ARMCC::AL
));
3289 } else if (CompareStackPointer
) {
3290 BuildMI(McrMBB
, DL
, TII
.get(ARM::MOVr
), ScratchReg1
)
3292 .add(predOps(ARMCC::AL
))
3296 // sub SR1, sp, #StackSize
3297 if (!CompareStackPointer
&& Thumb
) {
3298 if (AlignedStackSize
< 256) {
3299 BuildMI(McrMBB
, DL
, TII
.get(ARM::tSUBi8
), ScratchReg1
)
3301 .addReg(ScratchReg1
)
3302 .addImm(AlignedStackSize
)
3303 .add(predOps(ARMCC::AL
));
3305 if (Thumb2
|| ST
->genExecuteOnly()) {
3306 BuildMI(McrMBB
, DL
, TII
.get(MovOp
), ScratchReg0
)
3307 .addImm(AlignedStackSize
);
3309 auto MBBI
= McrMBB
->end();
3310 auto RegInfo
= STI
.getRegisterInfo();
3311 RegInfo
->emitLoadConstPool(*McrMBB
, MBBI
, DL
, ScratchReg0
, 0,
3314 BuildMI(McrMBB
, DL
, TII
.get(ARM::tSUBrr
), ScratchReg1
)
3316 .addReg(ScratchReg1
)
3317 .addReg(ScratchReg0
)
3318 .add(predOps(ARMCC::AL
));
3320 } else if (!CompareStackPointer
) {
3321 if (AlignedStackSize
< 256) {
3322 BuildMI(McrMBB
, DL
, TII
.get(ARM::SUBri
), ScratchReg1
)
3324 .addImm(AlignedStackSize
)
3325 .add(predOps(ARMCC::AL
))
3328 auto MBBI
= McrMBB
->end();
3329 auto RegInfo
= STI
.getRegisterInfo();
3330 RegInfo
->emitLoadConstPool(*McrMBB
, MBBI
, DL
, ScratchReg0
, 0,
3332 BuildMI(McrMBB
, DL
, TII
.get(ARM::SUBrr
), ScratchReg1
)
3334 .addReg(ScratchReg0
)
3335 .add(predOps(ARMCC::AL
))
3340 if (Thumb
&& ST
->isThumb1Only()) {
3341 if (ST
->genExecuteOnly()) {
3342 BuildMI(GetMBB
, DL
, TII
.get(MovOp
), ScratchReg0
)
3343 .addExternalSymbol("__STACK_LIMIT");
3345 unsigned PCLabelId
= ARMFI
->createPICLabelUId();
3346 ARMConstantPoolValue
*NewCPV
= ARMConstantPoolSymbol::Create(
3347 MF
.getFunction().getContext(), "__STACK_LIMIT", PCLabelId
, 0);
3348 MachineConstantPool
*MCP
= MF
.getConstantPool();
3349 unsigned CPI
= MCP
->getConstantPoolIndex(NewCPV
, Align(4));
3351 // ldr SR0, [pc, offset(STACK_LIMIT)]
3352 BuildMI(GetMBB
, DL
, TII
.get(ARM::tLDRpci
), ScratchReg0
)
3353 .addConstantPoolIndex(CPI
)
3354 .add(predOps(ARMCC::AL
));
3358 BuildMI(GetMBB
, DL
, TII
.get(ARM::tLDRi
), ScratchReg0
)
3359 .addReg(ScratchReg0
)
3361 .add(predOps(ARMCC::AL
));
3363 // Get TLS base address from the coprocessor
3364 // mrc p15, #0, SR0, c13, c0, #3
3365 BuildMI(McrMBB
, DL
, TII
.get(Thumb
? ARM::t2MRC
: ARM::MRC
),
3372 .add(predOps(ARMCC::AL
));
3374 // Use the last tls slot on android and a private field of the TCP on linux.
3375 assert(ST
->isTargetAndroid() || ST
->isTargetLinux());
3376 unsigned TlsOffset
= ST
->isTargetAndroid() ? 63 : 1;
3378 // Get the stack limit from the right offset
3379 // ldr SR0, [sr0, #4 * TlsOffset]
3380 BuildMI(GetMBB
, DL
, TII
.get(Thumb
? ARM::t2LDRi12
: ARM::LDRi12
),
3382 .addReg(ScratchReg0
)
3383 .addImm(4 * TlsOffset
)
3384 .add(predOps(ARMCC::AL
));
3387 // Compare stack limit with stack size requested.
3389 Opcode
= Thumb
? ARM::tCMPr
: ARM::CMPrr
;
3390 BuildMI(GetMBB
, DL
, TII
.get(Opcode
))
3391 .addReg(ScratchReg0
)
3392 .addReg(ScratchReg1
)
3393 .add(predOps(ARMCC::AL
));
3395 // This jump is taken if StackLimit <= SP - stack required.
3396 Opcode
= Thumb
? ARM::tBcc
: ARM::Bcc
;
3397 BuildMI(GetMBB
, DL
, TII
.get(Opcode
))
3398 .addMBB(PostStackMBB
)
3402 // Calling __morestack(StackSize, Size of stack arguments).
3403 // __morestack knows that the stack size requested is in SR0(r4)
3404 // and amount size of stack arguments is in SR1(r5).
3406 // Pass first argument for the __morestack by Scratch Register #0.
3407 // The amount size of stack required
3409 if (AlignedStackSize
< 256) {
3410 BuildMI(AllocMBB
, DL
, TII
.get(ARM::tMOVi8
), ScratchReg0
)
3412 .addImm(AlignedStackSize
)
3413 .add(predOps(ARMCC::AL
));
3415 if (Thumb2
|| ST
->genExecuteOnly()) {
3416 BuildMI(AllocMBB
, DL
, TII
.get(MovOp
), ScratchReg0
)
3417 .addImm(AlignedStackSize
);
3419 auto MBBI
= AllocMBB
->end();
3420 auto RegInfo
= STI
.getRegisterInfo();
3421 RegInfo
->emitLoadConstPool(*AllocMBB
, MBBI
, DL
, ScratchReg0
, 0,
3426 if (AlignedStackSize
< 256) {
3427 BuildMI(AllocMBB
, DL
, TII
.get(ARM::MOVi
), ScratchReg0
)
3428 .addImm(AlignedStackSize
)
3429 .add(predOps(ARMCC::AL
))
3432 auto MBBI
= AllocMBB
->end();
3433 auto RegInfo
= STI
.getRegisterInfo();
3434 RegInfo
->emitLoadConstPool(*AllocMBB
, MBBI
, DL
, ScratchReg0
, 0,
3439 // Pass second argument for the __morestack by Scratch Register #1.
3440 // The amount size of stack consumed to save function arguments.
3442 if (ARMFI
->getArgumentStackSize() < 256) {
3443 BuildMI(AllocMBB
, DL
, TII
.get(ARM::tMOVi8
), ScratchReg1
)
3445 .addImm(alignToARMConstant(ARMFI
->getArgumentStackSize()))
3446 .add(predOps(ARMCC::AL
));
3448 if (Thumb2
|| ST
->genExecuteOnly()) {
3449 BuildMI(AllocMBB
, DL
, TII
.get(MovOp
), ScratchReg1
)
3450 .addImm(alignToARMConstant(ARMFI
->getArgumentStackSize()));
3452 auto MBBI
= AllocMBB
->end();
3453 auto RegInfo
= STI
.getRegisterInfo();
3454 RegInfo
->emitLoadConstPool(
3455 *AllocMBB
, MBBI
, DL
, ScratchReg1
, 0,
3456 alignToARMConstant(ARMFI
->getArgumentStackSize()));
3460 if (alignToARMConstant(ARMFI
->getArgumentStackSize()) < 256) {
3461 BuildMI(AllocMBB
, DL
, TII
.get(ARM::MOVi
), ScratchReg1
)
3462 .addImm(alignToARMConstant(ARMFI
->getArgumentStackSize()))
3463 .add(predOps(ARMCC::AL
))
3466 auto MBBI
= AllocMBB
->end();
3467 auto RegInfo
= STI
.getRegisterInfo();
3468 RegInfo
->emitLoadConstPool(
3469 *AllocMBB
, MBBI
, DL
, ScratchReg1
, 0,
3470 alignToARMConstant(ARMFI
->getArgumentStackSize()));
3474 // push {lr} - Save return address of this function.
3476 BuildMI(AllocMBB
, DL
, TII
.get(ARM::tPUSH
))
3477 .add(predOps(ARMCC::AL
))
3480 BuildMI(AllocMBB
, DL
, TII
.get(ARM::STMDB_UPD
))
3481 .addReg(ARM::SP
, RegState::Define
)
3483 .add(predOps(ARMCC::AL
))
3487 // Emit the DWARF info about the change in stack as well as where to find the
3488 // previous link register
3489 if (!MF
.getTarget().getMCAsmInfo()->usesWindowsCFI()) {
3490 CFIIndex
= MF
.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 12));
3491 BuildMI(AllocMBB
, DL
, TII
.get(TargetOpcode::CFI_INSTRUCTION
))
3492 .addCFIIndex(CFIIndex
);
3493 CFIIndex
= MF
.addFrameInst(MCCFIInstruction::createOffset(
3494 nullptr, MRI
->getDwarfRegNum(ARM::LR
, true), -12));
3495 BuildMI(AllocMBB
, DL
, TII
.get(TargetOpcode::CFI_INSTRUCTION
))
3496 .addCFIIndex(CFIIndex
);
3499 // Call __morestack().
3501 BuildMI(AllocMBB
, DL
, TII
.get(ARM::tBL
))
3502 .add(predOps(ARMCC::AL
))
3503 .addExternalSymbol("__morestack");
3505 BuildMI(AllocMBB
, DL
, TII
.get(ARM::BL
))
3506 .addExternalSymbol("__morestack");
3509 // pop {lr} - Restore return address of this original function.
3511 if (ST
->isThumb1Only()) {
3512 BuildMI(AllocMBB
, DL
, TII
.get(ARM::tPOP
))
3513 .add(predOps(ARMCC::AL
))
3514 .addReg(ScratchReg0
);
3515 BuildMI(AllocMBB
, DL
, TII
.get(ARM::tMOVr
), ARM::LR
)
3516 .addReg(ScratchReg0
)
3517 .add(predOps(ARMCC::AL
));
3519 BuildMI(AllocMBB
, DL
, TII
.get(ARM::t2LDR_POST
))
3520 .addReg(ARM::LR
, RegState::Define
)
3521 .addReg(ARM::SP
, RegState::Define
)
3524 .add(predOps(ARMCC::AL
));
3527 BuildMI(AllocMBB
, DL
, TII
.get(ARM::LDMIA_UPD
))
3528 .addReg(ARM::SP
, RegState::Define
)
3530 .add(predOps(ARMCC::AL
))
3534 // Restore SR0 and SR1 in case of __morestack() was called.
3535 // __morestack() will skip PostStackMBB block so we need to restore
3536 // scratch registers from here.
3539 BuildMI(AllocMBB
, DL
, TII
.get(ARM::tPOP
))
3540 .add(predOps(ARMCC::AL
))
3541 .addReg(ScratchReg0
)
3542 .addReg(ScratchReg1
);
3544 BuildMI(AllocMBB
, DL
, TII
.get(ARM::LDMIA_UPD
))
3545 .addReg(ARM::SP
, RegState::Define
)
3547 .add(predOps(ARMCC::AL
))
3548 .addReg(ScratchReg0
)
3549 .addReg(ScratchReg1
);
3552 // Update the CFA offset now that we've popped
3553 if (!MF
.getTarget().getMCAsmInfo()->usesWindowsCFI()) {
3554 CFIIndex
= MF
.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 0));
3555 BuildMI(AllocMBB
, DL
, TII
.get(TargetOpcode::CFI_INSTRUCTION
))
3556 .addCFIIndex(CFIIndex
);
3559 // Return from this function.
3560 BuildMI(AllocMBB
, DL
, TII
.get(ST
->getReturnOpcode())).add(predOps(ARMCC::AL
));
3562 // Restore SR0 and SR1 in case of __morestack() was not called.
3565 BuildMI(PostStackMBB
, DL
, TII
.get(ARM::tPOP
))
3566 .add(predOps(ARMCC::AL
))
3567 .addReg(ScratchReg0
)
3568 .addReg(ScratchReg1
);
3570 BuildMI(PostStackMBB
, DL
, TII
.get(ARM::LDMIA_UPD
))
3571 .addReg(ARM::SP
, RegState::Define
)
3573 .add(predOps(ARMCC::AL
))
3574 .addReg(ScratchReg0
)
3575 .addReg(ScratchReg1
);
3578 // Update the CFA offset now that we've popped
3579 if (!MF
.getTarget().getMCAsmInfo()->usesWindowsCFI()) {
3580 CFIIndex
= MF
.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 0));
3581 BuildMI(PostStackMBB
, DL
, TII
.get(TargetOpcode::CFI_INSTRUCTION
))
3582 .addCFIIndex(CFIIndex
);
3584 // Tell debuggers that r4 and r5 are now the same as they were in the
3585 // previous function, that they're the "Same Value".
3586 CFIIndex
= MF
.addFrameInst(MCCFIInstruction::createSameValue(
3587 nullptr, MRI
->getDwarfRegNum(ScratchReg0
, true)));
3588 BuildMI(PostStackMBB
, DL
, TII
.get(TargetOpcode::CFI_INSTRUCTION
))
3589 .addCFIIndex(CFIIndex
);
3590 CFIIndex
= MF
.addFrameInst(MCCFIInstruction::createSameValue(
3591 nullptr, MRI
->getDwarfRegNum(ScratchReg1
, true)));
3592 BuildMI(PostStackMBB
, DL
, TII
.get(TargetOpcode::CFI_INSTRUCTION
))
3593 .addCFIIndex(CFIIndex
);
3596 // Organizing MBB lists
3597 PostStackMBB
->addSuccessor(&PrologueMBB
);
3599 AllocMBB
->addSuccessor(PostStackMBB
);
3601 GetMBB
->addSuccessor(PostStackMBB
);
3602 GetMBB
->addSuccessor(AllocMBB
);
3604 McrMBB
->addSuccessor(GetMBB
);
3606 PrevStackMBB
->addSuccessor(McrMBB
);
3608 #ifdef EXPENSIVE_CHECKS