1 //===- Thumb1FrameLowering.cpp - Thumb1 Frame Information -----------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file contains the Thumb1 implementation of TargetFrameLowering class.
11 //===----------------------------------------------------------------------===//
13 #include "Thumb1FrameLowering.h"
14 #include "ARMBaseInstrInfo.h"
15 #include "ARMBaseRegisterInfo.h"
16 #include "ARMMachineFunctionInfo.h"
17 #include "ARMSubtarget.h"
18 #include "Thumb1InstrInfo.h"
19 #include "ThumbRegisterInfo.h"
20 #include "Utils/ARMBaseInfo.h"
21 #include "llvm/ADT/BitVector.h"
22 #include "llvm/ADT/STLExtras.h"
23 #include "llvm/ADT/SmallVector.h"
24 #include "llvm/CodeGen/LivePhysRegs.h"
25 #include "llvm/CodeGen/MachineBasicBlock.h"
26 #include "llvm/CodeGen/MachineFrameInfo.h"
27 #include "llvm/CodeGen/MachineFunction.h"
28 #include "llvm/CodeGen/MachineInstr.h"
29 #include "llvm/CodeGen/MachineInstrBuilder.h"
30 #include "llvm/CodeGen/MachineModuleInfo.h"
31 #include "llvm/CodeGen/MachineOperand.h"
32 #include "llvm/CodeGen/MachineRegisterInfo.h"
33 #include "llvm/CodeGen/TargetInstrInfo.h"
34 #include "llvm/CodeGen/TargetOpcodes.h"
35 #include "llvm/CodeGen/TargetSubtargetInfo.h"
36 #include "llvm/IR/DebugLoc.h"
37 #include "llvm/MC/MCContext.h"
38 #include "llvm/MC/MCDwarf.h"
39 #include "llvm/MC/MCRegisterInfo.h"
40 #include "llvm/Support/Compiler.h"
41 #include "llvm/Support/ErrorHandling.h"
42 #include "llvm/Support/MathExtras.h"
50 Thumb1FrameLowering::Thumb1FrameLowering(const ARMSubtarget
&sti
)
51 : ARMFrameLowering(sti
) {}
53 bool Thumb1FrameLowering::hasReservedCallFrame(const MachineFunction
&MF
) const{
54 const MachineFrameInfo
&MFI
= MF
.getFrameInfo();
55 unsigned CFSize
= MFI
.getMaxCallFrameSize();
56 // It's not always a good idea to include the call frame as part of the
57 // stack frame. ARM (especially Thumb) has small immediate offset to
58 // address the stack frame. So a large call frame can cause poor codegen
59 // and may even makes it impossible to scavenge a register.
60 if (CFSize
>= ((1 << 8) - 1) * 4 / 2) // Half of imm8 * 4
63 return !MFI
.hasVarSizedObjects();
67 emitPrologueEpilogueSPUpdate(MachineBasicBlock
&MBB
,
68 MachineBasicBlock::iterator
&MBBI
,
69 const TargetInstrInfo
&TII
, const DebugLoc
&dl
,
70 const ThumbRegisterInfo
&MRI
, int NumBytes
,
71 unsigned ScratchReg
, unsigned MIFlags
) {
72 // If it would take more than three instructions to adjust the stack pointer
73 // using tADDspi/tSUBspi, load an immediate instead.
74 if (std::abs(NumBytes
) > 508 * 3) {
75 // We use a different codepath here from the normal
76 // emitThumbRegPlusImmediate so we don't have to deal with register
77 // scavenging. (Scavenging could try to use the emergency spill slot
78 // before we've actually finished setting up the stack.)
79 if (ScratchReg
== ARM::NoRegister
)
80 report_fatal_error("Failed to emit Thumb1 stack adjustment");
81 MachineFunction
&MF
= *MBB
.getParent();
82 const ARMSubtarget
&ST
= MF
.getSubtarget
<ARMSubtarget
>();
83 if (ST
.genExecuteOnly()) {
84 BuildMI(MBB
, MBBI
, dl
, TII
.get(ARM::t2MOVi32imm
), ScratchReg
)
85 .addImm(NumBytes
).setMIFlags(MIFlags
);
87 MRI
.emitLoadConstPool(MBB
, MBBI
, dl
, ScratchReg
, 0, NumBytes
, ARMCC::AL
,
90 BuildMI(MBB
, MBBI
, dl
, TII
.get(ARM::tADDhirr
), ARM::SP
)
92 .addReg(ScratchReg
, RegState::Kill
)
93 .add(predOps(ARMCC::AL
))
97 // FIXME: This is assuming the heuristics in emitThumbRegPlusImmediate
99 emitThumbRegPlusImmediate(MBB
, MBBI
, dl
, ARM::SP
, ARM::SP
, NumBytes
, TII
,
104 static void emitCallSPUpdate(MachineBasicBlock
&MBB
,
105 MachineBasicBlock::iterator
&MBBI
,
106 const TargetInstrInfo
&TII
, const DebugLoc
&dl
,
107 const ThumbRegisterInfo
&MRI
, int NumBytes
,
108 unsigned MIFlags
= MachineInstr::NoFlags
) {
109 emitThumbRegPlusImmediate(MBB
, MBBI
, dl
, ARM::SP
, ARM::SP
, NumBytes
, TII
,
114 MachineBasicBlock::iterator
Thumb1FrameLowering::
115 eliminateCallFramePseudoInstr(MachineFunction
&MF
, MachineBasicBlock
&MBB
,
116 MachineBasicBlock::iterator I
) const {
117 const Thumb1InstrInfo
&TII
=
118 *static_cast<const Thumb1InstrInfo
*>(STI
.getInstrInfo());
119 const ThumbRegisterInfo
*RegInfo
=
120 static_cast<const ThumbRegisterInfo
*>(STI
.getRegisterInfo());
121 if (!hasReservedCallFrame(MF
)) {
122 // If we have alloca, convert as follows:
123 // ADJCALLSTACKDOWN -> sub, sp, sp, amount
124 // ADJCALLSTACKUP -> add, sp, sp, amount
125 MachineInstr
&Old
= *I
;
126 DebugLoc dl
= Old
.getDebugLoc();
127 unsigned Amount
= TII
.getFrameSize(Old
);
129 // We need to keep the stack aligned properly. To do this, we round the
130 // amount of space needed for the outgoing arguments up to the next
131 // alignment boundary.
132 Amount
= alignTo(Amount
, getStackAlign());
134 // Replace the pseudo instruction with a new instruction...
135 unsigned Opc
= Old
.getOpcode();
136 if (Opc
== ARM::ADJCALLSTACKDOWN
|| Opc
== ARM::tADJCALLSTACKDOWN
) {
137 emitCallSPUpdate(MBB
, I
, TII
, dl
, *RegInfo
, -Amount
);
139 assert(Opc
== ARM::ADJCALLSTACKUP
|| Opc
== ARM::tADJCALLSTACKUP
);
140 emitCallSPUpdate(MBB
, I
, TII
, dl
, *RegInfo
, Amount
);
147 void Thumb1FrameLowering::emitPrologue(MachineFunction
&MF
,
148 MachineBasicBlock
&MBB
) const {
149 MachineBasicBlock::iterator MBBI
= MBB
.begin();
150 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
151 ARMFunctionInfo
*AFI
= MF
.getInfo
<ARMFunctionInfo
>();
152 MachineModuleInfo
&MMI
= MF
.getMMI();
153 const MCRegisterInfo
*MRI
= MMI
.getContext().getRegisterInfo();
154 const ThumbRegisterInfo
*RegInfo
=
155 static_cast<const ThumbRegisterInfo
*>(STI
.getRegisterInfo());
156 const Thumb1InstrInfo
&TII
=
157 *static_cast<const Thumb1InstrInfo
*>(STI
.getInstrInfo());
159 unsigned ArgRegsSaveSize
= AFI
->getArgRegsSaveSize();
160 unsigned NumBytes
= MFI
.getStackSize();
161 assert(NumBytes
>= ArgRegsSaveSize
&&
162 "ArgRegsSaveSize is included in NumBytes");
163 const std::vector
<CalleeSavedInfo
> &CSI
= MFI
.getCalleeSavedInfo();
165 // Debug location must be unknown since the first debug location is used
166 // to determine the end of the prologue.
169 Register FramePtr
= RegInfo
->getFrameRegister(MF
);
170 unsigned BasePtr
= RegInfo
->getBaseRegister();
173 // Thumb add/sub sp, imm8 instructions implicitly multiply the offset by 4.
174 NumBytes
= (NumBytes
+ 3) & ~3;
175 MFI
.setStackSize(NumBytes
);
177 // Determine the sizes of each callee-save spill areas and record which frame
178 // belongs to which callee-save spill areas.
179 unsigned GPRCS1Size
= 0, GPRCS2Size
= 0, DPRCSSize
= 0;
180 int FramePtrSpillFI
= 0;
182 if (ArgRegsSaveSize
) {
183 emitPrologueEpilogueSPUpdate(MBB
, MBBI
, TII
, dl
, *RegInfo
, -ArgRegsSaveSize
,
184 ARM::NoRegister
, MachineInstr::FrameSetup
);
185 CFAOffset
+= ArgRegsSaveSize
;
187 MF
.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, CFAOffset
));
188 BuildMI(MBB
, MBBI
, dl
, TII
.get(TargetOpcode::CFI_INSTRUCTION
))
189 .addCFIIndex(CFIIndex
)
190 .setMIFlags(MachineInstr::FrameSetup
);
193 if (!AFI
->hasStackFrame()) {
194 if (NumBytes
- ArgRegsSaveSize
!= 0) {
195 emitPrologueEpilogueSPUpdate(MBB
, MBBI
, TII
, dl
, *RegInfo
,
196 -(NumBytes
- ArgRegsSaveSize
),
197 ARM::NoRegister
, MachineInstr::FrameSetup
);
198 CFAOffset
+= NumBytes
- ArgRegsSaveSize
;
199 unsigned CFIIndex
= MF
.addFrameInst(
200 MCCFIInstruction::cfiDefCfaOffset(nullptr, CFAOffset
));
201 BuildMI(MBB
, MBBI
, dl
, TII
.get(TargetOpcode::CFI_INSTRUCTION
))
202 .addCFIIndex(CFIIndex
)
203 .setMIFlags(MachineInstr::FrameSetup
);
208 for (unsigned i
= 0, e
= CSI
.size(); i
!= e
; ++i
) {
209 unsigned Reg
= CSI
[i
].getReg();
210 int FI
= CSI
[i
].getFrameIdx();
216 if (STI
.splitFramePushPop(MF
)) {
227 FramePtrSpillFI
= FI
;
235 if (MBBI
!= MBB
.end() && MBBI
->getOpcode() == ARM::tPUSH
) {
239 // Determine starting offsets of spill areas.
240 unsigned DPRCSOffset
= NumBytes
- ArgRegsSaveSize
- (GPRCS1Size
+ GPRCS2Size
+ DPRCSSize
);
241 unsigned GPRCS2Offset
= DPRCSOffset
+ DPRCSSize
;
242 unsigned GPRCS1Offset
= GPRCS2Offset
+ GPRCS2Size
;
243 bool HasFP
= hasFP(MF
);
245 AFI
->setFramePtrSpillOffset(MFI
.getObjectOffset(FramePtrSpillFI
) +
247 AFI
->setGPRCalleeSavedArea1Offset(GPRCS1Offset
);
248 AFI
->setGPRCalleeSavedArea2Offset(GPRCS2Offset
);
249 AFI
->setDPRCalleeSavedAreaOffset(DPRCSOffset
);
250 NumBytes
= DPRCSOffset
;
252 int FramePtrOffsetInBlock
= 0;
253 unsigned adjustedGPRCS1Size
= GPRCS1Size
;
254 if (GPRCS1Size
> 0 && GPRCS2Size
== 0 &&
255 tryFoldSPUpdateIntoPushPop(STI
, MF
, &*std::prev(MBBI
), NumBytes
)) {
256 FramePtrOffsetInBlock
= NumBytes
;
257 adjustedGPRCS1Size
+= NumBytes
;
261 if (adjustedGPRCS1Size
) {
262 CFAOffset
+= adjustedGPRCS1Size
;
264 MF
.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, CFAOffset
));
265 BuildMI(MBB
, MBBI
, dl
, TII
.get(TargetOpcode::CFI_INSTRUCTION
))
266 .addCFIIndex(CFIIndex
)
267 .setMIFlags(MachineInstr::FrameSetup
);
269 for (std::vector
<CalleeSavedInfo
>::const_iterator I
= CSI
.begin(),
270 E
= CSI
.end(); I
!= E
; ++I
) {
271 unsigned Reg
= I
->getReg();
272 int FI
= I
->getFrameIdx();
279 if (STI
.splitFramePushPop(MF
))
291 unsigned CFIIndex
= MF
.addFrameInst(MCCFIInstruction::createOffset(
292 nullptr, MRI
->getDwarfRegNum(Reg
, true), MFI
.getObjectOffset(FI
)));
293 BuildMI(MBB
, MBBI
, dl
, TII
.get(TargetOpcode::CFI_INSTRUCTION
))
294 .addCFIIndex(CFIIndex
)
295 .setMIFlags(MachineInstr::FrameSetup
);
300 // Adjust FP so it point to the stack slot that contains the previous FP.
302 FramePtrOffsetInBlock
+=
303 MFI
.getObjectOffset(FramePtrSpillFI
) + GPRCS1Size
+ ArgRegsSaveSize
;
304 BuildMI(MBB
, MBBI
, dl
, TII
.get(ARM::tADDrSPi
), FramePtr
)
306 .addImm(FramePtrOffsetInBlock
/ 4)
307 .setMIFlags(MachineInstr::FrameSetup
)
308 .add(predOps(ARMCC::AL
));
309 if(FramePtrOffsetInBlock
) {
310 CFAOffset
-= FramePtrOffsetInBlock
;
311 unsigned CFIIndex
= MF
.addFrameInst(MCCFIInstruction::cfiDefCfa(
312 nullptr, MRI
->getDwarfRegNum(FramePtr
, true), CFAOffset
));
313 BuildMI(MBB
, MBBI
, dl
, TII
.get(TargetOpcode::CFI_INSTRUCTION
))
314 .addCFIIndex(CFIIndex
)
315 .setMIFlags(MachineInstr::FrameSetup
);
318 MF
.addFrameInst(MCCFIInstruction::createDefCfaRegister(
319 nullptr, MRI
->getDwarfRegNum(FramePtr
, true)));
320 BuildMI(MBB
, MBBI
, dl
, TII
.get(TargetOpcode::CFI_INSTRUCTION
))
321 .addCFIIndex(CFIIndex
)
322 .setMIFlags(MachineInstr::FrameSetup
);
325 // If offset is > 508 then sp cannot be adjusted in a single instruction,
326 // try restoring from fp instead.
327 AFI
->setShouldRestoreSPFromFP(true);
330 // Skip past the spilling of r8-r11, which could consist of multiple tPUSH
331 // and tMOVr instructions. We don't need to add any call frame information
332 // in-between these instructions, because they do not modify the high
335 MachineBasicBlock::iterator OldMBBI
= MBBI
;
336 // Skip a run of tMOVr instructions
337 while (MBBI
!= MBB
.end() && MBBI
->getOpcode() == ARM::tMOVr
)
339 if (MBBI
!= MBB
.end() && MBBI
->getOpcode() == ARM::tPUSH
) {
342 // We have reached an instruction which is not a push, so the previous
343 // run of tMOVr instructions (which may have been empty) was not part of
344 // the prologue. Reset MBBI back to the last PUSH of the prologue.
350 // Emit call frame information for the callee-saved high registers.
351 for (auto &I
: CSI
) {
352 unsigned Reg
= I
.getReg();
353 int FI
= I
.getFrameIdx();
360 unsigned CFIIndex
= MF
.addFrameInst(MCCFIInstruction::createOffset(
361 nullptr, MRI
->getDwarfRegNum(Reg
, true), MFI
.getObjectOffset(FI
)));
362 BuildMI(MBB
, MBBI
, dl
, TII
.get(TargetOpcode::CFI_INSTRUCTION
))
363 .addCFIIndex(CFIIndex
)
364 .setMIFlags(MachineInstr::FrameSetup
);
373 // Insert it after all the callee-save spills.
375 // For a large stack frame, we might need a scratch register to store
376 // the size of the frame. We know all callee-save registers are free
377 // at this point in the prologue, so pick one.
378 unsigned ScratchRegister
= ARM::NoRegister
;
379 for (auto &I
: CSI
) {
380 unsigned Reg
= I
.getReg();
381 if (isARMLowRegister(Reg
) && !(HasFP
&& Reg
== FramePtr
)) {
382 ScratchRegister
= Reg
;
386 emitPrologueEpilogueSPUpdate(MBB
, MBBI
, TII
, dl
, *RegInfo
, -NumBytes
,
387 ScratchRegister
, MachineInstr::FrameSetup
);
389 CFAOffset
+= NumBytes
;
390 unsigned CFIIndex
= MF
.addFrameInst(
391 MCCFIInstruction::cfiDefCfaOffset(nullptr, CFAOffset
));
392 BuildMI(MBB
, MBBI
, dl
, TII
.get(TargetOpcode::CFI_INSTRUCTION
))
393 .addCFIIndex(CFIIndex
)
394 .setMIFlags(MachineInstr::FrameSetup
);
398 if (STI
.isTargetELF() && HasFP
)
399 MFI
.setOffsetAdjustment(MFI
.getOffsetAdjustment() -
400 AFI
->getFramePtrSpillOffset());
402 AFI
->setGPRCalleeSavedArea1Size(GPRCS1Size
);
403 AFI
->setGPRCalleeSavedArea2Size(GPRCS2Size
);
404 AFI
->setDPRCalleeSavedAreaSize(DPRCSSize
);
406 if (RegInfo
->hasStackRealignment(MF
)) {
407 const unsigned NrBitsToZero
= Log2(MFI
.getMaxAlign());
408 // Emit the following sequence, using R4 as a temporary, since we cannot use
409 // SP as a source or destination register for the shifts:
411 // lsrs r4, r4, #NrBitsToZero
412 // lsls r4, r4, #NrBitsToZero
414 BuildMI(MBB
, MBBI
, dl
, TII
.get(ARM::tMOVr
), ARM::R4
)
415 .addReg(ARM::SP
, RegState::Kill
)
416 .add(predOps(ARMCC::AL
));
418 BuildMI(MBB
, MBBI
, dl
, TII
.get(ARM::tLSRri
), ARM::R4
)
420 .addReg(ARM::R4
, RegState::Kill
)
421 .addImm(NrBitsToZero
)
422 .add(predOps(ARMCC::AL
));
424 BuildMI(MBB
, MBBI
, dl
, TII
.get(ARM::tLSLri
), ARM::R4
)
426 .addReg(ARM::R4
, RegState::Kill
)
427 .addImm(NrBitsToZero
)
428 .add(predOps(ARMCC::AL
));
430 BuildMI(MBB
, MBBI
, dl
, TII
.get(ARM::tMOVr
), ARM::SP
)
431 .addReg(ARM::R4
, RegState::Kill
)
432 .add(predOps(ARMCC::AL
));
434 AFI
->setShouldRestoreSPFromFP(true);
437 // If we need a base pointer, set it up here. It's whatever the value
438 // of the stack pointer is at this point. Any variable size objects
439 // will be allocated after this, so we can still use the base pointer
440 // to reference locals.
441 if (RegInfo
->hasBasePointer(MF
))
442 BuildMI(MBB
, MBBI
, dl
, TII
.get(ARM::tMOVr
), BasePtr
)
444 .add(predOps(ARMCC::AL
));
446 // If the frame has variable sized objects then the epilogue must restore
447 // the sp from fp. We can assume there's an FP here since hasFP already
448 // checks for hasVarSizedObjects.
449 if (MFI
.hasVarSizedObjects())
450 AFI
->setShouldRestoreSPFromFP(true);
452 // In some cases, virtual registers have been introduced, e.g. by uses of
453 // emitThumbRegPlusImmInReg.
454 MF
.getProperties().reset(MachineFunctionProperties::Property::NoVRegs
);
457 static bool isCSRestore(MachineInstr
&MI
, const MCPhysReg
*CSRegs
) {
458 if (MI
.getOpcode() == ARM::tLDRspi
&& MI
.getOperand(1).isFI() &&
459 isCalleeSavedRegister(MI
.getOperand(0).getReg(), CSRegs
))
461 else if (MI
.getOpcode() == ARM::tPOP
) {
463 } else if (MI
.getOpcode() == ARM::tMOVr
) {
464 Register Dst
= MI
.getOperand(0).getReg();
465 Register Src
= MI
.getOperand(1).getReg();
466 return ((ARM::tGPRRegClass
.contains(Src
) || Src
== ARM::LR
) &&
467 ARM::hGPRRegClass
.contains(Dst
));
472 void Thumb1FrameLowering::emitEpilogue(MachineFunction
&MF
,
473 MachineBasicBlock
&MBB
) const {
474 MachineBasicBlock::iterator MBBI
= MBB
.getFirstTerminator();
475 DebugLoc dl
= MBBI
!= MBB
.end() ? MBBI
->getDebugLoc() : DebugLoc();
476 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
477 ARMFunctionInfo
*AFI
= MF
.getInfo
<ARMFunctionInfo
>();
478 const ThumbRegisterInfo
*RegInfo
=
479 static_cast<const ThumbRegisterInfo
*>(STI
.getRegisterInfo());
480 const Thumb1InstrInfo
&TII
=
481 *static_cast<const Thumb1InstrInfo
*>(STI
.getInstrInfo());
483 unsigned ArgRegsSaveSize
= AFI
->getArgRegsSaveSize();
484 int NumBytes
= (int)MFI
.getStackSize();
485 assert((unsigned)NumBytes
>= ArgRegsSaveSize
&&
486 "ArgRegsSaveSize is included in NumBytes");
487 const MCPhysReg
*CSRegs
= RegInfo
->getCalleeSavedRegs(&MF
);
488 Register FramePtr
= RegInfo
->getFrameRegister(MF
);
490 if (!AFI
->hasStackFrame()) {
491 if (NumBytes
- ArgRegsSaveSize
!= 0)
492 emitPrologueEpilogueSPUpdate(MBB
, MBBI
, TII
, dl
, *RegInfo
,
493 NumBytes
- ArgRegsSaveSize
, ARM::NoRegister
,
494 MachineInstr::NoFlags
);
496 // Unwind MBBI to point to first LDR / VLDRD.
497 if (MBBI
!= MBB
.begin()) {
500 while (MBBI
!= MBB
.begin() && isCSRestore(*MBBI
, CSRegs
));
501 if (!isCSRestore(*MBBI
, CSRegs
))
505 // Move SP to start of FP callee save spill area.
506 NumBytes
-= (AFI
->getGPRCalleeSavedArea1Size() +
507 AFI
->getGPRCalleeSavedArea2Size() +
508 AFI
->getDPRCalleeSavedAreaSize() +
511 if (AFI
->shouldRestoreSPFromFP()) {
512 NumBytes
= AFI
->getFramePtrSpillOffset() - NumBytes
;
513 // Reset SP based on frame pointer only if the stack frame extends beyond
514 // frame pointer stack slot, the target is ELF and the function has FP, or
515 // the target uses var sized objects.
517 assert(!MFI
.getPristineRegs(MF
).test(ARM::R4
) &&
518 "No scratch register to restore SP from FP!");
519 emitThumbRegPlusImmediate(MBB
, MBBI
, dl
, ARM::R4
, FramePtr
, -NumBytes
,
521 BuildMI(MBB
, MBBI
, dl
, TII
.get(ARM::tMOVr
), ARM::SP
)
523 .add(predOps(ARMCC::AL
));
525 BuildMI(MBB
, MBBI
, dl
, TII
.get(ARM::tMOVr
), ARM::SP
)
527 .add(predOps(ARMCC::AL
));
529 // For a large stack frame, we might need a scratch register to store
530 // the size of the frame. We know all callee-save registers are free
531 // at this point in the epilogue, so pick one.
532 unsigned ScratchRegister
= ARM::NoRegister
;
533 bool HasFP
= hasFP(MF
);
534 for (auto &I
: MFI
.getCalleeSavedInfo()) {
535 unsigned Reg
= I
.getReg();
536 if (isARMLowRegister(Reg
) && !(HasFP
&& Reg
== FramePtr
)) {
537 ScratchRegister
= Reg
;
541 if (MBBI
!= MBB
.end() && MBBI
->getOpcode() == ARM::tBX_RET
&&
542 &MBB
.front() != &*MBBI
&& std::prev(MBBI
)->getOpcode() == ARM::tPOP
) {
543 MachineBasicBlock::iterator PMBBI
= std::prev(MBBI
);
544 if (!tryFoldSPUpdateIntoPushPop(STI
, MF
, &*PMBBI
, NumBytes
))
545 emitPrologueEpilogueSPUpdate(MBB
, PMBBI
, TII
, dl
, *RegInfo
, NumBytes
,
546 ScratchRegister
, MachineInstr::NoFlags
);
547 } else if (!tryFoldSPUpdateIntoPushPop(STI
, MF
, &*MBBI
, NumBytes
))
548 emitPrologueEpilogueSPUpdate(MBB
, MBBI
, TII
, dl
, *RegInfo
, NumBytes
,
549 ScratchRegister
, MachineInstr::NoFlags
);
553 if (needPopSpecialFixUp(MF
)) {
554 bool Done
= emitPopSpecialFixUp(MBB
, /* DoIt */ true);
556 assert(Done
&& "Emission of the special fixup failed!?");
560 bool Thumb1FrameLowering::canUseAsEpilogue(const MachineBasicBlock
&MBB
) const {
561 if (!needPopSpecialFixUp(*MBB
.getParent()))
564 MachineBasicBlock
*TmpMBB
= const_cast<MachineBasicBlock
*>(&MBB
);
565 return emitPopSpecialFixUp(*TmpMBB
, /* DoIt */ false);
568 bool Thumb1FrameLowering::needPopSpecialFixUp(const MachineFunction
&MF
) const {
569 ARMFunctionInfo
*AFI
=
570 const_cast<MachineFunction
*>(&MF
)->getInfo
<ARMFunctionInfo
>();
571 if (AFI
->getArgRegsSaveSize())
574 // LR cannot be encoded with Thumb1, i.e., it requires a special fix-up.
575 for (const CalleeSavedInfo
&CSI
: MF
.getFrameInfo().getCalleeSavedInfo())
576 if (CSI
.getReg() == ARM::LR
)
582 static void findTemporariesForLR(const BitVector
&GPRsNoLRSP
,
583 const BitVector
&PopFriendly
,
584 const LivePhysRegs
&UsedRegs
, unsigned &PopReg
,
585 unsigned &TmpReg
, MachineRegisterInfo
&MRI
) {
587 for (auto Reg
: GPRsNoLRSP
.set_bits()) {
588 if (UsedRegs
.available(MRI
, Reg
)) {
589 // Remember the first pop-friendly register and exit.
590 if (PopFriendly
.test(Reg
)) {
595 // Otherwise, remember that the register will be available to
596 // save a pop-friendly register.
602 bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock
&MBB
,
604 MachineFunction
&MF
= *MBB
.getParent();
605 ARMFunctionInfo
*AFI
= MF
.getInfo
<ARMFunctionInfo
>();
606 unsigned ArgRegsSaveSize
= AFI
->getArgRegsSaveSize();
607 const TargetInstrInfo
&TII
= *STI
.getInstrInfo();
608 const ThumbRegisterInfo
*RegInfo
=
609 static_cast<const ThumbRegisterInfo
*>(STI
.getRegisterInfo());
611 // If MBBI is a return instruction, or is a tPOP followed by a return
612 // instruction in the successor BB, we may be able to directly restore
614 // This is only possible with v5T ops (v4T can't change the Thumb bit via
615 // a POP PC instruction), and only if we do not need to emit any SP update.
616 // Otherwise, we need a temporary register to pop the value
617 // and copy that value into LR.
618 auto MBBI
= MBB
.getFirstTerminator();
619 bool CanRestoreDirectly
= STI
.hasV5TOps() && !ArgRegsSaveSize
;
620 if (CanRestoreDirectly
) {
621 if (MBBI
!= MBB
.end() && MBBI
->getOpcode() != ARM::tB
)
622 CanRestoreDirectly
= (MBBI
->getOpcode() == ARM::tBX_RET
||
623 MBBI
->getOpcode() == ARM::tPOP_RET
);
625 auto MBBI_prev
= MBBI
;
627 assert(MBBI_prev
->getOpcode() == ARM::tPOP
);
628 assert(MBB
.succ_size() == 1);
629 if ((*MBB
.succ_begin())->begin()->getOpcode() == ARM::tBX_RET
)
630 MBBI
= MBBI_prev
; // Replace the final tPOP with a tPOP_RET.
632 CanRestoreDirectly
= false;
636 if (CanRestoreDirectly
) {
637 if (!DoIt
|| MBBI
->getOpcode() == ARM::tPOP_RET
)
639 MachineInstrBuilder MIB
=
640 BuildMI(MBB
, MBBI
, MBBI
->getDebugLoc(), TII
.get(ARM::tPOP_RET
))
641 .add(predOps(ARMCC::AL
));
642 // Copy implicit ops and popped registers, if any.
643 for (auto MO
: MBBI
->operands())
644 if (MO
.isReg() && (MO
.isImplicit() || MO
.isDef()))
646 MIB
.addReg(ARM::PC
, RegState::Define
);
647 // Erase the old instruction (tBX_RET or tPOP).
652 // Look for a temporary register to use.
653 // First, compute the liveness information.
654 const TargetRegisterInfo
&TRI
= *STI
.getRegisterInfo();
655 LivePhysRegs
UsedRegs(TRI
);
656 UsedRegs
.addLiveOuts(MBB
);
657 // The semantic of pristines changed recently and now,
658 // the callee-saved registers that are touched in the function
659 // are not part of the pristines set anymore.
660 // Add those callee-saved now.
661 const MCPhysReg
*CSRegs
= TRI
.getCalleeSavedRegs(&MF
);
662 for (unsigned i
= 0; CSRegs
[i
]; ++i
)
663 UsedRegs
.addReg(CSRegs
[i
]);
665 DebugLoc dl
= DebugLoc();
666 if (MBBI
!= MBB
.end()) {
667 dl
= MBBI
->getDebugLoc();
668 auto InstUpToMBBI
= MBB
.end();
669 while (InstUpToMBBI
!= MBBI
)
670 // The pre-decrement is on purpose here.
671 // We want to have the liveness right before MBBI.
672 UsedRegs
.stepBackward(*--InstUpToMBBI
);
675 // Look for a register that can be directly use in the POP.
677 // And some temporary register, just in case.
678 unsigned TemporaryReg
= 0;
679 BitVector PopFriendly
=
680 TRI
.getAllocatableSet(MF
, TRI
.getRegClass(ARM::tGPRRegClassID
));
681 // R7 may be used as a frame pointer, hence marked as not generally
682 // allocatable, however there's no reason to not use it as a temporary for
684 if (STI
.getFramePointerReg() == ARM::R7
)
685 PopFriendly
.set(ARM::R7
);
687 assert(PopFriendly
.any() && "No allocatable pop-friendly register?!");
688 // Rebuild the GPRs from the high registers because they are removed
689 // form the GPR reg class for thumb1.
690 BitVector GPRsNoLRSP
=
691 TRI
.getAllocatableSet(MF
, TRI
.getRegClass(ARM::hGPRRegClassID
));
692 GPRsNoLRSP
|= PopFriendly
;
693 GPRsNoLRSP
.reset(ARM::LR
);
694 GPRsNoLRSP
.reset(ARM::SP
);
695 GPRsNoLRSP
.reset(ARM::PC
);
696 findTemporariesForLR(GPRsNoLRSP
, PopFriendly
, UsedRegs
, PopReg
, TemporaryReg
,
699 // If we couldn't find a pop-friendly register, try restoring LR before
700 // popping the other callee-saved registers, so we could use one of them as a
702 bool UseLDRSP
= false;
703 if (!PopReg
&& MBBI
!= MBB
.begin()) {
704 auto PrevMBBI
= MBBI
;
706 if (PrevMBBI
->getOpcode() == ARM::tPOP
) {
707 UsedRegs
.stepBackward(*PrevMBBI
);
708 findTemporariesForLR(GPRsNoLRSP
, PopFriendly
, UsedRegs
, PopReg
,
709 TemporaryReg
, MF
.getRegInfo());
717 if (!DoIt
&& !PopReg
&& !TemporaryReg
)
720 assert((PopReg
|| TemporaryReg
) && "Cannot get LR");
723 assert(PopReg
&& "Do not know how to get LR");
724 // Load the LR via LDR tmp, [SP, #off]
725 BuildMI(MBB
, MBBI
, dl
, TII
.get(ARM::tLDRspi
))
726 .addReg(PopReg
, RegState::Define
)
728 .addImm(MBBI
->getNumExplicitOperands() - 2)
729 .add(predOps(ARMCC::AL
));
730 // Move from the temporary register to the LR.
731 BuildMI(MBB
, MBBI
, dl
, TII
.get(ARM::tMOVr
))
732 .addReg(ARM::LR
, RegState::Define
)
733 .addReg(PopReg
, RegState::Kill
)
734 .add(predOps(ARMCC::AL
));
735 // Advance past the pop instruction.
738 emitPrologueEpilogueSPUpdate(MBB
, MBBI
, TII
, dl
, *RegInfo
,
739 ArgRegsSaveSize
+ 4, ARM::NoRegister
,
740 MachineInstr::NoFlags
);
745 assert(!PopReg
&& "Unnecessary MOV is about to be inserted");
746 PopReg
= PopFriendly
.find_first();
747 BuildMI(MBB
, MBBI
, dl
, TII
.get(ARM::tMOVr
))
748 .addReg(TemporaryReg
, RegState::Define
)
749 .addReg(PopReg
, RegState::Kill
)
750 .add(predOps(ARMCC::AL
));
753 if (MBBI
!= MBB
.end() && MBBI
->getOpcode() == ARM::tPOP_RET
) {
754 // We couldn't use the direct restoration above, so
755 // perform the opposite conversion: tPOP_RET to tPOP.
756 MachineInstrBuilder MIB
=
757 BuildMI(MBB
, MBBI
, MBBI
->getDebugLoc(), TII
.get(ARM::tPOP
))
758 .add(predOps(ARMCC::AL
));
760 for (auto MO
: MBBI
->operands())
761 if (MO
.isReg() && (MO
.isImplicit() || MO
.isDef()) &&
762 MO
.getReg() != ARM::PC
) {
764 if (!MO
.isImplicit())
767 // Is there anything left to pop?
769 MBB
.erase(MIB
.getInstr());
770 // Erase the old instruction.
772 MBBI
= BuildMI(MBB
, MBB
.end(), dl
, TII
.get(ARM::tBX_RET
))
773 .add(predOps(ARMCC::AL
));
776 assert(PopReg
&& "Do not know how to get LR");
777 BuildMI(MBB
, MBBI
, dl
, TII
.get(ARM::tPOP
))
778 .add(predOps(ARMCC::AL
))
779 .addReg(PopReg
, RegState::Define
);
781 emitPrologueEpilogueSPUpdate(MBB
, MBBI
, TII
, dl
, *RegInfo
, ArgRegsSaveSize
,
782 ARM::NoRegister
, MachineInstr::NoFlags
);
784 BuildMI(MBB
, MBBI
, dl
, TII
.get(ARM::tMOVr
))
785 .addReg(ARM::LR
, RegState::Define
)
786 .addReg(PopReg
, RegState::Kill
)
787 .add(predOps(ARMCC::AL
));
790 BuildMI(MBB
, MBBI
, dl
, TII
.get(ARM::tMOVr
))
791 .addReg(PopReg
, RegState::Define
)
792 .addReg(TemporaryReg
, RegState::Kill
)
793 .add(predOps(ARMCC::AL
));
798 using ARMRegSet
= std::bitset
<ARM::NUM_TARGET_REGS
>;
800 // Return the first iteraror after CurrentReg which is present in EnabledRegs,
801 // or OrderEnd if no further registers are in that set. This does not advance
802 // the iterator fiorst, so returns CurrentReg if it is in EnabledRegs.
803 static const unsigned *findNextOrderedReg(const unsigned *CurrentReg
,
804 const ARMRegSet
&EnabledRegs
,
805 const unsigned *OrderEnd
) {
806 while (CurrentReg
!= OrderEnd
&& !EnabledRegs
[*CurrentReg
])
811 bool Thumb1FrameLowering::spillCalleeSavedRegisters(
812 MachineBasicBlock
&MBB
, MachineBasicBlock::iterator MI
,
813 ArrayRef
<CalleeSavedInfo
> CSI
, const TargetRegisterInfo
*TRI
) const {
818 const TargetInstrInfo
&TII
= *STI
.getInstrInfo();
819 MachineFunction
&MF
= *MBB
.getParent();
820 const ARMBaseRegisterInfo
*RegInfo
= static_cast<const ARMBaseRegisterInfo
*>(
821 MF
.getSubtarget().getRegisterInfo());
823 ARMRegSet LoRegsToSave
; // r0-r7, lr
824 ARMRegSet HiRegsToSave
; // r8-r11
825 ARMRegSet CopyRegs
; // Registers which can be used after pushing
826 // LoRegs for saving HiRegs.
828 for (unsigned i
= CSI
.size(); i
!= 0; --i
) {
829 unsigned Reg
= CSI
[i
-1].getReg();
831 if (ARM::tGPRRegClass
.contains(Reg
) || Reg
== ARM::LR
) {
832 LoRegsToSave
[Reg
] = true;
833 } else if (ARM::hGPRRegClass
.contains(Reg
) && Reg
!= ARM::LR
) {
834 HiRegsToSave
[Reg
] = true;
836 llvm_unreachable("callee-saved register of unexpected class");
839 if ((ARM::tGPRRegClass
.contains(Reg
) || Reg
== ARM::LR
) &&
840 !MF
.getRegInfo().isLiveIn(Reg
) &&
841 !(hasFP(MF
) && Reg
== RegInfo
->getFrameRegister(MF
)))
842 CopyRegs
[Reg
] = true;
845 // Unused argument registers can be used for the high register saving.
846 for (unsigned ArgReg
: {ARM::R0
, ARM::R1
, ARM::R2
, ARM::R3
})
847 if (!MF
.getRegInfo().isLiveIn(ArgReg
))
848 CopyRegs
[ArgReg
] = true;
850 // Push the low registers and lr
851 const MachineRegisterInfo
&MRI
= MF
.getRegInfo();
852 if (!LoRegsToSave
.none()) {
853 MachineInstrBuilder MIB
=
854 BuildMI(MBB
, MI
, DL
, TII
.get(ARM::tPUSH
)).add(predOps(ARMCC::AL
));
855 for (unsigned Reg
: {ARM::R4
, ARM::R5
, ARM::R6
, ARM::R7
, ARM::LR
}) {
856 if (LoRegsToSave
[Reg
]) {
857 bool isKill
= !MRI
.isLiveIn(Reg
);
858 if (isKill
&& !MRI
.isReserved(Reg
))
861 MIB
.addReg(Reg
, getKillRegState(isKill
));
864 MIB
.setMIFlags(MachineInstr::FrameSetup
);
867 // Push the high registers. There are no store instructions that can access
868 // these registers directly, so we have to move them to low registers, and
869 // push them. This might take multiple pushes, as it is possible for there to
870 // be fewer low registers available than high registers which need saving.
872 // These are in reverse order so that in the case where we need to use
873 // multiple PUSH instructions, the order of the registers on the stack still
874 // matches the unwind info. They need to be swicthed back to ascending order
875 // before adding to the PUSH instruction.
876 static const unsigned AllCopyRegs
[] = {ARM::LR
, ARM::R7
, ARM::R6
,
877 ARM::R5
, ARM::R4
, ARM::R3
,
878 ARM::R2
, ARM::R1
, ARM::R0
};
879 static const unsigned AllHighRegs
[] = {ARM::R11
, ARM::R10
, ARM::R9
, ARM::R8
};
881 const unsigned *AllCopyRegsEnd
= std::end(AllCopyRegs
);
882 const unsigned *AllHighRegsEnd
= std::end(AllHighRegs
);
884 // Find the first register to save.
885 const unsigned *HiRegToSave
= findNextOrderedReg(
886 std::begin(AllHighRegs
), HiRegsToSave
, AllHighRegsEnd
);
888 while (HiRegToSave
!= AllHighRegsEnd
) {
889 // Find the first low register to use.
890 const unsigned *CopyReg
=
891 findNextOrderedReg(std::begin(AllCopyRegs
), CopyRegs
, AllCopyRegsEnd
);
893 // Create the PUSH, but don't insert it yet (the MOVs need to come first).
894 MachineInstrBuilder PushMIB
= BuildMI(MF
, DL
, TII
.get(ARM::tPUSH
))
895 .add(predOps(ARMCC::AL
))
896 .setMIFlags(MachineInstr::FrameSetup
);
898 SmallVector
<unsigned, 4> RegsToPush
;
899 while (HiRegToSave
!= AllHighRegsEnd
&& CopyReg
!= AllCopyRegsEnd
) {
900 if (HiRegsToSave
[*HiRegToSave
]) {
901 bool isKill
= !MRI
.isLiveIn(*HiRegToSave
);
902 if (isKill
&& !MRI
.isReserved(*HiRegToSave
))
903 MBB
.addLiveIn(*HiRegToSave
);
905 // Emit a MOV from the high reg to the low reg.
906 BuildMI(MBB
, MI
, DL
, TII
.get(ARM::tMOVr
))
907 .addReg(*CopyReg
, RegState::Define
)
908 .addReg(*HiRegToSave
, getKillRegState(isKill
))
909 .add(predOps(ARMCC::AL
))
910 .setMIFlags(MachineInstr::FrameSetup
);
912 // Record the register that must be added to the PUSH.
913 RegsToPush
.push_back(*CopyReg
);
915 CopyReg
= findNextOrderedReg(++CopyReg
, CopyRegs
, AllCopyRegsEnd
);
917 findNextOrderedReg(++HiRegToSave
, HiRegsToSave
, AllHighRegsEnd
);
921 // Add the low registers to the PUSH, in ascending order.
922 for (unsigned Reg
: llvm::reverse(RegsToPush
))
923 PushMIB
.addReg(Reg
, RegState::Kill
);
925 // Insert the PUSH instruction after the MOVs.
926 MBB
.insert(MI
, PushMIB
);
932 bool Thumb1FrameLowering::restoreCalleeSavedRegisters(
933 MachineBasicBlock
&MBB
, MachineBasicBlock::iterator MI
,
934 MutableArrayRef
<CalleeSavedInfo
> CSI
, const TargetRegisterInfo
*TRI
) const {
938 MachineFunction
&MF
= *MBB
.getParent();
939 ARMFunctionInfo
*AFI
= MF
.getInfo
<ARMFunctionInfo
>();
940 const TargetInstrInfo
&TII
= *STI
.getInstrInfo();
941 const ARMBaseRegisterInfo
*RegInfo
= static_cast<const ARMBaseRegisterInfo
*>(
942 MF
.getSubtarget().getRegisterInfo());
944 bool isVarArg
= AFI
->getArgRegsSaveSize() > 0;
945 DebugLoc DL
= MI
!= MBB
.end() ? MI
->getDebugLoc() : DebugLoc();
947 ARMRegSet LoRegsToRestore
;
948 ARMRegSet HiRegsToRestore
;
949 // Low registers (r0-r7) which can be used to restore the high registers.
952 for (CalleeSavedInfo I
: CSI
) {
953 unsigned Reg
= I
.getReg();
955 if (ARM::tGPRRegClass
.contains(Reg
) || Reg
== ARM::LR
) {
956 LoRegsToRestore
[Reg
] = true;
957 } else if (ARM::hGPRRegClass
.contains(Reg
) && Reg
!= ARM::LR
) {
958 HiRegsToRestore
[Reg
] = true;
960 llvm_unreachable("callee-saved register of unexpected class");
963 // If this is a low register not used as the frame pointer, we may want to
964 // use it for restoring the high registers.
965 if ((ARM::tGPRRegClass
.contains(Reg
)) &&
966 !(hasFP(MF
) && Reg
== RegInfo
->getFrameRegister(MF
)))
967 CopyRegs
[Reg
] = true;
970 // If this is a return block, we may be able to use some unused return value
971 // registers for restoring the high regs.
972 auto Terminator
= MBB
.getFirstTerminator();
973 if (Terminator
!= MBB
.end() && Terminator
->getOpcode() == ARM::tBX_RET
) {
974 CopyRegs
[ARM::R0
] = true;
975 CopyRegs
[ARM::R1
] = true;
976 CopyRegs
[ARM::R2
] = true;
977 CopyRegs
[ARM::R3
] = true;
978 for (auto Op
: Terminator
->implicit_operands()) {
980 CopyRegs
[Op
.getReg()] = false;
984 static const unsigned AllCopyRegs
[] = {ARM::R0
, ARM::R1
, ARM::R2
, ARM::R3
,
985 ARM::R4
, ARM::R5
, ARM::R6
, ARM::R7
};
986 static const unsigned AllHighRegs
[] = {ARM::R8
, ARM::R9
, ARM::R10
, ARM::R11
};
988 const unsigned *AllCopyRegsEnd
= std::end(AllCopyRegs
);
989 const unsigned *AllHighRegsEnd
= std::end(AllHighRegs
);
991 // Find the first register to restore.
992 auto HiRegToRestore
= findNextOrderedReg(std::begin(AllHighRegs
),
993 HiRegsToRestore
, AllHighRegsEnd
);
995 while (HiRegToRestore
!= AllHighRegsEnd
) {
996 assert(!CopyRegs
.none());
997 // Find the first low register to use.
999 findNextOrderedReg(std::begin(AllCopyRegs
), CopyRegs
, AllCopyRegsEnd
);
1001 // Create the POP instruction.
1002 MachineInstrBuilder PopMIB
=
1003 BuildMI(MBB
, MI
, DL
, TII
.get(ARM::tPOP
)).add(predOps(ARMCC::AL
));
1005 while (HiRegToRestore
!= AllHighRegsEnd
&& CopyReg
!= AllCopyRegsEnd
) {
1006 // Add the low register to the POP.
1007 PopMIB
.addReg(*CopyReg
, RegState::Define
);
1009 // Create the MOV from low to high register.
1010 BuildMI(MBB
, MI
, DL
, TII
.get(ARM::tMOVr
))
1011 .addReg(*HiRegToRestore
, RegState::Define
)
1012 .addReg(*CopyReg
, RegState::Kill
)
1013 .add(predOps(ARMCC::AL
));
1015 CopyReg
= findNextOrderedReg(++CopyReg
, CopyRegs
, AllCopyRegsEnd
);
1017 findNextOrderedReg(++HiRegToRestore
, HiRegsToRestore
, AllHighRegsEnd
);
1021 MachineInstrBuilder MIB
=
1022 BuildMI(MF
, DL
, TII
.get(ARM::tPOP
)).add(predOps(ARMCC::AL
));
1024 bool NeedsPop
= false;
1025 for (unsigned i
= CSI
.size(); i
!= 0; --i
) {
1026 CalleeSavedInfo
&Info
= CSI
[i
-1];
1027 unsigned Reg
= Info
.getReg();
1029 // High registers (excluding lr) have already been dealt with
1030 if (!(ARM::tGPRRegClass
.contains(Reg
) || Reg
== ARM::LR
))
1033 if (Reg
== ARM::LR
) {
1034 Info
.setRestored(false);
1035 if (!MBB
.succ_empty() ||
1036 MI
->getOpcode() == ARM::TCRETURNdi
||
1037 MI
->getOpcode() == ARM::TCRETURNri
)
1038 // LR may only be popped into PC, as part of return sequence.
1039 // If this isn't the return sequence, we'll need emitPopSpecialFixUp
1040 // to restore LR the hard way.
1041 // FIXME: if we don't pass any stack arguments it would be actually
1042 // advantageous *and* correct to do the conversion to an ordinary call
1043 // instruction here.
1045 // Special epilogue for vararg functions. See emitEpilogue
1048 // ARMv4T requires BX, see emitEpilogue
1049 if (!STI
.hasV5TOps())
1052 // CMSE entry functions must return via BXNS, see emitEpilogue.
1053 if (AFI
->isCmseNSEntryFunction())
1058 (*MIB
).setDesc(TII
.get(ARM::tPOP_RET
));
1059 if (MI
!= MBB
.end())
1060 MIB
.copyImplicitOps(*MI
);
1063 MIB
.addReg(Reg
, getDefRegState(true));
1067 // It's illegal to emit pop instruction without operands.
1069 MBB
.insert(MI
, &*MIB
);
1071 MF
.DeleteMachineInstr(MIB
);