1 //===- Thumb1FrameLowering.cpp - Thumb1 Frame Information -----------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file contains the Thumb1 implementation of TargetFrameLowering class.
11 //===----------------------------------------------------------------------===//
13 #include "Thumb1FrameLowering.h"
14 #include "ARMBaseInstrInfo.h"
15 #include "ARMBaseRegisterInfo.h"
16 #include "ARMMachineFunctionInfo.h"
17 #include "ARMSubtarget.h"
18 #include "Thumb1InstrInfo.h"
19 #include "ThumbRegisterInfo.h"
20 #include "Utils/ARMBaseInfo.h"
21 #include "llvm/ADT/BitVector.h"
22 #include "llvm/ADT/STLExtras.h"
23 #include "llvm/ADT/SmallVector.h"
24 #include "llvm/CodeGen/LivePhysRegs.h"
25 #include "llvm/CodeGen/MachineBasicBlock.h"
26 #include "llvm/CodeGen/MachineFrameInfo.h"
27 #include "llvm/CodeGen/MachineFunction.h"
28 #include "llvm/CodeGen/MachineInstr.h"
29 #include "llvm/CodeGen/MachineInstrBuilder.h"
30 #include "llvm/CodeGen/MachineModuleInfo.h"
31 #include "llvm/CodeGen/MachineOperand.h"
32 #include "llvm/CodeGen/MachineRegisterInfo.h"
33 #include "llvm/CodeGen/TargetInstrInfo.h"
34 #include "llvm/CodeGen/TargetOpcodes.h"
35 #include "llvm/CodeGen/TargetSubtargetInfo.h"
36 #include "llvm/IR/DebugLoc.h"
37 #include "llvm/MC/MCContext.h"
38 #include "llvm/MC/MCDwarf.h"
39 #include "llvm/MC/MCRegisterInfo.h"
40 #include "llvm/Support/Compiler.h"
41 #include "llvm/Support/ErrorHandling.h"
42 #include "llvm/Support/MathExtras.h"
50 Thumb1FrameLowering::Thumb1FrameLowering(const ARMSubtarget
&sti
)
51 : ARMFrameLowering(sti
) {}
53 bool Thumb1FrameLowering::hasReservedCallFrame(const MachineFunction
&MF
) const{
54 const MachineFrameInfo
&MFI
= MF
.getFrameInfo();
55 unsigned CFSize
= MFI
.getMaxCallFrameSize();
56 // It's not always a good idea to include the call frame as part of the
57 // stack frame. ARM (especially Thumb) has small immediate offset to
58 // address the stack frame. So a large call frame can cause poor codegen
59 // and may even makes it impossible to scavenge a register.
60 if (CFSize
>= ((1 << 8) - 1) * 4 / 2) // Half of imm8 * 4
63 return !MFI
.hasVarSizedObjects();
67 emitPrologueEpilogueSPUpdate(MachineBasicBlock
&MBB
,
68 MachineBasicBlock::iterator
&MBBI
,
69 const TargetInstrInfo
&TII
, const DebugLoc
&dl
,
70 const ThumbRegisterInfo
&MRI
, int NumBytes
,
71 unsigned ScratchReg
, unsigned MIFlags
) {
72 // If it would take more than three instructions to adjust the stack pointer
73 // using tADDspi/tSUBspi, load an immediate instead.
74 if (std::abs(NumBytes
) > 508 * 3) {
75 // We use a different codepath here from the normal
76 // emitThumbRegPlusImmediate so we don't have to deal with register
77 // scavenging. (Scavenging could try to use the emergency spill slot
78 // before we've actually finished setting up the stack.)
79 if (ScratchReg
== ARM::NoRegister
)
80 report_fatal_error("Failed to emit Thumb1 stack adjustment");
81 MachineFunction
&MF
= *MBB
.getParent();
82 const ARMSubtarget
&ST
= MF
.getSubtarget
<ARMSubtarget
>();
83 if (ST
.genExecuteOnly()) {
84 BuildMI(MBB
, MBBI
, dl
, TII
.get(ARM::t2MOVi32imm
), ScratchReg
)
85 .addImm(NumBytes
).setMIFlags(MIFlags
);
87 MRI
.emitLoadConstPool(MBB
, MBBI
, dl
, ScratchReg
, 0, NumBytes
, ARMCC::AL
,
90 BuildMI(MBB
, MBBI
, dl
, TII
.get(ARM::tADDhirr
), ARM::SP
)
91 .addReg(ARM::SP
).addReg(ScratchReg
, RegState::Kill
)
92 .add(predOps(ARMCC::AL
));
95 // FIXME: This is assuming the heuristics in emitThumbRegPlusImmediate
97 emitThumbRegPlusImmediate(MBB
, MBBI
, dl
, ARM::SP
, ARM::SP
, NumBytes
, TII
,
102 static void emitCallSPUpdate(MachineBasicBlock
&MBB
,
103 MachineBasicBlock::iterator
&MBBI
,
104 const TargetInstrInfo
&TII
, const DebugLoc
&dl
,
105 const ThumbRegisterInfo
&MRI
, int NumBytes
,
106 unsigned MIFlags
= MachineInstr::NoFlags
) {
107 emitThumbRegPlusImmediate(MBB
, MBBI
, dl
, ARM::SP
, ARM::SP
, NumBytes
, TII
,
112 MachineBasicBlock::iterator
Thumb1FrameLowering::
113 eliminateCallFramePseudoInstr(MachineFunction
&MF
, MachineBasicBlock
&MBB
,
114 MachineBasicBlock::iterator I
) const {
115 const Thumb1InstrInfo
&TII
=
116 *static_cast<const Thumb1InstrInfo
*>(STI
.getInstrInfo());
117 const ThumbRegisterInfo
*RegInfo
=
118 static_cast<const ThumbRegisterInfo
*>(STI
.getRegisterInfo());
119 if (!hasReservedCallFrame(MF
)) {
120 // If we have alloca, convert as follows:
121 // ADJCALLSTACKDOWN -> sub, sp, sp, amount
122 // ADJCALLSTACKUP -> add, sp, sp, amount
123 MachineInstr
&Old
= *I
;
124 DebugLoc dl
= Old
.getDebugLoc();
125 unsigned Amount
= TII
.getFrameSize(Old
);
127 // We need to keep the stack aligned properly. To do this, we round the
128 // amount of space needed for the outgoing arguments up to the next
129 // alignment boundary.
130 Amount
= alignTo(Amount
, getStackAlignment());
132 // Replace the pseudo instruction with a new instruction...
133 unsigned Opc
= Old
.getOpcode();
134 if (Opc
== ARM::ADJCALLSTACKDOWN
|| Opc
== ARM::tADJCALLSTACKDOWN
) {
135 emitCallSPUpdate(MBB
, I
, TII
, dl
, *RegInfo
, -Amount
);
137 assert(Opc
== ARM::ADJCALLSTACKUP
|| Opc
== ARM::tADJCALLSTACKUP
);
138 emitCallSPUpdate(MBB
, I
, TII
, dl
, *RegInfo
, Amount
);
145 void Thumb1FrameLowering::emitPrologue(MachineFunction
&MF
,
146 MachineBasicBlock
&MBB
) const {
147 MachineBasicBlock::iterator MBBI
= MBB
.begin();
148 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
149 ARMFunctionInfo
*AFI
= MF
.getInfo
<ARMFunctionInfo
>();
150 MachineModuleInfo
&MMI
= MF
.getMMI();
151 const MCRegisterInfo
*MRI
= MMI
.getContext().getRegisterInfo();
152 const ThumbRegisterInfo
*RegInfo
=
153 static_cast<const ThumbRegisterInfo
*>(STI
.getRegisterInfo());
154 const Thumb1InstrInfo
&TII
=
155 *static_cast<const Thumb1InstrInfo
*>(STI
.getInstrInfo());
157 unsigned ArgRegsSaveSize
= AFI
->getArgRegsSaveSize();
158 unsigned NumBytes
= MFI
.getStackSize();
159 assert(NumBytes
>= ArgRegsSaveSize
&&
160 "ArgRegsSaveSize is included in NumBytes");
161 const std::vector
<CalleeSavedInfo
> &CSI
= MFI
.getCalleeSavedInfo();
163 // Debug location must be unknown since the first debug location is used
164 // to determine the end of the prologue.
167 unsigned FramePtr
= RegInfo
->getFrameRegister(MF
);
168 unsigned BasePtr
= RegInfo
->getBaseRegister();
171 // Thumb add/sub sp, imm8 instructions implicitly multiply the offset by 4.
172 NumBytes
= (NumBytes
+ 3) & ~3;
173 MFI
.setStackSize(NumBytes
);
175 // Determine the sizes of each callee-save spill areas and record which frame
176 // belongs to which callee-save spill areas.
177 unsigned GPRCS1Size
= 0, GPRCS2Size
= 0, DPRCSSize
= 0;
178 int FramePtrSpillFI
= 0;
180 if (ArgRegsSaveSize
) {
181 emitPrologueEpilogueSPUpdate(MBB
, MBBI
, TII
, dl
, *RegInfo
, -ArgRegsSaveSize
,
182 ARM::NoRegister
, MachineInstr::FrameSetup
);
183 CFAOffset
-= ArgRegsSaveSize
;
184 unsigned CFIIndex
= MF
.addFrameInst(
185 MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset
));
186 BuildMI(MBB
, MBBI
, dl
, TII
.get(TargetOpcode::CFI_INSTRUCTION
))
187 .addCFIIndex(CFIIndex
)
188 .setMIFlags(MachineInstr::FrameSetup
);
191 if (!AFI
->hasStackFrame()) {
192 if (NumBytes
- ArgRegsSaveSize
!= 0) {
193 emitPrologueEpilogueSPUpdate(MBB
, MBBI
, TII
, dl
, *RegInfo
,
194 -(NumBytes
- ArgRegsSaveSize
),
195 ARM::NoRegister
, MachineInstr::FrameSetup
);
196 CFAOffset
-= NumBytes
- ArgRegsSaveSize
;
197 unsigned CFIIndex
= MF
.addFrameInst(
198 MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset
));
199 BuildMI(MBB
, MBBI
, dl
, TII
.get(TargetOpcode::CFI_INSTRUCTION
))
200 .addCFIIndex(CFIIndex
)
201 .setMIFlags(MachineInstr::FrameSetup
);
206 for (unsigned i
= 0, e
= CSI
.size(); i
!= e
; ++i
) {
207 unsigned Reg
= CSI
[i
].getReg();
208 int FI
= CSI
[i
].getFrameIdx();
214 if (STI
.splitFramePushPop(MF
)) {
225 FramePtrSpillFI
= FI
;
233 if (MBBI
!= MBB
.end() && MBBI
->getOpcode() == ARM::tPUSH
) {
237 // Determine starting offsets of spill areas.
238 unsigned DPRCSOffset
= NumBytes
- ArgRegsSaveSize
- (GPRCS1Size
+ GPRCS2Size
+ DPRCSSize
);
239 unsigned GPRCS2Offset
= DPRCSOffset
+ DPRCSSize
;
240 unsigned GPRCS1Offset
= GPRCS2Offset
+ GPRCS2Size
;
241 bool HasFP
= hasFP(MF
);
243 AFI
->setFramePtrSpillOffset(MFI
.getObjectOffset(FramePtrSpillFI
) +
245 AFI
->setGPRCalleeSavedArea1Offset(GPRCS1Offset
);
246 AFI
->setGPRCalleeSavedArea2Offset(GPRCS2Offset
);
247 AFI
->setDPRCalleeSavedAreaOffset(DPRCSOffset
);
248 NumBytes
= DPRCSOffset
;
250 int FramePtrOffsetInBlock
= 0;
251 unsigned adjustedGPRCS1Size
= GPRCS1Size
;
252 if (GPRCS1Size
> 0 && GPRCS2Size
== 0 &&
253 tryFoldSPUpdateIntoPushPop(STI
, MF
, &*std::prev(MBBI
), NumBytes
)) {
254 FramePtrOffsetInBlock
= NumBytes
;
255 adjustedGPRCS1Size
+= NumBytes
;
259 if (adjustedGPRCS1Size
) {
260 CFAOffset
-= adjustedGPRCS1Size
;
261 unsigned CFIIndex
= MF
.addFrameInst(
262 MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset
));
263 BuildMI(MBB
, MBBI
, dl
, TII
.get(TargetOpcode::CFI_INSTRUCTION
))
264 .addCFIIndex(CFIIndex
)
265 .setMIFlags(MachineInstr::FrameSetup
);
267 for (std::vector
<CalleeSavedInfo
>::const_iterator I
= CSI
.begin(),
268 E
= CSI
.end(); I
!= E
; ++I
) {
269 unsigned Reg
= I
->getReg();
270 int FI
= I
->getFrameIdx();
277 if (STI
.splitFramePushPop(MF
))
289 unsigned CFIIndex
= MF
.addFrameInst(MCCFIInstruction::createOffset(
290 nullptr, MRI
->getDwarfRegNum(Reg
, true), MFI
.getObjectOffset(FI
)));
291 BuildMI(MBB
, MBBI
, dl
, TII
.get(TargetOpcode::CFI_INSTRUCTION
))
292 .addCFIIndex(CFIIndex
)
293 .setMIFlags(MachineInstr::FrameSetup
);
298 // Adjust FP so it point to the stack slot that contains the previous FP.
300 FramePtrOffsetInBlock
+=
301 MFI
.getObjectOffset(FramePtrSpillFI
) + GPRCS1Size
+ ArgRegsSaveSize
;
302 BuildMI(MBB
, MBBI
, dl
, TII
.get(ARM::tADDrSPi
), FramePtr
)
304 .addImm(FramePtrOffsetInBlock
/ 4)
305 .setMIFlags(MachineInstr::FrameSetup
)
306 .add(predOps(ARMCC::AL
));
307 if(FramePtrOffsetInBlock
) {
308 CFAOffset
+= FramePtrOffsetInBlock
;
309 unsigned CFIIndex
= MF
.addFrameInst(MCCFIInstruction::createDefCfa(
310 nullptr, MRI
->getDwarfRegNum(FramePtr
, true), CFAOffset
));
311 BuildMI(MBB
, MBBI
, dl
, TII
.get(TargetOpcode::CFI_INSTRUCTION
))
312 .addCFIIndex(CFIIndex
)
313 .setMIFlags(MachineInstr::FrameSetup
);
316 MF
.addFrameInst(MCCFIInstruction::createDefCfaRegister(
317 nullptr, MRI
->getDwarfRegNum(FramePtr
, true)));
318 BuildMI(MBB
, MBBI
, dl
, TII
.get(TargetOpcode::CFI_INSTRUCTION
))
319 .addCFIIndex(CFIIndex
)
320 .setMIFlags(MachineInstr::FrameSetup
);
323 // If offset is > 508 then sp cannot be adjusted in a single instruction,
324 // try restoring from fp instead.
325 AFI
->setShouldRestoreSPFromFP(true);
328 // Skip past the spilling of r8-r11, which could consist of multiple tPUSH
329 // and tMOVr instructions. We don't need to add any call frame information
330 // in-between these instructions, because they do not modify the high
333 MachineBasicBlock::iterator OldMBBI
= MBBI
;
334 // Skip a run of tMOVr instructions
335 while (MBBI
!= MBB
.end() && MBBI
->getOpcode() == ARM::tMOVr
)
337 if (MBBI
!= MBB
.end() && MBBI
->getOpcode() == ARM::tPUSH
) {
340 // We have reached an instruction which is not a push, so the previous
341 // run of tMOVr instructions (which may have been empty) was not part of
342 // the prologue. Reset MBBI back to the last PUSH of the prologue.
348 // Emit call frame information for the callee-saved high registers.
349 for (auto &I
: CSI
) {
350 unsigned Reg
= I
.getReg();
351 int FI
= I
.getFrameIdx();
358 unsigned CFIIndex
= MF
.addFrameInst(MCCFIInstruction::createOffset(
359 nullptr, MRI
->getDwarfRegNum(Reg
, true), MFI
.getObjectOffset(FI
)));
360 BuildMI(MBB
, MBBI
, dl
, TII
.get(TargetOpcode::CFI_INSTRUCTION
))
361 .addCFIIndex(CFIIndex
)
362 .setMIFlags(MachineInstr::FrameSetup
);
371 // Insert it after all the callee-save spills.
373 // For a large stack frame, we might need a scratch register to store
374 // the size of the frame. We know all callee-save registers are free
375 // at this point in the prologue, so pick one.
376 unsigned ScratchRegister
= ARM::NoRegister
;
377 for (auto &I
: CSI
) {
378 unsigned Reg
= I
.getReg();
379 if (isARMLowRegister(Reg
) && !(HasFP
&& Reg
== FramePtr
)) {
380 ScratchRegister
= Reg
;
384 emitPrologueEpilogueSPUpdate(MBB
, MBBI
, TII
, dl
, *RegInfo
, -NumBytes
,
385 ScratchRegister
, MachineInstr::FrameSetup
);
387 CFAOffset
-= NumBytes
;
388 unsigned CFIIndex
= MF
.addFrameInst(
389 MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset
));
390 BuildMI(MBB
, MBBI
, dl
, TII
.get(TargetOpcode::CFI_INSTRUCTION
))
391 .addCFIIndex(CFIIndex
)
392 .setMIFlags(MachineInstr::FrameSetup
);
396 if (STI
.isTargetELF() && HasFP
)
397 MFI
.setOffsetAdjustment(MFI
.getOffsetAdjustment() -
398 AFI
->getFramePtrSpillOffset());
400 AFI
->setGPRCalleeSavedArea1Size(GPRCS1Size
);
401 AFI
->setGPRCalleeSavedArea2Size(GPRCS2Size
);
402 AFI
->setDPRCalleeSavedAreaSize(DPRCSSize
);
404 if (RegInfo
->needsStackRealignment(MF
)) {
405 const unsigned NrBitsToZero
= countTrailingZeros(MFI
.getMaxAlignment());
406 // Emit the following sequence, using R4 as a temporary, since we cannot use
407 // SP as a source or destination register for the shifts:
409 // lsrs r4, r4, #NrBitsToZero
410 // lsls r4, r4, #NrBitsToZero
412 BuildMI(MBB
, MBBI
, dl
, TII
.get(ARM::tMOVr
), ARM::R4
)
413 .addReg(ARM::SP
, RegState::Kill
)
414 .add(predOps(ARMCC::AL
));
416 BuildMI(MBB
, MBBI
, dl
, TII
.get(ARM::tLSRri
), ARM::R4
)
418 .addReg(ARM::R4
, RegState::Kill
)
419 .addImm(NrBitsToZero
)
420 .add(predOps(ARMCC::AL
));
422 BuildMI(MBB
, MBBI
, dl
, TII
.get(ARM::tLSLri
), ARM::R4
)
424 .addReg(ARM::R4
, RegState::Kill
)
425 .addImm(NrBitsToZero
)
426 .add(predOps(ARMCC::AL
));
428 BuildMI(MBB
, MBBI
, dl
, TII
.get(ARM::tMOVr
), ARM::SP
)
429 .addReg(ARM::R4
, RegState::Kill
)
430 .add(predOps(ARMCC::AL
));
432 AFI
->setShouldRestoreSPFromFP(true);
435 // If we need a base pointer, set it up here. It's whatever the value
436 // of the stack pointer is at this point. Any variable size objects
437 // will be allocated after this, so we can still use the base pointer
438 // to reference locals.
439 if (RegInfo
->hasBasePointer(MF
))
440 BuildMI(MBB
, MBBI
, dl
, TII
.get(ARM::tMOVr
), BasePtr
)
442 .add(predOps(ARMCC::AL
));
444 // If the frame has variable sized objects then the epilogue must restore
445 // the sp from fp. We can assume there's an FP here since hasFP already
446 // checks for hasVarSizedObjects.
447 if (MFI
.hasVarSizedObjects())
448 AFI
->setShouldRestoreSPFromFP(true);
450 // In some cases, virtual registers have been introduced, e.g. by uses of
451 // emitThumbRegPlusImmInReg.
452 MF
.getProperties().reset(MachineFunctionProperties::Property::NoVRegs
);
455 static bool isCSRestore(MachineInstr
&MI
, const MCPhysReg
*CSRegs
) {
456 if (MI
.getOpcode() == ARM::tLDRspi
&& MI
.getOperand(1).isFI() &&
457 isCalleeSavedRegister(MI
.getOperand(0).getReg(), CSRegs
))
459 else if (MI
.getOpcode() == ARM::tPOP
) {
461 } else if (MI
.getOpcode() == ARM::tMOVr
) {
462 unsigned Dst
= MI
.getOperand(0).getReg();
463 unsigned Src
= MI
.getOperand(1).getReg();
464 return ((ARM::tGPRRegClass
.contains(Src
) || Src
== ARM::LR
) &&
465 ARM::hGPRRegClass
.contains(Dst
));
470 void Thumb1FrameLowering::emitEpilogue(MachineFunction
&MF
,
471 MachineBasicBlock
&MBB
) const {
472 MachineBasicBlock::iterator MBBI
= MBB
.getFirstTerminator();
473 DebugLoc dl
= MBBI
!= MBB
.end() ? MBBI
->getDebugLoc() : DebugLoc();
474 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
475 ARMFunctionInfo
*AFI
= MF
.getInfo
<ARMFunctionInfo
>();
476 const ThumbRegisterInfo
*RegInfo
=
477 static_cast<const ThumbRegisterInfo
*>(STI
.getRegisterInfo());
478 const Thumb1InstrInfo
&TII
=
479 *static_cast<const Thumb1InstrInfo
*>(STI
.getInstrInfo());
481 unsigned ArgRegsSaveSize
= AFI
->getArgRegsSaveSize();
482 int NumBytes
= (int)MFI
.getStackSize();
483 assert((unsigned)NumBytes
>= ArgRegsSaveSize
&&
484 "ArgRegsSaveSize is included in NumBytes");
485 const MCPhysReg
*CSRegs
= RegInfo
->getCalleeSavedRegs(&MF
);
486 unsigned FramePtr
= RegInfo
->getFrameRegister(MF
);
488 if (!AFI
->hasStackFrame()) {
489 if (NumBytes
- ArgRegsSaveSize
!= 0)
490 emitPrologueEpilogueSPUpdate(MBB
, MBBI
, TII
, dl
, *RegInfo
,
491 NumBytes
- ArgRegsSaveSize
, ARM::NoRegister
,
492 MachineInstr::NoFlags
);
494 // Unwind MBBI to point to first LDR / VLDRD.
495 if (MBBI
!= MBB
.begin()) {
498 while (MBBI
!= MBB
.begin() && isCSRestore(*MBBI
, CSRegs
));
499 if (!isCSRestore(*MBBI
, CSRegs
))
503 // Move SP to start of FP callee save spill area.
504 NumBytes
-= (AFI
->getGPRCalleeSavedArea1Size() +
505 AFI
->getGPRCalleeSavedArea2Size() +
506 AFI
->getDPRCalleeSavedAreaSize() +
509 if (AFI
->shouldRestoreSPFromFP()) {
510 NumBytes
= AFI
->getFramePtrSpillOffset() - NumBytes
;
511 // Reset SP based on frame pointer only if the stack frame extends beyond
512 // frame pointer stack slot, the target is ELF and the function has FP, or
513 // the target uses var sized objects.
515 assert(!MFI
.getPristineRegs(MF
).test(ARM::R4
) &&
516 "No scratch register to restore SP from FP!");
517 emitThumbRegPlusImmediate(MBB
, MBBI
, dl
, ARM::R4
, FramePtr
, -NumBytes
,
519 BuildMI(MBB
, MBBI
, dl
, TII
.get(ARM::tMOVr
), ARM::SP
)
521 .add(predOps(ARMCC::AL
));
523 BuildMI(MBB
, MBBI
, dl
, TII
.get(ARM::tMOVr
), ARM::SP
)
525 .add(predOps(ARMCC::AL
));
527 // For a large stack frame, we might need a scratch register to store
528 // the size of the frame. We know all callee-save registers are free
529 // at this point in the epilogue, so pick one.
530 unsigned ScratchRegister
= ARM::NoRegister
;
531 bool HasFP
= hasFP(MF
);
532 for (auto &I
: MFI
.getCalleeSavedInfo()) {
533 unsigned Reg
= I
.getReg();
534 if (isARMLowRegister(Reg
) && !(HasFP
&& Reg
== FramePtr
)) {
535 ScratchRegister
= Reg
;
539 if (MBBI
!= MBB
.end() && MBBI
->getOpcode() == ARM::tBX_RET
&&
540 &MBB
.front() != &*MBBI
&& std::prev(MBBI
)->getOpcode() == ARM::tPOP
) {
541 MachineBasicBlock::iterator PMBBI
= std::prev(MBBI
);
542 if (!tryFoldSPUpdateIntoPushPop(STI
, MF
, &*PMBBI
, NumBytes
))
543 emitPrologueEpilogueSPUpdate(MBB
, PMBBI
, TII
, dl
, *RegInfo
, NumBytes
,
544 ScratchRegister
, MachineInstr::NoFlags
);
545 } else if (!tryFoldSPUpdateIntoPushPop(STI
, MF
, &*MBBI
, NumBytes
))
546 emitPrologueEpilogueSPUpdate(MBB
, MBBI
, TII
, dl
, *RegInfo
, NumBytes
,
547 ScratchRegister
, MachineInstr::NoFlags
);
551 if (needPopSpecialFixUp(MF
)) {
552 bool Done
= emitPopSpecialFixUp(MBB
, /* DoIt */ true);
554 assert(Done
&& "Emission of the special fixup failed!?");
558 bool Thumb1FrameLowering::canUseAsEpilogue(const MachineBasicBlock
&MBB
) const {
559 if (!needPopSpecialFixUp(*MBB
.getParent()))
562 MachineBasicBlock
*TmpMBB
= const_cast<MachineBasicBlock
*>(&MBB
);
563 return emitPopSpecialFixUp(*TmpMBB
, /* DoIt */ false);
566 bool Thumb1FrameLowering::needPopSpecialFixUp(const MachineFunction
&MF
) const {
567 ARMFunctionInfo
*AFI
=
568 const_cast<MachineFunction
*>(&MF
)->getInfo
<ARMFunctionInfo
>();
569 if (AFI
->getArgRegsSaveSize())
572 // LR cannot be encoded with Thumb1, i.e., it requires a special fix-up.
573 for (const CalleeSavedInfo
&CSI
: MF
.getFrameInfo().getCalleeSavedInfo())
574 if (CSI
.getReg() == ARM::LR
)
580 static void findTemporariesForLR(const BitVector
&GPRsNoLRSP
,
581 const BitVector
&PopFriendly
,
582 const LivePhysRegs
&UsedRegs
, unsigned &PopReg
,
585 for (auto Reg
: GPRsNoLRSP
.set_bits()) {
586 if (!UsedRegs
.contains(Reg
)) {
587 // Remember the first pop-friendly register and exit.
588 if (PopFriendly
.test(Reg
)) {
593 // Otherwise, remember that the register will be available to
594 // save a pop-friendly register.
600 bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock
&MBB
,
602 MachineFunction
&MF
= *MBB
.getParent();
603 ARMFunctionInfo
*AFI
= MF
.getInfo
<ARMFunctionInfo
>();
604 unsigned ArgRegsSaveSize
= AFI
->getArgRegsSaveSize();
605 const TargetInstrInfo
&TII
= *STI
.getInstrInfo();
606 const ThumbRegisterInfo
*RegInfo
=
607 static_cast<const ThumbRegisterInfo
*>(STI
.getRegisterInfo());
609 // If MBBI is a return instruction, or is a tPOP followed by a return
610 // instruction in the successor BB, we may be able to directly restore
612 // This is only possible with v5T ops (v4T can't change the Thumb bit via
613 // a POP PC instruction), and only if we do not need to emit any SP update.
614 // Otherwise, we need a temporary register to pop the value
615 // and copy that value into LR.
616 auto MBBI
= MBB
.getFirstTerminator();
617 bool CanRestoreDirectly
= STI
.hasV5TOps() && !ArgRegsSaveSize
;
618 if (CanRestoreDirectly
) {
619 if (MBBI
!= MBB
.end() && MBBI
->getOpcode() != ARM::tB
)
620 CanRestoreDirectly
= (MBBI
->getOpcode() == ARM::tBX_RET
||
621 MBBI
->getOpcode() == ARM::tPOP_RET
);
623 auto MBBI_prev
= MBBI
;
625 assert(MBBI_prev
->getOpcode() == ARM::tPOP
);
626 assert(MBB
.succ_size() == 1);
627 if ((*MBB
.succ_begin())->begin()->getOpcode() == ARM::tBX_RET
)
628 MBBI
= MBBI_prev
; // Replace the final tPOP with a tPOP_RET.
630 CanRestoreDirectly
= false;
634 if (CanRestoreDirectly
) {
635 if (!DoIt
|| MBBI
->getOpcode() == ARM::tPOP_RET
)
637 MachineInstrBuilder MIB
=
638 BuildMI(MBB
, MBBI
, MBBI
->getDebugLoc(), TII
.get(ARM::tPOP_RET
))
639 .add(predOps(ARMCC::AL
));
640 // Copy implicit ops and popped registers, if any.
641 for (auto MO
: MBBI
->operands())
642 if (MO
.isReg() && (MO
.isImplicit() || MO
.isDef()))
644 MIB
.addReg(ARM::PC
, RegState::Define
);
645 // Erase the old instruction (tBX_RET or tPOP).
650 // Look for a temporary register to use.
651 // First, compute the liveness information.
652 const TargetRegisterInfo
&TRI
= *STI
.getRegisterInfo();
653 LivePhysRegs
UsedRegs(TRI
);
654 UsedRegs
.addLiveOuts(MBB
);
655 // The semantic of pristines changed recently and now,
656 // the callee-saved registers that are touched in the function
657 // are not part of the pristines set anymore.
658 // Add those callee-saved now.
659 const MCPhysReg
*CSRegs
= TRI
.getCalleeSavedRegs(&MF
);
660 for (unsigned i
= 0; CSRegs
[i
]; ++i
)
661 UsedRegs
.addReg(CSRegs
[i
]);
663 DebugLoc dl
= DebugLoc();
664 if (MBBI
!= MBB
.end()) {
665 dl
= MBBI
->getDebugLoc();
666 auto InstUpToMBBI
= MBB
.end();
667 while (InstUpToMBBI
!= MBBI
)
668 // The pre-decrement is on purpose here.
669 // We want to have the liveness right before MBBI.
670 UsedRegs
.stepBackward(*--InstUpToMBBI
);
673 // Look for a register that can be directly use in the POP.
675 // And some temporary register, just in case.
676 unsigned TemporaryReg
= 0;
677 BitVector PopFriendly
=
678 TRI
.getAllocatableSet(MF
, TRI
.getRegClass(ARM::tGPRRegClassID
));
679 // R7 may be used as a frame pointer, hence marked as not generally
680 // allocatable, however there's no reason to not use it as a temporary for
682 if (STI
.useR7AsFramePointer())
683 PopFriendly
.set(ARM::R7
);
685 assert(PopFriendly
.any() && "No allocatable pop-friendly register?!");
686 // Rebuild the GPRs from the high registers because they are removed
687 // form the GPR reg class for thumb1.
688 BitVector GPRsNoLRSP
=
689 TRI
.getAllocatableSet(MF
, TRI
.getRegClass(ARM::hGPRRegClassID
));
690 GPRsNoLRSP
|= PopFriendly
;
691 GPRsNoLRSP
.reset(ARM::LR
);
692 GPRsNoLRSP
.reset(ARM::SP
);
693 GPRsNoLRSP
.reset(ARM::PC
);
694 findTemporariesForLR(GPRsNoLRSP
, PopFriendly
, UsedRegs
, PopReg
, TemporaryReg
);
696 // If we couldn't find a pop-friendly register, try restoring LR before
697 // popping the other callee-saved registers, so we could use one of them as a
699 bool UseLDRSP
= false;
700 if (!PopReg
&& MBBI
!= MBB
.begin()) {
701 auto PrevMBBI
= MBBI
;
703 if (PrevMBBI
->getOpcode() == ARM::tPOP
) {
704 UsedRegs
.stepBackward(*PrevMBBI
);
705 findTemporariesForLR(GPRsNoLRSP
, PopFriendly
, UsedRegs
, PopReg
, TemporaryReg
);
713 if (!DoIt
&& !PopReg
&& !TemporaryReg
)
716 assert((PopReg
|| TemporaryReg
) && "Cannot get LR");
719 assert(PopReg
&& "Do not know how to get LR");
720 // Load the LR via LDR tmp, [SP, #off]
721 BuildMI(MBB
, MBBI
, dl
, TII
.get(ARM::tLDRspi
))
722 .addReg(PopReg
, RegState::Define
)
724 .addImm(MBBI
->getNumExplicitOperands() - 2)
725 .add(predOps(ARMCC::AL
));
726 // Move from the temporary register to the LR.
727 BuildMI(MBB
, MBBI
, dl
, TII
.get(ARM::tMOVr
))
728 .addReg(ARM::LR
, RegState::Define
)
729 .addReg(PopReg
, RegState::Kill
)
730 .add(predOps(ARMCC::AL
));
731 // Advance past the pop instruction.
734 emitPrologueEpilogueSPUpdate(MBB
, MBBI
, TII
, dl
, *RegInfo
,
735 ArgRegsSaveSize
+ 4, ARM::NoRegister
,
736 MachineInstr::NoFlags
);
741 assert(!PopReg
&& "Unnecessary MOV is about to be inserted");
742 PopReg
= PopFriendly
.find_first();
743 BuildMI(MBB
, MBBI
, dl
, TII
.get(ARM::tMOVr
))
744 .addReg(TemporaryReg
, RegState::Define
)
745 .addReg(PopReg
, RegState::Kill
)
746 .add(predOps(ARMCC::AL
));
749 if (MBBI
!= MBB
.end() && MBBI
->getOpcode() == ARM::tPOP_RET
) {
750 // We couldn't use the direct restoration above, so
751 // perform the opposite conversion: tPOP_RET to tPOP.
752 MachineInstrBuilder MIB
=
753 BuildMI(MBB
, MBBI
, MBBI
->getDebugLoc(), TII
.get(ARM::tPOP
))
754 .add(predOps(ARMCC::AL
));
756 for (auto MO
: MBBI
->operands())
757 if (MO
.isReg() && (MO
.isImplicit() || MO
.isDef()) &&
758 MO
.getReg() != ARM::PC
) {
760 if (!MO
.isImplicit())
763 // Is there anything left to pop?
765 MBB
.erase(MIB
.getInstr());
766 // Erase the old instruction.
768 MBBI
= BuildMI(MBB
, MBB
.end(), dl
, TII
.get(ARM::tBX_RET
))
769 .add(predOps(ARMCC::AL
));
772 assert(PopReg
&& "Do not know how to get LR");
773 BuildMI(MBB
, MBBI
, dl
, TII
.get(ARM::tPOP
))
774 .add(predOps(ARMCC::AL
))
775 .addReg(PopReg
, RegState::Define
);
777 emitPrologueEpilogueSPUpdate(MBB
, MBBI
, TII
, dl
, *RegInfo
, ArgRegsSaveSize
,
778 ARM::NoRegister
, MachineInstr::NoFlags
);
780 BuildMI(MBB
, MBBI
, dl
, TII
.get(ARM::tMOVr
))
781 .addReg(ARM::LR
, RegState::Define
)
782 .addReg(PopReg
, RegState::Kill
)
783 .add(predOps(ARMCC::AL
));
786 BuildMI(MBB
, MBBI
, dl
, TII
.get(ARM::tMOVr
))
787 .addReg(PopReg
, RegState::Define
)
788 .addReg(TemporaryReg
, RegState::Kill
)
789 .add(predOps(ARMCC::AL
));
794 using ARMRegSet
= std::bitset
<ARM::NUM_TARGET_REGS
>;
796 // Return the first iteraror after CurrentReg which is present in EnabledRegs,
797 // or OrderEnd if no further registers are in that set. This does not advance
798 // the iterator fiorst, so returns CurrentReg if it is in EnabledRegs.
799 static const unsigned *findNextOrderedReg(const unsigned *CurrentReg
,
800 const ARMRegSet
&EnabledRegs
,
801 const unsigned *OrderEnd
) {
802 while (CurrentReg
!= OrderEnd
&& !EnabledRegs
[*CurrentReg
])
807 bool Thumb1FrameLowering::
808 spillCalleeSavedRegisters(MachineBasicBlock
&MBB
,
809 MachineBasicBlock::iterator MI
,
810 const std::vector
<CalleeSavedInfo
> &CSI
,
811 const TargetRegisterInfo
*TRI
) const {
816 const TargetInstrInfo
&TII
= *STI
.getInstrInfo();
817 MachineFunction
&MF
= *MBB
.getParent();
818 const ARMBaseRegisterInfo
*RegInfo
= static_cast<const ARMBaseRegisterInfo
*>(
819 MF
.getSubtarget().getRegisterInfo());
821 ARMRegSet LoRegsToSave
; // r0-r7, lr
822 ARMRegSet HiRegsToSave
; // r8-r11
823 ARMRegSet CopyRegs
; // Registers which can be used after pushing
824 // LoRegs for saving HiRegs.
826 for (unsigned i
= CSI
.size(); i
!= 0; --i
) {
827 unsigned Reg
= CSI
[i
-1].getReg();
829 if (ARM::tGPRRegClass
.contains(Reg
) || Reg
== ARM::LR
) {
830 LoRegsToSave
[Reg
] = true;
831 } else if (ARM::hGPRRegClass
.contains(Reg
) && Reg
!= ARM::LR
) {
832 HiRegsToSave
[Reg
] = true;
834 llvm_unreachable("callee-saved register of unexpected class");
837 if ((ARM::tGPRRegClass
.contains(Reg
) || Reg
== ARM::LR
) &&
838 !MF
.getRegInfo().isLiveIn(Reg
) &&
839 !(hasFP(MF
) && Reg
== RegInfo
->getFrameRegister(MF
)))
840 CopyRegs
[Reg
] = true;
843 // Unused argument registers can be used for the high register saving.
844 for (unsigned ArgReg
: {ARM::R0
, ARM::R1
, ARM::R2
, ARM::R3
})
845 if (!MF
.getRegInfo().isLiveIn(ArgReg
))
846 CopyRegs
[ArgReg
] = true;
848 // Push the low registers and lr
849 const MachineRegisterInfo
&MRI
= MF
.getRegInfo();
850 if (!LoRegsToSave
.none()) {
851 MachineInstrBuilder MIB
=
852 BuildMI(MBB
, MI
, DL
, TII
.get(ARM::tPUSH
)).add(predOps(ARMCC::AL
));
853 for (unsigned Reg
: {ARM::R4
, ARM::R5
, ARM::R6
, ARM::R7
, ARM::LR
}) {
854 if (LoRegsToSave
[Reg
]) {
855 bool isKill
= !MRI
.isLiveIn(Reg
);
856 if (isKill
&& !MRI
.isReserved(Reg
))
859 MIB
.addReg(Reg
, getKillRegState(isKill
));
862 MIB
.setMIFlags(MachineInstr::FrameSetup
);
865 // Push the high registers. There are no store instructions that can access
866 // these registers directly, so we have to move them to low registers, and
867 // push them. This might take multiple pushes, as it is possible for there to
868 // be fewer low registers available than high registers which need saving.
870 // These are in reverse order so that in the case where we need to use
871 // multiple PUSH instructions, the order of the registers on the stack still
872 // matches the unwind info. They need to be swicthed back to ascending order
873 // before adding to the PUSH instruction.
874 static const unsigned AllCopyRegs
[] = {ARM::LR
, ARM::R7
, ARM::R6
,
875 ARM::R5
, ARM::R4
, ARM::R3
,
876 ARM::R2
, ARM::R1
, ARM::R0
};
877 static const unsigned AllHighRegs
[] = {ARM::R11
, ARM::R10
, ARM::R9
, ARM::R8
};
879 const unsigned *AllCopyRegsEnd
= std::end(AllCopyRegs
);
880 const unsigned *AllHighRegsEnd
= std::end(AllHighRegs
);
882 // Find the first register to save.
883 const unsigned *HiRegToSave
= findNextOrderedReg(
884 std::begin(AllHighRegs
), HiRegsToSave
, AllHighRegsEnd
);
886 while (HiRegToSave
!= AllHighRegsEnd
) {
887 // Find the first low register to use.
888 const unsigned *CopyReg
=
889 findNextOrderedReg(std::begin(AllCopyRegs
), CopyRegs
, AllCopyRegsEnd
);
891 // Create the PUSH, but don't insert it yet (the MOVs need to come first).
892 MachineInstrBuilder PushMIB
= BuildMI(MF
, DL
, TII
.get(ARM::tPUSH
))
893 .add(predOps(ARMCC::AL
))
894 .setMIFlags(MachineInstr::FrameSetup
);
896 SmallVector
<unsigned, 4> RegsToPush
;
897 while (HiRegToSave
!= AllHighRegsEnd
&& CopyReg
!= AllCopyRegsEnd
) {
898 if (HiRegsToSave
[*HiRegToSave
]) {
899 bool isKill
= !MRI
.isLiveIn(*HiRegToSave
);
900 if (isKill
&& !MRI
.isReserved(*HiRegToSave
))
901 MBB
.addLiveIn(*HiRegToSave
);
903 // Emit a MOV from the high reg to the low reg.
904 BuildMI(MBB
, MI
, DL
, TII
.get(ARM::tMOVr
))
905 .addReg(*CopyReg
, RegState::Define
)
906 .addReg(*HiRegToSave
, getKillRegState(isKill
))
907 .add(predOps(ARMCC::AL
))
908 .setMIFlags(MachineInstr::FrameSetup
);
910 // Record the register that must be added to the PUSH.
911 RegsToPush
.push_back(*CopyReg
);
913 CopyReg
= findNextOrderedReg(++CopyReg
, CopyRegs
, AllCopyRegsEnd
);
915 findNextOrderedReg(++HiRegToSave
, HiRegsToSave
, AllHighRegsEnd
);
919 // Add the low registers to the PUSH, in ascending order.
920 for (unsigned Reg
: llvm::reverse(RegsToPush
))
921 PushMIB
.addReg(Reg
, RegState::Kill
);
923 // Insert the PUSH instruction after the MOVs.
924 MBB
.insert(MI
, PushMIB
);
930 bool Thumb1FrameLowering::
931 restoreCalleeSavedRegisters(MachineBasicBlock
&MBB
,
932 MachineBasicBlock::iterator MI
,
933 std::vector
<CalleeSavedInfo
> &CSI
,
934 const TargetRegisterInfo
*TRI
) const {
938 MachineFunction
&MF
= *MBB
.getParent();
939 ARMFunctionInfo
*AFI
= MF
.getInfo
<ARMFunctionInfo
>();
940 const TargetInstrInfo
&TII
= *STI
.getInstrInfo();
941 const ARMBaseRegisterInfo
*RegInfo
= static_cast<const ARMBaseRegisterInfo
*>(
942 MF
.getSubtarget().getRegisterInfo());
944 bool isVarArg
= AFI
->getArgRegsSaveSize() > 0;
945 DebugLoc DL
= MI
!= MBB
.end() ? MI
->getDebugLoc() : DebugLoc();
947 ARMRegSet LoRegsToRestore
;
948 ARMRegSet HiRegsToRestore
;
949 // Low registers (r0-r7) which can be used to restore the high registers.
952 for (CalleeSavedInfo I
: CSI
) {
953 unsigned Reg
= I
.getReg();
955 if (ARM::tGPRRegClass
.contains(Reg
) || Reg
== ARM::LR
) {
956 LoRegsToRestore
[Reg
] = true;
957 } else if (ARM::hGPRRegClass
.contains(Reg
) && Reg
!= ARM::LR
) {
958 HiRegsToRestore
[Reg
] = true;
960 llvm_unreachable("callee-saved register of unexpected class");
963 // If this is a low register not used as the frame pointer, we may want to
964 // use it for restoring the high registers.
965 if ((ARM::tGPRRegClass
.contains(Reg
)) &&
966 !(hasFP(MF
) && Reg
== RegInfo
->getFrameRegister(MF
)))
967 CopyRegs
[Reg
] = true;
970 // If this is a return block, we may be able to use some unused return value
971 // registers for restoring the high regs.
972 auto Terminator
= MBB
.getFirstTerminator();
973 if (Terminator
!= MBB
.end() && Terminator
->getOpcode() == ARM::tBX_RET
) {
974 CopyRegs
[ARM::R0
] = true;
975 CopyRegs
[ARM::R1
] = true;
976 CopyRegs
[ARM::R2
] = true;
977 CopyRegs
[ARM::R3
] = true;
978 for (auto Op
: Terminator
->implicit_operands()) {
980 CopyRegs
[Op
.getReg()] = false;
984 static const unsigned AllCopyRegs
[] = {ARM::R0
, ARM::R1
, ARM::R2
, ARM::R3
,
985 ARM::R4
, ARM::R5
, ARM::R6
, ARM::R7
};
986 static const unsigned AllHighRegs
[] = {ARM::R8
, ARM::R9
, ARM::R10
, ARM::R11
};
988 const unsigned *AllCopyRegsEnd
= std::end(AllCopyRegs
);
989 const unsigned *AllHighRegsEnd
= std::end(AllHighRegs
);
991 // Find the first register to restore.
992 auto HiRegToRestore
= findNextOrderedReg(std::begin(AllHighRegs
),
993 HiRegsToRestore
, AllHighRegsEnd
);
995 while (HiRegToRestore
!= AllHighRegsEnd
) {
996 assert(!CopyRegs
.none());
997 // Find the first low register to use.
999 findNextOrderedReg(std::begin(AllCopyRegs
), CopyRegs
, AllCopyRegsEnd
);
1001 // Create the POP instruction.
1002 MachineInstrBuilder PopMIB
=
1003 BuildMI(MBB
, MI
, DL
, TII
.get(ARM::tPOP
)).add(predOps(ARMCC::AL
));
1005 while (HiRegToRestore
!= AllHighRegsEnd
&& CopyReg
!= AllCopyRegsEnd
) {
1006 // Add the low register to the POP.
1007 PopMIB
.addReg(*CopyReg
, RegState::Define
);
1009 // Create the MOV from low to high register.
1010 BuildMI(MBB
, MI
, DL
, TII
.get(ARM::tMOVr
))
1011 .addReg(*HiRegToRestore
, RegState::Define
)
1012 .addReg(*CopyReg
, RegState::Kill
)
1013 .add(predOps(ARMCC::AL
));
1015 CopyReg
= findNextOrderedReg(++CopyReg
, CopyRegs
, AllCopyRegsEnd
);
1017 findNextOrderedReg(++HiRegToRestore
, HiRegsToRestore
, AllHighRegsEnd
);
1021 MachineInstrBuilder MIB
=
1022 BuildMI(MF
, DL
, TII
.get(ARM::tPOP
)).add(predOps(ARMCC::AL
));
1024 bool NeedsPop
= false;
1025 for (unsigned i
= CSI
.size(); i
!= 0; --i
) {
1026 CalleeSavedInfo
&Info
= CSI
[i
-1];
1027 unsigned Reg
= Info
.getReg();
1029 // High registers (excluding lr) have already been dealt with
1030 if (!(ARM::tGPRRegClass
.contains(Reg
) || Reg
== ARM::LR
))
1033 if (Reg
== ARM::LR
) {
1034 Info
.setRestored(false);
1035 if (!MBB
.succ_empty() ||
1036 MI
->getOpcode() == ARM::TCRETURNdi
||
1037 MI
->getOpcode() == ARM::TCRETURNri
)
1038 // LR may only be popped into PC, as part of return sequence.
1039 // If this isn't the return sequence, we'll need emitPopSpecialFixUp
1040 // to restore LR the hard way.
1041 // FIXME: if we don't pass any stack arguments it would be actually
1042 // advantageous *and* correct to do the conversion to an ordinary call
1043 // instruction here.
1045 // Special epilogue for vararg functions. See emitEpilogue
1048 // ARMv4T requires BX, see emitEpilogue
1049 if (!STI
.hasV5TOps())
1054 (*MIB
).setDesc(TII
.get(ARM::tPOP_RET
));
1055 if (MI
!= MBB
.end())
1056 MIB
.copyImplicitOps(*MI
);
1059 MIB
.addReg(Reg
, getDefRegState(true));
1063 // It's illegal to emit pop instruction without operands.
1065 MBB
.insert(MI
, &*MIB
);
1067 MF
.DeleteMachineInstr(MIB
);