[MIPS GlobalISel] Register bank select for G_STORE. Select i64 store
[llvm-complete.git] / lib / Target / PowerPC / PPCFrameLowering.cpp
blobebfb1ef7f49ba3214bc33ce5adae0e5ed3449ad2
1 //===-- PPCFrameLowering.cpp - PPC Frame Information ----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the PPC implementation of TargetFrameLowering class.
11 //===----------------------------------------------------------------------===//
13 #include "PPCFrameLowering.h"
14 #include "PPCInstrBuilder.h"
15 #include "PPCInstrInfo.h"
16 #include "PPCMachineFunctionInfo.h"
17 #include "PPCSubtarget.h"
18 #include "PPCTargetMachine.h"
19 #include "llvm/ADT/Statistic.h"
20 #include "llvm/CodeGen/MachineFrameInfo.h"
21 #include "llvm/CodeGen/MachineFunction.h"
22 #include "llvm/CodeGen/MachineInstrBuilder.h"
23 #include "llvm/CodeGen/MachineModuleInfo.h"
24 #include "llvm/CodeGen/MachineRegisterInfo.h"
25 #include "llvm/CodeGen/RegisterScavenging.h"
26 #include "llvm/IR/Function.h"
27 #include "llvm/Target/TargetOptions.h"
29 using namespace llvm;
31 #define DEBUG_TYPE "framelowering"
32 STATISTIC(NumPESpillVSR, "Number of spills to vector in prologue");
33 STATISTIC(NumPEReloadVSR, "Number of reloads from vector in epilogue");
35 static cl::opt<bool>
36 EnablePEVectorSpills("ppc-enable-pe-vector-spills",
37 cl::desc("Enable spills in prologue to vector registers."),
38 cl::init(false), cl::Hidden);
40 /// VRRegNo - Map from a numbered VR register to its enum value.
41 ///
42 static const MCPhysReg VRRegNo[] = {
43 PPC::V0 , PPC::V1 , PPC::V2 , PPC::V3 , PPC::V4 , PPC::V5 , PPC::V6 , PPC::V7 ,
44 PPC::V8 , PPC::V9 , PPC::V10, PPC::V11, PPC::V12, PPC::V13, PPC::V14, PPC::V15,
45 PPC::V16, PPC::V17, PPC::V18, PPC::V19, PPC::V20, PPC::V21, PPC::V22, PPC::V23,
46 PPC::V24, PPC::V25, PPC::V26, PPC::V27, PPC::V28, PPC::V29, PPC::V30, PPC::V31
49 static unsigned computeReturnSaveOffset(const PPCSubtarget &STI) {
50 if (STI.isDarwinABI())
51 return STI.isPPC64() ? 16 : 8;
52 // SVR4 ABI:
53 return STI.isPPC64() ? 16 : 4;
56 static unsigned computeTOCSaveOffset(const PPCSubtarget &STI) {
57 return STI.isELFv2ABI() ? 24 : 40;
60 static unsigned computeFramePointerSaveOffset(const PPCSubtarget &STI) {
61 // For the Darwin ABI:
62 // We cannot use the TOC save slot (offset +20) in the PowerPC linkage area
63 // for saving the frame pointer (if needed.) While the published ABI has
64 // not used this slot since at least MacOSX 10.2, there is older code
65 // around that does use it, and that needs to continue to work.
66 if (STI.isDarwinABI())
67 return STI.isPPC64() ? -8U : -4U;
69 // SVR4 ABI: First slot in the general register save area.
70 return STI.isPPC64() ? -8U : -4U;
73 static unsigned computeLinkageSize(const PPCSubtarget &STI) {
74 if ((STI.isDarwinABI() || STI.isAIXABI()) || STI.isPPC64())
75 return (STI.isELFv2ABI() ? 4 : 6) * (STI.isPPC64() ? 8 : 4);
77 // 32-bit SVR4 ABI:
78 return 8;
81 static unsigned computeBasePointerSaveOffset(const PPCSubtarget &STI) {
82 if (STI.isDarwinABI())
83 return STI.isPPC64() ? -16U : -8U;
85 // SVR4 ABI: First slot in the general register save area.
86 return STI.isPPC64()
87 ? -16U
88 : STI.getTargetMachine().isPositionIndependent() ? -12U : -8U;
91 PPCFrameLowering::PPCFrameLowering(const PPCSubtarget &STI)
92 : TargetFrameLowering(TargetFrameLowering::StackGrowsDown,
93 STI.getPlatformStackAlignment(), 0),
94 Subtarget(STI), ReturnSaveOffset(computeReturnSaveOffset(Subtarget)),
95 TOCSaveOffset(computeTOCSaveOffset(Subtarget)),
96 FramePointerSaveOffset(computeFramePointerSaveOffset(Subtarget)),
97 LinkageSize(computeLinkageSize(Subtarget)),
98 BasePointerSaveOffset(computeBasePointerSaveOffset(STI)) {}
100 // With the SVR4 ABI, callee-saved registers have fixed offsets on the stack.
101 const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots(
102 unsigned &NumEntries) const {
103 if (Subtarget.isDarwinABI()) {
104 NumEntries = 1;
105 if (Subtarget.isPPC64()) {
106 static const SpillSlot darwin64Offsets = {PPC::X31, -8};
107 return &darwin64Offsets;
108 } else {
109 static const SpillSlot darwinOffsets = {PPC::R31, -4};
110 return &darwinOffsets;
114 // Early exit if not using the SVR4 ABI.
115 if (!Subtarget.isSVR4ABI()) {
116 NumEntries = 0;
117 return nullptr;
120 // Note that the offsets here overlap, but this is fixed up in
121 // processFunctionBeforeFrameFinalized.
123 static const SpillSlot Offsets[] = {
124 // Floating-point register save area offsets.
125 {PPC::F31, -8},
126 {PPC::F30, -16},
127 {PPC::F29, -24},
128 {PPC::F28, -32},
129 {PPC::F27, -40},
130 {PPC::F26, -48},
131 {PPC::F25, -56},
132 {PPC::F24, -64},
133 {PPC::F23, -72},
134 {PPC::F22, -80},
135 {PPC::F21, -88},
136 {PPC::F20, -96},
137 {PPC::F19, -104},
138 {PPC::F18, -112},
139 {PPC::F17, -120},
140 {PPC::F16, -128},
141 {PPC::F15, -136},
142 {PPC::F14, -144},
144 // General register save area offsets.
145 {PPC::R31, -4},
146 {PPC::R30, -8},
147 {PPC::R29, -12},
148 {PPC::R28, -16},
149 {PPC::R27, -20},
150 {PPC::R26, -24},
151 {PPC::R25, -28},
152 {PPC::R24, -32},
153 {PPC::R23, -36},
154 {PPC::R22, -40},
155 {PPC::R21, -44},
156 {PPC::R20, -48},
157 {PPC::R19, -52},
158 {PPC::R18, -56},
159 {PPC::R17, -60},
160 {PPC::R16, -64},
161 {PPC::R15, -68},
162 {PPC::R14, -72},
164 // CR save area offset. We map each of the nonvolatile CR fields
165 // to the slot for CR2, which is the first of the nonvolatile CR
166 // fields to be assigned, so that we only allocate one save slot.
167 // See PPCRegisterInfo::hasReservedSpillSlot() for more information.
168 {PPC::CR2, -4},
170 // VRSAVE save area offset.
171 {PPC::VRSAVE, -4},
173 // Vector register save area
174 {PPC::V31, -16},
175 {PPC::V30, -32},
176 {PPC::V29, -48},
177 {PPC::V28, -64},
178 {PPC::V27, -80},
179 {PPC::V26, -96},
180 {PPC::V25, -112},
181 {PPC::V24, -128},
182 {PPC::V23, -144},
183 {PPC::V22, -160},
184 {PPC::V21, -176},
185 {PPC::V20, -192},
187 // SPE register save area (overlaps Vector save area).
188 {PPC::S31, -8},
189 {PPC::S30, -16},
190 {PPC::S29, -24},
191 {PPC::S28, -32},
192 {PPC::S27, -40},
193 {PPC::S26, -48},
194 {PPC::S25, -56},
195 {PPC::S24, -64},
196 {PPC::S23, -72},
197 {PPC::S22, -80},
198 {PPC::S21, -88},
199 {PPC::S20, -96},
200 {PPC::S19, -104},
201 {PPC::S18, -112},
202 {PPC::S17, -120},
203 {PPC::S16, -128},
204 {PPC::S15, -136},
205 {PPC::S14, -144}};
207 static const SpillSlot Offsets64[] = {
208 // Floating-point register save area offsets.
209 {PPC::F31, -8},
210 {PPC::F30, -16},
211 {PPC::F29, -24},
212 {PPC::F28, -32},
213 {PPC::F27, -40},
214 {PPC::F26, -48},
215 {PPC::F25, -56},
216 {PPC::F24, -64},
217 {PPC::F23, -72},
218 {PPC::F22, -80},
219 {PPC::F21, -88},
220 {PPC::F20, -96},
221 {PPC::F19, -104},
222 {PPC::F18, -112},
223 {PPC::F17, -120},
224 {PPC::F16, -128},
225 {PPC::F15, -136},
226 {PPC::F14, -144},
228 // General register save area offsets.
229 {PPC::X31, -8},
230 {PPC::X30, -16},
231 {PPC::X29, -24},
232 {PPC::X28, -32},
233 {PPC::X27, -40},
234 {PPC::X26, -48},
235 {PPC::X25, -56},
236 {PPC::X24, -64},
237 {PPC::X23, -72},
238 {PPC::X22, -80},
239 {PPC::X21, -88},
240 {PPC::X20, -96},
241 {PPC::X19, -104},
242 {PPC::X18, -112},
243 {PPC::X17, -120},
244 {PPC::X16, -128},
245 {PPC::X15, -136},
246 {PPC::X14, -144},
248 // VRSAVE save area offset.
249 {PPC::VRSAVE, -4},
251 // Vector register save area
252 {PPC::V31, -16},
253 {PPC::V30, -32},
254 {PPC::V29, -48},
255 {PPC::V28, -64},
256 {PPC::V27, -80},
257 {PPC::V26, -96},
258 {PPC::V25, -112},
259 {PPC::V24, -128},
260 {PPC::V23, -144},
261 {PPC::V22, -160},
262 {PPC::V21, -176},
263 {PPC::V20, -192}};
265 if (Subtarget.isPPC64()) {
266 NumEntries = array_lengthof(Offsets64);
268 return Offsets64;
269 } else {
270 NumEntries = array_lengthof(Offsets);
272 return Offsets;
276 /// RemoveVRSaveCode - We have found that this function does not need any code
277 /// to manipulate the VRSAVE register, even though it uses vector registers.
278 /// This can happen when the only registers used are known to be live in or out
279 /// of the function. Remove all of the VRSAVE related code from the function.
280 /// FIXME: The removal of the code results in a compile failure at -O0 when the
281 /// function contains a function call, as the GPR containing original VRSAVE
282 /// contents is spilled and reloaded around the call. Without the prolog code,
283 /// the spill instruction refers to an undefined register. This code needs
284 /// to account for all uses of that GPR.
285 static void RemoveVRSaveCode(MachineInstr &MI) {
286 MachineBasicBlock *Entry = MI.getParent();
287 MachineFunction *MF = Entry->getParent();
289 // We know that the MTVRSAVE instruction immediately follows MI. Remove it.
290 MachineBasicBlock::iterator MBBI = MI;
291 ++MBBI;
292 assert(MBBI != Entry->end() && MBBI->getOpcode() == PPC::MTVRSAVE);
293 MBBI->eraseFromParent();
295 bool RemovedAllMTVRSAVEs = true;
296 // See if we can find and remove the MTVRSAVE instruction from all of the
297 // epilog blocks.
298 for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) {
299 // If last instruction is a return instruction, add an epilogue
300 if (I->isReturnBlock()) {
301 bool FoundIt = false;
302 for (MBBI = I->end(); MBBI != I->begin(); ) {
303 --MBBI;
304 if (MBBI->getOpcode() == PPC::MTVRSAVE) {
305 MBBI->eraseFromParent(); // remove it.
306 FoundIt = true;
307 break;
310 RemovedAllMTVRSAVEs &= FoundIt;
314 // If we found and removed all MTVRSAVE instructions, remove the read of
315 // VRSAVE as well.
316 if (RemovedAllMTVRSAVEs) {
317 MBBI = MI;
318 assert(MBBI != Entry->begin() && "UPDATE_VRSAVE is first instr in block?");
319 --MBBI;
320 assert(MBBI->getOpcode() == PPC::MFVRSAVE && "VRSAVE instrs wandered?");
321 MBBI->eraseFromParent();
324 // Finally, nuke the UPDATE_VRSAVE.
325 MI.eraseFromParent();
328 // HandleVRSaveUpdate - MI is the UPDATE_VRSAVE instruction introduced by the
329 // instruction selector. Based on the vector registers that have been used,
330 // transform this into the appropriate ORI instruction.
331 static void HandleVRSaveUpdate(MachineInstr &MI, const TargetInstrInfo &TII) {
332 MachineFunction *MF = MI.getParent()->getParent();
333 const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
334 DebugLoc dl = MI.getDebugLoc();
336 const MachineRegisterInfo &MRI = MF->getRegInfo();
337 unsigned UsedRegMask = 0;
338 for (unsigned i = 0; i != 32; ++i)
339 if (MRI.isPhysRegModified(VRRegNo[i]))
340 UsedRegMask |= 1 << (31-i);
342 // Live in and live out values already must be in the mask, so don't bother
343 // marking them.
344 for (std::pair<unsigned, unsigned> LI : MF->getRegInfo().liveins()) {
345 unsigned RegNo = TRI->getEncodingValue(LI.first);
346 if (VRRegNo[RegNo] == LI.first) // If this really is a vector reg.
347 UsedRegMask &= ~(1 << (31-RegNo)); // Doesn't need to be marked.
350 // Live out registers appear as use operands on return instructions.
351 for (MachineFunction::const_iterator BI = MF->begin(), BE = MF->end();
352 UsedRegMask != 0 && BI != BE; ++BI) {
353 const MachineBasicBlock &MBB = *BI;
354 if (!MBB.isReturnBlock())
355 continue;
356 const MachineInstr &Ret = MBB.back();
357 for (unsigned I = 0, E = Ret.getNumOperands(); I != E; ++I) {
358 const MachineOperand &MO = Ret.getOperand(I);
359 if (!MO.isReg() || !PPC::VRRCRegClass.contains(MO.getReg()))
360 continue;
361 unsigned RegNo = TRI->getEncodingValue(MO.getReg());
362 UsedRegMask &= ~(1 << (31-RegNo));
366 // If no registers are used, turn this into a copy.
367 if (UsedRegMask == 0) {
368 // Remove all VRSAVE code.
369 RemoveVRSaveCode(MI);
370 return;
373 unsigned SrcReg = MI.getOperand(1).getReg();
374 unsigned DstReg = MI.getOperand(0).getReg();
376 if ((UsedRegMask & 0xFFFF) == UsedRegMask) {
377 if (DstReg != SrcReg)
378 BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
379 .addReg(SrcReg)
380 .addImm(UsedRegMask);
381 else
382 BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
383 .addReg(SrcReg, RegState::Kill)
384 .addImm(UsedRegMask);
385 } else if ((UsedRegMask & 0xFFFF0000) == UsedRegMask) {
386 if (DstReg != SrcReg)
387 BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
388 .addReg(SrcReg)
389 .addImm(UsedRegMask >> 16);
390 else
391 BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
392 .addReg(SrcReg, RegState::Kill)
393 .addImm(UsedRegMask >> 16);
394 } else {
395 if (DstReg != SrcReg)
396 BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
397 .addReg(SrcReg)
398 .addImm(UsedRegMask >> 16);
399 else
400 BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
401 .addReg(SrcReg, RegState::Kill)
402 .addImm(UsedRegMask >> 16);
404 BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
405 .addReg(DstReg, RegState::Kill)
406 .addImm(UsedRegMask & 0xFFFF);
409 // Remove the old UPDATE_VRSAVE instruction.
410 MI.eraseFromParent();
413 static bool spillsCR(const MachineFunction &MF) {
414 const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
415 return FuncInfo->isCRSpilled();
418 static bool spillsVRSAVE(const MachineFunction &MF) {
419 const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
420 return FuncInfo->isVRSAVESpilled();
423 static bool hasSpills(const MachineFunction &MF) {
424 const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
425 return FuncInfo->hasSpills();
428 static bool hasNonRISpills(const MachineFunction &MF) {
429 const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
430 return FuncInfo->hasNonRISpills();
433 /// MustSaveLR - Return true if this function requires that we save the LR
434 /// register onto the stack in the prolog and restore it in the epilog of the
435 /// function.
436 static bool MustSaveLR(const MachineFunction &MF, unsigned LR) {
437 const PPCFunctionInfo *MFI = MF.getInfo<PPCFunctionInfo>();
439 // We need a save/restore of LR if there is any def of LR (which is
440 // defined by calls, including the PIC setup sequence), or if there is
441 // some use of the LR stack slot (e.g. for builtin_return_address).
442 // (LR comes in 32 and 64 bit versions.)
443 MachineRegisterInfo::def_iterator RI = MF.getRegInfo().def_begin(LR);
444 return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired();
447 /// determineFrameLayoutAndUpdate - Determine the size of the frame and maximum
448 /// call frame size. Update the MachineFunction object with the stack size.
449 unsigned
450 PPCFrameLowering::determineFrameLayoutAndUpdate(MachineFunction &MF,
451 bool UseEstimate) const {
452 unsigned NewMaxCallFrameSize = 0;
453 unsigned FrameSize = determineFrameLayout(MF, UseEstimate,
454 &NewMaxCallFrameSize);
455 MF.getFrameInfo().setStackSize(FrameSize);
456 MF.getFrameInfo().setMaxCallFrameSize(NewMaxCallFrameSize);
457 return FrameSize;
460 /// determineFrameLayout - Determine the size of the frame and maximum call
461 /// frame size.
462 unsigned
463 PPCFrameLowering::determineFrameLayout(const MachineFunction &MF,
464 bool UseEstimate,
465 unsigned *NewMaxCallFrameSize) const {
466 const MachineFrameInfo &MFI = MF.getFrameInfo();
467 const PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
469 // Get the number of bytes to allocate from the FrameInfo
470 unsigned FrameSize =
471 UseEstimate ? MFI.estimateStackSize(MF) : MFI.getStackSize();
473 // Get stack alignments. The frame must be aligned to the greatest of these:
474 unsigned TargetAlign = getStackAlignment(); // alignment required per the ABI
475 unsigned MaxAlign = MFI.getMaxAlignment(); // algmt required by data in frame
476 unsigned AlignMask = std::max(MaxAlign, TargetAlign) - 1;
478 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
480 unsigned LR = RegInfo->getRARegister();
481 bool DisableRedZone = MF.getFunction().hasFnAttribute(Attribute::NoRedZone);
482 bool CanUseRedZone = !MFI.hasVarSizedObjects() && // No dynamic alloca.
483 !MFI.adjustsStack() && // No calls.
484 !MustSaveLR(MF, LR) && // No need to save LR.
485 !FI->mustSaveTOC() && // No need to save TOC.
486 !RegInfo->hasBasePointer(MF); // No special alignment.
488 // Note: for PPC32 SVR4ABI (Non-DarwinABI), we can still generate stackless
489 // code if all local vars are reg-allocated.
490 bool FitsInRedZone = FrameSize <= Subtarget.getRedZoneSize();
492 // Check whether we can skip adjusting the stack pointer (by using red zone)
493 if (!DisableRedZone && CanUseRedZone && FitsInRedZone) {
494 // No need for frame
495 return 0;
498 // Get the maximum call frame size of all the calls.
499 unsigned maxCallFrameSize = MFI.getMaxCallFrameSize();
501 // Maximum call frame needs to be at least big enough for linkage area.
502 unsigned minCallFrameSize = getLinkageSize();
503 maxCallFrameSize = std::max(maxCallFrameSize, minCallFrameSize);
505 // If we have dynamic alloca then maxCallFrameSize needs to be aligned so
506 // that allocations will be aligned.
507 if (MFI.hasVarSizedObjects())
508 maxCallFrameSize = (maxCallFrameSize + AlignMask) & ~AlignMask;
510 // Update the new max call frame size if the caller passes in a valid pointer.
511 if (NewMaxCallFrameSize)
512 *NewMaxCallFrameSize = maxCallFrameSize;
514 // Include call frame size in total.
515 FrameSize += maxCallFrameSize;
517 // Make sure the frame is aligned.
518 FrameSize = (FrameSize + AlignMask) & ~AlignMask;
520 return FrameSize;
523 // hasFP - Return true if the specified function actually has a dedicated frame
524 // pointer register.
525 bool PPCFrameLowering::hasFP(const MachineFunction &MF) const {
526 const MachineFrameInfo &MFI = MF.getFrameInfo();
527 // FIXME: This is pretty much broken by design: hasFP() might be called really
528 // early, before the stack layout was calculated and thus hasFP() might return
529 // true or false here depending on the time of call.
530 return (MFI.getStackSize()) && needsFP(MF);
533 // needsFP - Return true if the specified function should have a dedicated frame
534 // pointer register. This is true if the function has variable sized allocas or
535 // if frame pointer elimination is disabled.
536 bool PPCFrameLowering::needsFP(const MachineFunction &MF) const {
537 const MachineFrameInfo &MFI = MF.getFrameInfo();
539 // Naked functions have no stack frame pushed, so we don't have a frame
540 // pointer.
541 if (MF.getFunction().hasFnAttribute(Attribute::Naked))
542 return false;
544 return MF.getTarget().Options.DisableFramePointerElim(MF) ||
545 MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint() ||
546 (MF.getTarget().Options.GuaranteedTailCallOpt &&
547 MF.getInfo<PPCFunctionInfo>()->hasFastCall());
550 void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const {
551 bool is31 = needsFP(MF);
552 unsigned FPReg = is31 ? PPC::R31 : PPC::R1;
553 unsigned FP8Reg = is31 ? PPC::X31 : PPC::X1;
555 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
556 bool HasBP = RegInfo->hasBasePointer(MF);
557 unsigned BPReg = HasBP ? (unsigned) RegInfo->getBaseRegister(MF) : FPReg;
558 unsigned BP8Reg = HasBP ? (unsigned) PPC::X30 : FP8Reg;
560 for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
561 BI != BE; ++BI)
562 for (MachineBasicBlock::iterator MBBI = BI->end(); MBBI != BI->begin(); ) {
563 --MBBI;
564 for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I) {
565 MachineOperand &MO = MBBI->getOperand(I);
566 if (!MO.isReg())
567 continue;
569 switch (MO.getReg()) {
570 case PPC::FP:
571 MO.setReg(FPReg);
572 break;
573 case PPC::FP8:
574 MO.setReg(FP8Reg);
575 break;
576 case PPC::BP:
577 MO.setReg(BPReg);
578 break;
579 case PPC::BP8:
580 MO.setReg(BP8Reg);
581 break;
588 /* This function will do the following:
589 - If MBB is an entry or exit block, set SR1 and SR2 to R0 and R12
590 respectively (defaults recommended by the ABI) and return true
591 - If MBB is not an entry block, initialize the register scavenger and look
592 for available registers.
593 - If the defaults (R0/R12) are available, return true
594 - If TwoUniqueRegsRequired is set to true, it looks for two unique
595 registers. Otherwise, look for a single available register.
596 - If the required registers are found, set SR1 and SR2 and return true.
597 - If the required registers are not found, set SR2 or both SR1 and SR2 to
598 PPC::NoRegister and return false.
600 Note that if both SR1 and SR2 are valid parameters and TwoUniqueRegsRequired
601 is not set, this function will attempt to find two different registers, but
602 still return true if only one register is available (and set SR1 == SR2).
604 bool
605 PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB,
606 bool UseAtEnd,
607 bool TwoUniqueRegsRequired,
608 unsigned *SR1,
609 unsigned *SR2) const {
610 RegScavenger RS;
611 unsigned R0 = Subtarget.isPPC64() ? PPC::X0 : PPC::R0;
612 unsigned R12 = Subtarget.isPPC64() ? PPC::X12 : PPC::R12;
614 // Set the defaults for the two scratch registers.
615 if (SR1)
616 *SR1 = R0;
618 if (SR2) {
619 assert (SR1 && "Asking for the second scratch register but not the first?");
620 *SR2 = R12;
623 // If MBB is an entry or exit block, use R0 and R12 as the scratch registers.
624 if ((UseAtEnd && MBB->isReturnBlock()) ||
625 (!UseAtEnd && (&MBB->getParent()->front() == MBB)))
626 return true;
628 RS.enterBasicBlock(*MBB);
630 if (UseAtEnd && !MBB->empty()) {
631 // The scratch register will be used at the end of the block, so must
632 // consider all registers used within the block
634 MachineBasicBlock::iterator MBBI = MBB->getFirstTerminator();
635 // If no terminator, back iterator up to previous instruction.
636 if (MBBI == MBB->end())
637 MBBI = std::prev(MBBI);
639 if (MBBI != MBB->begin())
640 RS.forward(MBBI);
643 // If the two registers are available, we're all good.
644 // Note that we only return here if both R0 and R12 are available because
645 // although the function may not require two unique registers, it may benefit
646 // from having two so we should try to provide them.
647 if (!RS.isRegUsed(R0) && !RS.isRegUsed(R12))
648 return true;
650 // Get the list of callee-saved registers for the target.
651 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
652 const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(MBB->getParent());
654 // Get all the available registers in the block.
655 BitVector BV = RS.getRegsAvailable(Subtarget.isPPC64() ? &PPC::G8RCRegClass :
656 &PPC::GPRCRegClass);
658 // We shouldn't use callee-saved registers as scratch registers as they may be
659 // available when looking for a candidate block for shrink wrapping but not
660 // available when the actual prologue/epilogue is being emitted because they
661 // were added as live-in to the prologue block by PrologueEpilogueInserter.
662 for (int i = 0; CSRegs[i]; ++i)
663 BV.reset(CSRegs[i]);
665 // Set the first scratch register to the first available one.
666 if (SR1) {
667 int FirstScratchReg = BV.find_first();
668 *SR1 = FirstScratchReg == -1 ? (unsigned)PPC::NoRegister : FirstScratchReg;
671 // If there is another one available, set the second scratch register to that.
672 // Otherwise, set it to either PPC::NoRegister if this function requires two
673 // or to whatever SR1 is set to if this function doesn't require two.
674 if (SR2) {
675 int SecondScratchReg = BV.find_next(*SR1);
676 if (SecondScratchReg != -1)
677 *SR2 = SecondScratchReg;
678 else
679 *SR2 = TwoUniqueRegsRequired ? (unsigned)PPC::NoRegister : *SR1;
682 // Now that we've done our best to provide both registers, double check
683 // whether we were unable to provide enough.
684 if (BV.count() < (TwoUniqueRegsRequired ? 2U : 1U))
685 return false;
687 return true;
690 // We need a scratch register for spilling LR and for spilling CR. By default,
691 // we use two scratch registers to hide latency. However, if only one scratch
692 // register is available, we can adjust for that by not overlapping the spill
693 // code. However, if we need to realign the stack (i.e. have a base pointer)
694 // and the stack frame is large, we need two scratch registers.
695 bool
696 PPCFrameLowering::twoUniqueScratchRegsRequired(MachineBasicBlock *MBB) const {
697 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
698 MachineFunction &MF = *(MBB->getParent());
699 bool HasBP = RegInfo->hasBasePointer(MF);
700 unsigned FrameSize = determineFrameLayout(MF);
701 int NegFrameSize = -FrameSize;
702 bool IsLargeFrame = !isInt<16>(NegFrameSize);
703 MachineFrameInfo &MFI = MF.getFrameInfo();
704 unsigned MaxAlign = MFI.getMaxAlignment();
705 bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI();
707 return (IsLargeFrame || !HasRedZone) && HasBP && MaxAlign > 1;
710 bool PPCFrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const {
711 MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
713 return findScratchRegister(TmpMBB, false,
714 twoUniqueScratchRegsRequired(TmpMBB));
717 bool PPCFrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const {
718 MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
720 return findScratchRegister(TmpMBB, true);
723 bool PPCFrameLowering::stackUpdateCanBeMoved(MachineFunction &MF) const {
724 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
725 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
727 // Abort if there is no register info or function info.
728 if (!RegInfo || !FI)
729 return false;
731 // Only move the stack update on ELFv2 ABI and PPC64.
732 if (!Subtarget.isELFv2ABI() || !Subtarget.isPPC64())
733 return false;
735 // Check the frame size first and return false if it does not fit the
736 // requirements.
737 // We need a non-zero frame size as well as a frame that will fit in the red
738 // zone. This is because by moving the stack pointer update we are now storing
739 // to the red zone until the stack pointer is updated. If we get an interrupt
740 // inside the prologue but before the stack update we now have a number of
741 // stores to the red zone and those stores must all fit.
742 MachineFrameInfo &MFI = MF.getFrameInfo();
743 unsigned FrameSize = MFI.getStackSize();
744 if (!FrameSize || FrameSize > Subtarget.getRedZoneSize())
745 return false;
747 // Frame pointers and base pointers complicate matters so don't do anything
748 // if we have them. For example having a frame pointer will sometimes require
749 // a copy of r1 into r31 and that makes keeping track of updates to r1 more
750 // difficult.
751 if (hasFP(MF) || RegInfo->hasBasePointer(MF))
752 return false;
754 // Calls to fast_cc functions use different rules for passing parameters on
755 // the stack from the ABI and using PIC base in the function imposes
756 // similar restrictions to using the base pointer. It is not generally safe
757 // to move the stack pointer update in these situations.
758 if (FI->hasFastCall() || FI->usesPICBase())
759 return false;
761 // Finally we can move the stack update if we do not require register
762 // scavenging. Register scavenging can introduce more spills and so
763 // may make the frame size larger than we have computed.
764 return !RegInfo->requiresFrameIndexScavenging(MF);
767 void PPCFrameLowering::emitPrologue(MachineFunction &MF,
768 MachineBasicBlock &MBB) const {
769 MachineBasicBlock::iterator MBBI = MBB.begin();
770 MachineFrameInfo &MFI = MF.getFrameInfo();
771 const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
772 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
774 MachineModuleInfo &MMI = MF.getMMI();
775 const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
776 DebugLoc dl;
777 bool needsCFI = MMI.hasDebugInfo() ||
778 MF.getFunction().needsUnwindTableEntry();
780 // Get processor type.
781 bool isPPC64 = Subtarget.isPPC64();
782 // Get the ABI.
783 bool isSVR4ABI = Subtarget.isSVR4ABI();
784 bool isELFv2ABI = Subtarget.isELFv2ABI();
785 assert((Subtarget.isDarwinABI() || isSVR4ABI) &&
786 "Currently only Darwin and SVR4 ABIs are supported for PowerPC.");
788 // Scan the prolog, looking for an UPDATE_VRSAVE instruction. If we find it,
789 // process it.
790 if (!isSVR4ABI)
791 for (unsigned i = 0; MBBI != MBB.end(); ++i, ++MBBI) {
792 if (MBBI->getOpcode() == PPC::UPDATE_VRSAVE) {
793 HandleVRSaveUpdate(*MBBI, TII);
794 break;
798 // Move MBBI back to the beginning of the prologue block.
799 MBBI = MBB.begin();
801 // Work out frame sizes.
802 unsigned FrameSize = determineFrameLayoutAndUpdate(MF);
803 int NegFrameSize = -FrameSize;
804 if (!isInt<32>(NegFrameSize))
805 llvm_unreachable("Unhandled stack size!");
807 if (MFI.isFrameAddressTaken())
808 replaceFPWithRealFP(MF);
810 // Check if the link register (LR) must be saved.
811 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
812 bool MustSaveLR = FI->mustSaveLR();
813 bool MustSaveTOC = FI->mustSaveTOC();
814 const SmallVectorImpl<unsigned> &MustSaveCRs = FI->getMustSaveCRs();
815 bool MustSaveCR = !MustSaveCRs.empty();
816 // Do we have a frame pointer and/or base pointer for this function?
817 bool HasFP = hasFP(MF);
818 bool HasBP = RegInfo->hasBasePointer(MF);
819 bool HasRedZone = isPPC64 || !isSVR4ABI;
821 unsigned SPReg = isPPC64 ? PPC::X1 : PPC::R1;
822 unsigned BPReg = RegInfo->getBaseRegister(MF);
823 unsigned FPReg = isPPC64 ? PPC::X31 : PPC::R31;
824 unsigned LRReg = isPPC64 ? PPC::LR8 : PPC::LR;
825 unsigned TOCReg = isPPC64 ? PPC::X2 : PPC::R2;
826 unsigned ScratchReg = 0;
827 unsigned TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
828 // ...(R12/X12 is volatile in both Darwin & SVR4, & can't be a function arg.)
829 const MCInstrDesc& MFLRInst = TII.get(isPPC64 ? PPC::MFLR8
830 : PPC::MFLR );
831 const MCInstrDesc& StoreInst = TII.get(isPPC64 ? PPC::STD
832 : PPC::STW );
833 const MCInstrDesc& StoreUpdtInst = TII.get(isPPC64 ? PPC::STDU
834 : PPC::STWU );
835 const MCInstrDesc& StoreUpdtIdxInst = TII.get(isPPC64 ? PPC::STDUX
836 : PPC::STWUX);
837 const MCInstrDesc& LoadImmShiftedInst = TII.get(isPPC64 ? PPC::LIS8
838 : PPC::LIS );
839 const MCInstrDesc& OrImmInst = TII.get(isPPC64 ? PPC::ORI8
840 : PPC::ORI );
841 const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8
842 : PPC::OR );
843 const MCInstrDesc& SubtractCarryingInst = TII.get(isPPC64 ? PPC::SUBFC8
844 : PPC::SUBFC);
845 const MCInstrDesc& SubtractImmCarryingInst = TII.get(isPPC64 ? PPC::SUBFIC8
846 : PPC::SUBFIC);
848 // Regarding this assert: Even though LR is saved in the caller's frame (i.e.,
849 // LROffset is positive), that slot is callee-owned. Because PPC32 SVR4 has no
850 // Red Zone, an asynchronous event (a form of "callee") could claim a frame &
851 // overwrite it, so PPC32 SVR4 must claim at least a minimal frame to save LR.
852 assert((isPPC64 || !isSVR4ABI || !(!FrameSize && (MustSaveLR || HasFP))) &&
853 "FrameSize must be >0 to save/restore the FP or LR for 32-bit SVR4.");
855 // Using the same bool variable as below to suppress compiler warnings.
856 bool SingleScratchReg =
857 findScratchRegister(&MBB, false, twoUniqueScratchRegsRequired(&MBB),
858 &ScratchReg, &TempReg);
859 assert(SingleScratchReg &&
860 "Required number of registers not available in this block");
862 SingleScratchReg = ScratchReg == TempReg;
864 int LROffset = getReturnSaveOffset();
866 int FPOffset = 0;
867 if (HasFP) {
868 if (isSVR4ABI) {
869 MachineFrameInfo &MFI = MF.getFrameInfo();
870 int FPIndex = FI->getFramePointerSaveIndex();
871 assert(FPIndex && "No Frame Pointer Save Slot!");
872 FPOffset = MFI.getObjectOffset(FPIndex);
873 } else {
874 FPOffset = getFramePointerSaveOffset();
878 int BPOffset = 0;
879 if (HasBP) {
880 if (isSVR4ABI) {
881 MachineFrameInfo &MFI = MF.getFrameInfo();
882 int BPIndex = FI->getBasePointerSaveIndex();
883 assert(BPIndex && "No Base Pointer Save Slot!");
884 BPOffset = MFI.getObjectOffset(BPIndex);
885 } else {
886 BPOffset = getBasePointerSaveOffset();
890 int PBPOffset = 0;
891 if (FI->usesPICBase()) {
892 MachineFrameInfo &MFI = MF.getFrameInfo();
893 int PBPIndex = FI->getPICBasePointerSaveIndex();
894 assert(PBPIndex && "No PIC Base Pointer Save Slot!");
895 PBPOffset = MFI.getObjectOffset(PBPIndex);
898 // Get stack alignments.
899 unsigned MaxAlign = MFI.getMaxAlignment();
900 if (HasBP && MaxAlign > 1)
901 assert(isPowerOf2_32(MaxAlign) && isInt<16>(MaxAlign) &&
902 "Invalid alignment!");
904 // Frames of 32KB & larger require special handling because they cannot be
905 // indexed into with a simple STDU/STWU/STD/STW immediate offset operand.
906 bool isLargeFrame = !isInt<16>(NegFrameSize);
908 assert((isPPC64 || !MustSaveCR) &&
909 "Prologue CR saving supported only in 64-bit mode");
911 // Check if we can move the stack update instruction (stdu) down the prologue
912 // past the callee saves. Hopefully this will avoid the situation where the
913 // saves are waiting for the update on the store with update to complete.
914 MachineBasicBlock::iterator StackUpdateLoc = MBBI;
915 bool MovingStackUpdateDown = false;
917 // Check if we can move the stack update.
918 if (stackUpdateCanBeMoved(MF)) {
919 const std::vector<CalleeSavedInfo> &Info = MFI.getCalleeSavedInfo();
920 for (CalleeSavedInfo CSI : Info) {
921 int FrIdx = CSI.getFrameIdx();
922 // If the frame index is not negative the callee saved info belongs to a
923 // stack object that is not a fixed stack object. We ignore non-fixed
924 // stack objects because we won't move the stack update pointer past them.
925 if (FrIdx >= 0)
926 continue;
928 if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0) {
929 StackUpdateLoc++;
930 MovingStackUpdateDown = true;
931 } else {
932 // We need all of the Frame Indices to meet these conditions.
933 // If they do not, abort the whole operation.
934 StackUpdateLoc = MBBI;
935 MovingStackUpdateDown = false;
936 break;
940 // If the operation was not aborted then update the object offset.
941 if (MovingStackUpdateDown) {
942 for (CalleeSavedInfo CSI : Info) {
943 int FrIdx = CSI.getFrameIdx();
944 if (FrIdx < 0)
945 MFI.setObjectOffset(FrIdx, MFI.getObjectOffset(FrIdx) + NegFrameSize);
950 // If we need to spill the CR and the LR but we don't have two separate
951 // registers available, we must spill them one at a time
952 if (MustSaveCR && SingleScratchReg && MustSaveLR) {
953 // In the ELFv2 ABI, we are not required to save all CR fields.
954 // If only one or two CR fields are clobbered, it is more efficient to use
955 // mfocrf to selectively save just those fields, because mfocrf has short
956 // latency compares to mfcr.
957 unsigned MfcrOpcode = PPC::MFCR8;
958 unsigned CrState = RegState::ImplicitKill;
959 if (isELFv2ABI && MustSaveCRs.size() == 1) {
960 MfcrOpcode = PPC::MFOCRF8;
961 CrState = RegState::Kill;
963 MachineInstrBuilder MIB =
964 BuildMI(MBB, MBBI, dl, TII.get(MfcrOpcode), TempReg);
965 for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
966 MIB.addReg(MustSaveCRs[i], CrState);
967 BuildMI(MBB, MBBI, dl, TII.get(PPC::STW8))
968 .addReg(TempReg, getKillRegState(true))
969 .addImm(8)
970 .addReg(SPReg);
973 if (MustSaveLR)
974 BuildMI(MBB, MBBI, dl, MFLRInst, ScratchReg);
976 if (MustSaveCR &&
977 !(SingleScratchReg && MustSaveLR)) { // will only occur for PPC64
978 // In the ELFv2 ABI, we are not required to save all CR fields.
979 // If only one or two CR fields are clobbered, it is more efficient to use
980 // mfocrf to selectively save just those fields, because mfocrf has short
981 // latency compares to mfcr.
982 unsigned MfcrOpcode = PPC::MFCR8;
983 unsigned CrState = RegState::ImplicitKill;
984 if (isELFv2ABI && MustSaveCRs.size() == 1) {
985 MfcrOpcode = PPC::MFOCRF8;
986 CrState = RegState::Kill;
988 MachineInstrBuilder MIB =
989 BuildMI(MBB, MBBI, dl, TII.get(MfcrOpcode), TempReg);
990 for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
991 MIB.addReg(MustSaveCRs[i], CrState);
994 if (HasRedZone) {
995 if (HasFP)
996 BuildMI(MBB, MBBI, dl, StoreInst)
997 .addReg(FPReg)
998 .addImm(FPOffset)
999 .addReg(SPReg);
1000 if (FI->usesPICBase())
1001 BuildMI(MBB, MBBI, dl, StoreInst)
1002 .addReg(PPC::R30)
1003 .addImm(PBPOffset)
1004 .addReg(SPReg);
1005 if (HasBP)
1006 BuildMI(MBB, MBBI, dl, StoreInst)
1007 .addReg(BPReg)
1008 .addImm(BPOffset)
1009 .addReg(SPReg);
1012 if (MustSaveLR)
1013 BuildMI(MBB, StackUpdateLoc, dl, StoreInst)
1014 .addReg(ScratchReg, getKillRegState(true))
1015 .addImm(LROffset)
1016 .addReg(SPReg);
1018 if (MustSaveCR &&
1019 !(SingleScratchReg && MustSaveLR)) { // will only occur for PPC64
1020 assert(HasRedZone && "A red zone is always available on PPC64");
1021 BuildMI(MBB, MBBI, dl, TII.get(PPC::STW8))
1022 .addReg(TempReg, getKillRegState(true))
1023 .addImm(8)
1024 .addReg(SPReg);
1027 // Skip the rest if this is a leaf function & all spills fit in the Red Zone.
1028 if (!FrameSize)
1029 return;
1031 // Adjust stack pointer: r1 += NegFrameSize.
1032 // If there is a preferred stack alignment, align R1 now
1034 if (HasBP && HasRedZone) {
1035 // Save a copy of r1 as the base pointer.
1036 BuildMI(MBB, MBBI, dl, OrInst, BPReg)
1037 .addReg(SPReg)
1038 .addReg(SPReg);
1041 // Have we generated a STUX instruction to claim stack frame? If so,
1042 // the negated frame size will be placed in ScratchReg.
1043 bool HasSTUX = false;
1045 // This condition must be kept in sync with canUseAsPrologue.
1046 if (HasBP && MaxAlign > 1) {
1047 if (isPPC64)
1048 BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), ScratchReg)
1049 .addReg(SPReg)
1050 .addImm(0)
1051 .addImm(64 - Log2_32(MaxAlign));
1052 else // PPC32...
1053 BuildMI(MBB, MBBI, dl, TII.get(PPC::RLWINM), ScratchReg)
1054 .addReg(SPReg)
1055 .addImm(0)
1056 .addImm(32 - Log2_32(MaxAlign))
1057 .addImm(31);
1058 if (!isLargeFrame) {
1059 BuildMI(MBB, MBBI, dl, SubtractImmCarryingInst, ScratchReg)
1060 .addReg(ScratchReg, RegState::Kill)
1061 .addImm(NegFrameSize);
1062 } else {
1063 assert(!SingleScratchReg && "Only a single scratch reg available");
1064 BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, TempReg)
1065 .addImm(NegFrameSize >> 16);
1066 BuildMI(MBB, MBBI, dl, OrImmInst, TempReg)
1067 .addReg(TempReg, RegState::Kill)
1068 .addImm(NegFrameSize & 0xFFFF);
1069 BuildMI(MBB, MBBI, dl, SubtractCarryingInst, ScratchReg)
1070 .addReg(ScratchReg, RegState::Kill)
1071 .addReg(TempReg, RegState::Kill);
1074 BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg)
1075 .addReg(SPReg, RegState::Kill)
1076 .addReg(SPReg)
1077 .addReg(ScratchReg);
1078 HasSTUX = true;
1080 } else if (!isLargeFrame) {
1081 BuildMI(MBB, StackUpdateLoc, dl, StoreUpdtInst, SPReg)
1082 .addReg(SPReg)
1083 .addImm(NegFrameSize)
1084 .addReg(SPReg);
1086 } else {
1087 BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
1088 .addImm(NegFrameSize >> 16);
1089 BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
1090 .addReg(ScratchReg, RegState::Kill)
1091 .addImm(NegFrameSize & 0xFFFF);
1092 BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg)
1093 .addReg(SPReg, RegState::Kill)
1094 .addReg(SPReg)
1095 .addReg(ScratchReg);
1096 HasSTUX = true;
1099 // Save the TOC register after the stack pointer update if a prologue TOC
1100 // save is required for the function.
1101 if (MustSaveTOC) {
1102 assert(isELFv2ABI && "TOC saves in the prologue only supported on ELFv2");
1103 BuildMI(MBB, StackUpdateLoc, dl, TII.get(PPC::STD))
1104 .addReg(TOCReg, getKillRegState(true))
1105 .addImm(TOCSaveOffset)
1106 .addReg(SPReg);
1109 if (!HasRedZone) {
1110 assert(!isPPC64 && "A red zone is always available on PPC64");
1111 if (HasSTUX) {
1112 // The negated frame size is in ScratchReg, and the SPReg has been
1113 // decremented by the frame size: SPReg = old SPReg + ScratchReg.
1114 // Since FPOffset, PBPOffset, etc. are relative to the beginning of
1115 // the stack frame (i.e. the old SP), ideally, we would put the old
1116 // SP into a register and use it as the base for the stores. The
1117 // problem is that the only available register may be ScratchReg,
1118 // which could be R0, and R0 cannot be used as a base address.
1120 // First, set ScratchReg to the old SP. This may need to be modified
1121 // later.
1122 BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg)
1123 .addReg(ScratchReg, RegState::Kill)
1124 .addReg(SPReg);
1126 if (ScratchReg == PPC::R0) {
1127 // R0 cannot be used as a base register, but it can be used as an
1128 // index in a store-indexed.
1129 int LastOffset = 0;
1130 if (HasFP) {
1131 // R0 += (FPOffset-LastOffset).
1132 // Need addic, since addi treats R0 as 0.
1133 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
1134 .addReg(ScratchReg)
1135 .addImm(FPOffset-LastOffset);
1136 LastOffset = FPOffset;
1137 // Store FP into *R0.
1138 BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
1139 .addReg(FPReg, RegState::Kill) // Save FP.
1140 .addReg(PPC::ZERO)
1141 .addReg(ScratchReg); // This will be the index (R0 is ok here).
1143 if (FI->usesPICBase()) {
1144 // R0 += (PBPOffset-LastOffset).
1145 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
1146 .addReg(ScratchReg)
1147 .addImm(PBPOffset-LastOffset);
1148 LastOffset = PBPOffset;
1149 BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
1150 .addReg(PPC::R30, RegState::Kill) // Save PIC base pointer.
1151 .addReg(PPC::ZERO)
1152 .addReg(ScratchReg); // This will be the index (R0 is ok here).
1154 if (HasBP) {
1155 // R0 += (BPOffset-LastOffset).
1156 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
1157 .addReg(ScratchReg)
1158 .addImm(BPOffset-LastOffset);
1159 LastOffset = BPOffset;
1160 BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
1161 .addReg(BPReg, RegState::Kill) // Save BP.
1162 .addReg(PPC::ZERO)
1163 .addReg(ScratchReg); // This will be the index (R0 is ok here).
1164 // BP = R0-LastOffset
1165 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), BPReg)
1166 .addReg(ScratchReg, RegState::Kill)
1167 .addImm(-LastOffset);
1169 } else {
1170 // ScratchReg is not R0, so use it as the base register. It is
1171 // already set to the old SP, so we can use the offsets directly.
1173 // Now that the stack frame has been allocated, save all the necessary
1174 // registers using ScratchReg as the base address.
1175 if (HasFP)
1176 BuildMI(MBB, MBBI, dl, StoreInst)
1177 .addReg(FPReg)
1178 .addImm(FPOffset)
1179 .addReg(ScratchReg);
1180 if (FI->usesPICBase())
1181 BuildMI(MBB, MBBI, dl, StoreInst)
1182 .addReg(PPC::R30)
1183 .addImm(PBPOffset)
1184 .addReg(ScratchReg);
1185 if (HasBP) {
1186 BuildMI(MBB, MBBI, dl, StoreInst)
1187 .addReg(BPReg)
1188 .addImm(BPOffset)
1189 .addReg(ScratchReg);
1190 BuildMI(MBB, MBBI, dl, OrInst, BPReg)
1191 .addReg(ScratchReg, RegState::Kill)
1192 .addReg(ScratchReg);
1195 } else {
1196 // The frame size is a known 16-bit constant (fitting in the immediate
1197 // field of STWU). To be here we have to be compiling for PPC32.
1198 // Since the SPReg has been decreased by FrameSize, add it back to each
1199 // offset.
1200 if (HasFP)
1201 BuildMI(MBB, MBBI, dl, StoreInst)
1202 .addReg(FPReg)
1203 .addImm(FrameSize + FPOffset)
1204 .addReg(SPReg);
1205 if (FI->usesPICBase())
1206 BuildMI(MBB, MBBI, dl, StoreInst)
1207 .addReg(PPC::R30)
1208 .addImm(FrameSize + PBPOffset)
1209 .addReg(SPReg);
1210 if (HasBP) {
1211 BuildMI(MBB, MBBI, dl, StoreInst)
1212 .addReg(BPReg)
1213 .addImm(FrameSize + BPOffset)
1214 .addReg(SPReg);
1215 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), BPReg)
1216 .addReg(SPReg)
1217 .addImm(FrameSize);
1222 // Add Call Frame Information for the instructions we generated above.
1223 if (needsCFI) {
1224 unsigned CFIIndex;
1226 if (HasBP) {
1227 // Define CFA in terms of BP. Do this in preference to using FP/SP,
1228 // because if the stack needed aligning then CFA won't be at a fixed
1229 // offset from FP/SP.
1230 unsigned Reg = MRI->getDwarfRegNum(BPReg, true);
1231 CFIIndex = MF.addFrameInst(
1232 MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
1233 } else {
1234 // Adjust the definition of CFA to account for the change in SP.
1235 assert(NegFrameSize);
1236 CFIIndex = MF.addFrameInst(
1237 MCCFIInstruction::createDefCfaOffset(nullptr, NegFrameSize));
1239 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1240 .addCFIIndex(CFIIndex);
1242 if (HasFP) {
1243 // Describe where FP was saved, at a fixed offset from CFA.
1244 unsigned Reg = MRI->getDwarfRegNum(FPReg, true);
1245 CFIIndex = MF.addFrameInst(
1246 MCCFIInstruction::createOffset(nullptr, Reg, FPOffset));
1247 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1248 .addCFIIndex(CFIIndex);
1251 if (FI->usesPICBase()) {
1252 // Describe where FP was saved, at a fixed offset from CFA.
1253 unsigned Reg = MRI->getDwarfRegNum(PPC::R30, true);
1254 CFIIndex = MF.addFrameInst(
1255 MCCFIInstruction::createOffset(nullptr, Reg, PBPOffset));
1256 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1257 .addCFIIndex(CFIIndex);
1260 if (HasBP) {
1261 // Describe where BP was saved, at a fixed offset from CFA.
1262 unsigned Reg = MRI->getDwarfRegNum(BPReg, true);
1263 CFIIndex = MF.addFrameInst(
1264 MCCFIInstruction::createOffset(nullptr, Reg, BPOffset));
1265 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1266 .addCFIIndex(CFIIndex);
1269 if (MustSaveLR) {
1270 // Describe where LR was saved, at a fixed offset from CFA.
1271 unsigned Reg = MRI->getDwarfRegNum(LRReg, true);
1272 CFIIndex = MF.addFrameInst(
1273 MCCFIInstruction::createOffset(nullptr, Reg, LROffset));
1274 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1275 .addCFIIndex(CFIIndex);
1279 // If there is a frame pointer, copy R1 into R31
1280 if (HasFP) {
1281 BuildMI(MBB, MBBI, dl, OrInst, FPReg)
1282 .addReg(SPReg)
1283 .addReg(SPReg);
1285 if (!HasBP && needsCFI) {
1286 // Change the definition of CFA from SP+offset to FP+offset, because SP
1287 // will change at every alloca.
1288 unsigned Reg = MRI->getDwarfRegNum(FPReg, true);
1289 unsigned CFIIndex = MF.addFrameInst(
1290 MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
1292 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1293 .addCFIIndex(CFIIndex);
1297 if (needsCFI) {
1298 // Describe where callee saved registers were saved, at fixed offsets from
1299 // CFA.
1300 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
1301 for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
1302 unsigned Reg = CSI[I].getReg();
1303 if (Reg == PPC::LR || Reg == PPC::LR8 || Reg == PPC::RM) continue;
1305 // This is a bit of a hack: CR2LT, CR2GT, CR2EQ and CR2UN are just
1306 // subregisters of CR2. We just need to emit a move of CR2.
1307 if (PPC::CRBITRCRegClass.contains(Reg))
1308 continue;
1310 if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
1311 continue;
1313 // For SVR4, don't emit a move for the CR spill slot if we haven't
1314 // spilled CRs.
1315 if (isSVR4ABI && (PPC::CR2 <= Reg && Reg <= PPC::CR4)
1316 && !MustSaveCR)
1317 continue;
1319 // For 64-bit SVR4 when we have spilled CRs, the spill location
1320 // is SP+8, not a frame-relative slot.
1321 if (isSVR4ABI && isPPC64 && (PPC::CR2 <= Reg && Reg <= PPC::CR4)) {
1322 // In the ELFv1 ABI, only CR2 is noted in CFI and stands in for
1323 // the whole CR word. In the ELFv2 ABI, every CR that was
1324 // actually saved gets its own CFI record.
1325 unsigned CRReg = isELFv2ABI? Reg : (unsigned) PPC::CR2;
1326 unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
1327 nullptr, MRI->getDwarfRegNum(CRReg, true), 8));
1328 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1329 .addCFIIndex(CFIIndex);
1330 continue;
1333 if (CSI[I].isSpilledToReg()) {
1334 unsigned SpilledReg = CSI[I].getDstReg();
1335 unsigned CFIRegister = MF.addFrameInst(MCCFIInstruction::createRegister(
1336 nullptr, MRI->getDwarfRegNum(Reg, true),
1337 MRI->getDwarfRegNum(SpilledReg, true)));
1338 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1339 .addCFIIndex(CFIRegister);
1340 } else {
1341 int Offset = MFI.getObjectOffset(CSI[I].getFrameIdx());
1342 // We have changed the object offset above but we do not want to change
1343 // the actual offsets in the CFI instruction so we have to undo the
1344 // offset change here.
1345 if (MovingStackUpdateDown)
1346 Offset -= NegFrameSize;
1348 unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
1349 nullptr, MRI->getDwarfRegNum(Reg, true), Offset));
1350 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1351 .addCFIIndex(CFIIndex);
1357 void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
1358 MachineBasicBlock &MBB) const {
1359 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
1360 DebugLoc dl;
1362 if (MBBI != MBB.end())
1363 dl = MBBI->getDebugLoc();
1365 const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
1366 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1368 // Get alignment info so we know how to restore the SP.
1369 const MachineFrameInfo &MFI = MF.getFrameInfo();
1371 // Get the number of bytes allocated from the FrameInfo.
1372 int FrameSize = MFI.getStackSize();
1374 // Get processor type.
1375 bool isPPC64 = Subtarget.isPPC64();
1376 // Get the ABI.
1377 bool isSVR4ABI = Subtarget.isSVR4ABI();
1379 // Check if the link register (LR) has been saved.
1380 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
1381 bool MustSaveLR = FI->mustSaveLR();
1382 const SmallVectorImpl<unsigned> &MustSaveCRs = FI->getMustSaveCRs();
1383 bool MustSaveCR = !MustSaveCRs.empty();
1384 // Do we have a frame pointer and/or base pointer for this function?
1385 bool HasFP = hasFP(MF);
1386 bool HasBP = RegInfo->hasBasePointer(MF);
1387 bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI();
1389 unsigned SPReg = isPPC64 ? PPC::X1 : PPC::R1;
1390 unsigned BPReg = RegInfo->getBaseRegister(MF);
1391 unsigned FPReg = isPPC64 ? PPC::X31 : PPC::R31;
1392 unsigned ScratchReg = 0;
1393 unsigned TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
1394 const MCInstrDesc& MTLRInst = TII.get( isPPC64 ? PPC::MTLR8
1395 : PPC::MTLR );
1396 const MCInstrDesc& LoadInst = TII.get( isPPC64 ? PPC::LD
1397 : PPC::LWZ );
1398 const MCInstrDesc& LoadImmShiftedInst = TII.get( isPPC64 ? PPC::LIS8
1399 : PPC::LIS );
1400 const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8
1401 : PPC::OR );
1402 const MCInstrDesc& OrImmInst = TII.get( isPPC64 ? PPC::ORI8
1403 : PPC::ORI );
1404 const MCInstrDesc& AddImmInst = TII.get( isPPC64 ? PPC::ADDI8
1405 : PPC::ADDI );
1406 const MCInstrDesc& AddInst = TII.get( isPPC64 ? PPC::ADD8
1407 : PPC::ADD4 );
1409 int LROffset = getReturnSaveOffset();
1411 int FPOffset = 0;
1413 // Using the same bool variable as below to suppress compiler warnings.
1414 bool SingleScratchReg = findScratchRegister(&MBB, true, false, &ScratchReg,
1415 &TempReg);
1416 assert(SingleScratchReg &&
1417 "Could not find an available scratch register");
1419 SingleScratchReg = ScratchReg == TempReg;
1421 if (HasFP) {
1422 if (isSVR4ABI) {
1423 int FPIndex = FI->getFramePointerSaveIndex();
1424 assert(FPIndex && "No Frame Pointer Save Slot!");
1425 FPOffset = MFI.getObjectOffset(FPIndex);
1426 } else {
1427 FPOffset = getFramePointerSaveOffset();
1431 int BPOffset = 0;
1432 if (HasBP) {
1433 if (isSVR4ABI) {
1434 int BPIndex = FI->getBasePointerSaveIndex();
1435 assert(BPIndex && "No Base Pointer Save Slot!");
1436 BPOffset = MFI.getObjectOffset(BPIndex);
1437 } else {
1438 BPOffset = getBasePointerSaveOffset();
1442 int PBPOffset = 0;
1443 if (FI->usesPICBase()) {
1444 int PBPIndex = FI->getPICBasePointerSaveIndex();
1445 assert(PBPIndex && "No PIC Base Pointer Save Slot!");
1446 PBPOffset = MFI.getObjectOffset(PBPIndex);
1449 bool IsReturnBlock = (MBBI != MBB.end() && MBBI->isReturn());
1451 if (IsReturnBlock) {
1452 unsigned RetOpcode = MBBI->getOpcode();
1453 bool UsesTCRet = RetOpcode == PPC::TCRETURNri ||
1454 RetOpcode == PPC::TCRETURNdi ||
1455 RetOpcode == PPC::TCRETURNai ||
1456 RetOpcode == PPC::TCRETURNri8 ||
1457 RetOpcode == PPC::TCRETURNdi8 ||
1458 RetOpcode == PPC::TCRETURNai8;
1460 if (UsesTCRet) {
1461 int MaxTCRetDelta = FI->getTailCallSPDelta();
1462 MachineOperand &StackAdjust = MBBI->getOperand(1);
1463 assert(StackAdjust.isImm() && "Expecting immediate value.");
1464 // Adjust stack pointer.
1465 int StackAdj = StackAdjust.getImm();
1466 int Delta = StackAdj - MaxTCRetDelta;
1467 assert((Delta >= 0) && "Delta must be positive");
1468 if (MaxTCRetDelta>0)
1469 FrameSize += (StackAdj +Delta);
1470 else
1471 FrameSize += StackAdj;
1475 // Frames of 32KB & larger require special handling because they cannot be
1476 // indexed into with a simple LD/LWZ immediate offset operand.
1477 bool isLargeFrame = !isInt<16>(FrameSize);
1479 // On targets without red zone, the SP needs to be restored last, so that
1480 // all live contents of the stack frame are upwards of the SP. This means
1481 // that we cannot restore SP just now, since there may be more registers
1482 // to restore from the stack frame (e.g. R31). If the frame size is not
1483 // a simple immediate value, we will need a spare register to hold the
1484 // restored SP. If the frame size is known and small, we can simply adjust
1485 // the offsets of the registers to be restored, and still use SP to restore
1486 // them. In such case, the final update of SP will be to add the frame
1487 // size to it.
1488 // To simplify the code, set RBReg to the base register used to restore
1489 // values from the stack, and set SPAdd to the value that needs to be added
1490 // to the SP at the end. The default values are as if red zone was present.
1491 unsigned RBReg = SPReg;
1492 unsigned SPAdd = 0;
1494 // Check if we can move the stack update instruction up the epilogue
1495 // past the callee saves. This will allow the move to LR instruction
1496 // to be executed before the restores of the callee saves which means
1497 // that the callee saves can hide the latency from the MTLR instrcution.
1498 MachineBasicBlock::iterator StackUpdateLoc = MBBI;
1499 if (stackUpdateCanBeMoved(MF)) {
1500 const std::vector<CalleeSavedInfo> & Info = MFI.getCalleeSavedInfo();
1501 for (CalleeSavedInfo CSI : Info) {
1502 int FrIdx = CSI.getFrameIdx();
1503 // If the frame index is not negative the callee saved info belongs to a
1504 // stack object that is not a fixed stack object. We ignore non-fixed
1505 // stack objects because we won't move the update of the stack pointer
1506 // past them.
1507 if (FrIdx >= 0)
1508 continue;
1510 if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0)
1511 StackUpdateLoc--;
1512 else {
1513 // Abort the operation as we can't update all CSR restores.
1514 StackUpdateLoc = MBBI;
1515 break;
1520 if (FrameSize) {
1521 // In the prologue, the loaded (or persistent) stack pointer value is
1522 // offset by the STDU/STDUX/STWU/STWUX instruction. For targets with red
1523 // zone add this offset back now.
1525 // If this function contained a fastcc call and GuaranteedTailCallOpt is
1526 // enabled (=> hasFastCall()==true) the fastcc call might contain a tail
1527 // call which invalidates the stack pointer value in SP(0). So we use the
1528 // value of R31 in this case.
1529 if (FI->hasFastCall()) {
1530 assert(HasFP && "Expecting a valid frame pointer.");
1531 if (!HasRedZone)
1532 RBReg = FPReg;
1533 if (!isLargeFrame) {
1534 BuildMI(MBB, MBBI, dl, AddImmInst, RBReg)
1535 .addReg(FPReg).addImm(FrameSize);
1536 } else {
1537 BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
1538 .addImm(FrameSize >> 16);
1539 BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
1540 .addReg(ScratchReg, RegState::Kill)
1541 .addImm(FrameSize & 0xFFFF);
1542 BuildMI(MBB, MBBI, dl, AddInst)
1543 .addReg(RBReg)
1544 .addReg(FPReg)
1545 .addReg(ScratchReg);
1547 } else if (!isLargeFrame && !HasBP && !MFI.hasVarSizedObjects()) {
1548 if (HasRedZone) {
1549 BuildMI(MBB, StackUpdateLoc, dl, AddImmInst, SPReg)
1550 .addReg(SPReg)
1551 .addImm(FrameSize);
1552 } else {
1553 // Make sure that adding FrameSize will not overflow the max offset
1554 // size.
1555 assert(FPOffset <= 0 && BPOffset <= 0 && PBPOffset <= 0 &&
1556 "Local offsets should be negative");
1557 SPAdd = FrameSize;
1558 FPOffset += FrameSize;
1559 BPOffset += FrameSize;
1560 PBPOffset += FrameSize;
1562 } else {
1563 // We don't want to use ScratchReg as a base register, because it
1564 // could happen to be R0. Use FP instead, but make sure to preserve it.
1565 if (!HasRedZone) {
1566 // If FP is not saved, copy it to ScratchReg.
1567 if (!HasFP)
1568 BuildMI(MBB, MBBI, dl, OrInst, ScratchReg)
1569 .addReg(FPReg)
1570 .addReg(FPReg);
1571 RBReg = FPReg;
1573 BuildMI(MBB, StackUpdateLoc, dl, LoadInst, RBReg)
1574 .addImm(0)
1575 .addReg(SPReg);
1578 assert(RBReg != ScratchReg && "Should have avoided ScratchReg");
1579 // If there is no red zone, ScratchReg may be needed for holding a useful
1580 // value (although not the base register). Make sure it is not overwritten
1581 // too early.
1583 assert((isPPC64 || !MustSaveCR) &&
1584 "Epilogue CR restoring supported only in 64-bit mode");
1586 // If we need to restore both the LR and the CR and we only have one
1587 // available scratch register, we must do them one at a time.
1588 if (MustSaveCR && SingleScratchReg && MustSaveLR) {
1589 // Here TempReg == ScratchReg, and in the absence of red zone ScratchReg
1590 // is live here.
1591 assert(HasRedZone && "Expecting red zone");
1592 BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ8), TempReg)
1593 .addImm(8)
1594 .addReg(SPReg);
1595 for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
1596 BuildMI(MBB, MBBI, dl, TII.get(PPC::MTOCRF8), MustSaveCRs[i])
1597 .addReg(TempReg, getKillRegState(i == e-1));
1600 // Delay restoring of the LR if ScratchReg is needed. This is ok, since
1601 // LR is stored in the caller's stack frame. ScratchReg will be needed
1602 // if RBReg is anything other than SP. We shouldn't use ScratchReg as
1603 // a base register anyway, because it may happen to be R0.
1604 bool LoadedLR = false;
1605 if (MustSaveLR && RBReg == SPReg && isInt<16>(LROffset+SPAdd)) {
1606 BuildMI(MBB, StackUpdateLoc, dl, LoadInst, ScratchReg)
1607 .addImm(LROffset+SPAdd)
1608 .addReg(RBReg);
1609 LoadedLR = true;
1612 if (MustSaveCR && !(SingleScratchReg && MustSaveLR)) {
1613 // This will only occur for PPC64.
1614 assert(isPPC64 && "Expecting 64-bit mode");
1615 assert(RBReg == SPReg && "Should be using SP as a base register");
1616 BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ8), TempReg)
1617 .addImm(8)
1618 .addReg(RBReg);
1621 if (HasFP) {
1622 // If there is red zone, restore FP directly, since SP has already been
1623 // restored. Otherwise, restore the value of FP into ScratchReg.
1624 if (HasRedZone || RBReg == SPReg)
1625 BuildMI(MBB, MBBI, dl, LoadInst, FPReg)
1626 .addImm(FPOffset)
1627 .addReg(SPReg);
1628 else
1629 BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg)
1630 .addImm(FPOffset)
1631 .addReg(RBReg);
1634 if (FI->usesPICBase())
1635 BuildMI(MBB, MBBI, dl, LoadInst, PPC::R30)
1636 .addImm(PBPOffset)
1637 .addReg(RBReg);
1639 if (HasBP)
1640 BuildMI(MBB, MBBI, dl, LoadInst, BPReg)
1641 .addImm(BPOffset)
1642 .addReg(RBReg);
1644 // There is nothing more to be loaded from the stack, so now we can
1645 // restore SP: SP = RBReg + SPAdd.
1646 if (RBReg != SPReg || SPAdd != 0) {
1647 assert(!HasRedZone && "This should not happen with red zone");
1648 // If SPAdd is 0, generate a copy.
1649 if (SPAdd == 0)
1650 BuildMI(MBB, MBBI, dl, OrInst, SPReg)
1651 .addReg(RBReg)
1652 .addReg(RBReg);
1653 else
1654 BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
1655 .addReg(RBReg)
1656 .addImm(SPAdd);
1658 assert(RBReg != ScratchReg && "Should be using FP or SP as base register");
1659 if (RBReg == FPReg)
1660 BuildMI(MBB, MBBI, dl, OrInst, FPReg)
1661 .addReg(ScratchReg)
1662 .addReg(ScratchReg);
1664 // Now load the LR from the caller's stack frame.
1665 if (MustSaveLR && !LoadedLR)
1666 BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg)
1667 .addImm(LROffset)
1668 .addReg(SPReg);
1671 if (MustSaveCR &&
1672 !(SingleScratchReg && MustSaveLR)) // will only occur for PPC64
1673 for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
1674 BuildMI(MBB, MBBI, dl, TII.get(PPC::MTOCRF8), MustSaveCRs[i])
1675 .addReg(TempReg, getKillRegState(i == e-1));
1677 if (MustSaveLR)
1678 BuildMI(MBB, StackUpdateLoc, dl, MTLRInst).addReg(ScratchReg);
1680 // Callee pop calling convention. Pop parameter/linkage area. Used for tail
1681 // call optimization
1682 if (IsReturnBlock) {
1683 unsigned RetOpcode = MBBI->getOpcode();
1684 if (MF.getTarget().Options.GuaranteedTailCallOpt &&
1685 (RetOpcode == PPC::BLR || RetOpcode == PPC::BLR8) &&
1686 MF.getFunction().getCallingConv() == CallingConv::Fast) {
1687 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
1688 unsigned CallerAllocatedAmt = FI->getMinReservedArea();
1690 if (CallerAllocatedAmt && isInt<16>(CallerAllocatedAmt)) {
1691 BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
1692 .addReg(SPReg).addImm(CallerAllocatedAmt);
1693 } else {
1694 BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
1695 .addImm(CallerAllocatedAmt >> 16);
1696 BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
1697 .addReg(ScratchReg, RegState::Kill)
1698 .addImm(CallerAllocatedAmt & 0xFFFF);
1699 BuildMI(MBB, MBBI, dl, AddInst)
1700 .addReg(SPReg)
1701 .addReg(FPReg)
1702 .addReg(ScratchReg);
1704 } else {
1705 createTailCallBranchInstr(MBB);
1710 void PPCFrameLowering::createTailCallBranchInstr(MachineBasicBlock &MBB) const {
1711 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
1713 // If we got this far a first terminator should exist.
1714 assert(MBBI != MBB.end() && "Failed to find the first terminator.");
1716 DebugLoc dl = MBBI->getDebugLoc();
1717 const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
1719 // Create branch instruction for pseudo tail call return instruction
1720 unsigned RetOpcode = MBBI->getOpcode();
1721 if (RetOpcode == PPC::TCRETURNdi) {
1722 MBBI = MBB.getLastNonDebugInstr();
1723 MachineOperand &JumpTarget = MBBI->getOperand(0);
1724 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)).
1725 addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
1726 } else if (RetOpcode == PPC::TCRETURNri) {
1727 MBBI = MBB.getLastNonDebugInstr();
1728 assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
1729 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR));
1730 } else if (RetOpcode == PPC::TCRETURNai) {
1731 MBBI = MBB.getLastNonDebugInstr();
1732 MachineOperand &JumpTarget = MBBI->getOperand(0);
1733 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA)).addImm(JumpTarget.getImm());
1734 } else if (RetOpcode == PPC::TCRETURNdi8) {
1735 MBBI = MBB.getLastNonDebugInstr();
1736 MachineOperand &JumpTarget = MBBI->getOperand(0);
1737 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)).
1738 addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
1739 } else if (RetOpcode == PPC::TCRETURNri8) {
1740 MBBI = MBB.getLastNonDebugInstr();
1741 assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
1742 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR8));
1743 } else if (RetOpcode == PPC::TCRETURNai8) {
1744 MBBI = MBB.getLastNonDebugInstr();
1745 MachineOperand &JumpTarget = MBBI->getOperand(0);
1746 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA8)).addImm(JumpTarget.getImm());
1750 void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF,
1751 BitVector &SavedRegs,
1752 RegScavenger *RS) const {
1753 TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
1755 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1757 // Save and clear the LR state.
1758 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
1759 unsigned LR = RegInfo->getRARegister();
1760 FI->setMustSaveLR(MustSaveLR(MF, LR));
1761 SavedRegs.reset(LR);
1763 // Save R31 if necessary
1764 int FPSI = FI->getFramePointerSaveIndex();
1765 bool isPPC64 = Subtarget.isPPC64();
1766 bool isDarwinABI = Subtarget.isDarwinABI();
1767 MachineFrameInfo &MFI = MF.getFrameInfo();
1769 // If the frame pointer save index hasn't been defined yet.
1770 if (!FPSI && needsFP(MF)) {
1771 // Find out what the fix offset of the frame pointer save area.
1772 int FPOffset = getFramePointerSaveOffset();
1773 // Allocate the frame index for frame pointer save area.
1774 FPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
1775 // Save the result.
1776 FI->setFramePointerSaveIndex(FPSI);
1779 int BPSI = FI->getBasePointerSaveIndex();
1780 if (!BPSI && RegInfo->hasBasePointer(MF)) {
1781 int BPOffset = getBasePointerSaveOffset();
1782 // Allocate the frame index for the base pointer save area.
1783 BPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, BPOffset, true);
1784 // Save the result.
1785 FI->setBasePointerSaveIndex(BPSI);
1788 // Reserve stack space for the PIC Base register (R30).
1789 // Only used in SVR4 32-bit.
1790 if (FI->usesPICBase()) {
1791 int PBPSI = MFI.CreateFixedObject(4, -8, true);
1792 FI->setPICBasePointerSaveIndex(PBPSI);
1795 // Make sure we don't explicitly spill r31, because, for example, we have
1796 // some inline asm which explicitly clobbers it, when we otherwise have a
1797 // frame pointer and are using r31's spill slot for the prologue/epilogue
1798 // code. Same goes for the base pointer and the PIC base register.
1799 if (needsFP(MF))
1800 SavedRegs.reset(isPPC64 ? PPC::X31 : PPC::R31);
1801 if (RegInfo->hasBasePointer(MF))
1802 SavedRegs.reset(RegInfo->getBaseRegister(MF));
1803 if (FI->usesPICBase())
1804 SavedRegs.reset(PPC::R30);
1806 // Reserve stack space to move the linkage area to in case of a tail call.
1807 int TCSPDelta = 0;
1808 if (MF.getTarget().Options.GuaranteedTailCallOpt &&
1809 (TCSPDelta = FI->getTailCallSPDelta()) < 0) {
1810 MFI.CreateFixedObject(-1 * TCSPDelta, TCSPDelta, true);
1813 // For 32-bit SVR4, allocate the nonvolatile CR spill slot iff the
1814 // function uses CR 2, 3, or 4.
1815 if (!isPPC64 && !isDarwinABI &&
1816 (SavedRegs.test(PPC::CR2) ||
1817 SavedRegs.test(PPC::CR3) ||
1818 SavedRegs.test(PPC::CR4))) {
1819 int FrameIdx = MFI.CreateFixedObject((uint64_t)4, (int64_t)-4, true);
1820 FI->setCRSpillFrameIndex(FrameIdx);
1824 void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
1825 RegScavenger *RS) const {
1826 // Early exit if not using the SVR4 ABI.
1827 if (!Subtarget.isSVR4ABI()) {
1828 addScavengingSpillSlot(MF, RS);
1829 return;
1832 // Get callee saved register information.
1833 MachineFrameInfo &MFI = MF.getFrameInfo();
1834 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
1836 // If the function is shrink-wrapped, and if the function has a tail call, the
1837 // tail call might not be in the new RestoreBlock, so real branch instruction
1838 // won't be generated by emitEpilogue(), because shrink-wrap has chosen new
1839 // RestoreBlock. So we handle this case here.
1840 if (MFI.getSavePoint() && MFI.hasTailCall()) {
1841 MachineBasicBlock *RestoreBlock = MFI.getRestorePoint();
1842 for (MachineBasicBlock &MBB : MF) {
1843 if (MBB.isReturnBlock() && (&MBB) != RestoreBlock)
1844 createTailCallBranchInstr(MBB);
1848 // Early exit if no callee saved registers are modified!
1849 if (CSI.empty() && !needsFP(MF)) {
1850 addScavengingSpillSlot(MF, RS);
1851 return;
1854 unsigned MinGPR = PPC::R31;
1855 unsigned MinG8R = PPC::X31;
1856 unsigned MinFPR = PPC::F31;
1857 unsigned MinVR = Subtarget.hasSPE() ? PPC::S31 : PPC::V31;
1859 bool HasGPSaveArea = false;
1860 bool HasG8SaveArea = false;
1861 bool HasFPSaveArea = false;
1862 bool HasVRSAVESaveArea = false;
1863 bool HasVRSaveArea = false;
1865 SmallVector<CalleeSavedInfo, 18> GPRegs;
1866 SmallVector<CalleeSavedInfo, 18> G8Regs;
1867 SmallVector<CalleeSavedInfo, 18> FPRegs;
1868 SmallVector<CalleeSavedInfo, 18> VRegs;
1870 for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
1871 unsigned Reg = CSI[i].getReg();
1872 assert((!MF.getInfo<PPCFunctionInfo>()->mustSaveTOC() ||
1873 (Reg != PPC::X2 && Reg != PPC::R2)) &&
1874 "Not expecting to try to spill R2 in a function that must save TOC");
1875 if (PPC::GPRCRegClass.contains(Reg) ||
1876 PPC::SPE4RCRegClass.contains(Reg)) {
1877 HasGPSaveArea = true;
1879 GPRegs.push_back(CSI[i]);
1881 if (Reg < MinGPR) {
1882 MinGPR = Reg;
1884 } else if (PPC::G8RCRegClass.contains(Reg)) {
1885 HasG8SaveArea = true;
1887 G8Regs.push_back(CSI[i]);
1889 if (Reg < MinG8R) {
1890 MinG8R = Reg;
1892 } else if (PPC::F8RCRegClass.contains(Reg)) {
1893 HasFPSaveArea = true;
1895 FPRegs.push_back(CSI[i]);
1897 if (Reg < MinFPR) {
1898 MinFPR = Reg;
1900 } else if (PPC::CRBITRCRegClass.contains(Reg) ||
1901 PPC::CRRCRegClass.contains(Reg)) {
1902 ; // do nothing, as we already know whether CRs are spilled
1903 } else if (PPC::VRSAVERCRegClass.contains(Reg)) {
1904 HasVRSAVESaveArea = true;
1905 } else if (PPC::VRRCRegClass.contains(Reg) ||
1906 PPC::SPERCRegClass.contains(Reg)) {
1907 // Altivec and SPE are mutually exclusive, but have the same stack
1908 // alignment requirements, so overload the save area for both cases.
1909 HasVRSaveArea = true;
1911 VRegs.push_back(CSI[i]);
1913 if (Reg < MinVR) {
1914 MinVR = Reg;
1916 } else {
1917 llvm_unreachable("Unknown RegisterClass!");
1921 PPCFunctionInfo *PFI = MF.getInfo<PPCFunctionInfo>();
1922 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
1924 int64_t LowerBound = 0;
1926 // Take into account stack space reserved for tail calls.
1927 int TCSPDelta = 0;
1928 if (MF.getTarget().Options.GuaranteedTailCallOpt &&
1929 (TCSPDelta = PFI->getTailCallSPDelta()) < 0) {
1930 LowerBound = TCSPDelta;
1933 // The Floating-point register save area is right below the back chain word
1934 // of the previous stack frame.
1935 if (HasFPSaveArea) {
1936 for (unsigned i = 0, e = FPRegs.size(); i != e; ++i) {
1937 int FI = FPRegs[i].getFrameIdx();
1939 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
1942 LowerBound -= (31 - TRI->getEncodingValue(MinFPR) + 1) * 8;
1945 // Check whether the frame pointer register is allocated. If so, make sure it
1946 // is spilled to the correct offset.
1947 if (needsFP(MF)) {
1948 int FI = PFI->getFramePointerSaveIndex();
1949 assert(FI && "No Frame Pointer Save Slot!");
1950 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
1951 // FP is R31/X31, so no need to update MinGPR/MinG8R.
1952 HasGPSaveArea = true;
1955 if (PFI->usesPICBase()) {
1956 int FI = PFI->getPICBasePointerSaveIndex();
1957 assert(FI && "No PIC Base Pointer Save Slot!");
1958 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
1960 MinGPR = std::min<unsigned>(MinGPR, PPC::R30);
1961 HasGPSaveArea = true;
1964 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1965 if (RegInfo->hasBasePointer(MF)) {
1966 int FI = PFI->getBasePointerSaveIndex();
1967 assert(FI && "No Base Pointer Save Slot!");
1968 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
1970 unsigned BP = RegInfo->getBaseRegister(MF);
1971 if (PPC::G8RCRegClass.contains(BP)) {
1972 MinG8R = std::min<unsigned>(MinG8R, BP);
1973 HasG8SaveArea = true;
1974 } else if (PPC::GPRCRegClass.contains(BP)) {
1975 MinGPR = std::min<unsigned>(MinGPR, BP);
1976 HasGPSaveArea = true;
1980 // General register save area starts right below the Floating-point
1981 // register save area.
1982 if (HasGPSaveArea || HasG8SaveArea) {
1983 // Move general register save area spill slots down, taking into account
1984 // the size of the Floating-point register save area.
1985 for (unsigned i = 0, e = GPRegs.size(); i != e; ++i) {
1986 if (!GPRegs[i].isSpilledToReg()) {
1987 int FI = GPRegs[i].getFrameIdx();
1988 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
1992 // Move general register save area spill slots down, taking into account
1993 // the size of the Floating-point register save area.
1994 for (unsigned i = 0, e = G8Regs.size(); i != e; ++i) {
1995 if (!G8Regs[i].isSpilledToReg()) {
1996 int FI = G8Regs[i].getFrameIdx();
1997 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2001 unsigned MinReg =
2002 std::min<unsigned>(TRI->getEncodingValue(MinGPR),
2003 TRI->getEncodingValue(MinG8R));
2005 if (Subtarget.isPPC64()) {
2006 LowerBound -= (31 - MinReg + 1) * 8;
2007 } else {
2008 LowerBound -= (31 - MinReg + 1) * 4;
2012 // For 32-bit only, the CR save area is below the general register
2013 // save area. For 64-bit SVR4, the CR save area is addressed relative
2014 // to the stack pointer and hence does not need an adjustment here.
2015 // Only CR2 (the first nonvolatile spilled) has an associated frame
2016 // index so that we have a single uniform save area.
2017 if (spillsCR(MF) && !(Subtarget.isPPC64() && Subtarget.isSVR4ABI())) {
2018 // Adjust the frame index of the CR spill slot.
2019 for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
2020 unsigned Reg = CSI[i].getReg();
2022 if ((Subtarget.isSVR4ABI() && Reg == PPC::CR2)
2023 // Leave Darwin logic as-is.
2024 || (!Subtarget.isSVR4ABI() &&
2025 (PPC::CRBITRCRegClass.contains(Reg) ||
2026 PPC::CRRCRegClass.contains(Reg)))) {
2027 int FI = CSI[i].getFrameIdx();
2029 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2033 LowerBound -= 4; // The CR save area is always 4 bytes long.
2036 if (HasVRSAVESaveArea) {
2037 // FIXME SVR4: Is it actually possible to have multiple elements in CSI
2038 // which have the VRSAVE register class?
2039 // Adjust the frame index of the VRSAVE spill slot.
2040 for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
2041 unsigned Reg = CSI[i].getReg();
2043 if (PPC::VRSAVERCRegClass.contains(Reg)) {
2044 int FI = CSI[i].getFrameIdx();
2046 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2050 LowerBound -= 4; // The VRSAVE save area is always 4 bytes long.
2053 // Both Altivec and SPE have the same alignment and padding requirements
2054 // within the stack frame.
2055 if (HasVRSaveArea) {
2056 // Insert alignment padding, we need 16-byte alignment. Note: for positive
2057 // number the alignment formula is : y = (x + (n-1)) & (~(n-1)). But since
2058 // we are using negative number here (the stack grows downward). We should
2059 // use formula : y = x & (~(n-1)). Where x is the size before aligning, n
2060 // is the alignment size ( n = 16 here) and y is the size after aligning.
2061 assert(LowerBound <= 0 && "Expect LowerBound have a non-positive value!");
2062 LowerBound &= ~(15);
2064 for (unsigned i = 0, e = VRegs.size(); i != e; ++i) {
2065 int FI = VRegs[i].getFrameIdx();
2067 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2071 addScavengingSpillSlot(MF, RS);
2074 void
2075 PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF,
2076 RegScavenger *RS) const {
2077 // Reserve a slot closest to SP or frame pointer if we have a dynalloc or
2078 // a large stack, which will require scavenging a register to materialize a
2079 // large offset.
2081 // We need to have a scavenger spill slot for spills if the frame size is
2082 // large. In case there is no free register for large-offset addressing,
2083 // this slot is used for the necessary emergency spill. Also, we need the
2084 // slot for dynamic stack allocations.
2086 // The scavenger might be invoked if the frame offset does not fit into
2087 // the 16-bit immediate. We don't know the complete frame size here
2088 // because we've not yet computed callee-saved register spills or the
2089 // needed alignment padding.
2090 unsigned StackSize = determineFrameLayout(MF, true);
2091 MachineFrameInfo &MFI = MF.getFrameInfo();
2092 if (MFI.hasVarSizedObjects() || spillsCR(MF) || spillsVRSAVE(MF) ||
2093 hasNonRISpills(MF) || (hasSpills(MF) && !isInt<16>(StackSize))) {
2094 const TargetRegisterClass &GPRC = PPC::GPRCRegClass;
2095 const TargetRegisterClass &G8RC = PPC::G8RCRegClass;
2096 const TargetRegisterClass &RC = Subtarget.isPPC64() ? G8RC : GPRC;
2097 const TargetRegisterInfo &TRI = *Subtarget.getRegisterInfo();
2098 unsigned Size = TRI.getSpillSize(RC);
2099 unsigned Align = TRI.getSpillAlignment(RC);
2100 RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Align, false));
2102 // Might we have over-aligned allocas?
2103 bool HasAlVars = MFI.hasVarSizedObjects() &&
2104 MFI.getMaxAlignment() > getStackAlignment();
2106 // These kinds of spills might need two registers.
2107 if (spillsCR(MF) || spillsVRSAVE(MF) || HasAlVars)
2108 RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Align, false));
2113 // This function checks if a callee saved gpr can be spilled to a volatile
2114 // vector register. This occurs for leaf functions when the option
2115 // ppc-enable-pe-vector-spills is enabled. If there are any remaining registers
2116 // which were not spilled to vectors, return false so the target independent
2117 // code can handle them by assigning a FrameIdx to a stack slot.
2118 bool PPCFrameLowering::assignCalleeSavedSpillSlots(
2119 MachineFunction &MF, const TargetRegisterInfo *TRI,
2120 std::vector<CalleeSavedInfo> &CSI) const {
2122 if (CSI.empty())
2123 return true; // Early exit if no callee saved registers are modified!
2125 // Early exit if cannot spill gprs to volatile vector registers.
2126 MachineFrameInfo &MFI = MF.getFrameInfo();
2127 if (!EnablePEVectorSpills || MFI.hasCalls() || !Subtarget.hasP9Vector())
2128 return false;
2130 // Build a BitVector of VSRs that can be used for spilling GPRs.
2131 BitVector BVAllocatable = TRI->getAllocatableSet(MF);
2132 BitVector BVCalleeSaved(TRI->getNumRegs());
2133 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
2134 const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
2135 for (unsigned i = 0; CSRegs[i]; ++i)
2136 BVCalleeSaved.set(CSRegs[i]);
2138 for (unsigned Reg : BVAllocatable.set_bits()) {
2139 // Set to 0 if the register is not a volatile VF/F8 register, or if it is
2140 // used in the function.
2141 if (BVCalleeSaved[Reg] ||
2142 (!PPC::F8RCRegClass.contains(Reg) &&
2143 !PPC::VFRCRegClass.contains(Reg)) ||
2144 (MF.getRegInfo().isPhysRegUsed(Reg)))
2145 BVAllocatable.reset(Reg);
2148 bool AllSpilledToReg = true;
2149 for (auto &CS : CSI) {
2150 if (BVAllocatable.none())
2151 return false;
2153 unsigned Reg = CS.getReg();
2154 if (!PPC::G8RCRegClass.contains(Reg) && !PPC::GPRCRegClass.contains(Reg)) {
2155 AllSpilledToReg = false;
2156 continue;
2159 unsigned VolatileVFReg = BVAllocatable.find_first();
2160 if (VolatileVFReg < BVAllocatable.size()) {
2161 CS.setDstReg(VolatileVFReg);
2162 BVAllocatable.reset(VolatileVFReg);
2163 } else {
2164 AllSpilledToReg = false;
2167 return AllSpilledToReg;
2171 bool
2172 PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
2173 MachineBasicBlock::iterator MI,
2174 const std::vector<CalleeSavedInfo> &CSI,
2175 const TargetRegisterInfo *TRI) const {
2177 // Currently, this function only handles SVR4 32- and 64-bit ABIs.
2178 // Return false otherwise to maintain pre-existing behavior.
2179 if (!Subtarget.isSVR4ABI())
2180 return false;
2182 MachineFunction *MF = MBB.getParent();
2183 const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
2184 PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
2185 bool MustSaveTOC = FI->mustSaveTOC();
2186 DebugLoc DL;
2187 bool CRSpilled = false;
2188 MachineInstrBuilder CRMIB;
2190 for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
2191 unsigned Reg = CSI[i].getReg();
2192 // Only Darwin actually uses the VRSAVE register, but it can still appear
2193 // here if, for example, @llvm.eh.unwind.init() is used. If we're not on
2194 // Darwin, ignore it.
2195 if (Reg == PPC::VRSAVE && !Subtarget.isDarwinABI())
2196 continue;
2198 // CR2 through CR4 are the nonvolatile CR fields.
2199 bool IsCRField = PPC::CR2 <= Reg && Reg <= PPC::CR4;
2201 // Add the callee-saved register as live-in; it's killed at the spill.
2202 // Do not do this for callee-saved registers that are live-in to the
2203 // function because they will already be marked live-in and this will be
2204 // adding it for a second time. It is an error to add the same register
2205 // to the set more than once.
2206 const MachineRegisterInfo &MRI = MF->getRegInfo();
2207 bool IsLiveIn = MRI.isLiveIn(Reg);
2208 if (!IsLiveIn)
2209 MBB.addLiveIn(Reg);
2211 if (CRSpilled && IsCRField) {
2212 CRMIB.addReg(Reg, RegState::ImplicitKill);
2213 continue;
2216 // The actual spill will happen in the prologue.
2217 if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
2218 continue;
2220 // Insert the spill to the stack frame.
2221 if (IsCRField) {
2222 PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>();
2223 if (Subtarget.isPPC64()) {
2224 // The actual spill will happen at the start of the prologue.
2225 FuncInfo->addMustSaveCR(Reg);
2226 } else {
2227 CRSpilled = true;
2228 FuncInfo->setSpillsCR();
2230 // 32-bit: FP-relative. Note that we made sure CR2-CR4 all have
2231 // the same frame index in PPCRegisterInfo::hasReservedSpillSlot.
2232 CRMIB = BuildMI(*MF, DL, TII.get(PPC::MFCR), PPC::R12)
2233 .addReg(Reg, RegState::ImplicitKill);
2235 MBB.insert(MI, CRMIB);
2236 MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::STW))
2237 .addReg(PPC::R12,
2238 getKillRegState(true)),
2239 CSI[i].getFrameIdx()));
2241 } else {
2242 if (CSI[i].isSpilledToReg()) {
2243 NumPESpillVSR++;
2244 BuildMI(MBB, MI, DL, TII.get(PPC::MTVSRD), CSI[i].getDstReg())
2245 .addReg(Reg, getKillRegState(true));
2246 } else {
2247 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
2248 // Use !IsLiveIn for the kill flag.
2249 // We do not want to kill registers that are live in this function
2250 // before their use because they will become undefined registers.
2251 TII.storeRegToStackSlot(MBB, MI, Reg, !IsLiveIn,
2252 CSI[i].getFrameIdx(), RC, TRI);
2256 return true;
2259 static void
2260 restoreCRs(bool isPPC64, bool is31,
2261 bool CR2Spilled, bool CR3Spilled, bool CR4Spilled,
2262 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
2263 const std::vector<CalleeSavedInfo> &CSI, unsigned CSIIndex) {
2265 MachineFunction *MF = MBB.getParent();
2266 const PPCInstrInfo &TII = *MF->getSubtarget<PPCSubtarget>().getInstrInfo();
2267 DebugLoc DL;
2268 unsigned RestoreOp, MoveReg;
2270 if (isPPC64)
2271 // This is handled during epilogue generation.
2272 return;
2273 else {
2274 // 32-bit: FP-relative
2275 MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::LWZ),
2276 PPC::R12),
2277 CSI[CSIIndex].getFrameIdx()));
2278 RestoreOp = PPC::MTOCRF;
2279 MoveReg = PPC::R12;
2282 if (CR2Spilled)
2283 MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR2)
2284 .addReg(MoveReg, getKillRegState(!CR3Spilled && !CR4Spilled)));
2286 if (CR3Spilled)
2287 MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR3)
2288 .addReg(MoveReg, getKillRegState(!CR4Spilled)));
2290 if (CR4Spilled)
2291 MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR4)
2292 .addReg(MoveReg, getKillRegState(true)));
2295 MachineBasicBlock::iterator PPCFrameLowering::
2296 eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
2297 MachineBasicBlock::iterator I) const {
2298 const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
2299 if (MF.getTarget().Options.GuaranteedTailCallOpt &&
2300 I->getOpcode() == PPC::ADJCALLSTACKUP) {
2301 // Add (actually subtract) back the amount the callee popped on return.
2302 if (int CalleeAmt = I->getOperand(1).getImm()) {
2303 bool is64Bit = Subtarget.isPPC64();
2304 CalleeAmt *= -1;
2305 unsigned StackReg = is64Bit ? PPC::X1 : PPC::R1;
2306 unsigned TmpReg = is64Bit ? PPC::X0 : PPC::R0;
2307 unsigned ADDIInstr = is64Bit ? PPC::ADDI8 : PPC::ADDI;
2308 unsigned ADDInstr = is64Bit ? PPC::ADD8 : PPC::ADD4;
2309 unsigned LISInstr = is64Bit ? PPC::LIS8 : PPC::LIS;
2310 unsigned ORIInstr = is64Bit ? PPC::ORI8 : PPC::ORI;
2311 const DebugLoc &dl = I->getDebugLoc();
2313 if (isInt<16>(CalleeAmt)) {
2314 BuildMI(MBB, I, dl, TII.get(ADDIInstr), StackReg)
2315 .addReg(StackReg, RegState::Kill)
2316 .addImm(CalleeAmt);
2317 } else {
2318 MachineBasicBlock::iterator MBBI = I;
2319 BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg)
2320 .addImm(CalleeAmt >> 16);
2321 BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg)
2322 .addReg(TmpReg, RegState::Kill)
2323 .addImm(CalleeAmt & 0xFFFF);
2324 BuildMI(MBB, MBBI, dl, TII.get(ADDInstr), StackReg)
2325 .addReg(StackReg, RegState::Kill)
2326 .addReg(TmpReg);
2330 // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions.
2331 return MBB.erase(I);
2334 bool
2335 PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
2336 MachineBasicBlock::iterator MI,
2337 std::vector<CalleeSavedInfo> &CSI,
2338 const TargetRegisterInfo *TRI) const {
2340 // Currently, this function only handles SVR4 32- and 64-bit ABIs.
2341 // Return false otherwise to maintain pre-existing behavior.
2342 if (!Subtarget.isSVR4ABI())
2343 return false;
2345 MachineFunction *MF = MBB.getParent();
2346 const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
2347 PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
2348 bool MustSaveTOC = FI->mustSaveTOC();
2349 bool CR2Spilled = false;
2350 bool CR3Spilled = false;
2351 bool CR4Spilled = false;
2352 unsigned CSIIndex = 0;
2354 // Initialize insertion-point logic; we will be restoring in reverse
2355 // order of spill.
2356 MachineBasicBlock::iterator I = MI, BeforeI = I;
2357 bool AtStart = I == MBB.begin();
2359 if (!AtStart)
2360 --BeforeI;
2362 for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
2363 unsigned Reg = CSI[i].getReg();
2365 // Only Darwin actually uses the VRSAVE register, but it can still appear
2366 // here if, for example, @llvm.eh.unwind.init() is used. If we're not on
2367 // Darwin, ignore it.
2368 if (Reg == PPC::VRSAVE && !Subtarget.isDarwinABI())
2369 continue;
2371 if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
2372 continue;
2374 if (Reg == PPC::CR2) {
2375 CR2Spilled = true;
2376 // The spill slot is associated only with CR2, which is the
2377 // first nonvolatile spilled. Save it here.
2378 CSIIndex = i;
2379 continue;
2380 } else if (Reg == PPC::CR3) {
2381 CR3Spilled = true;
2382 continue;
2383 } else if (Reg == PPC::CR4) {
2384 CR4Spilled = true;
2385 continue;
2386 } else {
2387 // When we first encounter a non-CR register after seeing at
2388 // least one CR register, restore all spilled CRs together.
2389 if ((CR2Spilled || CR3Spilled || CR4Spilled)
2390 && !(PPC::CR2 <= Reg && Reg <= PPC::CR4)) {
2391 bool is31 = needsFP(*MF);
2392 restoreCRs(Subtarget.isPPC64(), is31,
2393 CR2Spilled, CR3Spilled, CR4Spilled,
2394 MBB, I, CSI, CSIIndex);
2395 CR2Spilled = CR3Spilled = CR4Spilled = false;
2398 if (CSI[i].isSpilledToReg()) {
2399 DebugLoc DL;
2400 NumPEReloadVSR++;
2401 BuildMI(MBB, I, DL, TII.get(PPC::MFVSRD), Reg)
2402 .addReg(CSI[i].getDstReg(), getKillRegState(true));
2403 } else {
2404 // Default behavior for non-CR saves.
2405 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
2406 TII.loadRegFromStackSlot(MBB, I, Reg, CSI[i].getFrameIdx(), RC, TRI);
2407 assert(I != MBB.begin() &&
2408 "loadRegFromStackSlot didn't insert any code!");
2412 // Insert in reverse order.
2413 if (AtStart)
2414 I = MBB.begin();
2415 else {
2416 I = BeforeI;
2417 ++I;
2421 // If we haven't yet spilled the CRs, do so now.
2422 if (CR2Spilled || CR3Spilled || CR4Spilled) {
2423 bool is31 = needsFP(*MF);
2424 restoreCRs(Subtarget.isPPC64(), is31, CR2Spilled, CR3Spilled, CR4Spilled,
2425 MBB, I, CSI, CSIIndex);
2428 return true;
2431 bool PPCFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const {
2432 if (MF.getInfo<PPCFunctionInfo>()->shrinkWrapDisabled())
2433 return false;
2434 return (MF.getSubtarget<PPCSubtarget>().isSVR4ABI() &&
2435 MF.getSubtarget<PPCSubtarget>().isPPC64());