1 //=======- X86FrameLowering.cpp - X86 Frame Information --------*- C++ -*-====//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains the X86 implementation of TargetFrameLowering class.
12 //===----------------------------------------------------------------------===//
14 #include "X86FrameLowering.h"
15 #include "X86InstrBuilder.h"
16 #include "X86InstrInfo.h"
17 #include "X86MachineFunctionInfo.h"
18 #include "X86TargetMachine.h"
19 #include "llvm/Function.h"
20 #include "llvm/CodeGen/MachineFrameInfo.h"
21 #include "llvm/CodeGen/MachineFunction.h"
22 #include "llvm/CodeGen/MachineInstrBuilder.h"
23 #include "llvm/CodeGen/MachineModuleInfo.h"
24 #include "llvm/CodeGen/MachineRegisterInfo.h"
25 #include "llvm/MC/MCAsmInfo.h"
26 #include "llvm/MC/MCSymbol.h"
27 #include "llvm/Target/TargetData.h"
28 #include "llvm/Target/TargetOptions.h"
29 #include "llvm/Support/CommandLine.h"
30 #include "llvm/ADT/SmallSet.h"
34 // FIXME: completely move here.
35 extern cl::opt
<bool> ForceStackAlign
;
37 bool X86FrameLowering::hasReservedCallFrame(const MachineFunction
&MF
) const {
38 return !MF
.getFrameInfo()->hasVarSizedObjects();
41 /// hasFP - Return true if the specified function should have a dedicated frame
42 /// pointer register. This is true if the function has variable sized allocas
43 /// or if frame pointer elimination is disabled.
44 bool X86FrameLowering::hasFP(const MachineFunction
&MF
) const {
45 const MachineFrameInfo
*MFI
= MF
.getFrameInfo();
46 const MachineModuleInfo
&MMI
= MF
.getMMI();
47 const TargetRegisterInfo
*RI
= TM
.getRegisterInfo();
49 return (DisableFramePointerElim(MF
) ||
50 RI
->needsStackRealignment(MF
) ||
51 MFI
->hasVarSizedObjects() ||
52 MFI
->isFrameAddressTaken() ||
53 MF
.getInfo
<X86MachineFunctionInfo
>()->getForceFramePointer() ||
54 MMI
.callsUnwindInit());
57 static unsigned getSUBriOpcode(unsigned is64Bit
, int64_t Imm
) {
61 return X86::SUB64ri32
;
69 static unsigned getADDriOpcode(unsigned is64Bit
, int64_t Imm
) {
73 return X86::ADD64ri32
;
81 /// findDeadCallerSavedReg - Return a caller-saved register that isn't live
82 /// when it reaches the "return" instruction. We can then pop a stack object
83 /// to this register without worry about clobbering it.
84 static unsigned findDeadCallerSavedReg(MachineBasicBlock
&MBB
,
85 MachineBasicBlock::iterator
&MBBI
,
86 const TargetRegisterInfo
&TRI
,
88 const MachineFunction
*MF
= MBB
.getParent();
89 const Function
*F
= MF
->getFunction();
90 if (!F
|| MF
->getMMI().callsEHReturn())
93 static const unsigned CallerSavedRegs32Bit
[] = {
94 X86::EAX
, X86::EDX
, X86::ECX
97 static const unsigned CallerSavedRegs64Bit
[] = {
98 X86::RAX
, X86::RDX
, X86::RCX
, X86::RSI
, X86::RDI
,
99 X86::R8
, X86::R9
, X86::R10
, X86::R11
102 unsigned Opc
= MBBI
->getOpcode();
107 case X86::TCRETURNdi
:
108 case X86::TCRETURNri
:
109 case X86::TCRETURNmi
:
110 case X86::TCRETURNdi64
:
111 case X86::TCRETURNri64
:
112 case X86::TCRETURNmi64
:
114 case X86::EH_RETURN64
: {
115 SmallSet
<unsigned, 8> Uses
;
116 for (unsigned i
= 0, e
= MBBI
->getNumOperands(); i
!= e
; ++i
) {
117 MachineOperand
&MO
= MBBI
->getOperand(i
);
118 if (!MO
.isReg() || MO
.isDef())
120 unsigned Reg
= MO
.getReg();
123 for (const unsigned *AsI
= TRI
.getOverlaps(Reg
); *AsI
; ++AsI
)
127 const unsigned *CS
= Is64Bit
? CallerSavedRegs64Bit
: CallerSavedRegs32Bit
;
129 if (!Uses
.count(*CS
))
138 /// emitSPUpdate - Emit a series of instructions to increment / decrement the
139 /// stack pointer by a constant value.
141 void emitSPUpdate(MachineBasicBlock
&MBB
, MachineBasicBlock::iterator
&MBBI
,
142 unsigned StackPtr
, int64_t NumBytes
,
143 bool Is64Bit
, const TargetInstrInfo
&TII
,
144 const TargetRegisterInfo
&TRI
) {
145 bool isSub
= NumBytes
< 0;
146 uint64_t Offset
= isSub
? -NumBytes
: NumBytes
;
147 unsigned Opc
= isSub
?
148 getSUBriOpcode(Is64Bit
, Offset
) :
149 getADDriOpcode(Is64Bit
, Offset
);
150 uint64_t Chunk
= (1LL << 31) - 1;
151 DebugLoc DL
= MBB
.findDebugLoc(MBBI
);
154 uint64_t ThisVal
= (Offset
> Chunk
) ? Chunk
: Offset
;
155 if (ThisVal
== (Is64Bit
? 8 : 4)) {
156 // Use push / pop instead.
158 ? (unsigned)(Is64Bit
? X86::RAX
: X86::EAX
)
159 : findDeadCallerSavedReg(MBB
, MBBI
, TRI
, Is64Bit
);
162 ? (Is64Bit
? X86::PUSH64r
: X86::PUSH32r
)
163 : (Is64Bit
? X86::POP64r
: X86::POP32r
);
164 MachineInstr
*MI
= BuildMI(MBB
, MBBI
, DL
, TII
.get(Opc
))
165 .addReg(Reg
, getDefRegState(!isSub
) | getUndefRegState(isSub
));
167 MI
->setFlag(MachineInstr::FrameSetup
);
174 BuildMI(MBB
, MBBI
, DL
, TII
.get(Opc
), StackPtr
)
178 MI
->setFlag(MachineInstr::FrameSetup
);
179 MI
->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
184 /// mergeSPUpdatesUp - Merge two stack-manipulating instructions upper iterator.
186 void mergeSPUpdatesUp(MachineBasicBlock
&MBB
, MachineBasicBlock::iterator
&MBBI
,
187 unsigned StackPtr
, uint64_t *NumBytes
= NULL
) {
188 if (MBBI
== MBB
.begin()) return;
190 MachineBasicBlock::iterator PI
= prior(MBBI
);
191 unsigned Opc
= PI
->getOpcode();
192 if ((Opc
== X86::ADD64ri32
|| Opc
== X86::ADD64ri8
||
193 Opc
== X86::ADD32ri
|| Opc
== X86::ADD32ri8
) &&
194 PI
->getOperand(0).getReg() == StackPtr
) {
196 *NumBytes
+= PI
->getOperand(2).getImm();
198 } else if ((Opc
== X86::SUB64ri32
|| Opc
== X86::SUB64ri8
||
199 Opc
== X86::SUB32ri
|| Opc
== X86::SUB32ri8
) &&
200 PI
->getOperand(0).getReg() == StackPtr
) {
202 *NumBytes
-= PI
->getOperand(2).getImm();
207 /// mergeSPUpdatesDown - Merge two stack-manipulating instructions lower iterator.
209 void mergeSPUpdatesDown(MachineBasicBlock
&MBB
,
210 MachineBasicBlock::iterator
&MBBI
,
211 unsigned StackPtr
, uint64_t *NumBytes
= NULL
) {
212 // FIXME: THIS ISN'T RUN!!!
215 if (MBBI
== MBB
.end()) return;
217 MachineBasicBlock::iterator NI
= llvm::next(MBBI
);
218 if (NI
== MBB
.end()) return;
220 unsigned Opc
= NI
->getOpcode();
221 if ((Opc
== X86::ADD64ri32
|| Opc
== X86::ADD64ri8
||
222 Opc
== X86::ADD32ri
|| Opc
== X86::ADD32ri8
) &&
223 NI
->getOperand(0).getReg() == StackPtr
) {
225 *NumBytes
-= NI
->getOperand(2).getImm();
228 } else if ((Opc
== X86::SUB64ri32
|| Opc
== X86::SUB64ri8
||
229 Opc
== X86::SUB32ri
|| Opc
== X86::SUB32ri8
) &&
230 NI
->getOperand(0).getReg() == StackPtr
) {
232 *NumBytes
+= NI
->getOperand(2).getImm();
238 /// mergeSPUpdates - Checks the instruction before/after the passed
239 /// instruction. If it is an ADD/SUB instruction it is deleted argument and the
240 /// stack adjustment is returned as a positive value for ADD and a negative for
242 static int mergeSPUpdates(MachineBasicBlock
&MBB
,
243 MachineBasicBlock::iterator
&MBBI
,
245 bool doMergeWithPrevious
) {
246 if ((doMergeWithPrevious
&& MBBI
== MBB
.begin()) ||
247 (!doMergeWithPrevious
&& MBBI
== MBB
.end()))
250 MachineBasicBlock::iterator PI
= doMergeWithPrevious
? prior(MBBI
) : MBBI
;
251 MachineBasicBlock::iterator NI
= doMergeWithPrevious
? 0 : llvm::next(MBBI
);
252 unsigned Opc
= PI
->getOpcode();
255 if ((Opc
== X86::ADD64ri32
|| Opc
== X86::ADD64ri8
||
256 Opc
== X86::ADD32ri
|| Opc
== X86::ADD32ri8
) &&
257 PI
->getOperand(0).getReg() == StackPtr
){
258 Offset
+= PI
->getOperand(2).getImm();
260 if (!doMergeWithPrevious
) MBBI
= NI
;
261 } else if ((Opc
== X86::SUB64ri32
|| Opc
== X86::SUB64ri8
||
262 Opc
== X86::SUB32ri
|| Opc
== X86::SUB32ri8
) &&
263 PI
->getOperand(0).getReg() == StackPtr
) {
264 Offset
-= PI
->getOperand(2).getImm();
266 if (!doMergeWithPrevious
) MBBI
= NI
;
272 static bool isEAXLiveIn(MachineFunction
&MF
) {
273 for (MachineRegisterInfo::livein_iterator II
= MF
.getRegInfo().livein_begin(),
274 EE
= MF
.getRegInfo().livein_end(); II
!= EE
; ++II
) {
275 unsigned Reg
= II
->first
;
277 if (Reg
== X86::EAX
|| Reg
== X86::AX
||
278 Reg
== X86::AH
|| Reg
== X86::AL
)
285 void X86FrameLowering::emitCalleeSavedFrameMoves(MachineFunction
&MF
,
287 unsigned FramePtr
) const {
288 MachineFrameInfo
*MFI
= MF
.getFrameInfo();
289 MachineModuleInfo
&MMI
= MF
.getMMI();
291 // Add callee saved registers to move list.
292 const std::vector
<CalleeSavedInfo
> &CSI
= MFI
->getCalleeSavedInfo();
293 if (CSI
.empty()) return;
295 std::vector
<MachineMove
> &Moves
= MMI
.getFrameMoves();
296 const TargetData
*TD
= TM
.getTargetData();
297 bool HasFP
= hasFP(MF
);
299 // Calculate amount of bytes used for return address storing.
300 int stackGrowth
= -TD
->getPointerSize();
302 // FIXME: This is dirty hack. The code itself is pretty mess right now.
303 // It should be rewritten from scratch and generalized sometimes.
305 // Determine maximum offset (minimum due to stack growth).
306 int64_t MaxOffset
= 0;
307 for (std::vector
<CalleeSavedInfo
>::const_iterator
308 I
= CSI
.begin(), E
= CSI
.end(); I
!= E
; ++I
)
309 MaxOffset
= std::min(MaxOffset
,
310 MFI
->getObjectOffset(I
->getFrameIdx()));
312 // Calculate offsets.
313 int64_t saveAreaOffset
= (HasFP
? 3 : 2) * stackGrowth
;
314 for (std::vector
<CalleeSavedInfo
>::const_iterator
315 I
= CSI
.begin(), E
= CSI
.end(); I
!= E
; ++I
) {
316 int64_t Offset
= MFI
->getObjectOffset(I
->getFrameIdx());
317 unsigned Reg
= I
->getReg();
318 Offset
= MaxOffset
- Offset
+ saveAreaOffset
;
320 // Don't output a new machine move if we're re-saving the frame
321 // pointer. This happens when the PrologEpilogInserter has inserted an extra
322 // "PUSH" of the frame pointer -- the "emitPrologue" method automatically
323 // generates one when frame pointers are used. If we generate a "machine
324 // move" for this extra "PUSH", the linker will lose track of the fact that
325 // the frame pointer should have the value of the first "PUSH" when it's
328 // FIXME: This looks inelegant. It's possibly correct, but it's covering up
329 // another bug. I.e., one where we generate a prolog like this:
337 // The immediate re-push of EBP is unnecessary. At the least, it's an
338 // optimization bug. EBP can be used as a scratch register in certain
339 // cases, but probably not when we have a frame pointer.
340 if (HasFP
&& FramePtr
== Reg
)
343 MachineLocation
CSDst(MachineLocation::VirtualFP
, Offset
);
344 MachineLocation
CSSrc(Reg
);
345 Moves
.push_back(MachineMove(Label
, CSDst
, CSSrc
));
349 /// emitPrologue - Push callee-saved registers onto the stack, which
350 /// automatically adjust the stack pointer. Adjust the stack pointer to allocate
351 /// space for local variables. Also emit labels used by the exception handler to
352 /// generate the exception handling frames.
353 void X86FrameLowering::emitPrologue(MachineFunction
&MF
) const {
354 MachineBasicBlock
&MBB
= MF
.front(); // Prologue goes in entry BB.
355 MachineBasicBlock::iterator MBBI
= MBB
.begin();
356 MachineFrameInfo
*MFI
= MF
.getFrameInfo();
357 const Function
*Fn
= MF
.getFunction();
358 const X86RegisterInfo
*RegInfo
= TM
.getRegisterInfo();
359 const X86InstrInfo
&TII
= *TM
.getInstrInfo();
360 MachineModuleInfo
&MMI
= MF
.getMMI();
361 X86MachineFunctionInfo
*X86FI
= MF
.getInfo
<X86MachineFunctionInfo
>();
362 bool needsFrameMoves
= MMI
.hasDebugInfo() ||
363 Fn
->needsUnwindTableEntry();
364 uint64_t MaxAlign
= MFI
->getMaxAlignment(); // Desired stack alignment.
365 uint64_t StackSize
= MFI
->getStackSize(); // Number of bytes to allocate.
366 bool HasFP
= hasFP(MF
);
367 bool Is64Bit
= STI
.is64Bit();
368 bool IsWin64
= STI
.isTargetWin64();
369 unsigned StackAlign
= getStackAlignment();
370 unsigned SlotSize
= RegInfo
->getSlotSize();
371 unsigned FramePtr
= RegInfo
->getFrameRegister(MF
);
372 unsigned StackPtr
= RegInfo
->getStackRegister();
376 // If we're forcing a stack realignment we can't rely on just the frame
377 // info, we need to know the ABI stack alignment as well in case we
378 // have a call out. Otherwise just make sure we have some alignment - we'll
379 // go with the minimum SlotSize.
380 if (ForceStackAlign
) {
382 MaxAlign
= (StackAlign
> MaxAlign
) ? StackAlign
: MaxAlign
;
383 else if (MaxAlign
< SlotSize
)
387 // Add RETADDR move area to callee saved frame size.
388 int TailCallReturnAddrDelta
= X86FI
->getTCReturnAddrDelta();
389 if (TailCallReturnAddrDelta
< 0)
390 X86FI
->setCalleeSavedFrameSize(
391 X86FI
->getCalleeSavedFrameSize() - TailCallReturnAddrDelta
);
393 // If this is x86-64 and the Red Zone is not disabled, if we are a leaf
394 // function, and use up to 128 bytes of stack space, don't have a frame
395 // pointer, calls, or dynamic alloca then we do not need to adjust the
396 // stack pointer (we fit in the Red Zone).
397 if (Is64Bit
&& !Fn
->hasFnAttr(Attribute::NoRedZone
) &&
398 !RegInfo
->needsStackRealignment(MF
) &&
399 !MFI
->hasVarSizedObjects() && // No dynamic alloca.
400 !MFI
->adjustsStack() && // No calls.
401 !IsWin64
) { // Win64 has no Red Zone
402 uint64_t MinSize
= X86FI
->getCalleeSavedFrameSize();
403 if (HasFP
) MinSize
+= SlotSize
;
404 StackSize
= std::max(MinSize
, StackSize
> 128 ? StackSize
- 128 : 0);
405 MFI
->setStackSize(StackSize
);
408 // Insert stack pointer adjustment for later moving of return addr. Only
409 // applies to tail call optimized functions where the callee argument stack
410 // size is bigger than the callers.
411 if (TailCallReturnAddrDelta
< 0) {
413 BuildMI(MBB
, MBBI
, DL
,
414 TII
.get(getSUBriOpcode(Is64Bit
, -TailCallReturnAddrDelta
)),
417 .addImm(-TailCallReturnAddrDelta
)
418 .setMIFlag(MachineInstr::FrameSetup
);
419 MI
->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
422 // Mapping for machine moves:
424 // DST: VirtualFP AND
425 // SRC: VirtualFP => DW_CFA_def_cfa_offset
426 // ELSE => DW_CFA_def_cfa
428 // SRC: VirtualFP AND
429 // DST: Register => DW_CFA_def_cfa_register
432 // OFFSET < 0 => DW_CFA_offset_extended_sf
433 // REG < 64 => DW_CFA_offset + Reg
434 // ELSE => DW_CFA_offset_extended
436 std::vector
<MachineMove
> &Moves
= MMI
.getFrameMoves();
437 const TargetData
*TD
= MF
.getTarget().getTargetData();
438 uint64_t NumBytes
= 0;
439 int stackGrowth
= -TD
->getPointerSize();
442 // Calculate required stack adjustment.
443 uint64_t FrameSize
= StackSize
- SlotSize
;
444 if (RegInfo
->needsStackRealignment(MF
))
445 FrameSize
= (FrameSize
+ MaxAlign
- 1) / MaxAlign
* MaxAlign
;
447 NumBytes
= FrameSize
- X86FI
->getCalleeSavedFrameSize();
449 // Get the offset of the stack slot for the EBP register, which is
450 // guaranteed to be the last slot by processFunctionBeforeFrameFinalized.
451 // Update the frame offset adjustment.
452 MFI
->setOffsetAdjustment(-NumBytes
);
454 // Save EBP/RBP into the appropriate stack slot.
455 BuildMI(MBB
, MBBI
, DL
, TII
.get(Is64Bit
? X86::PUSH64r
: X86::PUSH32r
))
456 .addReg(FramePtr
, RegState::Kill
)
457 .setMIFlag(MachineInstr::FrameSetup
);
459 if (needsFrameMoves
) {
460 // Mark the place where EBP/RBP was saved.
461 MCSymbol
*FrameLabel
= MMI
.getContext().CreateTempSymbol();
462 BuildMI(MBB
, MBBI
, DL
, TII
.get(X86::PROLOG_LABEL
)).addSym(FrameLabel
);
464 // Define the current CFA rule to use the provided offset.
466 MachineLocation
SPDst(MachineLocation::VirtualFP
);
467 MachineLocation
SPSrc(MachineLocation::VirtualFP
, 2 * stackGrowth
);
468 Moves
.push_back(MachineMove(FrameLabel
, SPDst
, SPSrc
));
470 MachineLocation
SPDst(StackPtr
);
471 MachineLocation
SPSrc(StackPtr
, stackGrowth
);
472 Moves
.push_back(MachineMove(FrameLabel
, SPDst
, SPSrc
));
475 // Change the rule for the FramePtr to be an "offset" rule.
476 MachineLocation
FPDst(MachineLocation::VirtualFP
, 2 * stackGrowth
);
477 MachineLocation
FPSrc(FramePtr
);
478 Moves
.push_back(MachineMove(FrameLabel
, FPDst
, FPSrc
));
481 // Update EBP with the new base value...
482 BuildMI(MBB
, MBBI
, DL
,
483 TII
.get(Is64Bit
? X86::MOV64rr
: X86::MOV32rr
), FramePtr
)
485 .setMIFlag(MachineInstr::FrameSetup
);
487 if (needsFrameMoves
) {
488 // Mark effective beginning of when frame pointer becomes valid.
489 MCSymbol
*FrameLabel
= MMI
.getContext().CreateTempSymbol();
490 BuildMI(MBB
, MBBI
, DL
, TII
.get(X86::PROLOG_LABEL
)).addSym(FrameLabel
);
492 // Define the current CFA to use the EBP/RBP register.
493 MachineLocation
FPDst(FramePtr
);
494 MachineLocation
FPSrc(MachineLocation::VirtualFP
);
495 Moves
.push_back(MachineMove(FrameLabel
, FPDst
, FPSrc
));
498 // Mark the FramePtr as live-in in every block except the entry.
499 for (MachineFunction::iterator I
= llvm::next(MF
.begin()), E
= MF
.end();
501 I
->addLiveIn(FramePtr
);
504 if (RegInfo
->needsStackRealignment(MF
)) {
506 BuildMI(MBB
, MBBI
, DL
,
507 TII
.get(Is64Bit
? X86::AND64ri32
: X86::AND32ri
),
508 StackPtr
).addReg(StackPtr
).addImm(-MaxAlign
);
510 // The EFLAGS implicit def is dead.
511 MI
->getOperand(3).setIsDead();
514 NumBytes
= StackSize
- X86FI
->getCalleeSavedFrameSize();
517 // Skip the callee-saved push instructions.
518 bool PushedRegs
= false;
519 int StackOffset
= 2 * stackGrowth
;
521 while (MBBI
!= MBB
.end() &&
522 (MBBI
->getOpcode() == X86::PUSH32r
||
523 MBBI
->getOpcode() == X86::PUSH64r
)) {
527 if (!HasFP
&& needsFrameMoves
) {
528 // Mark callee-saved push instruction.
529 MCSymbol
*Label
= MMI
.getContext().CreateTempSymbol();
530 BuildMI(MBB
, MBBI
, DL
, TII
.get(X86::PROLOG_LABEL
)).addSym(Label
);
532 // Define the current CFA rule to use the provided offset.
533 unsigned Ptr
= StackSize
?
534 MachineLocation::VirtualFP
: StackPtr
;
535 MachineLocation
SPDst(Ptr
);
536 MachineLocation
SPSrc(Ptr
, StackOffset
);
537 Moves
.push_back(MachineMove(Label
, SPDst
, SPSrc
));
538 StackOffset
+= stackGrowth
;
542 DL
= MBB
.findDebugLoc(MBBI
);
544 // If there is an SUB32ri of ESP immediately before this instruction, merge
545 // the two. This can be the case when tail call elimination is enabled and
546 // the callee has more arguments then the caller.
547 NumBytes
-= mergeSPUpdates(MBB
, MBBI
, StackPtr
, true);
549 // If there is an ADD32ri or SUB32ri of ESP immediately after this
550 // instruction, merge the two instructions.
551 mergeSPUpdatesDown(MBB
, MBBI
, StackPtr
, &NumBytes
);
553 // Adjust stack pointer: ESP -= numbytes.
555 // Windows and cygwin/mingw require a prologue helper routine when allocating
556 // more than 4K bytes on the stack. Windows uses __chkstk and cygwin/mingw
557 // uses __alloca. __alloca and the 32-bit version of __chkstk will probe the
558 // stack and adjust the stack pointer in one go. The 64-bit version of
559 // __chkstk is only responsible for probing the stack. The 64-bit prologue is
560 // responsible for adjusting the stack pointer. Touching the stack at 4K
561 // increments is necessary to ensure that the guard pages used by the OS
562 // virtual memory manager are allocated in correct sequence.
563 if (NumBytes
>= 4096 && STI
.isTargetCOFF() && !STI
.isTargetEnvMacho()) {
564 const char *StackProbeSymbol
;
565 bool isSPUpdateNeeded
= false;
568 if (STI
.isTargetCygMing())
569 StackProbeSymbol
= "___chkstk";
571 StackProbeSymbol
= "__chkstk";
572 isSPUpdateNeeded
= true;
574 } else if (STI
.isTargetCygMing())
575 StackProbeSymbol
= "_alloca";
577 StackProbeSymbol
= "_chkstk";
579 // Check whether EAX is livein for this function.
580 bool isEAXAlive
= isEAXLiveIn(MF
);
583 // Sanity check that EAX is not livein for this function.
584 // It should not be, so throw an assert.
585 assert(!Is64Bit
&& "EAX is livein in x64 case!");
588 BuildMI(MBB
, MBBI
, DL
, TII
.get(X86::PUSH32r
))
589 .addReg(X86::EAX
, RegState::Kill
);
593 // Handle the 64-bit Windows ABI case where we need to call __chkstk.
594 // Function prologue is responsible for adjusting the stack pointer.
595 BuildMI(MBB
, MBBI
, DL
, TII
.get(X86::MOV64ri
), X86::RAX
)
598 // Allocate NumBytes-4 bytes on stack in case of isEAXAlive.
599 // We'll also use 4 already allocated bytes for EAX.
600 BuildMI(MBB
, MBBI
, DL
, TII
.get(X86::MOV32ri
), X86::EAX
)
601 .addImm(isEAXAlive
? NumBytes
- 4 : NumBytes
);
604 BuildMI(MBB
, MBBI
, DL
,
605 TII
.get(Is64Bit
? X86::W64ALLOCA
: X86::CALLpcrel32
))
606 .addExternalSymbol(StackProbeSymbol
)
607 .addReg(StackPtr
, RegState::Define
| RegState::Implicit
)
608 .addReg(X86::EFLAGS
, RegState::Define
| RegState::Implicit
);
610 // MSVC x64's __chkstk needs to adjust %rsp.
611 // FIXME: %rax preserves the offset and should be available.
612 if (isSPUpdateNeeded
)
613 emitSPUpdate(MBB
, MBBI
, StackPtr
, -(int64_t)NumBytes
, Is64Bit
,
618 MachineInstr
*MI
= addRegOffset(BuildMI(MF
, DL
, TII
.get(X86::MOV32rm
),
620 StackPtr
, false, NumBytes
- 4);
621 MBB
.insert(MBBI
, MI
);
624 emitSPUpdate(MBB
, MBBI
, StackPtr
, -(int64_t)NumBytes
, Is64Bit
,
627 if (( (!HasFP
&& NumBytes
) || PushedRegs
) && needsFrameMoves
) {
628 // Mark end of stack pointer adjustment.
629 MCSymbol
*Label
= MMI
.getContext().CreateTempSymbol();
630 BuildMI(MBB
, MBBI
, DL
, TII
.get(X86::PROLOG_LABEL
)).addSym(Label
);
632 if (!HasFP
&& NumBytes
) {
633 // Define the current CFA rule to use the provided offset.
635 MachineLocation
SPDst(MachineLocation::VirtualFP
);
636 MachineLocation
SPSrc(MachineLocation::VirtualFP
,
637 -StackSize
+ stackGrowth
);
638 Moves
.push_back(MachineMove(Label
, SPDst
, SPSrc
));
640 MachineLocation
SPDst(StackPtr
);
641 MachineLocation
SPSrc(StackPtr
, stackGrowth
);
642 Moves
.push_back(MachineMove(Label
, SPDst
, SPSrc
));
646 // Emit DWARF info specifying the offsets of the callee-saved registers.
648 emitCalleeSavedFrameMoves(MF
, Label
, HasFP
? FramePtr
: StackPtr
);
652 void X86FrameLowering::emitEpilogue(MachineFunction
&MF
,
653 MachineBasicBlock
&MBB
) const {
654 const MachineFrameInfo
*MFI
= MF
.getFrameInfo();
655 X86MachineFunctionInfo
*X86FI
= MF
.getInfo
<X86MachineFunctionInfo
>();
656 const X86RegisterInfo
*RegInfo
= TM
.getRegisterInfo();
657 const X86InstrInfo
&TII
= *TM
.getInstrInfo();
658 MachineBasicBlock::iterator MBBI
= MBB
.getLastNonDebugInstr();
659 assert(MBBI
!= MBB
.end() && "Returning block has no instructions");
660 unsigned RetOpcode
= MBBI
->getOpcode();
661 DebugLoc DL
= MBBI
->getDebugLoc();
662 bool Is64Bit
= STI
.is64Bit();
663 unsigned StackAlign
= getStackAlignment();
664 unsigned SlotSize
= RegInfo
->getSlotSize();
665 unsigned FramePtr
= RegInfo
->getFrameRegister(MF
);
666 unsigned StackPtr
= RegInfo
->getStackRegister();
670 llvm_unreachable("Can only insert epilog into returning blocks");
673 case X86::TCRETURNdi
:
674 case X86::TCRETURNri
:
675 case X86::TCRETURNmi
:
676 case X86::TCRETURNdi64
:
677 case X86::TCRETURNri64
:
678 case X86::TCRETURNmi64
:
680 case X86::EH_RETURN64
:
681 break; // These are ok
684 // Get the number of bytes to allocate from the FrameInfo.
685 uint64_t StackSize
= MFI
->getStackSize();
686 uint64_t MaxAlign
= MFI
->getMaxAlignment();
687 unsigned CSSize
= X86FI
->getCalleeSavedFrameSize();
688 uint64_t NumBytes
= 0;
690 // If we're forcing a stack realignment we can't rely on just the frame
691 // info, we need to know the ABI stack alignment as well in case we
692 // have a call out. Otherwise just make sure we have some alignment - we'll
693 // go with the minimum.
694 if (ForceStackAlign
) {
696 MaxAlign
= (StackAlign
> MaxAlign
) ? StackAlign
: MaxAlign
;
698 MaxAlign
= MaxAlign
? MaxAlign
: 4;
702 // Calculate required stack adjustment.
703 uint64_t FrameSize
= StackSize
- SlotSize
;
704 if (RegInfo
->needsStackRealignment(MF
))
705 FrameSize
= (FrameSize
+ MaxAlign
- 1)/MaxAlign
*MaxAlign
;
707 NumBytes
= FrameSize
- CSSize
;
710 BuildMI(MBB
, MBBI
, DL
,
711 TII
.get(Is64Bit
? X86::POP64r
: X86::POP32r
), FramePtr
);
713 NumBytes
= StackSize
- CSSize
;
716 // Skip the callee-saved pop instructions.
717 MachineBasicBlock::iterator LastCSPop
= MBBI
;
718 while (MBBI
!= MBB
.begin()) {
719 MachineBasicBlock::iterator PI
= prior(MBBI
);
720 unsigned Opc
= PI
->getOpcode();
722 if (Opc
!= X86::POP32r
&& Opc
!= X86::POP64r
&& Opc
!= X86::DBG_VALUE
&&
723 !PI
->getDesc().isTerminator())
729 DL
= MBBI
->getDebugLoc();
731 // If there is an ADD32ri or SUB32ri of ESP immediately before this
732 // instruction, merge the two instructions.
733 if (NumBytes
|| MFI
->hasVarSizedObjects())
734 mergeSPUpdatesUp(MBB
, MBBI
, StackPtr
, &NumBytes
);
736 // If dynamic alloca is used, then reset esp to point to the last callee-saved
737 // slot before popping them off! Same applies for the case, when stack was
739 if (RegInfo
->needsStackRealignment(MF
)) {
740 // We cannot use LEA here, because stack pointer was realigned. We need to
741 // deallocate local frame back.
743 emitSPUpdate(MBB
, MBBI
, StackPtr
, NumBytes
, Is64Bit
, TII
, *RegInfo
);
744 MBBI
= prior(LastCSPop
);
747 BuildMI(MBB
, MBBI
, DL
,
748 TII
.get(Is64Bit
? X86::MOV64rr
: X86::MOV32rr
),
749 StackPtr
).addReg(FramePtr
);
750 } else if (MFI
->hasVarSizedObjects()) {
752 unsigned Opc
= Is64Bit
? X86::LEA64r
: X86::LEA32r
;
754 addRegOffset(BuildMI(MF
, DL
, TII
.get(Opc
), StackPtr
),
755 FramePtr
, false, -CSSize
);
756 MBB
.insert(MBBI
, MI
);
758 BuildMI(MBB
, MBBI
, DL
,
759 TII
.get(Is64Bit
? X86::MOV64rr
: X86::MOV32rr
), StackPtr
)
762 } else if (NumBytes
) {
763 // Adjust stack pointer back: ESP += numbytes.
764 emitSPUpdate(MBB
, MBBI
, StackPtr
, NumBytes
, Is64Bit
, TII
, *RegInfo
);
767 // We're returning from function via eh_return.
768 if (RetOpcode
== X86::EH_RETURN
|| RetOpcode
== X86::EH_RETURN64
) {
769 MBBI
= MBB
.getLastNonDebugInstr();
770 MachineOperand
&DestAddr
= MBBI
->getOperand(0);
771 assert(DestAddr
.isReg() && "Offset should be in register!");
772 BuildMI(MBB
, MBBI
, DL
,
773 TII
.get(Is64Bit
? X86::MOV64rr
: X86::MOV32rr
),
774 StackPtr
).addReg(DestAddr
.getReg());
775 } else if (RetOpcode
== X86::TCRETURNri
|| RetOpcode
== X86::TCRETURNdi
||
776 RetOpcode
== X86::TCRETURNmi
||
777 RetOpcode
== X86::TCRETURNri64
|| RetOpcode
== X86::TCRETURNdi64
||
778 RetOpcode
== X86::TCRETURNmi64
) {
779 bool isMem
= RetOpcode
== X86::TCRETURNmi
|| RetOpcode
== X86::TCRETURNmi64
;
780 // Tail call return: adjust the stack pointer and jump to callee.
781 MBBI
= MBB
.getLastNonDebugInstr();
782 MachineOperand
&JumpTarget
= MBBI
->getOperand(0);
783 MachineOperand
&StackAdjust
= MBBI
->getOperand(isMem
? 5 : 1);
784 assert(StackAdjust
.isImm() && "Expecting immediate value.");
786 // Adjust stack pointer.
787 int StackAdj
= StackAdjust
.getImm();
788 int MaxTCDelta
= X86FI
->getTCReturnAddrDelta();
790 assert(MaxTCDelta
<= 0 && "MaxTCDelta should never be positive");
792 // Incoporate the retaddr area.
793 Offset
= StackAdj
-MaxTCDelta
;
794 assert(Offset
>= 0 && "Offset should never be negative");
797 // Check for possible merge with preceding ADD instruction.
798 Offset
+= mergeSPUpdates(MBB
, MBBI
, StackPtr
, true);
799 emitSPUpdate(MBB
, MBBI
, StackPtr
, Offset
, Is64Bit
, TII
, *RegInfo
);
802 // Jump to label or value in register.
803 if (RetOpcode
== X86::TCRETURNdi
|| RetOpcode
== X86::TCRETURNdi64
) {
804 MachineInstrBuilder MIB
=
805 BuildMI(MBB
, MBBI
, DL
, TII
.get((RetOpcode
== X86::TCRETURNdi
)
806 ? X86::TAILJMPd
: X86::TAILJMPd64
));
807 if (JumpTarget
.isGlobal())
808 MIB
.addGlobalAddress(JumpTarget
.getGlobal(), JumpTarget
.getOffset(),
809 JumpTarget
.getTargetFlags());
811 assert(JumpTarget
.isSymbol());
812 MIB
.addExternalSymbol(JumpTarget
.getSymbolName(),
813 JumpTarget
.getTargetFlags());
815 } else if (RetOpcode
== X86::TCRETURNmi
|| RetOpcode
== X86::TCRETURNmi64
) {
816 MachineInstrBuilder MIB
=
817 BuildMI(MBB
, MBBI
, DL
, TII
.get((RetOpcode
== X86::TCRETURNmi
)
818 ? X86::TAILJMPm
: X86::TAILJMPm64
));
819 for (unsigned i
= 0; i
!= 5; ++i
)
820 MIB
.addOperand(MBBI
->getOperand(i
));
821 } else if (RetOpcode
== X86::TCRETURNri64
) {
822 BuildMI(MBB
, MBBI
, DL
, TII
.get(X86::TAILJMPr64
)).
823 addReg(JumpTarget
.getReg(), RegState::Kill
);
825 BuildMI(MBB
, MBBI
, DL
, TII
.get(X86::TAILJMPr
)).
826 addReg(JumpTarget
.getReg(), RegState::Kill
);
829 MachineInstr
*NewMI
= prior(MBBI
);
830 for (unsigned i
= 2, e
= MBBI
->getNumOperands(); i
!= e
; ++i
)
831 NewMI
->addOperand(MBBI
->getOperand(i
));
833 // Delete the pseudo instruction TCRETURN.
835 } else if ((RetOpcode
== X86::RET
|| RetOpcode
== X86::RETI
) &&
836 (X86FI
->getTCReturnAddrDelta() < 0)) {
837 // Add the return addr area delta back since we are not tail calling.
838 int delta
= -1*X86FI
->getTCReturnAddrDelta();
839 MBBI
= MBB
.getLastNonDebugInstr();
841 // Check for possible merge with preceding ADD instruction.
842 delta
+= mergeSPUpdates(MBB
, MBBI
, StackPtr
, true);
843 emitSPUpdate(MBB
, MBBI
, StackPtr
, delta
, Is64Bit
, TII
, *RegInfo
);
848 X86FrameLowering::getInitialFrameState(std::vector
<MachineMove
> &Moves
) const {
849 // Calculate amount of bytes used for return address storing
850 int stackGrowth
= (STI
.is64Bit() ? -8 : -4);
851 const X86RegisterInfo
*RI
= TM
.getRegisterInfo();
853 // Initial state of the frame pointer is esp+stackGrowth.
854 MachineLocation
Dst(MachineLocation::VirtualFP
);
855 MachineLocation
Src(RI
->getStackRegister(), stackGrowth
);
856 Moves
.push_back(MachineMove(0, Dst
, Src
));
858 // Add return address to move list
859 MachineLocation
CSDst(RI
->getStackRegister(), stackGrowth
);
860 MachineLocation
CSSrc(RI
->getRARegister());
861 Moves
.push_back(MachineMove(0, CSDst
, CSSrc
));
864 int X86FrameLowering::getFrameIndexOffset(const MachineFunction
&MF
, int FI
) const {
865 const X86RegisterInfo
*RI
=
866 static_cast<const X86RegisterInfo
*>(MF
.getTarget().getRegisterInfo());
867 const MachineFrameInfo
*MFI
= MF
.getFrameInfo();
868 int Offset
= MFI
->getObjectOffset(FI
) - getOffsetOfLocalArea();
869 uint64_t StackSize
= MFI
->getStackSize();
871 if (RI
->needsStackRealignment(MF
)) {
873 // Skip the saved EBP.
874 Offset
+= RI
->getSlotSize();
876 unsigned Align
= MFI
->getObjectAlignment(FI
);
877 assert((-(Offset
+ StackSize
)) % Align
== 0);
879 return Offset
+ StackSize
;
881 // FIXME: Support tail calls
884 return Offset
+ StackSize
;
886 // Skip the saved EBP.
887 Offset
+= RI
->getSlotSize();
889 // Skip the RETADDR move area
890 const X86MachineFunctionInfo
*X86FI
= MF
.getInfo
<X86MachineFunctionInfo
>();
891 int TailCallReturnAddrDelta
= X86FI
->getTCReturnAddrDelta();
892 if (TailCallReturnAddrDelta
< 0)
893 Offset
-= TailCallReturnAddrDelta
;
899 bool X86FrameLowering::spillCalleeSavedRegisters(MachineBasicBlock
&MBB
,
900 MachineBasicBlock::iterator MI
,
901 const std::vector
<CalleeSavedInfo
> &CSI
,
902 const TargetRegisterInfo
*TRI
) const {
906 DebugLoc DL
= MBB
.findDebugLoc(MI
);
908 MachineFunction
&MF
= *MBB
.getParent();
910 unsigned SlotSize
= STI
.is64Bit() ? 8 : 4;
911 unsigned FPReg
= TRI
->getFrameRegister(MF
);
912 unsigned CalleeFrameSize
= 0;
914 const TargetInstrInfo
&TII
= *MF
.getTarget().getInstrInfo();
915 X86MachineFunctionInfo
*X86FI
= MF
.getInfo
<X86MachineFunctionInfo
>();
917 // Push GPRs. It increases frame size.
918 unsigned Opc
= STI
.is64Bit() ? X86::PUSH64r
: X86::PUSH32r
;
919 for (unsigned i
= CSI
.size(); i
!= 0; --i
) {
920 unsigned Reg
= CSI
[i
-1].getReg();
921 if (!X86::GR64RegClass
.contains(Reg
) &&
922 !X86::GR32RegClass
.contains(Reg
))
924 // Add the callee-saved register as live-in. It's killed at the spill.
927 // X86RegisterInfo::emitPrologue will handle spilling of frame register.
929 CalleeFrameSize
+= SlotSize
;
930 BuildMI(MBB
, MI
, DL
, TII
.get(Opc
)).addReg(Reg
, RegState::Kill
)
931 .setMIFlag(MachineInstr::FrameSetup
);
934 X86FI
->setCalleeSavedFrameSize(CalleeFrameSize
);
936 // Make XMM regs spilled. X86 does not have ability of push/pop XMM.
937 // It can be done by spilling XMMs to stack frame.
938 // Note that only Win64 ABI might spill XMMs.
939 for (unsigned i
= CSI
.size(); i
!= 0; --i
) {
940 unsigned Reg
= CSI
[i
-1].getReg();
941 if (X86::GR64RegClass
.contains(Reg
) ||
942 X86::GR32RegClass
.contains(Reg
))
944 // Add the callee-saved register as live-in. It's killed at the spill.
946 const TargetRegisterClass
*RC
= TRI
->getMinimalPhysRegClass(Reg
);
947 TII
.storeRegToStackSlot(MBB
, MI
, Reg
, true, CSI
[i
-1].getFrameIdx(),
954 bool X86FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock
&MBB
,
955 MachineBasicBlock::iterator MI
,
956 const std::vector
<CalleeSavedInfo
> &CSI
,
957 const TargetRegisterInfo
*TRI
) const {
961 DebugLoc DL
= MBB
.findDebugLoc(MI
);
963 MachineFunction
&MF
= *MBB
.getParent();
964 const TargetInstrInfo
&TII
= *MF
.getTarget().getInstrInfo();
966 // Reload XMMs from stack frame.
967 for (unsigned i
= 0, e
= CSI
.size(); i
!= e
; ++i
) {
968 unsigned Reg
= CSI
[i
].getReg();
969 if (X86::GR64RegClass
.contains(Reg
) ||
970 X86::GR32RegClass
.contains(Reg
))
972 const TargetRegisterClass
*RC
= TRI
->getMinimalPhysRegClass(Reg
);
973 TII
.loadRegFromStackSlot(MBB
, MI
, Reg
, CSI
[i
].getFrameIdx(),
978 unsigned FPReg
= TRI
->getFrameRegister(MF
);
979 unsigned Opc
= STI
.is64Bit() ? X86::POP64r
: X86::POP32r
;
980 for (unsigned i
= 0, e
= CSI
.size(); i
!= e
; ++i
) {
981 unsigned Reg
= CSI
[i
].getReg();
982 if (!X86::GR64RegClass
.contains(Reg
) &&
983 !X86::GR32RegClass
.contains(Reg
))
986 // X86RegisterInfo::emitEpilogue will handle restoring of frame register.
988 BuildMI(MBB
, MI
, DL
, TII
.get(Opc
), Reg
);
994 X86FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction
&MF
,
995 RegScavenger
*RS
) const {
996 MachineFrameInfo
*MFI
= MF
.getFrameInfo();
997 const X86RegisterInfo
*RegInfo
= TM
.getRegisterInfo();
998 unsigned SlotSize
= RegInfo
->getSlotSize();
1000 X86MachineFunctionInfo
*X86FI
= MF
.getInfo
<X86MachineFunctionInfo
>();
1001 int32_t TailCallReturnAddrDelta
= X86FI
->getTCReturnAddrDelta();
1003 if (TailCallReturnAddrDelta
< 0) {
1004 // create RETURNADDR area
1013 MFI
->CreateFixedObject(-TailCallReturnAddrDelta
,
1014 (-1U*SlotSize
)+TailCallReturnAddrDelta
, true);
1018 assert((TailCallReturnAddrDelta
<= 0) &&
1019 "The Delta should always be zero or negative");
1020 const TargetFrameLowering
&TFI
= *MF
.getTarget().getFrameLowering();
1022 // Create a frame entry for the EBP register that must be saved.
1023 int FrameIdx
= MFI
->CreateFixedObject(SlotSize
,
1025 TFI
.getOffsetOfLocalArea() +
1026 TailCallReturnAddrDelta
,
1028 assert(FrameIdx
== MFI
->getObjectIndexBegin() &&
1029 "Slot for EBP register must be last in order to be found!");
1034 uint32_t X86FrameLowering::
1035 getCompactUnwindEncoding(ArrayRef
<MCCFIInstruction
> Instrs
,
1036 int DataAlignmentFactor
, bool IsEH
) const {
1037 uint32_t Encoding
= 0;
1039 const TargetRegisterInfo
*TRI
= TM
.getRegisterInfo();
1040 SmallVector
<unsigned, 8> SavedRegs
;
1041 int FramePointerReg
= -1;
1043 for (ArrayRef
<MCCFIInstruction
>::const_iterator
1044 I
= Instrs
.begin(), E
= Instrs
.end(); I
!= E
; ++I
) {
1045 const MCCFIInstruction
&Inst
= *I
;
1046 MCSymbol
*Label
= Inst
.getLabel();
1048 // Ignore invalid labels.
1049 if (Label
&& !Label
->isDefined()) continue;
1051 unsigned Operation
= Inst
.getOperation();
1052 if (Operation
!= MCCFIInstruction::Move
&&
1053 Operation
!= MCCFIInstruction::RelMove
)
1054 // FIXME: We can't handle this frame just yet.
1057 const MachineLocation
&Dst
= Inst
.getDestination();
1058 const MachineLocation
&Src
= Inst
.getSource();
1059 const bool IsRelative
= (Operation
== MCCFIInstruction::RelMove
);
1061 if (Dst
.isReg() && Dst
.getReg() == MachineLocation::VirtualFP
) {
1062 if (Src
.getReg() == MachineLocation::VirtualFP
) {
1063 // DW_CFA_def_cfa_offset
1065 CFAOffset
+= Src
.getOffset();
1067 CFAOffset
= -Src
.getOffset();
1068 } // else DW_CFA_def_cfa
1073 if (Src
.isReg() && Src
.getReg() == MachineLocation::VirtualFP
) {
1074 // DW_CFA_def_cfa_register
1075 FramePointerReg
= Dst
.getReg();
1079 unsigned Reg
= Src
.getReg();
1080 int Offset
= Dst
.getOffset();
1082 Offset
-= CFAOffset
;
1083 Offset
/= DataAlignmentFactor
;
1087 // DW_CFA_offset_extended_sf
1089 } else if (Reg
< 64) {
1090 // DW_CFA_offset + Reg
1091 SavedRegs
.push_back(Reg
);
1094 // DW_CFA_offset_extended
1101 // Check if the offset is too big.
1102 if ((CFAOffset
& 0xFF) != CFAOffset
)
1105 // Bail if there are too many registers to encode.
1106 unsigned NumRegsToEncode
= SavedRegs
.size() - (FramePointerReg
!= -1 ? 1 : 0);
1107 if (NumRegsToEncode
> 5) return 0;
1109 if (TRI
->getLLVMRegNum(FramePointerReg
, IsEH
) != X86::EBP
&&
1110 TRI
->getLLVMRegNum(FramePointerReg
, IsEH
) != X86::RBP
)
1111 // FIXME: Handle frameless version!
1114 Encoding
|= 1 << 24;
1115 Encoding
|= (CFAOffset
& 0xFF) << 16;
1118 for (SmallVectorImpl
<unsigned>::iterator
1119 I
= SavedRegs
.begin(), E
= SavedRegs
.end(); I
!= E
; ++I
) {
1120 if (*I
== unsigned(FramePointerReg
)) continue;
1122 int CURegNum
= TRI
->getCompactUnwindRegNum(*I
, IsEH
);
1123 if (CURegNum
== -1) return 0;
1125 Encoding
|= (CURegNum
& 0x7) << (Idx
++ * 3);