1 //===- X86RegisterInfo.cpp - X86 Register Information -----------*- C++ -*-===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file contains the X86 implementation of the TargetRegisterInfo class.
11 // This file is responsible for the frame pointer elimination optimization
14 //===----------------------------------------------------------------------===//
17 #include "X86RegisterInfo.h"
18 #include "X86InstrBuilder.h"
19 #include "X86MachineFunctionInfo.h"
20 #include "X86Subtarget.h"
21 #include "X86TargetMachine.h"
22 #include "llvm/Constants.h"
23 #include "llvm/Function.h"
24 #include "llvm/Type.h"
25 #include "llvm/CodeGen/ValueTypes.h"
26 #include "llvm/CodeGen/MachineInstrBuilder.h"
27 #include "llvm/CodeGen/MachineFunction.h"
28 #include "llvm/CodeGen/MachineFunctionPass.h"
29 #include "llvm/CodeGen/MachineFrameInfo.h"
30 #include "llvm/CodeGen/MachineLocation.h"
31 #include "llvm/CodeGen/MachineModuleInfo.h"
32 #include "llvm/CodeGen/MachineRegisterInfo.h"
33 #include "llvm/Target/TargetAsmInfo.h"
34 #include "llvm/Target/TargetFrameInfo.h"
35 #include "llvm/Target/TargetInstrInfo.h"
36 #include "llvm/Target/TargetMachine.h"
37 #include "llvm/Target/TargetOptions.h"
38 #include "llvm/ADT/BitVector.h"
39 #include "llvm/ADT/STLExtras.h"
40 #include "llvm/Support/CommandLine.h"
41 #include "llvm/Support/Compiler.h"
42 #include "llvm/Support/ErrorHandling.h"
46 StrictIndexRegclass("strict-index-regclass",
47 cl::desc("Use a special register class to avoid letting SP "
48 "be used as an index"));
50 X86RegisterInfo::X86RegisterInfo(X86TargetMachine
&tm
,
51 const TargetInstrInfo
&tii
)
52 : X86GenRegisterInfo(tm
.getSubtarget
<X86Subtarget
>().is64Bit() ?
53 X86::ADJCALLSTACKDOWN64
:
54 X86::ADJCALLSTACKDOWN32
,
55 tm
.getSubtarget
<X86Subtarget
>().is64Bit() ?
56 X86::ADJCALLSTACKUP64
:
57 X86::ADJCALLSTACKUP32
),
59 // Cache some information.
60 const X86Subtarget
*Subtarget
= &TM
.getSubtarget
<X86Subtarget
>();
61 Is64Bit
= Subtarget
->is64Bit();
62 IsWin64
= Subtarget
->isTargetWin64();
63 StackAlign
= TM
.getFrameInfo()->getStackAlignment();
75 // getDwarfRegNum - This function maps LLVM register identifiers to the
76 // Dwarf specific numbering, used in debug info and exception tables.
78 int X86RegisterInfo::getDwarfRegNum(unsigned RegNo
, bool isEH
) const {
79 const X86Subtarget
*Subtarget
= &TM
.getSubtarget
<X86Subtarget
>();
80 unsigned Flavour
= DWARFFlavour::X86_64
;
81 if (!Subtarget
->is64Bit()) {
82 if (Subtarget
->isTargetDarwin()) {
84 Flavour
= DWARFFlavour::X86_32_DarwinEH
;
86 Flavour
= DWARFFlavour::X86_32_Generic
;
87 } else if (Subtarget
->isTargetCygMing()) {
88 // Unsupported by now, just quick fallback
89 Flavour
= DWARFFlavour::X86_32_Generic
;
91 Flavour
= DWARFFlavour::X86_32_Generic
;
95 return X86GenRegisterInfo::getDwarfRegNumFull(RegNo
, Flavour
);
98 // getX86RegNum - This function maps LLVM register identifiers to their X86
99 // specific numbering, which is used in various places encoding instructions.
101 unsigned X86RegisterInfo::getX86RegNum(unsigned RegNo
) {
103 case X86::RAX
: case X86::EAX
: case X86::AX
: case X86::AL
: return N86::EAX
;
104 case X86::RCX
: case X86::ECX
: case X86::CX
: case X86::CL
: return N86::ECX
;
105 case X86::RDX
: case X86::EDX
: case X86::DX
: case X86::DL
: return N86::EDX
;
106 case X86::RBX
: case X86::EBX
: case X86::BX
: case X86::BL
: return N86::EBX
;
107 case X86::RSP
: case X86::ESP
: case X86::SP
: case X86::SPL
: case X86::AH
:
109 case X86::RBP
: case X86::EBP
: case X86::BP
: case X86::BPL
: case X86::CH
:
111 case X86::RSI
: case X86::ESI
: case X86::SI
: case X86::SIL
: case X86::DH
:
113 case X86::RDI
: case X86::EDI
: case X86::DI
: case X86::DIL
: case X86::BH
:
116 case X86::R8
: case X86::R8D
: case X86::R8W
: case X86::R8B
:
118 case X86::R9
: case X86::R9D
: case X86::R9W
: case X86::R9B
:
120 case X86::R10
: case X86::R10D
: case X86::R10W
: case X86::R10B
:
122 case X86::R11
: case X86::R11D
: case X86::R11W
: case X86::R11B
:
124 case X86::R12
: case X86::R12D
: case X86::R12W
: case X86::R12B
:
126 case X86::R13
: case X86::R13D
: case X86::R13W
: case X86::R13B
:
128 case X86::R14
: case X86::R14D
: case X86::R14W
: case X86::R14B
:
130 case X86::R15
: case X86::R15D
: case X86::R15W
: case X86::R15B
:
133 case X86::ST0
: case X86::ST1
: case X86::ST2
: case X86::ST3
:
134 case X86::ST4
: case X86::ST5
: case X86::ST6
: case X86::ST7
:
135 return RegNo
-X86::ST0
;
137 case X86::XMM0
: case X86::XMM8
: case X86::MM0
:
139 case X86::XMM1
: case X86::XMM9
: case X86::MM1
:
141 case X86::XMM2
: case X86::XMM10
: case X86::MM2
:
143 case X86::XMM3
: case X86::XMM11
: case X86::MM3
:
145 case X86::XMM4
: case X86::XMM12
: case X86::MM4
:
147 case X86::XMM5
: case X86::XMM13
: case X86::MM5
:
149 case X86::XMM6
: case X86::XMM14
: case X86::MM6
:
151 case X86::XMM7
: case X86::XMM15
: case X86::MM7
:
155 assert(isVirtualRegister(RegNo
) && "Unknown physical register!");
156 llvm_unreachable("Register allocator hasn't allocated reg correctly yet!");
161 const TargetRegisterClass
*
162 X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass
*A
,
163 const TargetRegisterClass
*B
,
164 unsigned SubIdx
) const {
169 if (B
== &X86::GR8RegClass
) {
170 if (A
->getSize() == 2 || A
->getSize() == 4 || A
->getSize() == 8)
172 } else if (B
== &X86::GR8_ABCD_LRegClass
|| B
== &X86::GR8_ABCD_HRegClass
) {
173 if (A
== &X86::GR64RegClass
|| A
== &X86::GR64_ABCDRegClass
||
174 A
== &X86::GR64_NOREXRegClass
||
175 A
== &X86::GR64_NOSPRegClass
||
176 A
== &X86::GR64_NOREX_NOSPRegClass
)
177 return &X86::GR64_ABCDRegClass
;
178 else if (A
== &X86::GR32RegClass
|| A
== &X86::GR32_ABCDRegClass
||
179 A
== &X86::GR32_NOREXRegClass
||
180 A
== &X86::GR32_NOSPRegClass
)
181 return &X86::GR32_ABCDRegClass
;
182 else if (A
== &X86::GR16RegClass
|| A
== &X86::GR16_ABCDRegClass
||
183 A
== &X86::GR16_NOREXRegClass
)
184 return &X86::GR16_ABCDRegClass
;
185 } else if (B
== &X86::GR8_NOREXRegClass
) {
186 if (A
== &X86::GR64RegClass
|| A
== &X86::GR64_NOREXRegClass
||
187 A
== &X86::GR64_NOSPRegClass
|| A
== &X86::GR64_NOREX_NOSPRegClass
)
188 return &X86::GR64_NOREXRegClass
;
189 else if (A
== &X86::GR64_ABCDRegClass
)
190 return &X86::GR64_ABCDRegClass
;
191 else if (A
== &X86::GR32RegClass
|| A
== &X86::GR32_NOREXRegClass
||
192 A
== &X86::GR32_NOSPRegClass
)
193 return &X86::GR32_NOREXRegClass
;
194 else if (A
== &X86::GR32_ABCDRegClass
)
195 return &X86::GR32_ABCDRegClass
;
196 else if (A
== &X86::GR16RegClass
|| A
== &X86::GR16_NOREXRegClass
)
197 return &X86::GR16_NOREXRegClass
;
198 else if (A
== &X86::GR16_ABCDRegClass
)
199 return &X86::GR16_ABCDRegClass
;
204 if (B
== &X86::GR8_ABCD_HRegClass
) {
205 if (A
== &X86::GR64RegClass
|| A
== &X86::GR64_ABCDRegClass
||
206 A
== &X86::GR64_NOREXRegClass
||
207 A
== &X86::GR64_NOSPRegClass
||
208 A
== &X86::GR64_NOREX_NOSPRegClass
)
209 return &X86::GR64_ABCDRegClass
;
210 else if (A
== &X86::GR32RegClass
|| A
== &X86::GR32_ABCDRegClass
||
211 A
== &X86::GR32_NOREXRegClass
|| A
== &X86::GR32_NOSPRegClass
)
212 return &X86::GR32_ABCDRegClass
;
213 else if (A
== &X86::GR16RegClass
|| A
== &X86::GR16_ABCDRegClass
||
214 A
== &X86::GR16_NOREXRegClass
)
215 return &X86::GR16_ABCDRegClass
;
220 if (B
== &X86::GR16RegClass
) {
221 if (A
->getSize() == 4 || A
->getSize() == 8)
223 } else if (B
== &X86::GR16_ABCDRegClass
) {
224 if (A
== &X86::GR64RegClass
|| A
== &X86::GR64_ABCDRegClass
||
225 A
== &X86::GR64_NOREXRegClass
||
226 A
== &X86::GR64_NOSPRegClass
||
227 A
== &X86::GR64_NOREX_NOSPRegClass
)
228 return &X86::GR64_ABCDRegClass
;
229 else if (A
== &X86::GR32RegClass
|| A
== &X86::GR32_ABCDRegClass
||
230 A
== &X86::GR32_NOREXRegClass
|| A
== &X86::GR32_NOSPRegClass
)
231 return &X86::GR32_ABCDRegClass
;
232 } else if (B
== &X86::GR16_NOREXRegClass
) {
233 if (A
== &X86::GR64RegClass
|| A
== &X86::GR64_NOREXRegClass
||
234 A
== &X86::GR64_NOSPRegClass
|| A
== &X86::GR64_NOREX_NOSPRegClass
)
235 return &X86::GR64_NOREXRegClass
;
236 else if (A
== &X86::GR64_ABCDRegClass
)
237 return &X86::GR64_ABCDRegClass
;
238 else if (A
== &X86::GR32RegClass
|| A
== &X86::GR32_NOREXRegClass
||
239 A
== &X86::GR32_NOSPRegClass
)
240 return &X86::GR32_NOREXRegClass
;
241 else if (A
== &X86::GR32_ABCDRegClass
)
242 return &X86::GR64_ABCDRegClass
;
247 if (B
== &X86::GR32RegClass
|| B
== &X86::GR32_NOSPRegClass
) {
248 if (A
->getSize() == 8)
250 } else if (B
== &X86::GR32_ABCDRegClass
) {
251 if (A
== &X86::GR64RegClass
|| A
== &X86::GR64_ABCDRegClass
||
252 A
== &X86::GR64_NOREXRegClass
||
253 A
== &X86::GR64_NOSPRegClass
||
254 A
== &X86::GR64_NOREX_NOSPRegClass
)
255 return &X86::GR64_ABCDRegClass
;
256 } else if (B
== &X86::GR32_NOREXRegClass
) {
257 if (A
== &X86::GR64RegClass
|| A
== &X86::GR64_NOREXRegClass
||
258 A
== &X86::GR64_NOSPRegClass
|| A
== &X86::GR64_NOREX_NOSPRegClass
)
259 return &X86::GR64_NOREXRegClass
;
260 else if (A
== &X86::GR64_ABCDRegClass
)
261 return &X86::GR64_ABCDRegClass
;
268 const TargetRegisterClass
*X86RegisterInfo::
269 getPointerRegClass(unsigned Kind
) const {
271 default: llvm_unreachable("Unexpected Kind in getPointerRegClass!");
272 case 0: // Normal GPRs.
273 if (TM
.getSubtarget
<X86Subtarget
>().is64Bit())
274 return &X86::GR64RegClass
;
275 return &X86::GR32RegClass
;
276 case 1: // Normal GRPs except the stack pointer (for encoding reasons).
277 if (!StrictIndexRegclass
) {
278 if (TM
.getSubtarget
<X86Subtarget
>().is64Bit())
279 return &X86::GR64RegClass
;
280 return &X86::GR32RegClass
;
282 if (TM
.getSubtarget
<X86Subtarget
>().is64Bit())
283 return &X86::GR64_NOSPRegClass
;
284 return &X86::GR32_NOSPRegClass
;
289 const TargetRegisterClass
*
290 X86RegisterInfo::getCrossCopyRegClass(const TargetRegisterClass
*RC
) const {
291 if (RC
== &X86::CCRRegClass
) {
293 return &X86::GR64RegClass
;
295 return &X86::GR32RegClass
;
301 X86RegisterInfo::getCalleeSavedRegs(const MachineFunction
*MF
) const {
302 bool callsEHReturn
= false;
305 const MachineFrameInfo
*MFI
= MF
->getFrameInfo();
306 const MachineModuleInfo
*MMI
= MFI
->getMachineModuleInfo();
307 callsEHReturn
= (MMI
? MMI
->callsEHReturn() : false);
310 static const unsigned CalleeSavedRegs32Bit
[] = {
311 X86::ESI
, X86::EDI
, X86::EBX
, X86::EBP
, 0
314 static const unsigned CalleeSavedRegs32EHRet
[] = {
315 X86::EAX
, X86::EDX
, X86::ESI
, X86::EDI
, X86::EBX
, X86::EBP
, 0
318 static const unsigned CalleeSavedRegs64Bit
[] = {
319 X86::RBX
, X86::R12
, X86::R13
, X86::R14
, X86::R15
, X86::RBP
, 0
322 static const unsigned CalleeSavedRegs64EHRet
[] = {
323 X86::RAX
, X86::RDX
, X86::RBX
, X86::R12
,
324 X86::R13
, X86::R14
, X86::R15
, X86::RBP
, 0
327 static const unsigned CalleeSavedRegsWin64
[] = {
328 X86::RBX
, X86::RBP
, X86::RDI
, X86::RSI
,
329 X86::R12
, X86::R13
, X86::R14
, X86::R15
,
330 X86::XMM6
, X86::XMM7
, X86::XMM8
, X86::XMM9
,
331 X86::XMM10
, X86::XMM11
, X86::XMM12
, X86::XMM13
,
332 X86::XMM14
, X86::XMM15
, 0
337 return CalleeSavedRegsWin64
;
339 return (callsEHReturn
? CalleeSavedRegs64EHRet
: CalleeSavedRegs64Bit
);
341 return (callsEHReturn
? CalleeSavedRegs32EHRet
: CalleeSavedRegs32Bit
);
345 const TargetRegisterClass
* const*
346 X86RegisterInfo::getCalleeSavedRegClasses(const MachineFunction
*MF
) const {
347 bool callsEHReturn
= false;
350 const MachineFrameInfo
*MFI
= MF
->getFrameInfo();
351 const MachineModuleInfo
*MMI
= MFI
->getMachineModuleInfo();
352 callsEHReturn
= (MMI
? MMI
->callsEHReturn() : false);
355 static const TargetRegisterClass
* const CalleeSavedRegClasses32Bit
[] = {
356 &X86::GR32RegClass
, &X86::GR32RegClass
,
357 &X86::GR32RegClass
, &X86::GR32RegClass
, 0
359 static const TargetRegisterClass
* const CalleeSavedRegClasses32EHRet
[] = {
360 &X86::GR32RegClass
, &X86::GR32RegClass
,
361 &X86::GR32RegClass
, &X86::GR32RegClass
,
362 &X86::GR32RegClass
, &X86::GR32RegClass
, 0
364 static const TargetRegisterClass
* const CalleeSavedRegClasses64Bit
[] = {
365 &X86::GR64RegClass
, &X86::GR64RegClass
,
366 &X86::GR64RegClass
, &X86::GR64RegClass
,
367 &X86::GR64RegClass
, &X86::GR64RegClass
, 0
369 static const TargetRegisterClass
* const CalleeSavedRegClasses64EHRet
[] = {
370 &X86::GR64RegClass
, &X86::GR64RegClass
,
371 &X86::GR64RegClass
, &X86::GR64RegClass
,
372 &X86::GR64RegClass
, &X86::GR64RegClass
,
373 &X86::GR64RegClass
, &X86::GR64RegClass
, 0
375 static const TargetRegisterClass
* const CalleeSavedRegClassesWin64
[] = {
376 &X86::GR64RegClass
, &X86::GR64RegClass
,
377 &X86::GR64RegClass
, &X86::GR64RegClass
,
378 &X86::GR64RegClass
, &X86::GR64RegClass
,
379 &X86::GR64RegClass
, &X86::GR64RegClass
,
380 &X86::VR128RegClass
, &X86::VR128RegClass
,
381 &X86::VR128RegClass
, &X86::VR128RegClass
,
382 &X86::VR128RegClass
, &X86::VR128RegClass
,
383 &X86::VR128RegClass
, &X86::VR128RegClass
,
384 &X86::VR128RegClass
, &X86::VR128RegClass
, 0
389 return CalleeSavedRegClassesWin64
;
391 return (callsEHReturn
?
392 CalleeSavedRegClasses64EHRet
: CalleeSavedRegClasses64Bit
);
394 return (callsEHReturn
?
395 CalleeSavedRegClasses32EHRet
: CalleeSavedRegClasses32Bit
);
399 BitVector
X86RegisterInfo::getReservedRegs(const MachineFunction
&MF
) const {
400 BitVector
Reserved(getNumRegs());
401 // Set the stack-pointer register and its aliases as reserved.
402 Reserved
.set(X86::RSP
);
403 Reserved
.set(X86::ESP
);
404 Reserved
.set(X86::SP
);
405 Reserved
.set(X86::SPL
);
406 // Set the frame-pointer register and its aliases as reserved if needed.
408 Reserved
.set(X86::RBP
);
409 Reserved
.set(X86::EBP
);
410 Reserved
.set(X86::BP
);
411 Reserved
.set(X86::BPL
);
413 // Mark the x87 stack registers as reserved, since they don't
414 // behave normally with respect to liveness. We don't fully
415 // model the effects of x87 stack pushes and pops after
417 Reserved
.set(X86::ST0
);
418 Reserved
.set(X86::ST1
);
419 Reserved
.set(X86::ST2
);
420 Reserved
.set(X86::ST3
);
421 Reserved
.set(X86::ST4
);
422 Reserved
.set(X86::ST5
);
423 Reserved
.set(X86::ST6
);
424 Reserved
.set(X86::ST7
);
428 //===----------------------------------------------------------------------===//
429 // Stack Frame Processing methods
430 //===----------------------------------------------------------------------===//
432 static unsigned calculateMaxStackAlignment(const MachineFrameInfo
*FFI
) {
433 unsigned MaxAlign
= 0;
434 for (int i
= FFI
->getObjectIndexBegin(),
435 e
= FFI
->getObjectIndexEnd(); i
!= e
; ++i
) {
436 if (FFI
->isDeadObjectIndex(i
))
438 unsigned Align
= FFI
->getObjectAlignment(i
);
439 MaxAlign
= std::max(MaxAlign
, Align
);
445 // hasFP - Return true if the specified function should have a dedicated frame
446 // pointer register. This is true if the function has variable sized allocas or
447 // if frame pointer elimination is disabled.
449 bool X86RegisterInfo::hasFP(const MachineFunction
&MF
) const {
450 const MachineFrameInfo
*MFI
= MF
.getFrameInfo();
451 const MachineModuleInfo
*MMI
= MFI
->getMachineModuleInfo();
453 return (NoFramePointerElim
||
454 needsStackRealignment(MF
) ||
455 MFI
->hasVarSizedObjects() ||
456 MFI
->isFrameAddressTaken() ||
457 MF
.getInfo
<X86MachineFunctionInfo
>()->getForceFramePointer() ||
458 (MMI
&& MMI
->callsUnwindInit()));
461 bool X86RegisterInfo::needsStackRealignment(const MachineFunction
&MF
) const {
462 const MachineFrameInfo
*MFI
= MF
.getFrameInfo();
464 // FIXME: Currently we don't support stack realignment for functions with
465 // variable-sized allocas
466 return (RealignStack
&&
467 (MFI
->getMaxAlignment() > StackAlign
&&
468 !MFI
->hasVarSizedObjects()));
471 bool X86RegisterInfo::hasReservedCallFrame(MachineFunction
&MF
) const {
472 return !MF
.getFrameInfo()->hasVarSizedObjects();
475 bool X86RegisterInfo::hasReservedSpillSlot(MachineFunction
&MF
, unsigned Reg
,
476 int &FrameIdx
) const {
477 if (Reg
== FramePtr
&& hasFP(MF
)) {
478 FrameIdx
= MF
.getFrameInfo()->getObjectIndexBegin();
486 X86RegisterInfo::getFrameIndexOffset(MachineFunction
&MF
, int FI
) const {
487 const TargetFrameInfo
&TFI
= *MF
.getTarget().getFrameInfo();
488 MachineFrameInfo
*MFI
= MF
.getFrameInfo();
490 int Offset
= MFI
->getObjectOffset(FI
) - TFI
.getOffsetOfLocalArea();
491 uint64_t StackSize
= MFI
->getStackSize();
493 if (needsStackRealignment(MF
)) {
495 // Skip the saved EBP
498 unsigned Align
= MFI
->getObjectAlignment(FI
);
499 assert( (-(Offset
+ StackSize
)) % Align
== 0);
501 return Offset
+ StackSize
;
504 // FIXME: Support tail calls
507 return Offset
+ StackSize
;
509 // Skip the saved EBP
512 // Skip the RETADDR move area
513 X86MachineFunctionInfo
*X86FI
= MF
.getInfo
<X86MachineFunctionInfo
>();
514 int TailCallReturnAddrDelta
= X86FI
->getTCReturnAddrDelta();
515 if (TailCallReturnAddrDelta
< 0) Offset
-= TailCallReturnAddrDelta
;
521 void X86RegisterInfo::
522 eliminateCallFramePseudoInstr(MachineFunction
&MF
, MachineBasicBlock
&MBB
,
523 MachineBasicBlock::iterator I
) const {
524 if (!hasReservedCallFrame(MF
)) {
525 // If the stack pointer can be changed after prologue, turn the
526 // adjcallstackup instruction into a 'sub ESP, <amt>' and the
527 // adjcallstackdown instruction into 'add ESP, <amt>'
528 // TODO: consider using push / pop instead of sub + store / add
529 MachineInstr
*Old
= I
;
530 uint64_t Amount
= Old
->getOperand(0).getImm();
532 // We need to keep the stack aligned properly. To do this, we round the
533 // amount of space needed for the outgoing arguments up to the next
534 // alignment boundary.
535 Amount
= (Amount
+StackAlign
-1)/StackAlign
*StackAlign
;
537 MachineInstr
*New
= 0;
538 if (Old
->getOpcode() == getCallFrameSetupOpcode()) {
539 New
= BuildMI(MF
, Old
->getDebugLoc(),
540 TII
.get(Is64Bit
? X86::SUB64ri32
: X86::SUB32ri
),
541 StackPtr
).addReg(StackPtr
).addImm(Amount
);
543 assert(Old
->getOpcode() == getCallFrameDestroyOpcode());
544 // factor out the amount the callee already popped.
545 uint64_t CalleeAmt
= Old
->getOperand(1).getImm();
548 unsigned Opc
= (Amount
< 128) ?
549 (Is64Bit
? X86::ADD64ri8
: X86::ADD32ri8
) :
550 (Is64Bit
? X86::ADD64ri32
: X86::ADD32ri
);
551 New
= BuildMI(MF
, Old
->getDebugLoc(), TII
.get(Opc
), StackPtr
)
552 .addReg(StackPtr
).addImm(Amount
);
557 // The EFLAGS implicit def is dead.
558 New
->getOperand(3).setIsDead();
560 // Replace the pseudo instruction with a new instruction...
564 } else if (I
->getOpcode() == getCallFrameDestroyOpcode()) {
565 // If we are performing frame pointer elimination and if the callee pops
566 // something off the stack pointer, add it back. We do this until we have
567 // more advanced stack pointer tracking ability.
568 if (uint64_t CalleeAmt
= I
->getOperand(1).getImm()) {
569 unsigned Opc
= (CalleeAmt
< 128) ?
570 (Is64Bit
? X86::SUB64ri8
: X86::SUB32ri8
) :
571 (Is64Bit
? X86::SUB64ri32
: X86::SUB32ri
);
572 MachineInstr
*Old
= I
;
574 BuildMI(MF
, Old
->getDebugLoc(), TII
.get(Opc
),
575 StackPtr
).addReg(StackPtr
).addImm(CalleeAmt
);
576 // The EFLAGS implicit def is dead.
577 New
->getOperand(3).setIsDead();
586 void X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II
,
587 int SPAdj
, RegScavenger
*RS
) const{
588 assert(SPAdj
== 0 && "Unexpected");
591 MachineInstr
&MI
= *II
;
592 MachineFunction
&MF
= *MI
.getParent()->getParent();
593 while (!MI
.getOperand(i
).isFI()) {
595 assert(i
< MI
.getNumOperands() && "Instr doesn't have FrameIndex operand!");
598 int FrameIndex
= MI
.getOperand(i
).getIndex();
601 if (needsStackRealignment(MF
))
602 BasePtr
= (FrameIndex
< 0 ? FramePtr
: StackPtr
);
604 BasePtr
= (hasFP(MF
) ? FramePtr
: StackPtr
);
606 // This must be part of a four operand memory reference. Replace the
607 // FrameIndex with base register with EBP. Add an offset to the offset.
608 MI
.getOperand(i
).ChangeToRegister(BasePtr
, false);
610 // Now add the frame object offset to the offset from EBP.
611 if (MI
.getOperand(i
+3).isImm()) {
612 // Offset is a 32-bit integer.
613 int Offset
= getFrameIndexOffset(MF
, FrameIndex
) +
614 (int)(MI
.getOperand(i
+3).getImm());
616 MI
.getOperand(i
+3).ChangeToImmediate(Offset
);
618 // Offset is symbolic. This is extremely rare.
619 uint64_t Offset
= getFrameIndexOffset(MF
, FrameIndex
) +
620 (uint64_t)MI
.getOperand(i
+3).getOffset();
621 MI
.getOperand(i
+3).setOffset(Offset
);
626 X86RegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction
&MF
,
627 RegScavenger
*RS
) const {
628 MachineFrameInfo
*MFI
= MF
.getFrameInfo();
630 // Calculate and set max stack object alignment early, so we can decide
631 // whether we will need stack realignment (and thus FP).
632 unsigned MaxAlign
= std::max(MFI
->getMaxAlignment(),
633 calculateMaxStackAlignment(MFI
));
635 MFI
->setMaxAlignment(MaxAlign
);
637 X86MachineFunctionInfo
*X86FI
= MF
.getInfo
<X86MachineFunctionInfo
>();
638 int32_t TailCallReturnAddrDelta
= X86FI
->getTCReturnAddrDelta();
639 if (TailCallReturnAddrDelta
< 0) {
640 // create RETURNADDR area
649 MFI
->CreateFixedObject(-TailCallReturnAddrDelta
,
650 (-1*SlotSize
)+TailCallReturnAddrDelta
);
654 assert((TailCallReturnAddrDelta
<= 0) &&
655 "The Delta should always be zero or negative");
656 const TargetFrameInfo
&TFI
= *MF
.getTarget().getFrameInfo();
657 // Create a frame entry for the EBP register that must be saved.
658 int FrameIdx
= MFI
->CreateFixedObject(SlotSize
,
660 TFI
.getOffsetOfLocalArea() +
661 TailCallReturnAddrDelta
);
662 assert(FrameIdx
== MFI
->getObjectIndexBegin() &&
663 "Slot for EBP register must be last in order to be found!");
668 /// emitSPUpdate - Emit a series of instructions to increment / decrement the
669 /// stack pointer by a constant value.
671 void emitSPUpdate(MachineBasicBlock
&MBB
, MachineBasicBlock::iterator
&MBBI
,
672 unsigned StackPtr
, int64_t NumBytes
, bool Is64Bit
,
673 const TargetInstrInfo
&TII
) {
674 bool isSub
= NumBytes
< 0;
675 uint64_t Offset
= isSub
? -NumBytes
: NumBytes
;
678 (Is64Bit
? X86::SUB64ri8
: X86::SUB32ri8
) :
679 (Is64Bit
? X86::SUB64ri32
: X86::SUB32ri
))
681 (Is64Bit
? X86::ADD64ri8
: X86::ADD32ri8
) :
682 (Is64Bit
? X86::ADD64ri32
: X86::ADD32ri
));
683 uint64_t Chunk
= (1LL << 31) - 1;
684 DebugLoc DL
= (MBBI
!= MBB
.end() ? MBBI
->getDebugLoc() :
685 DebugLoc::getUnknownLoc());
688 uint64_t ThisVal
= (Offset
> Chunk
) ? Chunk
: Offset
;
690 BuildMI(MBB
, MBBI
, DL
, TII
.get(Opc
), StackPtr
)
691 .addReg(StackPtr
).addImm(ThisVal
);
692 // The EFLAGS implicit def is dead.
693 MI
->getOperand(3).setIsDead();
698 // mergeSPUpdatesUp - Merge two stack-manipulating instructions upper iterator.
700 void mergeSPUpdatesUp(MachineBasicBlock
&MBB
, MachineBasicBlock::iterator
&MBBI
,
701 unsigned StackPtr
, uint64_t *NumBytes
= NULL
) {
702 if (MBBI
== MBB
.begin()) return;
704 MachineBasicBlock::iterator PI
= prior(MBBI
);
705 unsigned Opc
= PI
->getOpcode();
706 if ((Opc
== X86::ADD64ri32
|| Opc
== X86::ADD64ri8
||
707 Opc
== X86::ADD32ri
|| Opc
== X86::ADD32ri8
) &&
708 PI
->getOperand(0).getReg() == StackPtr
) {
710 *NumBytes
+= PI
->getOperand(2).getImm();
712 } else if ((Opc
== X86::SUB64ri32
|| Opc
== X86::SUB64ri8
||
713 Opc
== X86::SUB32ri
|| Opc
== X86::SUB32ri8
) &&
714 PI
->getOperand(0).getReg() == StackPtr
) {
716 *NumBytes
-= PI
->getOperand(2).getImm();
721 // mergeSPUpdatesUp - Merge two stack-manipulating instructions lower iterator.
723 void mergeSPUpdatesDown(MachineBasicBlock
&MBB
,
724 MachineBasicBlock::iterator
&MBBI
,
725 unsigned StackPtr
, uint64_t *NumBytes
= NULL
) {
728 if (MBBI
== MBB
.end()) return;
730 MachineBasicBlock::iterator NI
= next(MBBI
);
731 if (NI
== MBB
.end()) return;
733 unsigned Opc
= NI
->getOpcode();
734 if ((Opc
== X86::ADD64ri32
|| Opc
== X86::ADD64ri8
||
735 Opc
== X86::ADD32ri
|| Opc
== X86::ADD32ri8
) &&
736 NI
->getOperand(0).getReg() == StackPtr
) {
738 *NumBytes
-= NI
->getOperand(2).getImm();
741 } else if ((Opc
== X86::SUB64ri32
|| Opc
== X86::SUB64ri8
||
742 Opc
== X86::SUB32ri
|| Opc
== X86::SUB32ri8
) &&
743 NI
->getOperand(0).getReg() == StackPtr
) {
745 *NumBytes
+= NI
->getOperand(2).getImm();
751 /// mergeSPUpdates - Checks the instruction before/after the passed
752 /// instruction. If it is an ADD/SUB instruction it is deleted
753 /// argument and the stack adjustment is returned as a positive value for ADD
754 /// and a negative for SUB.
755 static int mergeSPUpdates(MachineBasicBlock
&MBB
,
756 MachineBasicBlock::iterator
&MBBI
,
758 bool doMergeWithPrevious
) {
760 if ((doMergeWithPrevious
&& MBBI
== MBB
.begin()) ||
761 (!doMergeWithPrevious
&& MBBI
== MBB
.end()))
766 MachineBasicBlock::iterator PI
= doMergeWithPrevious
? prior(MBBI
) : MBBI
;
767 MachineBasicBlock::iterator NI
= doMergeWithPrevious
? 0 : next(MBBI
);
768 unsigned Opc
= PI
->getOpcode();
769 if ((Opc
== X86::ADD64ri32
|| Opc
== X86::ADD64ri8
||
770 Opc
== X86::ADD32ri
|| Opc
== X86::ADD32ri8
) &&
771 PI
->getOperand(0).getReg() == StackPtr
){
772 Offset
+= PI
->getOperand(2).getImm();
774 if (!doMergeWithPrevious
) MBBI
= NI
;
775 } else if ((Opc
== X86::SUB64ri32
|| Opc
== X86::SUB64ri8
||
776 Opc
== X86::SUB32ri
|| Opc
== X86::SUB32ri8
) &&
777 PI
->getOperand(0).getReg() == StackPtr
) {
778 Offset
-= PI
->getOperand(2).getImm();
780 if (!doMergeWithPrevious
) MBBI
= NI
;
786 void X86RegisterInfo::emitCalleeSavedFrameMoves(MachineFunction
&MF
,
788 unsigned FramePtr
) const {
789 MachineFrameInfo
*MFI
= MF
.getFrameInfo();
790 MachineModuleInfo
*MMI
= MFI
->getMachineModuleInfo();
793 // Add callee saved registers to move list.
794 const std::vector
<CalleeSavedInfo
> &CSI
= MFI
->getCalleeSavedInfo();
795 if (CSI
.empty()) return;
797 std::vector
<MachineMove
> &Moves
= MMI
->getFrameMoves();
798 const TargetData
*TD
= MF
.getTarget().getTargetData();
799 bool HasFP
= hasFP(MF
);
801 // Calculate amount of bytes used for return address storing
803 (MF
.getTarget().getFrameInfo()->getStackGrowthDirection() ==
804 TargetFrameInfo::StackGrowsUp
?
805 TD
->getPointerSize() : -TD
->getPointerSize());
807 // FIXME: This is dirty hack. The code itself is pretty mess right now.
808 // It should be rewritten from scratch and generalized sometimes.
810 // Determine maximum offset (minumum due to stack growth)
811 int64_t MaxOffset
= 0;
812 for (std::vector
<CalleeSavedInfo
>::const_iterator
813 I
= CSI
.begin(), E
= CSI
.end(); I
!= E
; ++I
)
814 MaxOffset
= std::min(MaxOffset
,
815 MFI
->getObjectOffset(I
->getFrameIdx()));
817 // Calculate offsets.
818 int64_t saveAreaOffset
= (HasFP
? 3 : 2) * stackGrowth
;
819 for (std::vector
<CalleeSavedInfo
>::const_iterator
820 I
= CSI
.begin(), E
= CSI
.end(); I
!= E
; ++I
) {
821 int64_t Offset
= MFI
->getObjectOffset(I
->getFrameIdx());
822 unsigned Reg
= I
->getReg();
823 Offset
= MaxOffset
- Offset
+ saveAreaOffset
;
825 // Don't output a new machine move if we're re-saving the frame
826 // pointer. This happens when the PrologEpilogInserter has inserted an extra
827 // "PUSH" of the frame pointer -- the "emitPrologue" method automatically
828 // generates one when frame pointers are used. If we generate a "machine
829 // move" for this extra "PUSH", the linker will lose track of the fact that
830 // the frame pointer should have the value of the first "PUSH" when it's
833 // FIXME: This looks inelegant. It's possibly correct, but it's covering up
834 // another bug. I.e., one where we generate a prolog like this:
842 // The immediate re-push of EBP is unnecessary. At the least, it's an
843 // optimization bug. EBP can be used as a scratch register in certain
844 // cases, but probably not when we have a frame pointer.
845 if (HasFP
&& FramePtr
== Reg
)
848 MachineLocation
CSDst(MachineLocation::VirtualFP
, Offset
);
849 MachineLocation
CSSrc(Reg
);
850 Moves
.push_back(MachineMove(LabelId
, CSDst
, CSSrc
));
854 void X86RegisterInfo::emitPrologue(MachineFunction
&MF
) const {
855 MachineBasicBlock
&MBB
= MF
.front(); // Prolog goes in entry BB
856 MachineFrameInfo
*MFI
= MF
.getFrameInfo();
857 const Function
* Fn
= MF
.getFunction();
858 const X86Subtarget
* Subtarget
= &MF
.getTarget().getSubtarget
<X86Subtarget
>();
859 MachineModuleInfo
*MMI
= MFI
->getMachineModuleInfo();
860 X86MachineFunctionInfo
*X86FI
= MF
.getInfo
<X86MachineFunctionInfo
>();
861 MachineBasicBlock::iterator MBBI
= MBB
.begin();
862 bool needsFrameMoves
= (MMI
&& MMI
->hasDebugInfo()) ||
863 !Fn
->doesNotThrow() ||
864 UnwindTablesMandatory
;
865 bool HasFP
= hasFP(MF
);
868 // Get the number of bytes to allocate from the FrameInfo.
869 uint64_t StackSize
= MFI
->getStackSize();
871 // Get desired stack alignment
872 uint64_t MaxAlign
= MFI
->getMaxAlignment();
874 // Add RETADDR move area to callee saved frame size.
875 int TailCallReturnAddrDelta
= X86FI
->getTCReturnAddrDelta();
876 if (TailCallReturnAddrDelta
< 0)
877 X86FI
->setCalleeSavedFrameSize(
878 X86FI
->getCalleeSavedFrameSize() +(-TailCallReturnAddrDelta
));
880 // If this is x86-64 and the Red Zone is not disabled, if we are a leaf
881 // function, and use up to 128 bytes of stack space, don't have a frame
882 // pointer, calls, or dynamic alloca then we do not need to adjust the
883 // stack pointer (we fit in the Red Zone).
884 bool DisableRedZone
= Fn
->hasFnAttr(Attribute::NoRedZone
);
885 if (Is64Bit
&& !DisableRedZone
&&
886 !needsStackRealignment(MF
) &&
887 !MFI
->hasVarSizedObjects() && // No dynamic alloca.
888 !MFI
->hasCalls() && // No calls.
889 !Subtarget
->isTargetWin64()) { // Win64 has no Red Zone
890 uint64_t MinSize
= X86FI
->getCalleeSavedFrameSize();
891 if (HasFP
) MinSize
+= SlotSize
;
892 StackSize
= std::max(MinSize
,
893 StackSize
> 128 ? StackSize
- 128 : 0);
894 MFI
->setStackSize(StackSize
);
895 } else if (Subtarget
->isTargetWin64()) {
896 // We need to always allocate 32 bytes as register spill area.
897 // FIXME: we might reuse these 32 bytes for leaf functions.
899 MFI
->setStackSize(StackSize
);
902 // Insert stack pointer adjustment for later moving of return addr. Only
903 // applies to tail call optimized functions where the callee argument stack
904 // size is bigger than the callers.
905 if (TailCallReturnAddrDelta
< 0) {
907 BuildMI(MBB
, MBBI
, DL
, TII
.get(Is64Bit
? X86::SUB64ri32
: X86::SUB32ri
),
908 StackPtr
).addReg(StackPtr
).addImm(-TailCallReturnAddrDelta
);
909 // The EFLAGS implicit def is dead.
910 MI
->getOperand(3).setIsDead();
913 // uint64_t StackSize = MFI->getStackSize();
914 std::vector
<MachineMove
> &Moves
= MMI
->getFrameMoves();
915 const TargetData
*TD
= MF
.getTarget().getTargetData();
917 (MF
.getTarget().getFrameInfo()->getStackGrowthDirection() ==
918 TargetFrameInfo::StackGrowsUp
?
919 TD
->getPointerSize() : -TD
->getPointerSize());
921 uint64_t NumBytes
= 0;
923 // Calculate required stack adjustment
924 uint64_t FrameSize
= StackSize
- SlotSize
;
925 if (needsStackRealignment(MF
))
926 FrameSize
= (FrameSize
+ MaxAlign
- 1)/MaxAlign
*MaxAlign
;
928 NumBytes
= FrameSize
- X86FI
->getCalleeSavedFrameSize();
930 // Get the offset of the stack slot for the EBP register, which is
931 // guaranteed to be the last slot by processFunctionBeforeFrameFinalized.
932 // Update the frame offset adjustment.
933 MFI
->setOffsetAdjustment(-NumBytes
);
935 // Save EBP/RBP into the appropriate stack slot...
936 BuildMI(MBB
, MBBI
, DL
, TII
.get(Is64Bit
? X86::PUSH64r
: X86::PUSH32r
))
937 .addReg(FramePtr
, RegState::Kill
);
939 if (needsFrameMoves
) {
940 // Mark effective beginning of when frame pointer becomes valid.
941 unsigned FrameLabelId
= MMI
->NextLabelID();
942 BuildMI(MBB
, MBBI
, DL
, TII
.get(X86::DBG_LABEL
)).addImm(FrameLabelId
);
944 // Define the current CFA rule to use the provided offset.
946 MachineLocation
SPDst(MachineLocation::VirtualFP
);
947 MachineLocation
SPSrc(MachineLocation::VirtualFP
,
948 HasFP
? 2 * stackGrowth
:
949 -StackSize
+ stackGrowth
);
950 Moves
.push_back(MachineMove(FrameLabelId
, SPDst
, SPSrc
));
952 // FIXME: Verify & implement for FP
953 MachineLocation
SPDst(StackPtr
);
954 MachineLocation
SPSrc(StackPtr
, stackGrowth
);
955 Moves
.push_back(MachineMove(FrameLabelId
, SPDst
, SPSrc
));
958 // Change the rule for the FramePtr to be an "offset" rule.
959 MachineLocation
FPDst(MachineLocation::VirtualFP
, 2 * stackGrowth
);
960 MachineLocation
FPSrc(FramePtr
);
961 Moves
.push_back(MachineMove(FrameLabelId
, FPDst
, FPSrc
));
964 // Update EBP with the new base value...
965 BuildMI(MBB
, MBBI
, DL
,
966 TII
.get(Is64Bit
? X86::MOV64rr
: X86::MOV32rr
), FramePtr
)
969 if (needsFrameMoves
) {
970 unsigned FrameLabelId
= MMI
->NextLabelID();
971 BuildMI(MBB
, MBBI
, DL
, TII
.get(X86::DBG_LABEL
)).addImm(FrameLabelId
);
973 // Define the current CFA to use the EBP/RBP register.
974 MachineLocation
FPDst(FramePtr
);
975 MachineLocation
FPSrc(MachineLocation::VirtualFP
);
976 Moves
.push_back(MachineMove(FrameLabelId
, FPDst
, FPSrc
));
979 // Mark the FramePtr as live-in in every block except the entry.
980 for (MachineFunction::iterator I
= next(MF
.begin()), E
= MF
.end();
982 I
->addLiveIn(FramePtr
);
985 if (needsStackRealignment(MF
)) {
987 BuildMI(MBB
, MBBI
, DL
,
988 TII
.get(Is64Bit
? X86::AND64ri32
: X86::AND32ri
),
989 StackPtr
).addReg(StackPtr
).addImm(-MaxAlign
);
991 // The EFLAGS implicit def is dead.
992 MI
->getOperand(3).setIsDead();
995 NumBytes
= StackSize
- X86FI
->getCalleeSavedFrameSize();
998 // Skip the callee-saved push instructions.
999 bool RegsSaved
= false;
1000 while (MBBI
!= MBB
.end() &&
1001 (MBBI
->getOpcode() == X86::PUSH32r
||
1002 MBBI
->getOpcode() == X86::PUSH64r
)) {
1007 if (RegsSaved
&& needsFrameMoves
) {
1008 // Mark end of callee-saved push instructions.
1009 unsigned LabelId
= MMI
->NextLabelID();
1010 BuildMI(MBB
, MBBI
, DL
, TII
.get(X86::DBG_LABEL
)).addImm(LabelId
);
1012 // Emit DWARF info specifying the offsets of the callee-saved registers.
1013 emitCalleeSavedFrameMoves(MF
, LabelId
, HasFP
? FramePtr
: StackPtr
);
1016 if (MBBI
!= MBB
.end())
1017 DL
= MBBI
->getDebugLoc();
1019 // Adjust stack pointer: ESP -= numbytes.
1020 if (NumBytes
>= 4096 && Subtarget
->isTargetCygMing()) {
1021 // Check, whether EAX is livein for this function.
1022 bool isEAXAlive
= false;
1023 for (MachineRegisterInfo::livein_iterator
1024 II
= MF
.getRegInfo().livein_begin(),
1025 EE
= MF
.getRegInfo().livein_end(); (II
!= EE
) && !isEAXAlive
; ++II
) {
1026 unsigned Reg
= II
->first
;
1027 isEAXAlive
= (Reg
== X86::EAX
|| Reg
== X86::AX
||
1028 Reg
== X86::AH
|| Reg
== X86::AL
);
1031 // Function prologue calls _alloca to probe the stack when allocating more
1032 // than 4k bytes in one go. Touching the stack at 4K increments is necessary
1033 // to ensure that the guard pages used by the OS virtual memory manager are
1034 // allocated in correct sequence.
1036 BuildMI(MBB
, MBBI
, DL
, TII
.get(X86::MOV32ri
), X86::EAX
)
1038 BuildMI(MBB
, MBBI
, DL
, TII
.get(X86::CALLpcrel32
))
1039 .addExternalSymbol("_alloca");
1042 BuildMI(MBB
, MBBI
, DL
, TII
.get(X86::PUSH32r
))
1043 .addReg(X86::EAX
, RegState::Kill
);
1045 // Allocate NumBytes-4 bytes on stack. We'll also use 4 already
1046 // allocated bytes for EAX.
1047 BuildMI(MBB
, MBBI
, DL
, TII
.get(X86::MOV32ri
), X86::EAX
)
1048 .addImm(NumBytes
- 4);
1049 BuildMI(MBB
, MBBI
, DL
, TII
.get(X86::CALLpcrel32
))
1050 .addExternalSymbol("_alloca");
1053 MachineInstr
*MI
= addRegOffset(BuildMI(MF
, DL
, TII
.get(X86::MOV32rm
),
1055 StackPtr
, false, NumBytes
- 4);
1056 MBB
.insert(MBBI
, MI
);
1058 } else if (NumBytes
) {
1059 // If there is an SUB32ri of ESP immediately before this instruction, merge
1060 // the two. This can be the case when tail call elimination is enabled and
1061 // the callee has more arguments then the caller.
1062 NumBytes
-= mergeSPUpdates(MBB
, MBBI
, StackPtr
, true);
1064 // If there is an ADD32ri or SUB32ri of ESP immediately after this
1065 // instruction, merge the two instructions.
1066 mergeSPUpdatesDown(MBB
, MBBI
, StackPtr
, &NumBytes
);
1069 emitSPUpdate(MBB
, MBBI
, StackPtr
, -(int64_t)NumBytes
, Is64Bit
, TII
);
1072 if (!HasFP
&& needsFrameMoves
) {
1073 // Mark end of stack pointer adjustment.
1074 unsigned LabelId
= MMI
->NextLabelID();
1075 BuildMI(MBB
, MBBI
, DL
, TII
.get(X86::DBG_LABEL
)).addImm(LabelId
);
1077 // Define the current CFA rule to use the provided offset.
1079 MachineLocation
SPDst(MachineLocation::VirtualFP
);
1080 MachineLocation
SPSrc(MachineLocation::VirtualFP
,
1081 -StackSize
+ stackGrowth
);
1082 Moves
.push_back(MachineMove(LabelId
, SPDst
, SPSrc
));
1084 // FIXME: Verify & implement for FP
1085 MachineLocation
SPDst(StackPtr
);
1086 MachineLocation
SPSrc(StackPtr
, stackGrowth
);
1087 Moves
.push_back(MachineMove(LabelId
, SPDst
, SPSrc
));
1092 void X86RegisterInfo::emitEpilogue(MachineFunction
&MF
,
1093 MachineBasicBlock
&MBB
) const {
1094 const MachineFrameInfo
*MFI
= MF
.getFrameInfo();
1095 X86MachineFunctionInfo
*X86FI
= MF
.getInfo
<X86MachineFunctionInfo
>();
1096 MachineBasicBlock::iterator MBBI
= prior(MBB
.end());
1097 unsigned RetOpcode
= MBBI
->getOpcode();
1098 DebugLoc DL
= MBBI
->getDebugLoc();
1100 switch (RetOpcode
) {
1103 case X86::TCRETURNdi
:
1104 case X86::TCRETURNri
:
1105 case X86::TCRETURNri64
:
1106 case X86::TCRETURNdi64
:
1107 case X86::EH_RETURN
:
1108 case X86::EH_RETURN64
:
1111 case X86::TAILJMPm
: break; // These are ok
1113 llvm_unreachable("Can only insert epilog into returning blocks");
1116 // Get the number of bytes to allocate from the FrameInfo
1117 uint64_t StackSize
= MFI
->getStackSize();
1118 uint64_t MaxAlign
= MFI
->getMaxAlignment();
1119 unsigned CSSize
= X86FI
->getCalleeSavedFrameSize();
1120 uint64_t NumBytes
= 0;
1123 // Calculate required stack adjustment
1124 uint64_t FrameSize
= StackSize
- SlotSize
;
1125 if (needsStackRealignment(MF
))
1126 FrameSize
= (FrameSize
+ MaxAlign
- 1)/MaxAlign
*MaxAlign
;
1128 NumBytes
= FrameSize
- CSSize
;
1131 BuildMI(MBB
, MBBI
, DL
,
1132 TII
.get(Is64Bit
? X86::POP64r
: X86::POP32r
), FramePtr
);
1134 NumBytes
= StackSize
- CSSize
;
1137 // Skip the callee-saved pop instructions.
1138 MachineBasicBlock::iterator LastCSPop
= MBBI
;
1139 while (MBBI
!= MBB
.begin()) {
1140 MachineBasicBlock::iterator PI
= prior(MBBI
);
1141 unsigned Opc
= PI
->getOpcode();
1142 if (Opc
!= X86::POP32r
&& Opc
!= X86::POP64r
&&
1143 !PI
->getDesc().isTerminator())
1148 DL
= MBBI
->getDebugLoc();
1150 // If there is an ADD32ri or SUB32ri of ESP immediately before this
1151 // instruction, merge the two instructions.
1152 if (NumBytes
|| MFI
->hasVarSizedObjects())
1153 mergeSPUpdatesUp(MBB
, MBBI
, StackPtr
, &NumBytes
);
1155 // If dynamic alloca is used, then reset esp to point to the last callee-saved
1156 // slot before popping them off! Same applies for the case, when stack was
1158 if (needsStackRealignment(MF
)) {
1159 // We cannot use LEA here, because stack pointer was realigned. We need to
1160 // deallocate local frame back
1162 emitSPUpdate(MBB
, MBBI
, StackPtr
, NumBytes
, Is64Bit
, TII
);
1163 MBBI
= prior(LastCSPop
);
1166 BuildMI(MBB
, MBBI
, DL
,
1167 TII
.get(Is64Bit
? X86::MOV64rr
: X86::MOV32rr
),
1168 StackPtr
).addReg(FramePtr
);
1169 } else if (MFI
->hasVarSizedObjects()) {
1171 unsigned Opc
= Is64Bit
? X86::LEA64r
: X86::LEA32r
;
1172 MachineInstr
*MI
= addLeaRegOffset(BuildMI(MF
, DL
, TII
.get(Opc
), StackPtr
),
1173 FramePtr
, false, -CSSize
);
1174 MBB
.insert(MBBI
, MI
);
1176 BuildMI(MBB
, MBBI
, DL
, TII
.get(Is64Bit
? X86::MOV64rr
: X86::MOV32rr
),
1177 StackPtr
).addReg(FramePtr
);
1180 // adjust stack pointer back: ESP += numbytes
1182 emitSPUpdate(MBB
, MBBI
, StackPtr
, NumBytes
, Is64Bit
, TII
);
1185 // We're returning from function via eh_return.
1186 if (RetOpcode
== X86::EH_RETURN
|| RetOpcode
== X86::EH_RETURN64
) {
1187 MBBI
= prior(MBB
.end());
1188 MachineOperand
&DestAddr
= MBBI
->getOperand(0);
1189 assert(DestAddr
.isReg() && "Offset should be in register!");
1190 BuildMI(MBB
, MBBI
, DL
,
1191 TII
.get(Is64Bit
? X86::MOV64rr
: X86::MOV32rr
),
1192 StackPtr
).addReg(DestAddr
.getReg());
1193 // Tail call return: adjust the stack pointer and jump to callee
1194 } else if (RetOpcode
== X86::TCRETURNri
|| RetOpcode
== X86::TCRETURNdi
||
1195 RetOpcode
== X86::TCRETURNri64
|| RetOpcode
== X86::TCRETURNdi64
) {
1196 MBBI
= prior(MBB
.end());
1197 MachineOperand
&JumpTarget
= MBBI
->getOperand(0);
1198 MachineOperand
&StackAdjust
= MBBI
->getOperand(1);
1199 assert(StackAdjust
.isImm() && "Expecting immediate value.");
1201 // Adjust stack pointer.
1202 int StackAdj
= StackAdjust
.getImm();
1203 int MaxTCDelta
= X86FI
->getTCReturnAddrDelta();
1205 assert(MaxTCDelta
<= 0 && "MaxTCDelta should never be positive");
1206 // Incoporate the retaddr area.
1207 Offset
= StackAdj
-MaxTCDelta
;
1208 assert(Offset
>= 0 && "Offset should never be negative");
1211 // Check for possible merge with preceeding ADD instruction.
1212 Offset
+= mergeSPUpdates(MBB
, MBBI
, StackPtr
, true);
1213 emitSPUpdate(MBB
, MBBI
, StackPtr
, Offset
, Is64Bit
, TII
);
1216 // Jump to label or value in register.
1217 if (RetOpcode
== X86::TCRETURNdi
|| RetOpcode
== X86::TCRETURNdi64
)
1218 BuildMI(MBB
, MBBI
, DL
, TII
.get(X86::TAILJMPd
)).
1219 addGlobalAddress(JumpTarget
.getGlobal(), JumpTarget
.getOffset());
1220 else if (RetOpcode
== X86::TCRETURNri64
)
1221 BuildMI(MBB
, MBBI
, DL
, TII
.get(X86::TAILJMPr64
), JumpTarget
.getReg());
1223 BuildMI(MBB
, MBBI
, DL
, TII
.get(X86::TAILJMPr
), JumpTarget
.getReg());
1225 // Delete the pseudo instruction TCRETURN.
1227 } else if ((RetOpcode
== X86::RET
|| RetOpcode
== X86::RETI
) &&
1228 (X86FI
->getTCReturnAddrDelta() < 0)) {
1229 // Add the return addr area delta back since we are not tail calling.
1230 int delta
= -1*X86FI
->getTCReturnAddrDelta();
1231 MBBI
= prior(MBB
.end());
1232 // Check for possible merge with preceeding ADD instruction.
1233 delta
+= mergeSPUpdates(MBB
, MBBI
, StackPtr
, true);
1234 emitSPUpdate(MBB
, MBBI
, StackPtr
, delta
, Is64Bit
, TII
);
1238 unsigned X86RegisterInfo::getRARegister() const {
1240 return X86::RIP
; // Should have dwarf #16
1242 return X86::EIP
; // Should have dwarf #8
1245 unsigned X86RegisterInfo::getFrameRegister(MachineFunction
&MF
) const {
1246 return hasFP(MF
) ? FramePtr
: StackPtr
;
1249 void X86RegisterInfo::getInitialFrameState(std::vector
<MachineMove
> &Moves
)
1251 // Calculate amount of bytes used for return address storing
1252 int stackGrowth
= (Is64Bit
? -8 : -4);
1254 // Initial state of the frame pointer is esp+4.
1255 MachineLocation
Dst(MachineLocation::VirtualFP
);
1256 MachineLocation
Src(StackPtr
, stackGrowth
);
1257 Moves
.push_back(MachineMove(0, Dst
, Src
));
1259 // Add return address to move list
1260 MachineLocation
CSDst(StackPtr
, stackGrowth
);
1261 MachineLocation
CSSrc(getRARegister());
1262 Moves
.push_back(MachineMove(0, CSDst
, CSSrc
));
1265 unsigned X86RegisterInfo::getEHExceptionRegister() const {
1266 llvm_unreachable("What is the exception register");
1270 unsigned X86RegisterInfo::getEHHandlerRegister() const {
1271 llvm_unreachable("What is the exception handler register");
1276 unsigned getX86SubSuperRegister(unsigned Reg
, MVT VT
, bool High
) {
1277 switch (VT
.getSimpleVT()) {
1278 default: return Reg
;
1283 case X86::AH
: case X86::AL
: case X86::AX
: case X86::EAX
: case X86::RAX
:
1285 case X86::DH
: case X86::DL
: case X86::DX
: case X86::EDX
: case X86::RDX
:
1287 case X86::CH
: case X86::CL
: case X86::CX
: case X86::ECX
: case X86::RCX
:
1289 case X86::BH
: case X86::BL
: case X86::BX
: case X86::EBX
: case X86::RBX
:
1295 case X86::AH
: case X86::AL
: case X86::AX
: case X86::EAX
: case X86::RAX
:
1297 case X86::DH
: case X86::DL
: case X86::DX
: case X86::EDX
: case X86::RDX
:
1299 case X86::CH
: case X86::CL
: case X86::CX
: case X86::ECX
: case X86::RCX
:
1301 case X86::BH
: case X86::BL
: case X86::BX
: case X86::EBX
: case X86::RBX
:
1303 case X86::SIL
: case X86::SI
: case X86::ESI
: case X86::RSI
:
1305 case X86::DIL
: case X86::DI
: case X86::EDI
: case X86::RDI
:
1307 case X86::BPL
: case X86::BP
: case X86::EBP
: case X86::RBP
:
1309 case X86::SPL
: case X86::SP
: case X86::ESP
: case X86::RSP
:
1311 case X86::R8B
: case X86::R8W
: case X86::R8D
: case X86::R8
:
1313 case X86::R9B
: case X86::R9W
: case X86::R9D
: case X86::R9
:
1315 case X86::R10B
: case X86::R10W
: case X86::R10D
: case X86::R10
:
1317 case X86::R11B
: case X86::R11W
: case X86::R11D
: case X86::R11
:
1319 case X86::R12B
: case X86::R12W
: case X86::R12D
: case X86::R12
:
1321 case X86::R13B
: case X86::R13W
: case X86::R13D
: case X86::R13
:
1323 case X86::R14B
: case X86::R14W
: case X86::R14D
: case X86::R14
:
1325 case X86::R15B
: case X86::R15W
: case X86::R15D
: case X86::R15
:
1331 default: return Reg
;
1332 case X86::AH
: case X86::AL
: case X86::AX
: case X86::EAX
: case X86::RAX
:
1334 case X86::DH
: case X86::DL
: case X86::DX
: case X86::EDX
: case X86::RDX
:
1336 case X86::CH
: case X86::CL
: case X86::CX
: case X86::ECX
: case X86::RCX
:
1338 case X86::BH
: case X86::BL
: case X86::BX
: case X86::EBX
: case X86::RBX
:
1340 case X86::SIL
: case X86::SI
: case X86::ESI
: case X86::RSI
:
1342 case X86::DIL
: case X86::DI
: case X86::EDI
: case X86::RDI
:
1344 case X86::BPL
: case X86::BP
: case X86::EBP
: case X86::RBP
:
1346 case X86::SPL
: case X86::SP
: case X86::ESP
: case X86::RSP
:
1348 case X86::R8B
: case X86::R8W
: case X86::R8D
: case X86::R8
:
1350 case X86::R9B
: case X86::R9W
: case X86::R9D
: case X86::R9
:
1352 case X86::R10B
: case X86::R10W
: case X86::R10D
: case X86::R10
:
1354 case X86::R11B
: case X86::R11W
: case X86::R11D
: case X86::R11
:
1356 case X86::R12B
: case X86::R12W
: case X86::R12D
: case X86::R12
:
1358 case X86::R13B
: case X86::R13W
: case X86::R13D
: case X86::R13
:
1360 case X86::R14B
: case X86::R14W
: case X86::R14D
: case X86::R14
:
1362 case X86::R15B
: case X86::R15W
: case X86::R15D
: case X86::R15
:
1367 default: return Reg
;
1368 case X86::AH
: case X86::AL
: case X86::AX
: case X86::EAX
: case X86::RAX
:
1370 case X86::DH
: case X86::DL
: case X86::DX
: case X86::EDX
: case X86::RDX
:
1372 case X86::CH
: case X86::CL
: case X86::CX
: case X86::ECX
: case X86::RCX
:
1374 case X86::BH
: case X86::BL
: case X86::BX
: case X86::EBX
: case X86::RBX
:
1376 case X86::SIL
: case X86::SI
: case X86::ESI
: case X86::RSI
:
1378 case X86::DIL
: case X86::DI
: case X86::EDI
: case X86::RDI
:
1380 case X86::BPL
: case X86::BP
: case X86::EBP
: case X86::RBP
:
1382 case X86::SPL
: case X86::SP
: case X86::ESP
: case X86::RSP
:
1384 case X86::R8B
: case X86::R8W
: case X86::R8D
: case X86::R8
:
1386 case X86::R9B
: case X86::R9W
: case X86::R9D
: case X86::R9
:
1388 case X86::R10B
: case X86::R10W
: case X86::R10D
: case X86::R10
:
1390 case X86::R11B
: case X86::R11W
: case X86::R11D
: case X86::R11
:
1392 case X86::R12B
: case X86::R12W
: case X86::R12D
: case X86::R12
:
1394 case X86::R13B
: case X86::R13W
: case X86::R13D
: case X86::R13
:
1396 case X86::R14B
: case X86::R14W
: case X86::R14D
: case X86::R14
:
1398 case X86::R15B
: case X86::R15W
: case X86::R15D
: case X86::R15
:
1403 default: return Reg
;
1404 case X86::AH
: case X86::AL
: case X86::AX
: case X86::EAX
: case X86::RAX
:
1406 case X86::DH
: case X86::DL
: case X86::DX
: case X86::EDX
: case X86::RDX
:
1408 case X86::CH
: case X86::CL
: case X86::CX
: case X86::ECX
: case X86::RCX
:
1410 case X86::BH
: case X86::BL
: case X86::BX
: case X86::EBX
: case X86::RBX
:
1412 case X86::SIL
: case X86::SI
: case X86::ESI
: case X86::RSI
:
1414 case X86::DIL
: case X86::DI
: case X86::EDI
: case X86::RDI
:
1416 case X86::BPL
: case X86::BP
: case X86::EBP
: case X86::RBP
:
1418 case X86::SPL
: case X86::SP
: case X86::ESP
: case X86::RSP
:
1420 case X86::R8B
: case X86::R8W
: case X86::R8D
: case X86::R8
:
1422 case X86::R9B
: case X86::R9W
: case X86::R9D
: case X86::R9
:
1424 case X86::R10B
: case X86::R10W
: case X86::R10D
: case X86::R10
:
1426 case X86::R11B
: case X86::R11W
: case X86::R11D
: case X86::R11
:
1428 case X86::R12B
: case X86::R12W
: case X86::R12D
: case X86::R12
:
1430 case X86::R13B
: case X86::R13W
: case X86::R13D
: case X86::R13
:
1432 case X86::R14B
: case X86::R14W
: case X86::R14D
: case X86::R14
:
1434 case X86::R15B
: case X86::R15W
: case X86::R15D
: case X86::R15
:
1443 #include "X86GenRegisterInfo.inc"
1446 struct VISIBILITY_HIDDEN MSAC
: public MachineFunctionPass
{
1448 MSAC() : MachineFunctionPass(&ID
) {}
1450 virtual bool runOnMachineFunction(MachineFunction
&MF
) {
1451 MachineFrameInfo
*FFI
= MF
.getFrameInfo();
1452 MachineRegisterInfo
&RI
= MF
.getRegInfo();
1454 // Calculate max stack alignment of all already allocated stack objects.
1455 unsigned MaxAlign
= calculateMaxStackAlignment(FFI
);
1457 // Be over-conservative: scan over all vreg defs and find, whether vector
1458 // registers are used. If yes - there is probability, that vector register
1459 // will be spilled and thus stack needs to be aligned properly.
1460 for (unsigned RegNum
= TargetRegisterInfo::FirstVirtualRegister
;
1461 RegNum
< RI
.getLastVirtReg(); ++RegNum
)
1462 MaxAlign
= std::max(MaxAlign
, RI
.getRegClass(RegNum
)->getAlignment());
1464 if (FFI
->getMaxAlignment() == MaxAlign
)
1467 FFI
->setMaxAlignment(MaxAlign
);
1471 virtual const char *getPassName() const {
1472 return "X86 Maximal Stack Alignment Calculator";
1475 virtual void getAnalysisUsage(AnalysisUsage
&AU
) const {
1476 AU
.setPreservesCFG();
1477 MachineFunctionPass::getAnalysisUsage(AU
);
1485 llvm::createX86MaxStackAlignmentCalculatorPass() { return new MSAC(); }