1 //===-- X86FloatingPoint.cpp - Floating point Reg -> Stack converter ------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file defines the pass which converts floating point instructions from
11 // pseudo registers into register stack instructions. This pass uses live
12 // variable information to indicate where the FPn registers are used and their
15 // The x87 hardware tracks liveness of the stack registers, so it is necessary
16 // to implement exact liveness tracking between basic blocks. The CFG edges are
17 // partitioned into bundles where the same FP registers must be live in
18 // identical stack positions. Instructions are inserted at the end of each basic
19 // block to rearrange the live registers to match the outgoing bundle.
21 // This approach avoids splitting critical edges at the potential cost of more
22 // live register shuffling instructions when critical edges are present.
24 //===----------------------------------------------------------------------===//
26 #define DEBUG_TYPE "x86-codegen"
28 #include "X86InstrInfo.h"
29 #include "llvm/ADT/DepthFirstIterator.h"
30 #include "llvm/ADT/DenseMap.h"
31 #include "llvm/ADT/SmallPtrSet.h"
32 #include "llvm/ADT/SmallVector.h"
33 #include "llvm/ADT/Statistic.h"
34 #include "llvm/ADT/STLExtras.h"
35 #include "llvm/CodeGen/EdgeBundles.h"
36 #include "llvm/CodeGen/MachineFunctionPass.h"
37 #include "llvm/CodeGen/MachineInstrBuilder.h"
38 #include "llvm/CodeGen/MachineRegisterInfo.h"
39 #include "llvm/CodeGen/Passes.h"
40 #include "llvm/InlineAsm.h"
41 #include "llvm/Support/Debug.h"
42 #include "llvm/Support/ErrorHandling.h"
43 #include "llvm/Support/raw_ostream.h"
44 #include "llvm/Target/TargetInstrInfo.h"
45 #include "llvm/Target/TargetMachine.h"
49 STATISTIC(NumFXCH
, "Number of fxch instructions inserted");
50 STATISTIC(NumFP
, "Number of floating point instructions");
53 struct FPS
: public MachineFunctionPass
{
55 FPS() : MachineFunctionPass(ID
) {
56 initializeEdgeBundlesPass(*PassRegistry::getPassRegistry());
57 // This is really only to keep valgrind quiet.
58 // The logic in isLive() is too much for it.
59 memset(Stack
, 0, sizeof(Stack
));
60 memset(RegMap
, 0, sizeof(RegMap
));
63 virtual void getAnalysisUsage(AnalysisUsage
&AU
) const {
65 AU
.addRequired
<EdgeBundles
>();
66 AU
.addPreservedID(MachineLoopInfoID
);
67 AU
.addPreservedID(MachineDominatorsID
);
68 MachineFunctionPass::getAnalysisUsage(AU
);
71 virtual bool runOnMachineFunction(MachineFunction
&MF
);
73 virtual const char *getPassName() const { return "X86 FP Stackifier"; }
76 const TargetInstrInfo
*TII
; // Machine instruction info.
78 // Two CFG edges are related if they leave the same block, or enter the same
79 // block. The transitive closure of an edge under this relation is a
80 // LiveBundle. It represents a set of CFG edges where the live FP stack
81 // registers must be allocated identically in the x87 stack.
83 // A LiveBundle is usually all the edges leaving a block, or all the edges
84 // entering a block, but it can contain more edges if critical edges are
87 // The set of live FP registers in a LiveBundle is calculated by bundleCFG,
88 // but the exact mapping of FP registers to stack slots is fixed later.
90 // Bit mask of live FP registers. Bit 0 = FP0, bit 1 = FP1, &c.
93 // Number of pre-assigned live registers in FixStack. This is 0 when the
94 // stack order has not yet been fixed.
97 // Assigned stack order for live-in registers.
98 // FixStack[i] == getStackEntry(i) for all i < FixCount.
99 unsigned char FixStack
[8];
101 LiveBundle() : Mask(0), FixCount(0) {}
103 // Have the live registers been assigned a stack order yet?
104 bool isFixed() const { return !Mask
|| FixCount
; }
107 // Numbered LiveBundle structs. LiveBundles[0] is used for all CFG edges
108 // with no live FP registers.
109 SmallVector
<LiveBundle
, 8> LiveBundles
;
111 // The edge bundle analysis provides indices into the LiveBundles vector.
112 EdgeBundles
*Bundles
;
114 // Return a bitmask of FP registers in block's live-in list.
115 unsigned calcLiveInMask(MachineBasicBlock
*MBB
) {
117 for (MachineBasicBlock::livein_iterator I
= MBB
->livein_begin(),
118 E
= MBB
->livein_end(); I
!= E
; ++I
) {
119 unsigned Reg
= *I
- X86::FP0
;
126 // Partition all the CFG edges into LiveBundles.
127 void bundleCFG(MachineFunction
&MF
);
129 MachineBasicBlock
*MBB
; // Current basic block
131 // The hardware keeps track of how many FP registers are live, so we have
132 // to model that exactly. Usually, each live register corresponds to an
133 // FP<n> register, but when dealing with calls, returns, and inline
134 // assembly, it is sometimes neccesary to have live scratch registers.
135 unsigned Stack
[8]; // FP<n> Registers in each stack slot...
136 unsigned StackTop
; // The current top of the FP stack.
139 NumFPRegs
= 16 // Including scratch pseudo-registers.
142 // For each live FP<n> register, point to its Stack[] entry.
143 // The first entries correspond to FP0-FP6, the rest are scratch registers
144 // used when we need slightly different live registers than what the
145 // register allocator thinks.
146 unsigned RegMap
[NumFPRegs
];
148 // Pending fixed registers - Inline assembly needs FP registers to appear
149 // in fixed stack slot positions. This is handled by copying FP registers
150 // to ST registers before the instruction, and copying back after the
153 // This is modeled with pending ST registers. NumPendingSTs is the number
154 // of ST registers (ST0-STn) we are tracking. PendingST[n] points to an FP
155 // register that holds the ST value. The ST registers are not moved into
156 // place until immediately before the instruction that needs them.
158 // It can happen that we need an ST register to be live when no FP register
161 // %ST0 = COPY %FP4<kill>
163 // When that happens, we allocate a scratch FP register to hold the ST
164 // value. That means every register in PendingST must be live.
166 unsigned NumPendingSTs
;
167 unsigned char PendingST
[8];
169 // Set up our stack model to match the incoming registers to MBB.
170 void setupBlockStack();
172 // Shuffle live registers to match the expectations of successor blocks.
173 void finishBlockStack();
175 void dumpStack() const {
176 dbgs() << "Stack contents:";
177 for (unsigned i
= 0; i
!= StackTop
; ++i
) {
178 dbgs() << " FP" << Stack
[i
];
179 assert(RegMap
[Stack
[i
]] == i
&& "Stack[] doesn't match RegMap[]!");
181 for (unsigned i
= 0; i
!= NumPendingSTs
; ++i
)
182 dbgs() << ", ST" << i
<< " in FP" << unsigned(PendingST
[i
]);
186 /// getSlot - Return the stack slot number a particular register number is
188 unsigned getSlot(unsigned RegNo
) const {
189 assert(RegNo
< NumFPRegs
&& "Regno out of range!");
190 return RegMap
[RegNo
];
193 /// isLive - Is RegNo currently live in the stack?
194 bool isLive(unsigned RegNo
) const {
195 unsigned Slot
= getSlot(RegNo
);
196 return Slot
< StackTop
&& Stack
[Slot
] == RegNo
;
199 /// getScratchReg - Return an FP register that is not currently in use.
200 unsigned getScratchReg() {
201 for (int i
= NumFPRegs
- 1; i
>= 8; --i
)
204 llvm_unreachable("Ran out of scratch FP registers");
207 /// isScratchReg - Returns trus if RegNo is a scratch FP register.
208 bool isScratchReg(unsigned RegNo
) {
209 return RegNo
> 8 && RegNo
< NumFPRegs
;
212 /// getStackEntry - Return the X86::FP<n> register in register ST(i).
213 unsigned getStackEntry(unsigned STi
) const {
215 report_fatal_error("Access past stack top!");
216 return Stack
[StackTop
-1-STi
];
219 /// getSTReg - Return the X86::ST(i) register which contains the specified
220 /// FP<RegNo> register.
221 unsigned getSTReg(unsigned RegNo
) const {
222 return StackTop
- 1 - getSlot(RegNo
) + llvm::X86::ST0
;
225 // pushReg - Push the specified FP<n> register onto the stack.
226 void pushReg(unsigned Reg
) {
227 assert(Reg
< NumFPRegs
&& "Register number out of range!");
229 report_fatal_error("Stack overflow!");
230 Stack
[StackTop
] = Reg
;
231 RegMap
[Reg
] = StackTop
++;
234 bool isAtTop(unsigned RegNo
) const { return getSlot(RegNo
) == StackTop
-1; }
235 void moveToTop(unsigned RegNo
, MachineBasicBlock::iterator I
) {
236 DebugLoc dl
= I
== MBB
->end() ? DebugLoc() : I
->getDebugLoc();
237 if (isAtTop(RegNo
)) return;
239 unsigned STReg
= getSTReg(RegNo
);
240 unsigned RegOnTop
= getStackEntry(0);
242 // Swap the slots the regs are in.
243 std::swap(RegMap
[RegNo
], RegMap
[RegOnTop
]);
245 // Swap stack slot contents.
246 if (RegMap
[RegOnTop
] >= StackTop
)
247 report_fatal_error("Access past stack top!");
248 std::swap(Stack
[RegMap
[RegOnTop
]], Stack
[StackTop
-1]);
250 // Emit an fxch to update the runtime processors version of the state.
251 BuildMI(*MBB
, I
, dl
, TII
->get(X86::XCH_F
)).addReg(STReg
);
255 void duplicateToTop(unsigned RegNo
, unsigned AsReg
, MachineInstr
*I
) {
256 DebugLoc dl
= I
== MBB
->end() ? DebugLoc() : I
->getDebugLoc();
257 unsigned STReg
= getSTReg(RegNo
);
258 pushReg(AsReg
); // New register on top of stack
260 BuildMI(*MBB
, I
, dl
, TII
->get(X86::LD_Frr
)).addReg(STReg
);
263 /// popStackAfter - Pop the current value off of the top of the FP stack
264 /// after the specified instruction.
265 void popStackAfter(MachineBasicBlock::iterator
&I
);
267 /// freeStackSlotAfter - Free the specified register from the register
268 /// stack, so that it is no longer in a register. If the register is
269 /// currently at the top of the stack, we just pop the current instruction,
270 /// otherwise we store the current top-of-stack into the specified slot,
271 /// then pop the top of stack.
272 void freeStackSlotAfter(MachineBasicBlock::iterator
&I
, unsigned Reg
);
274 /// freeStackSlotBefore - Just the pop, no folding. Return the inserted
276 MachineBasicBlock::iterator
277 freeStackSlotBefore(MachineBasicBlock::iterator I
, unsigned FPRegNo
);
279 /// Adjust the live registers to be the set in Mask.
280 void adjustLiveRegs(unsigned Mask
, MachineBasicBlock::iterator I
);
282 /// Shuffle the top FixCount stack entries such that FP reg FixStack[0] is
283 /// st(0), FP reg FixStack[1] is st(1) etc.
284 void shuffleStackTop(const unsigned char *FixStack
, unsigned FixCount
,
285 MachineBasicBlock::iterator I
);
287 bool processBasicBlock(MachineFunction
&MF
, MachineBasicBlock
&MBB
);
289 void handleZeroArgFP(MachineBasicBlock::iterator
&I
);
290 void handleOneArgFP(MachineBasicBlock::iterator
&I
);
291 void handleOneArgFPRW(MachineBasicBlock::iterator
&I
);
292 void handleTwoArgFP(MachineBasicBlock::iterator
&I
);
293 void handleCompareFP(MachineBasicBlock::iterator
&I
);
294 void handleCondMovFP(MachineBasicBlock::iterator
&I
);
295 void handleSpecialFP(MachineBasicBlock::iterator
&I
);
297 // Check if a COPY instruction is using FP registers.
298 bool isFPCopy(MachineInstr
*MI
) {
299 unsigned DstReg
= MI
->getOperand(0).getReg();
300 unsigned SrcReg
= MI
->getOperand(1).getReg();
302 return X86::RFP80RegClass
.contains(DstReg
) ||
303 X86::RFP80RegClass
.contains(SrcReg
);
309 FunctionPass
*llvm::createX86FloatingPointStackifierPass() { return new FPS(); }
311 /// getFPReg - Return the X86::FPx register number for the specified operand.
312 /// For example, this returns 3 for X86::FP3.
313 static unsigned getFPReg(const MachineOperand
&MO
) {
314 assert(MO
.isReg() && "Expected an FP register!");
315 unsigned Reg
= MO
.getReg();
316 assert(Reg
>= X86::FP0
&& Reg
<= X86::FP6
&& "Expected FP register!");
317 return Reg
- X86::FP0
;
320 /// runOnMachineFunction - Loop over all of the basic blocks, transforming FP
321 /// register references into FP stack references.
323 bool FPS::runOnMachineFunction(MachineFunction
&MF
) {
324 // We only need to run this pass if there are any FP registers used in this
325 // function. If it is all integer, there is nothing for us to do!
326 bool FPIsUsed
= false;
328 assert(X86::FP6
== X86::FP0
+6 && "Register enums aren't sorted right!");
329 for (unsigned i
= 0; i
<= 6; ++i
)
330 if (MF
.getRegInfo().isPhysRegUsed(X86::FP0
+i
)) {
336 if (!FPIsUsed
) return false;
338 Bundles
= &getAnalysis
<EdgeBundles
>();
339 TII
= MF
.getTarget().getInstrInfo();
341 // Prepare cross-MBB liveness.
346 // Process the function in depth first order so that we process at least one
347 // of the predecessors for every reachable block in the function.
348 SmallPtrSet
<MachineBasicBlock
*, 8> Processed
;
349 MachineBasicBlock
*Entry
= MF
.begin();
351 bool Changed
= false;
352 for (df_ext_iterator
<MachineBasicBlock
*, SmallPtrSet
<MachineBasicBlock
*, 8> >
353 I
= df_ext_begin(Entry
, Processed
), E
= df_ext_end(Entry
, Processed
);
355 Changed
|= processBasicBlock(MF
, **I
);
357 // Process any unreachable blocks in arbitrary order now.
358 if (MF
.size() != Processed
.size())
359 for (MachineFunction::iterator BB
= MF
.begin(), E
= MF
.end(); BB
!= E
; ++BB
)
360 if (Processed
.insert(BB
))
361 Changed
|= processBasicBlock(MF
, *BB
);
368 /// bundleCFG - Scan all the basic blocks to determine consistent live-in and
369 /// live-out sets for the FP registers. Consistent means that the set of
370 /// registers live-out from a block is identical to the live-in set of all
371 /// successors. This is not enforced by the normal live-in lists since
372 /// registers may be implicitly defined, or not used by all successors.
373 void FPS::bundleCFG(MachineFunction
&MF
) {
374 assert(LiveBundles
.empty() && "Stale data in LiveBundles");
375 LiveBundles
.resize(Bundles
->getNumBundles());
377 // Gather the actual live-in masks for all MBBs.
378 for (MachineFunction::iterator I
= MF
.begin(), E
= MF
.end(); I
!= E
; ++I
) {
379 MachineBasicBlock
*MBB
= I
;
380 const unsigned Mask
= calcLiveInMask(MBB
);
383 // Update MBB ingoing bundle mask.
384 LiveBundles
[Bundles
->getBundle(MBB
->getNumber(), false)].Mask
|= Mask
;
388 /// processBasicBlock - Loop over all of the instructions in the basic block,
389 /// transforming FP instructions into their stack form.
391 bool FPS::processBasicBlock(MachineFunction
&MF
, MachineBasicBlock
&BB
) {
392 bool Changed
= false;
398 for (MachineBasicBlock::iterator I
= BB
.begin(); I
!= BB
.end(); ++I
) {
399 MachineInstr
*MI
= I
;
400 uint64_t Flags
= MI
->getDesc().TSFlags
;
402 unsigned FPInstClass
= Flags
& X86II::FPTypeMask
;
403 if (MI
->isInlineAsm())
404 FPInstClass
= X86II::SpecialFP
;
406 if (MI
->isCopy() && isFPCopy(MI
))
407 FPInstClass
= X86II::SpecialFP
;
409 if (FPInstClass
== X86II::NotFP
)
410 continue; // Efficiently ignore non-fp insts!
412 MachineInstr
*PrevMI
= 0;
416 ++NumFP
; // Keep track of # of pseudo instrs
417 DEBUG(dbgs() << "\nFPInst:\t" << *MI
);
419 // Get dead variables list now because the MI pointer may be deleted as part
421 SmallVector
<unsigned, 8> DeadRegs
;
422 for (unsigned i
= 0, e
= MI
->getNumOperands(); i
!= e
; ++i
) {
423 const MachineOperand
&MO
= MI
->getOperand(i
);
424 if (MO
.isReg() && MO
.isDead())
425 DeadRegs
.push_back(MO
.getReg());
428 switch (FPInstClass
) {
429 case X86II::ZeroArgFP
: handleZeroArgFP(I
); break;
430 case X86II::OneArgFP
: handleOneArgFP(I
); break; // fstp ST(0)
431 case X86II::OneArgFPRW
: handleOneArgFPRW(I
); break; // ST(0) = fsqrt(ST(0))
432 case X86II::TwoArgFP
: handleTwoArgFP(I
); break;
433 case X86II::CompareFP
: handleCompareFP(I
); break;
434 case X86II::CondMovFP
: handleCondMovFP(I
); break;
435 case X86II::SpecialFP
: handleSpecialFP(I
); break;
436 default: llvm_unreachable("Unknown FP Type!");
439 // Check to see if any of the values defined by this instruction are dead
440 // after definition. If so, pop them.
441 for (unsigned i
= 0, e
= DeadRegs
.size(); i
!= e
; ++i
) {
442 unsigned Reg
= DeadRegs
[i
];
443 if (Reg
>= X86::FP0
&& Reg
<= X86::FP6
) {
444 DEBUG(dbgs() << "Register FP#" << Reg
-X86::FP0
<< " is dead!\n");
445 freeStackSlotAfter(I
, Reg
-X86::FP0
);
449 // Print out all of the instructions expanded to if -debug
451 MachineBasicBlock::iterator
PrevI(PrevMI
);
453 dbgs() << "Just deleted pseudo instruction\n";
455 MachineBasicBlock::iterator Start
= I
;
456 // Rewind to first instruction newly inserted.
457 while (Start
!= BB
.begin() && prior(Start
) != PrevI
) --Start
;
458 dbgs() << "Inserted instructions:\n\t";
459 Start
->print(dbgs(), &MF
.getTarget());
460 while (++Start
!= llvm::next(I
)) {}
473 /// setupBlockStack - Use the live bundles to set up our model of the stack
474 /// to match predecessors' live out stack.
475 void FPS::setupBlockStack() {
476 DEBUG(dbgs() << "\nSetting up live-ins for BB#" << MBB
->getNumber()
477 << " derived from " << MBB
->getName() << ".\n");
479 // Get the live-in bundle for MBB.
480 const LiveBundle
&Bundle
=
481 LiveBundles
[Bundles
->getBundle(MBB
->getNumber(), false)];
484 DEBUG(dbgs() << "Block has no FP live-ins.\n");
488 // Depth-first iteration should ensure that we always have an assigned stack.
489 assert(Bundle
.isFixed() && "Reached block before any predecessors");
491 // Push the fixed live-in registers.
492 for (unsigned i
= Bundle
.FixCount
; i
> 0; --i
) {
493 MBB
->addLiveIn(X86::ST0
+i
-1);
494 DEBUG(dbgs() << "Live-in st(" << (i
-1) << "): %FP"
495 << unsigned(Bundle
.FixStack
[i
-1]) << '\n');
496 pushReg(Bundle
.FixStack
[i
-1]);
499 // Kill off unwanted live-ins. This can happen with a critical edge.
500 // FIXME: We could keep these live registers around as zombies. They may need
501 // to be revived at the end of a short block. It might save a few instrs.
502 adjustLiveRegs(calcLiveInMask(MBB
), MBB
->begin());
506 /// finishBlockStack - Revive live-outs that are implicitly defined out of
507 /// MBB. Shuffle live registers to match the expected fixed stack of any
508 /// predecessors, and ensure that all predecessors are expecting the same
510 void FPS::finishBlockStack() {
511 // The RET handling below takes care of return blocks for us.
512 if (MBB
->succ_empty())
515 DEBUG(dbgs() << "Setting up live-outs for BB#" << MBB
->getNumber()
516 << " derived from " << MBB
->getName() << ".\n");
518 // Get MBB's live-out bundle.
519 unsigned BundleIdx
= Bundles
->getBundle(MBB
->getNumber(), true);
520 LiveBundle
&Bundle
= LiveBundles
[BundleIdx
];
522 // We may need to kill and define some registers to match successors.
523 // FIXME: This can probably be combined with the shuffle below.
524 MachineBasicBlock::iterator Term
= MBB
->getFirstTerminator();
525 adjustLiveRegs(Bundle
.Mask
, Term
);
528 DEBUG(dbgs() << "No live-outs.\n");
532 // Has the stack order been fixed yet?
533 DEBUG(dbgs() << "LB#" << BundleIdx
<< ": ");
534 if (Bundle
.isFixed()) {
535 DEBUG(dbgs() << "Shuffling stack to match.\n");
536 shuffleStackTop(Bundle
.FixStack
, Bundle
.FixCount
, Term
);
538 // Not fixed yet, we get to choose.
539 DEBUG(dbgs() << "Fixing stack order now.\n");
540 Bundle
.FixCount
= StackTop
;
541 for (unsigned i
= 0; i
< StackTop
; ++i
)
542 Bundle
.FixStack
[i
] = getStackEntry(i
);
547 //===----------------------------------------------------------------------===//
548 // Efficient Lookup Table Support
549 //===----------------------------------------------------------------------===//
555 bool operator<(const TableEntry
&TE
) const { return from
< TE
.from
; }
556 friend bool operator<(const TableEntry
&TE
, unsigned V
) {
559 friend bool LLVM_ATTRIBUTE_USED
operator<(unsigned V
,
560 const TableEntry
&TE
) {
567 static bool TableIsSorted(const TableEntry
*Table
, unsigned NumEntries
) {
568 for (unsigned i
= 0; i
!= NumEntries
-1; ++i
)
569 if (!(Table
[i
] < Table
[i
+1])) return false;
574 static int Lookup(const TableEntry
*Table
, unsigned N
, unsigned Opcode
) {
575 const TableEntry
*I
= std::lower_bound(Table
, Table
+N
, Opcode
);
576 if (I
!= Table
+N
&& I
->from
== Opcode
)
582 #define ASSERT_SORTED(TABLE)
584 #define ASSERT_SORTED(TABLE) \
585 { static bool TABLE##Checked = false; \
586 if (!TABLE##Checked) { \
587 assert(TableIsSorted(TABLE, array_lengthof(TABLE)) && \
588 "All lookup tables must be sorted for efficient access!"); \
589 TABLE##Checked = true; \
594 //===----------------------------------------------------------------------===//
595 // Register File -> Register Stack Mapping Methods
596 //===----------------------------------------------------------------------===//
598 // OpcodeTable - Sorted map of register instructions to their stack version.
599 // The first element is an register file pseudo instruction, the second is the
600 // concrete X86 instruction which uses the register stack.
602 static const TableEntry OpcodeTable
[] = {
603 { X86::ABS_Fp32
, X86::ABS_F
},
604 { X86::ABS_Fp64
, X86::ABS_F
},
605 { X86::ABS_Fp80
, X86::ABS_F
},
606 { X86::ADD_Fp32m
, X86::ADD_F32m
},
607 { X86::ADD_Fp64m
, X86::ADD_F64m
},
608 { X86::ADD_Fp64m32
, X86::ADD_F32m
},
609 { X86::ADD_Fp80m32
, X86::ADD_F32m
},
610 { X86::ADD_Fp80m64
, X86::ADD_F64m
},
611 { X86::ADD_FpI16m32
, X86::ADD_FI16m
},
612 { X86::ADD_FpI16m64
, X86::ADD_FI16m
},
613 { X86::ADD_FpI16m80
, X86::ADD_FI16m
},
614 { X86::ADD_FpI32m32
, X86::ADD_FI32m
},
615 { X86::ADD_FpI32m64
, X86::ADD_FI32m
},
616 { X86::ADD_FpI32m80
, X86::ADD_FI32m
},
617 { X86::CHS_Fp32
, X86::CHS_F
},
618 { X86::CHS_Fp64
, X86::CHS_F
},
619 { X86::CHS_Fp80
, X86::CHS_F
},
620 { X86::CMOVBE_Fp32
, X86::CMOVBE_F
},
621 { X86::CMOVBE_Fp64
, X86::CMOVBE_F
},
622 { X86::CMOVBE_Fp80
, X86::CMOVBE_F
},
623 { X86::CMOVB_Fp32
, X86::CMOVB_F
},
624 { X86::CMOVB_Fp64
, X86::CMOVB_F
},
625 { X86::CMOVB_Fp80
, X86::CMOVB_F
},
626 { X86::CMOVE_Fp32
, X86::CMOVE_F
},
627 { X86::CMOVE_Fp64
, X86::CMOVE_F
},
628 { X86::CMOVE_Fp80
, X86::CMOVE_F
},
629 { X86::CMOVNBE_Fp32
, X86::CMOVNBE_F
},
630 { X86::CMOVNBE_Fp64
, X86::CMOVNBE_F
},
631 { X86::CMOVNBE_Fp80
, X86::CMOVNBE_F
},
632 { X86::CMOVNB_Fp32
, X86::CMOVNB_F
},
633 { X86::CMOVNB_Fp64
, X86::CMOVNB_F
},
634 { X86::CMOVNB_Fp80
, X86::CMOVNB_F
},
635 { X86::CMOVNE_Fp32
, X86::CMOVNE_F
},
636 { X86::CMOVNE_Fp64
, X86::CMOVNE_F
},
637 { X86::CMOVNE_Fp80
, X86::CMOVNE_F
},
638 { X86::CMOVNP_Fp32
, X86::CMOVNP_F
},
639 { X86::CMOVNP_Fp64
, X86::CMOVNP_F
},
640 { X86::CMOVNP_Fp80
, X86::CMOVNP_F
},
641 { X86::CMOVP_Fp32
, X86::CMOVP_F
},
642 { X86::CMOVP_Fp64
, X86::CMOVP_F
},
643 { X86::CMOVP_Fp80
, X86::CMOVP_F
},
644 { X86::COS_Fp32
, X86::COS_F
},
645 { X86::COS_Fp64
, X86::COS_F
},
646 { X86::COS_Fp80
, X86::COS_F
},
647 { X86::DIVR_Fp32m
, X86::DIVR_F32m
},
648 { X86::DIVR_Fp64m
, X86::DIVR_F64m
},
649 { X86::DIVR_Fp64m32
, X86::DIVR_F32m
},
650 { X86::DIVR_Fp80m32
, X86::DIVR_F32m
},
651 { X86::DIVR_Fp80m64
, X86::DIVR_F64m
},
652 { X86::DIVR_FpI16m32
, X86::DIVR_FI16m
},
653 { X86::DIVR_FpI16m64
, X86::DIVR_FI16m
},
654 { X86::DIVR_FpI16m80
, X86::DIVR_FI16m
},
655 { X86::DIVR_FpI32m32
, X86::DIVR_FI32m
},
656 { X86::DIVR_FpI32m64
, X86::DIVR_FI32m
},
657 { X86::DIVR_FpI32m80
, X86::DIVR_FI32m
},
658 { X86::DIV_Fp32m
, X86::DIV_F32m
},
659 { X86::DIV_Fp64m
, X86::DIV_F64m
},
660 { X86::DIV_Fp64m32
, X86::DIV_F32m
},
661 { X86::DIV_Fp80m32
, X86::DIV_F32m
},
662 { X86::DIV_Fp80m64
, X86::DIV_F64m
},
663 { X86::DIV_FpI16m32
, X86::DIV_FI16m
},
664 { X86::DIV_FpI16m64
, X86::DIV_FI16m
},
665 { X86::DIV_FpI16m80
, X86::DIV_FI16m
},
666 { X86::DIV_FpI32m32
, X86::DIV_FI32m
},
667 { X86::DIV_FpI32m64
, X86::DIV_FI32m
},
668 { X86::DIV_FpI32m80
, X86::DIV_FI32m
},
669 { X86::ILD_Fp16m32
, X86::ILD_F16m
},
670 { X86::ILD_Fp16m64
, X86::ILD_F16m
},
671 { X86::ILD_Fp16m80
, X86::ILD_F16m
},
672 { X86::ILD_Fp32m32
, X86::ILD_F32m
},
673 { X86::ILD_Fp32m64
, X86::ILD_F32m
},
674 { X86::ILD_Fp32m80
, X86::ILD_F32m
},
675 { X86::ILD_Fp64m32
, X86::ILD_F64m
},
676 { X86::ILD_Fp64m64
, X86::ILD_F64m
},
677 { X86::ILD_Fp64m80
, X86::ILD_F64m
},
678 { X86::ISTT_Fp16m32
, X86::ISTT_FP16m
},
679 { X86::ISTT_Fp16m64
, X86::ISTT_FP16m
},
680 { X86::ISTT_Fp16m80
, X86::ISTT_FP16m
},
681 { X86::ISTT_Fp32m32
, X86::ISTT_FP32m
},
682 { X86::ISTT_Fp32m64
, X86::ISTT_FP32m
},
683 { X86::ISTT_Fp32m80
, X86::ISTT_FP32m
},
684 { X86::ISTT_Fp64m32
, X86::ISTT_FP64m
},
685 { X86::ISTT_Fp64m64
, X86::ISTT_FP64m
},
686 { X86::ISTT_Fp64m80
, X86::ISTT_FP64m
},
687 { X86::IST_Fp16m32
, X86::IST_F16m
},
688 { X86::IST_Fp16m64
, X86::IST_F16m
},
689 { X86::IST_Fp16m80
, X86::IST_F16m
},
690 { X86::IST_Fp32m32
, X86::IST_F32m
},
691 { X86::IST_Fp32m64
, X86::IST_F32m
},
692 { X86::IST_Fp32m80
, X86::IST_F32m
},
693 { X86::IST_Fp64m32
, X86::IST_FP64m
},
694 { X86::IST_Fp64m64
, X86::IST_FP64m
},
695 { X86::IST_Fp64m80
, X86::IST_FP64m
},
696 { X86::LD_Fp032
, X86::LD_F0
},
697 { X86::LD_Fp064
, X86::LD_F0
},
698 { X86::LD_Fp080
, X86::LD_F0
},
699 { X86::LD_Fp132
, X86::LD_F1
},
700 { X86::LD_Fp164
, X86::LD_F1
},
701 { X86::LD_Fp180
, X86::LD_F1
},
702 { X86::LD_Fp32m
, X86::LD_F32m
},
703 { X86::LD_Fp32m64
, X86::LD_F32m
},
704 { X86::LD_Fp32m80
, X86::LD_F32m
},
705 { X86::LD_Fp64m
, X86::LD_F64m
},
706 { X86::LD_Fp64m80
, X86::LD_F64m
},
707 { X86::LD_Fp80m
, X86::LD_F80m
},
708 { X86::MUL_Fp32m
, X86::MUL_F32m
},
709 { X86::MUL_Fp64m
, X86::MUL_F64m
},
710 { X86::MUL_Fp64m32
, X86::MUL_F32m
},
711 { X86::MUL_Fp80m32
, X86::MUL_F32m
},
712 { X86::MUL_Fp80m64
, X86::MUL_F64m
},
713 { X86::MUL_FpI16m32
, X86::MUL_FI16m
},
714 { X86::MUL_FpI16m64
, X86::MUL_FI16m
},
715 { X86::MUL_FpI16m80
, X86::MUL_FI16m
},
716 { X86::MUL_FpI32m32
, X86::MUL_FI32m
},
717 { X86::MUL_FpI32m64
, X86::MUL_FI32m
},
718 { X86::MUL_FpI32m80
, X86::MUL_FI32m
},
719 { X86::SIN_Fp32
, X86::SIN_F
},
720 { X86::SIN_Fp64
, X86::SIN_F
},
721 { X86::SIN_Fp80
, X86::SIN_F
},
722 { X86::SQRT_Fp32
, X86::SQRT_F
},
723 { X86::SQRT_Fp64
, X86::SQRT_F
},
724 { X86::SQRT_Fp80
, X86::SQRT_F
},
725 { X86::ST_Fp32m
, X86::ST_F32m
},
726 { X86::ST_Fp64m
, X86::ST_F64m
},
727 { X86::ST_Fp64m32
, X86::ST_F32m
},
728 { X86::ST_Fp80m32
, X86::ST_F32m
},
729 { X86::ST_Fp80m64
, X86::ST_F64m
},
730 { X86::ST_FpP80m
, X86::ST_FP80m
},
731 { X86::SUBR_Fp32m
, X86::SUBR_F32m
},
732 { X86::SUBR_Fp64m
, X86::SUBR_F64m
},
733 { X86::SUBR_Fp64m32
, X86::SUBR_F32m
},
734 { X86::SUBR_Fp80m32
, X86::SUBR_F32m
},
735 { X86::SUBR_Fp80m64
, X86::SUBR_F64m
},
736 { X86::SUBR_FpI16m32
, X86::SUBR_FI16m
},
737 { X86::SUBR_FpI16m64
, X86::SUBR_FI16m
},
738 { X86::SUBR_FpI16m80
, X86::SUBR_FI16m
},
739 { X86::SUBR_FpI32m32
, X86::SUBR_FI32m
},
740 { X86::SUBR_FpI32m64
, X86::SUBR_FI32m
},
741 { X86::SUBR_FpI32m80
, X86::SUBR_FI32m
},
742 { X86::SUB_Fp32m
, X86::SUB_F32m
},
743 { X86::SUB_Fp64m
, X86::SUB_F64m
},
744 { X86::SUB_Fp64m32
, X86::SUB_F32m
},
745 { X86::SUB_Fp80m32
, X86::SUB_F32m
},
746 { X86::SUB_Fp80m64
, X86::SUB_F64m
},
747 { X86::SUB_FpI16m32
, X86::SUB_FI16m
},
748 { X86::SUB_FpI16m64
, X86::SUB_FI16m
},
749 { X86::SUB_FpI16m80
, X86::SUB_FI16m
},
750 { X86::SUB_FpI32m32
, X86::SUB_FI32m
},
751 { X86::SUB_FpI32m64
, X86::SUB_FI32m
},
752 { X86::SUB_FpI32m80
, X86::SUB_FI32m
},
753 { X86::TST_Fp32
, X86::TST_F
},
754 { X86::TST_Fp64
, X86::TST_F
},
755 { X86::TST_Fp80
, X86::TST_F
},
756 { X86::UCOM_FpIr32
, X86::UCOM_FIr
},
757 { X86::UCOM_FpIr64
, X86::UCOM_FIr
},
758 { X86::UCOM_FpIr80
, X86::UCOM_FIr
},
759 { X86::UCOM_Fpr32
, X86::UCOM_Fr
},
760 { X86::UCOM_Fpr64
, X86::UCOM_Fr
},
761 { X86::UCOM_Fpr80
, X86::UCOM_Fr
},
764 static unsigned getConcreteOpcode(unsigned Opcode
) {
765 ASSERT_SORTED(OpcodeTable
);
766 int Opc
= Lookup(OpcodeTable
, array_lengthof(OpcodeTable
), Opcode
);
767 assert(Opc
!= -1 && "FP Stack instruction not in OpcodeTable!");
771 //===----------------------------------------------------------------------===//
773 //===----------------------------------------------------------------------===//
775 // PopTable - Sorted map of instructions to their popping version. The first
776 // element is an instruction, the second is the version which pops.
778 static const TableEntry PopTable
[] = {
779 { X86::ADD_FrST0
, X86::ADD_FPrST0
},
781 { X86::DIVR_FrST0
, X86::DIVR_FPrST0
},
782 { X86::DIV_FrST0
, X86::DIV_FPrST0
},
784 { X86::IST_F16m
, X86::IST_FP16m
},
785 { X86::IST_F32m
, X86::IST_FP32m
},
787 { X86::MUL_FrST0
, X86::MUL_FPrST0
},
789 { X86::ST_F32m
, X86::ST_FP32m
},
790 { X86::ST_F64m
, X86::ST_FP64m
},
791 { X86::ST_Frr
, X86::ST_FPrr
},
793 { X86::SUBR_FrST0
, X86::SUBR_FPrST0
},
794 { X86::SUB_FrST0
, X86::SUB_FPrST0
},
796 { X86::UCOM_FIr
, X86::UCOM_FIPr
},
798 { X86::UCOM_FPr
, X86::UCOM_FPPr
},
799 { X86::UCOM_Fr
, X86::UCOM_FPr
},
802 /// popStackAfter - Pop the current value off of the top of the FP stack after
803 /// the specified instruction. This attempts to be sneaky and combine the pop
804 /// into the instruction itself if possible. The iterator is left pointing to
805 /// the last instruction, be it a new pop instruction inserted, or the old
806 /// instruction if it was modified in place.
808 void FPS::popStackAfter(MachineBasicBlock::iterator
&I
) {
809 MachineInstr
* MI
= I
;
810 DebugLoc dl
= MI
->getDebugLoc();
811 ASSERT_SORTED(PopTable
);
813 report_fatal_error("Cannot pop empty stack!");
814 RegMap
[Stack
[--StackTop
]] = ~0; // Update state
816 // Check to see if there is a popping version of this instruction...
817 int Opcode
= Lookup(PopTable
, array_lengthof(PopTable
), I
->getOpcode());
819 I
->setDesc(TII
->get(Opcode
));
820 if (Opcode
== X86::UCOM_FPPr
)
822 } else { // Insert an explicit pop
823 I
= BuildMI(*MBB
, ++I
, dl
, TII
->get(X86::ST_FPrr
)).addReg(X86::ST0
);
827 /// freeStackSlotAfter - Free the specified register from the register stack, so
828 /// that it is no longer in a register. If the register is currently at the top
829 /// of the stack, we just pop the current instruction, otherwise we store the
830 /// current top-of-stack into the specified slot, then pop the top of stack.
831 void FPS::freeStackSlotAfter(MachineBasicBlock::iterator
&I
, unsigned FPRegNo
) {
832 if (getStackEntry(0) == FPRegNo
) { // already at the top of stack? easy.
837 // Otherwise, store the top of stack into the dead slot, killing the operand
838 // without having to add in an explicit xchg then pop.
840 I
= freeStackSlotBefore(++I
, FPRegNo
);
843 /// freeStackSlotBefore - Free the specified register without trying any
845 MachineBasicBlock::iterator
846 FPS::freeStackSlotBefore(MachineBasicBlock::iterator I
, unsigned FPRegNo
) {
847 unsigned STReg
= getSTReg(FPRegNo
);
848 unsigned OldSlot
= getSlot(FPRegNo
);
849 unsigned TopReg
= Stack
[StackTop
-1];
850 Stack
[OldSlot
] = TopReg
;
851 RegMap
[TopReg
] = OldSlot
;
852 RegMap
[FPRegNo
] = ~0;
853 Stack
[--StackTop
] = ~0;
854 return BuildMI(*MBB
, I
, DebugLoc(), TII
->get(X86::ST_FPrr
)).addReg(STReg
);
857 /// adjustLiveRegs - Kill and revive registers such that exactly the FP
858 /// registers with a bit in Mask are live.
859 void FPS::adjustLiveRegs(unsigned Mask
, MachineBasicBlock::iterator I
) {
860 unsigned Defs
= Mask
;
862 for (unsigned i
= 0; i
< StackTop
; ++i
) {
863 unsigned RegNo
= Stack
[i
];
864 if (!(Defs
& (1 << RegNo
)))
865 // This register is live, but we don't want it.
866 Kills
|= (1 << RegNo
);
868 // We don't need to imp-def this live register.
869 Defs
&= ~(1 << RegNo
);
871 assert((Kills
& Defs
) == 0 && "Register needs killing and def'ing?");
873 // Produce implicit-defs for free by using killed registers.
874 while (Kills
&& Defs
) {
875 unsigned KReg
= CountTrailingZeros_32(Kills
);
876 unsigned DReg
= CountTrailingZeros_32(Defs
);
877 DEBUG(dbgs() << "Renaming %FP" << KReg
<< " as imp %FP" << DReg
<< "\n");
878 std::swap(Stack
[getSlot(KReg
)], Stack
[getSlot(DReg
)]);
879 std::swap(RegMap
[KReg
], RegMap
[DReg
]);
880 Kills
&= ~(1 << KReg
);
881 Defs
&= ~(1 << DReg
);
884 // Kill registers by popping.
885 if (Kills
&& I
!= MBB
->begin()) {
886 MachineBasicBlock::iterator I2
= llvm::prior(I
);
888 unsigned KReg
= getStackEntry(0);
889 if (!(Kills
& (1 << KReg
)))
891 DEBUG(dbgs() << "Popping %FP" << KReg
<< "\n");
893 Kills
&= ~(1 << KReg
);
897 // Manually kill the rest.
899 unsigned KReg
= CountTrailingZeros_32(Kills
);
900 DEBUG(dbgs() << "Killing %FP" << KReg
<< "\n");
901 freeStackSlotBefore(I
, KReg
);
902 Kills
&= ~(1 << KReg
);
905 // Load zeros for all the imp-defs.
907 unsigned DReg
= CountTrailingZeros_32(Defs
);
908 DEBUG(dbgs() << "Defining %FP" << DReg
<< " as 0\n");
909 BuildMI(*MBB
, I
, DebugLoc(), TII
->get(X86::LD_F0
));
911 Defs
&= ~(1 << DReg
);
914 // Now we should have the correct registers live.
916 assert(StackTop
== CountPopulation_32(Mask
) && "Live count mismatch");
919 /// shuffleStackTop - emit fxch instructions before I to shuffle the top
920 /// FixCount entries into the order given by FixStack.
921 /// FIXME: Is there a better algorithm than insertion sort?
922 void FPS::shuffleStackTop(const unsigned char *FixStack
,
924 MachineBasicBlock::iterator I
) {
925 // Move items into place, starting from the desired stack bottom.
927 // Old register at position FixCount.
928 unsigned OldReg
= getStackEntry(FixCount
);
929 // Desired register at position FixCount.
930 unsigned Reg
= FixStack
[FixCount
];
933 // (Reg st0) (OldReg st0) = (Reg OldReg st0)
936 moveToTop(OldReg
, I
);
942 //===----------------------------------------------------------------------===//
943 // Instruction transformation implementation
944 //===----------------------------------------------------------------------===//
946 /// handleZeroArgFP - ST(0) = fld0 ST(0) = flds <mem>
948 void FPS::handleZeroArgFP(MachineBasicBlock::iterator
&I
) {
949 MachineInstr
*MI
= I
;
950 unsigned DestReg
= getFPReg(MI
->getOperand(0));
952 // Change from the pseudo instruction to the concrete instruction.
953 MI
->RemoveOperand(0); // Remove the explicit ST(0) operand
954 MI
->setDesc(TII
->get(getConcreteOpcode(MI
->getOpcode())));
956 // Result gets pushed on the stack.
960 /// handleOneArgFP - fst <mem>, ST(0)
962 void FPS::handleOneArgFP(MachineBasicBlock::iterator
&I
) {
963 MachineInstr
*MI
= I
;
964 unsigned NumOps
= MI
->getDesc().getNumOperands();
965 assert((NumOps
== X86::AddrNumOperands
+ 1 || NumOps
== 1) &&
966 "Can only handle fst* & ftst instructions!");
968 // Is this the last use of the source register?
969 unsigned Reg
= getFPReg(MI
->getOperand(NumOps
-1));
970 bool KillsSrc
= MI
->killsRegister(X86::FP0
+Reg
);
972 // FISTP64m is strange because there isn't a non-popping versions.
973 // If we have one _and_ we don't want to pop the operand, duplicate the value
974 // on the stack instead of moving it. This ensure that popping the value is
976 // Ditto FISTTP16m, FISTTP32m, FISTTP64m, ST_FpP80m.
979 (MI
->getOpcode() == X86::IST_Fp64m32
||
980 MI
->getOpcode() == X86::ISTT_Fp16m32
||
981 MI
->getOpcode() == X86::ISTT_Fp32m32
||
982 MI
->getOpcode() == X86::ISTT_Fp64m32
||
983 MI
->getOpcode() == X86::IST_Fp64m64
||
984 MI
->getOpcode() == X86::ISTT_Fp16m64
||
985 MI
->getOpcode() == X86::ISTT_Fp32m64
||
986 MI
->getOpcode() == X86::ISTT_Fp64m64
||
987 MI
->getOpcode() == X86::IST_Fp64m80
||
988 MI
->getOpcode() == X86::ISTT_Fp16m80
||
989 MI
->getOpcode() == X86::ISTT_Fp32m80
||
990 MI
->getOpcode() == X86::ISTT_Fp64m80
||
991 MI
->getOpcode() == X86::ST_FpP80m
)) {
992 duplicateToTop(Reg
, getScratchReg(), I
);
994 moveToTop(Reg
, I
); // Move to the top of the stack...
997 // Convert from the pseudo instruction to the concrete instruction.
998 MI
->RemoveOperand(NumOps
-1); // Remove explicit ST(0) operand
999 MI
->setDesc(TII
->get(getConcreteOpcode(MI
->getOpcode())));
1001 if (MI
->getOpcode() == X86::IST_FP64m
||
1002 MI
->getOpcode() == X86::ISTT_FP16m
||
1003 MI
->getOpcode() == X86::ISTT_FP32m
||
1004 MI
->getOpcode() == X86::ISTT_FP64m
||
1005 MI
->getOpcode() == X86::ST_FP80m
) {
1007 report_fatal_error("Stack empty??");
1009 } else if (KillsSrc
) { // Last use of operand?
1015 /// handleOneArgFPRW: Handle instructions that read from the top of stack and
1016 /// replace the value with a newly computed value. These instructions may have
1017 /// non-fp operands after their FP operands.
1021 /// R1 = fadd R2, [mem]
1023 void FPS::handleOneArgFPRW(MachineBasicBlock::iterator
&I
) {
1024 MachineInstr
*MI
= I
;
1026 unsigned NumOps
= MI
->getDesc().getNumOperands();
1027 assert(NumOps
>= 2 && "FPRW instructions must have 2 ops!!");
1030 // Is this the last use of the source register?
1031 unsigned Reg
= getFPReg(MI
->getOperand(1));
1032 bool KillsSrc
= MI
->killsRegister(X86::FP0
+Reg
);
1035 // If this is the last use of the source register, just make sure it's on
1036 // the top of the stack.
1039 report_fatal_error("Stack cannot be empty!");
1041 pushReg(getFPReg(MI
->getOperand(0)));
1043 // If this is not the last use of the source register, _copy_ it to the top
1045 duplicateToTop(Reg
, getFPReg(MI
->getOperand(0)), I
);
1048 // Change from the pseudo instruction to the concrete instruction.
1049 MI
->RemoveOperand(1); // Drop the source operand.
1050 MI
->RemoveOperand(0); // Drop the destination operand.
1051 MI
->setDesc(TII
->get(getConcreteOpcode(MI
->getOpcode())));
1055 //===----------------------------------------------------------------------===//
1056 // Define tables of various ways to map pseudo instructions
1059 // ForwardST0Table - Map: A = B op C into: ST(0) = ST(0) op ST(i)
1060 static const TableEntry ForwardST0Table
[] = {
1061 { X86::ADD_Fp32
, X86::ADD_FST0r
},
1062 { X86::ADD_Fp64
, X86::ADD_FST0r
},
1063 { X86::ADD_Fp80
, X86::ADD_FST0r
},
1064 { X86::DIV_Fp32
, X86::DIV_FST0r
},
1065 { X86::DIV_Fp64
, X86::DIV_FST0r
},
1066 { X86::DIV_Fp80
, X86::DIV_FST0r
},
1067 { X86::MUL_Fp32
, X86::MUL_FST0r
},
1068 { X86::MUL_Fp64
, X86::MUL_FST0r
},
1069 { X86::MUL_Fp80
, X86::MUL_FST0r
},
1070 { X86::SUB_Fp32
, X86::SUB_FST0r
},
1071 { X86::SUB_Fp64
, X86::SUB_FST0r
},
1072 { X86::SUB_Fp80
, X86::SUB_FST0r
},
1075 // ReverseST0Table - Map: A = B op C into: ST(0) = ST(i) op ST(0)
1076 static const TableEntry ReverseST0Table
[] = {
1077 { X86::ADD_Fp32
, X86::ADD_FST0r
}, // commutative
1078 { X86::ADD_Fp64
, X86::ADD_FST0r
}, // commutative
1079 { X86::ADD_Fp80
, X86::ADD_FST0r
}, // commutative
1080 { X86::DIV_Fp32
, X86::DIVR_FST0r
},
1081 { X86::DIV_Fp64
, X86::DIVR_FST0r
},
1082 { X86::DIV_Fp80
, X86::DIVR_FST0r
},
1083 { X86::MUL_Fp32
, X86::MUL_FST0r
}, // commutative
1084 { X86::MUL_Fp64
, X86::MUL_FST0r
}, // commutative
1085 { X86::MUL_Fp80
, X86::MUL_FST0r
}, // commutative
1086 { X86::SUB_Fp32
, X86::SUBR_FST0r
},
1087 { X86::SUB_Fp64
, X86::SUBR_FST0r
},
1088 { X86::SUB_Fp80
, X86::SUBR_FST0r
},
1091 // ForwardSTiTable - Map: A = B op C into: ST(i) = ST(0) op ST(i)
1092 static const TableEntry ForwardSTiTable
[] = {
1093 { X86::ADD_Fp32
, X86::ADD_FrST0
}, // commutative
1094 { X86::ADD_Fp64
, X86::ADD_FrST0
}, // commutative
1095 { X86::ADD_Fp80
, X86::ADD_FrST0
}, // commutative
1096 { X86::DIV_Fp32
, X86::DIVR_FrST0
},
1097 { X86::DIV_Fp64
, X86::DIVR_FrST0
},
1098 { X86::DIV_Fp80
, X86::DIVR_FrST0
},
1099 { X86::MUL_Fp32
, X86::MUL_FrST0
}, // commutative
1100 { X86::MUL_Fp64
, X86::MUL_FrST0
}, // commutative
1101 { X86::MUL_Fp80
, X86::MUL_FrST0
}, // commutative
1102 { X86::SUB_Fp32
, X86::SUBR_FrST0
},
1103 { X86::SUB_Fp64
, X86::SUBR_FrST0
},
1104 { X86::SUB_Fp80
, X86::SUBR_FrST0
},
1107 // ReverseSTiTable - Map: A = B op C into: ST(i) = ST(i) op ST(0)
1108 static const TableEntry ReverseSTiTable
[] = {
1109 { X86::ADD_Fp32
, X86::ADD_FrST0
},
1110 { X86::ADD_Fp64
, X86::ADD_FrST0
},
1111 { X86::ADD_Fp80
, X86::ADD_FrST0
},
1112 { X86::DIV_Fp32
, X86::DIV_FrST0
},
1113 { X86::DIV_Fp64
, X86::DIV_FrST0
},
1114 { X86::DIV_Fp80
, X86::DIV_FrST0
},
1115 { X86::MUL_Fp32
, X86::MUL_FrST0
},
1116 { X86::MUL_Fp64
, X86::MUL_FrST0
},
1117 { X86::MUL_Fp80
, X86::MUL_FrST0
},
1118 { X86::SUB_Fp32
, X86::SUB_FrST0
},
1119 { X86::SUB_Fp64
, X86::SUB_FrST0
},
1120 { X86::SUB_Fp80
, X86::SUB_FrST0
},
1124 /// handleTwoArgFP - Handle instructions like FADD and friends which are virtual
1125 /// instructions which need to be simplified and possibly transformed.
1127 /// Result: ST(0) = fsub ST(0), ST(i)
1128 /// ST(i) = fsub ST(0), ST(i)
1129 /// ST(0) = fsubr ST(0), ST(i)
1130 /// ST(i) = fsubr ST(0), ST(i)
1132 void FPS::handleTwoArgFP(MachineBasicBlock::iterator
&I
) {
1133 ASSERT_SORTED(ForwardST0Table
); ASSERT_SORTED(ReverseST0Table
);
1134 ASSERT_SORTED(ForwardSTiTable
); ASSERT_SORTED(ReverseSTiTable
);
1135 MachineInstr
*MI
= I
;
1137 unsigned NumOperands
= MI
->getDesc().getNumOperands();
1138 assert(NumOperands
== 3 && "Illegal TwoArgFP instruction!");
1139 unsigned Dest
= getFPReg(MI
->getOperand(0));
1140 unsigned Op0
= getFPReg(MI
->getOperand(NumOperands
-2));
1141 unsigned Op1
= getFPReg(MI
->getOperand(NumOperands
-1));
1142 bool KillsOp0
= MI
->killsRegister(X86::FP0
+Op0
);
1143 bool KillsOp1
= MI
->killsRegister(X86::FP0
+Op1
);
1144 DebugLoc dl
= MI
->getDebugLoc();
1146 unsigned TOS
= getStackEntry(0);
1148 // One of our operands must be on the top of the stack. If neither is yet, we
1149 // need to move one.
1150 if (Op0
!= TOS
&& Op1
!= TOS
) { // No operand at TOS?
1151 // We can choose to move either operand to the top of the stack. If one of
1152 // the operands is killed by this instruction, we want that one so that we
1153 // can update right on top of the old version.
1155 moveToTop(Op0
, I
); // Move dead operand to TOS.
1157 } else if (KillsOp1
) {
1161 // All of the operands are live after this instruction executes, so we
1162 // cannot update on top of any operand. Because of this, we must
1163 // duplicate one of the stack elements to the top. It doesn't matter
1164 // which one we pick.
1166 duplicateToTop(Op0
, Dest
, I
);
1170 } else if (!KillsOp0
&& !KillsOp1
) {
1171 // If we DO have one of our operands at the top of the stack, but we don't
1172 // have a dead operand, we must duplicate one of the operands to a new slot
1174 duplicateToTop(Op0
, Dest
, I
);
1179 // Now we know that one of our operands is on the top of the stack, and at
1180 // least one of our operands is killed by this instruction.
1181 assert((TOS
== Op0
|| TOS
== Op1
) && (KillsOp0
|| KillsOp1
) &&
1182 "Stack conditions not set up right!");
1184 // We decide which form to use based on what is on the top of the stack, and
1185 // which operand is killed by this instruction.
1186 const TableEntry
*InstTable
;
1187 bool isForward
= TOS
== Op0
;
1188 bool updateST0
= (TOS
== Op0
&& !KillsOp1
) || (TOS
== Op1
&& !KillsOp0
);
1191 InstTable
= ForwardST0Table
;
1193 InstTable
= ReverseST0Table
;
1196 InstTable
= ForwardSTiTable
;
1198 InstTable
= ReverseSTiTable
;
1201 int Opcode
= Lookup(InstTable
, array_lengthof(ForwardST0Table
),
1203 assert(Opcode
!= -1 && "Unknown TwoArgFP pseudo instruction!");
1205 // NotTOS - The register which is not on the top of stack...
1206 unsigned NotTOS
= (TOS
== Op0
) ? Op1
: Op0
;
1208 // Replace the old instruction with a new instruction
1210 I
= BuildMI(*MBB
, I
, dl
, TII
->get(Opcode
)).addReg(getSTReg(NotTOS
));
1212 // If both operands are killed, pop one off of the stack in addition to
1213 // overwriting the other one.
1214 if (KillsOp0
&& KillsOp1
&& Op0
!= Op1
) {
1215 assert(!updateST0
&& "Should have updated other operand!");
1216 popStackAfter(I
); // Pop the top of stack
1219 // Update stack information so that we know the destination register is now on
1221 unsigned UpdatedSlot
= getSlot(updateST0
? TOS
: NotTOS
);
1222 assert(UpdatedSlot
< StackTop
&& Dest
< 7);
1223 Stack
[UpdatedSlot
] = Dest
;
1224 RegMap
[Dest
] = UpdatedSlot
;
1225 MBB
->getParent()->DeleteMachineInstr(MI
); // Remove the old instruction
1228 /// handleCompareFP - Handle FUCOM and FUCOMI instructions, which have two FP
1229 /// register arguments and no explicit destinations.
1231 void FPS::handleCompareFP(MachineBasicBlock::iterator
&I
) {
1232 ASSERT_SORTED(ForwardST0Table
); ASSERT_SORTED(ReverseST0Table
);
1233 ASSERT_SORTED(ForwardSTiTable
); ASSERT_SORTED(ReverseSTiTable
);
1234 MachineInstr
*MI
= I
;
1236 unsigned NumOperands
= MI
->getDesc().getNumOperands();
1237 assert(NumOperands
== 2 && "Illegal FUCOM* instruction!");
1238 unsigned Op0
= getFPReg(MI
->getOperand(NumOperands
-2));
1239 unsigned Op1
= getFPReg(MI
->getOperand(NumOperands
-1));
1240 bool KillsOp0
= MI
->killsRegister(X86::FP0
+Op0
);
1241 bool KillsOp1
= MI
->killsRegister(X86::FP0
+Op1
);
1243 // Make sure the first operand is on the top of stack, the other one can be
1247 // Change from the pseudo instruction to the concrete instruction.
1248 MI
->getOperand(0).setReg(getSTReg(Op1
));
1249 MI
->RemoveOperand(1);
1250 MI
->setDesc(TII
->get(getConcreteOpcode(MI
->getOpcode())));
1252 // If any of the operands are killed by this instruction, free them.
1253 if (KillsOp0
) freeStackSlotAfter(I
, Op0
);
1254 if (KillsOp1
&& Op0
!= Op1
) freeStackSlotAfter(I
, Op1
);
1257 /// handleCondMovFP - Handle two address conditional move instructions. These
1258 /// instructions move a st(i) register to st(0) iff a condition is true. These
1259 /// instructions require that the first operand is at the top of the stack, but
1260 /// otherwise don't modify the stack at all.
1261 void FPS::handleCondMovFP(MachineBasicBlock::iterator
&I
) {
1262 MachineInstr
*MI
= I
;
1264 unsigned Op0
= getFPReg(MI
->getOperand(0));
1265 unsigned Op1
= getFPReg(MI
->getOperand(2));
1266 bool KillsOp1
= MI
->killsRegister(X86::FP0
+Op1
);
1268 // The first operand *must* be on the top of the stack.
1271 // Change the second operand to the stack register that the operand is in.
1272 // Change from the pseudo instruction to the concrete instruction.
1273 MI
->RemoveOperand(0);
1274 MI
->RemoveOperand(1);
1275 MI
->getOperand(0).setReg(getSTReg(Op1
));
1276 MI
->setDesc(TII
->get(getConcreteOpcode(MI
->getOpcode())));
1278 // If we kill the second operand, make sure to pop it from the stack.
1279 if (Op0
!= Op1
&& KillsOp1
) {
1280 // Get this value off of the register stack.
1281 freeStackSlotAfter(I
, Op1
);
1286 /// handleSpecialFP - Handle special instructions which behave unlike other
1287 /// floating point instructions. This is primarily intended for use by pseudo
1290 void FPS::handleSpecialFP(MachineBasicBlock::iterator
&I
) {
1291 MachineInstr
*MI
= I
;
1292 switch (MI
->getOpcode()) {
1293 default: llvm_unreachable("Unknown SpecialFP instruction!");
1294 case TargetOpcode::COPY
: {
1295 // We handle three kinds of copies: FP <- FP, FP <- ST, and ST <- FP.
1296 const MachineOperand
&MO1
= MI
->getOperand(1);
1297 const MachineOperand
&MO0
= MI
->getOperand(0);
1298 unsigned DstST
= MO0
.getReg() - X86::ST0
;
1299 unsigned SrcST
= MO1
.getReg() - X86::ST0
;
1300 bool KillsSrc
= MI
->killsRegister(MO1
.getReg());
1302 // ST = COPY FP. Set up a pending ST register.
1304 unsigned SrcFP
= getFPReg(MO1
);
1305 assert(isLive(SrcFP
) && "Cannot copy dead register");
1306 assert(!MO0
.isDead() && "Cannot copy to dead ST register");
1308 // Unallocated STs are marked as the nonexistent FP255.
1309 while (NumPendingSTs
<= DstST
)
1310 PendingST
[NumPendingSTs
++] = NumFPRegs
;
1312 // STi could still be live from a previous inline asm.
1313 if (isScratchReg(PendingST
[DstST
])) {
1314 DEBUG(dbgs() << "Clobbering old ST in FP" << unsigned(PendingST
[DstST
])
1316 freeStackSlotBefore(MI
, PendingST
[DstST
]);
1319 // When the source is killed, allocate a scratch FP register.
1321 unsigned Slot
= getSlot(SrcFP
);
1322 unsigned SR
= getScratchReg();
1323 PendingST
[DstST
] = SR
;
1327 PendingST
[DstST
] = SrcFP
;
1331 // FP = COPY ST. Extract fixed stack value.
1332 // Any instruction defining ST registers must have assigned them to a
1333 // scratch register.
1335 unsigned DstFP
= getFPReg(MO0
);
1336 assert(!isLive(DstFP
) && "Cannot copy ST to live FP register");
1337 assert(NumPendingSTs
> SrcST
&& "Cannot copy from dead ST register");
1338 unsigned SrcFP
= PendingST
[SrcST
];
1339 assert(isScratchReg(SrcFP
) && "Expected ST in a scratch register");
1340 assert(isLive(SrcFP
) && "Scratch holding ST is dead");
1342 // DstFP steals the stack slot from SrcFP.
1343 unsigned Slot
= getSlot(SrcFP
);
1344 Stack
[Slot
] = DstFP
;
1345 RegMap
[DstFP
] = Slot
;
1347 // Always treat the ST as killed.
1348 PendingST
[SrcST
] = NumFPRegs
;
1349 while (NumPendingSTs
&& PendingST
[NumPendingSTs
- 1] == NumFPRegs
)
1355 unsigned DstFP
= getFPReg(MO0
);
1356 unsigned SrcFP
= getFPReg(MO1
);
1357 assert(isLive(SrcFP
) && "Cannot copy dead register");
1359 // If the input operand is killed, we can just change the owner of the
1360 // incoming stack slot into the result.
1361 unsigned Slot
= getSlot(SrcFP
);
1362 Stack
[Slot
] = DstFP
;
1363 RegMap
[DstFP
] = Slot
;
1365 // For COPY we just duplicate the specified value to a new stack slot.
1366 // This could be made better, but would require substantial changes.
1367 duplicateToTop(SrcFP
, DstFP
, I
);
1372 case X86::FpPOP_RETVAL
: {
1373 // The FpPOP_RETVAL instruction is used after calls that return a value on
1374 // the floating point stack. We cannot model this with ST defs since CALL
1375 // instructions have fixed clobber lists. This instruction is interpreted
1376 // to mean that there is one more live register on the stack than we
1379 // This means that StackTop does not match the hardware stack between a
1380 // call and the FpPOP_RETVAL instructions. We do tolerate FP instructions
1381 // between CALL and FpPOP_RETVAL as long as they don't overflow the
1383 unsigned DstFP
= getFPReg(MI
->getOperand(0));
1385 // Move existing stack elements up to reflect reality.
1386 assert(StackTop
< 8 && "Stack overflowed before FpPOP_RETVAL");
1388 std::copy_backward(Stack
, Stack
+ StackTop
, Stack
+ StackTop
+ 1);
1389 for (unsigned i
= 0; i
!= NumFPRegs
; ++i
)
1394 // DstFP is the new bottom of the stack.
1398 // DstFP will be killed by processBasicBlock if this was a dead def.
1402 case TargetOpcode::INLINEASM
: {
1403 // The inline asm MachineInstr currently only *uses* FP registers for the
1404 // 'f' constraint. These should be turned into the current ST(x) register
1405 // in the machine instr.
1407 // There are special rules for x87 inline assembly. The compiler must know
1408 // exactly how many registers are popped and pushed implicitly by the asm.
1409 // Otherwise it is not possible to restore the stack state after the inline
1412 // There are 3 kinds of input operands:
1414 // 1. Popped inputs. These must appear at the stack top in ST0-STn. A
1415 // popped input operand must be in a fixed stack slot, and it is either
1416 // tied to an output operand, or in the clobber list. The MI has ST use
1417 // and def operands for these inputs.
1419 // 2. Fixed inputs. These inputs appear in fixed stack slots, but are
1420 // preserved by the inline asm. The fixed stack slots must be STn-STm
1421 // following the popped inputs. A fixed input operand cannot be tied to
1422 // an output or appear in the clobber list. The MI has ST use operands
1423 // and no defs for these inputs.
1425 // 3. Preserved inputs. These inputs use the "f" constraint which is
1426 // represented as an FP register. The inline asm won't change these
1429 // Outputs must be in ST registers, FP outputs are not allowed. Clobbered
1430 // registers do not count as output operands. The inline asm changes the
1431 // stack as if it popped all the popped inputs and then pushed all the
1434 // Scan the assembly for ST registers used, defined and clobbered. We can
1435 // only tell clobbers from defs by looking at the asm descriptor.
1436 unsigned STUses
= 0, STDefs
= 0, STClobbers
= 0, STDeadDefs
= 0;
1437 unsigned NumOps
= 0;
1438 for (unsigned i
= InlineAsm::MIOp_FirstOperand
, e
= MI
->getNumOperands();
1439 i
!= e
&& MI
->getOperand(i
).isImm(); i
+= 1 + NumOps
) {
1440 unsigned Flags
= MI
->getOperand(i
).getImm();
1441 NumOps
= InlineAsm::getNumOperandRegisters(Flags
);
1444 const MachineOperand
&MO
= MI
->getOperand(i
+ 1);
1447 unsigned STReg
= MO
.getReg() - X86::ST0
;
1451 switch (InlineAsm::getKind(Flags
)) {
1452 case InlineAsm::Kind_RegUse
:
1453 STUses
|= (1u << STReg
);
1455 case InlineAsm::Kind_RegDef
:
1456 case InlineAsm::Kind_RegDefEarlyClobber
:
1457 STDefs
|= (1u << STReg
);
1459 STDeadDefs
|= (1u << STReg
);
1461 case InlineAsm::Kind_Clobber
:
1462 STClobbers
|= (1u << STReg
);
1469 if (STUses
&& !isMask_32(STUses
))
1470 MI
->emitError("fixed input regs must be last on the x87 stack");
1471 unsigned NumSTUses
= CountTrailingOnes_32(STUses
);
1473 // Defs must be contiguous from the stack top. ST0-STn.
1474 if (STDefs
&& !isMask_32(STDefs
)) {
1475 MI
->emitError("output regs must be last on the x87 stack");
1476 STDefs
= NextPowerOf2(STDefs
) - 1;
1478 unsigned NumSTDefs
= CountTrailingOnes_32(STDefs
);
1480 // So must the clobbered stack slots. ST0-STm, m >= n.
1481 if (STClobbers
&& !isMask_32(STDefs
| STClobbers
))
1482 MI
->emitError("clobbers must be last on the x87 stack");
1484 // Popped inputs are the ones that are also clobbered or defined.
1485 unsigned STPopped
= STUses
& (STDefs
| STClobbers
);
1486 if (STPopped
&& !isMask_32(STPopped
))
1487 MI
->emitError("implicitly popped regs must be last on the x87 stack");
1488 unsigned NumSTPopped
= CountTrailingOnes_32(STPopped
);
1490 DEBUG(dbgs() << "Asm uses " << NumSTUses
<< " fixed regs, pops "
1491 << NumSTPopped
<< ", and defines " << NumSTDefs
<< " regs.\n");
1493 // Scan the instruction for FP uses corresponding to "f" constraints.
1494 // Collect FP registers to kill afer the instruction.
1495 // Always kill all the scratch regs.
1496 unsigned FPKills
= ((1u << NumFPRegs
) - 1) & ~0xff;
1497 unsigned FPUsed
= 0;
1498 for (unsigned i
= 0, e
= MI
->getNumOperands(); i
!= e
; ++i
) {
1499 MachineOperand
&Op
= MI
->getOperand(i
);
1500 if (!Op
.isReg() || Op
.getReg() < X86::FP0
|| Op
.getReg() > X86::FP6
)
1503 MI
->emitError("illegal \"f\" output constraint");
1504 unsigned FPReg
= getFPReg(Op
);
1505 FPUsed
|= 1U << FPReg
;
1507 // If we kill this operand, make sure to pop it from the stack after the
1508 // asm. We just remember it for now, and pop them all off at the end in
1511 FPKills
|= 1U << FPReg
;
1514 // The popped inputs will be killed by the instruction, so duplicate them
1515 // if the FP register needs to be live after the instruction, or if it is
1516 // used in the instruction itself. We effectively treat the popped inputs
1517 // as early clobbers.
1518 for (unsigned i
= 0; i
< NumSTPopped
; ++i
) {
1519 if ((FPKills
& ~FPUsed
) & (1u << PendingST
[i
]))
1521 unsigned SR
= getScratchReg();
1522 duplicateToTop(PendingST
[i
], SR
, I
);
1523 DEBUG(dbgs() << "Duplicating ST" << i
<< " in FP"
1524 << unsigned(PendingST
[i
]) << " to avoid clobbering it.\n");
1528 // Make sure we have a unique live register for every fixed use. Some of
1529 // them could be undef uses, and we need to emit LD_F0 instructions.
1530 for (unsigned i
= 0; i
< NumSTUses
; ++i
) {
1531 if (i
< NumPendingSTs
&& PendingST
[i
] < NumFPRegs
) {
1532 // Check for shared assignments.
1533 for (unsigned j
= 0; j
< i
; ++j
) {
1534 if (PendingST
[j
] != PendingST
[i
])
1536 // STi and STj are inn the same register, create a copy.
1537 unsigned SR
= getScratchReg();
1538 duplicateToTop(PendingST
[i
], SR
, I
);
1539 DEBUG(dbgs() << "Duplicating ST" << i
<< " in FP"
1540 << unsigned(PendingST
[i
])
1541 << " to avoid collision with ST" << j
<< '\n');
1546 unsigned SR
= getScratchReg();
1547 DEBUG(dbgs() << "Emitting LD_F0 for ST" << i
<< " in FP" << SR
<< '\n');
1548 BuildMI(*MBB
, I
, MI
->getDebugLoc(), TII
->get(X86::LD_F0
));
1551 if (NumPendingSTs
== i
)
1554 assert(NumPendingSTs
>= NumSTUses
&& "Fixed registers should be assigned");
1556 // Now we can rearrange the live registers to match what was requested.
1557 shuffleStackTop(PendingST
, NumPendingSTs
, I
);
1558 DEBUG({dbgs() << "Before asm: "; dumpStack();});
1560 // With the stack layout fixed, rewrite the FP registers.
1561 for (unsigned i
= 0, e
= MI
->getNumOperands(); i
!= e
; ++i
) {
1562 MachineOperand
&Op
= MI
->getOperand(i
);
1563 if (!Op
.isReg() || Op
.getReg() < X86::FP0
|| Op
.getReg() > X86::FP6
)
1565 unsigned FPReg
= getFPReg(Op
);
1566 Op
.setReg(getSTReg(FPReg
));
1569 // Simulate the inline asm popping its inputs and pushing its outputs.
1570 StackTop
-= NumSTPopped
;
1572 // Hold the fixed output registers in scratch FP registers. They will be
1573 // transferred to real FP registers by copies.
1575 for (unsigned i
= 0; i
< NumSTDefs
; ++i
) {
1576 unsigned SR
= getScratchReg();
1578 FPKills
&= ~(1u << SR
);
1580 for (unsigned i
= 0; i
< NumSTDefs
; ++i
)
1581 PendingST
[NumPendingSTs
++] = getStackEntry(i
);
1582 DEBUG({dbgs() << "After asm: "; dumpStack();});
1584 // If any of the ST defs were dead, pop them immediately. Our caller only
1585 // handles dead FP defs.
1586 MachineBasicBlock::iterator InsertPt
= MI
;
1587 for (unsigned i
= 0; STDefs
& (1u << i
); ++i
) {
1588 if (!(STDeadDefs
& (1u << i
)))
1590 freeStackSlotAfter(InsertPt
, PendingST
[i
]);
1591 PendingST
[i
] = NumFPRegs
;
1593 while (NumPendingSTs
&& PendingST
[NumPendingSTs
- 1] == NumFPRegs
)
1596 // If this asm kills any FP registers (is the last use of them) we must
1597 // explicitly emit pop instructions for them. Do this now after the asm has
1598 // executed so that the ST(x) numbers are not off (which would happen if we
1599 // did this inline with operand rewriting).
1601 // Note: this might be a non-optimal pop sequence. We might be able to do
1602 // better by trying to pop in stack order or something.
1604 unsigned FPReg
= CountTrailingZeros_32(FPKills
);
1606 freeStackSlotAfter(InsertPt
, FPReg
);
1607 FPKills
&= ~(1U << FPReg
);
1609 // Don't delete the inline asm!
1615 // If RET has an FP register use operand, pass the first one in ST(0) and
1616 // the second one in ST(1).
1618 // Find the register operands.
1619 unsigned FirstFPRegOp
= ~0U, SecondFPRegOp
= ~0U;
1620 unsigned LiveMask
= 0;
1622 for (unsigned i
= 0, e
= MI
->getNumOperands(); i
!= e
; ++i
) {
1623 MachineOperand
&Op
= MI
->getOperand(i
);
1624 if (!Op
.isReg() || Op
.getReg() < X86::FP0
|| Op
.getReg() > X86::FP6
)
1626 // FP Register uses must be kills unless there are two uses of the same
1627 // register, in which case only one will be a kill.
1628 assert(Op
.isUse() &&
1629 (Op
.isKill() || // Marked kill.
1630 getFPReg(Op
) == FirstFPRegOp
|| // Second instance.
1631 MI
->killsRegister(Op
.getReg())) && // Later use is marked kill.
1632 "Ret only defs operands, and values aren't live beyond it");
1634 if (FirstFPRegOp
== ~0U)
1635 FirstFPRegOp
= getFPReg(Op
);
1637 assert(SecondFPRegOp
== ~0U && "More than two fp operands!");
1638 SecondFPRegOp
= getFPReg(Op
);
1640 LiveMask
|= (1 << getFPReg(Op
));
1642 // Remove the operand so that later passes don't see it.
1643 MI
->RemoveOperand(i
);
1647 // We may have been carrying spurious live-ins, so make sure only the returned
1648 // registers are left live.
1649 adjustLiveRegs(LiveMask
, MI
);
1650 if (!LiveMask
) return; // Quick check to see if any are possible.
1652 // There are only four possibilities here:
1653 // 1) we are returning a single FP value. In this case, it has to be in
1654 // ST(0) already, so just declare success by removing the value from the
1656 if (SecondFPRegOp
== ~0U) {
1657 // Assert that the top of stack contains the right FP register.
1658 assert(StackTop
== 1 && FirstFPRegOp
== getStackEntry(0) &&
1659 "Top of stack not the right register for RET!");
1661 // Ok, everything is good, mark the value as not being on the stack
1662 // anymore so that our assertion about the stack being empty at end of
1663 // block doesn't fire.
1668 // Otherwise, we are returning two values:
1669 // 2) If returning the same value for both, we only have one thing in the FP
1670 // stack. Consider: RET FP1, FP1
1671 if (StackTop
== 1) {
1672 assert(FirstFPRegOp
== SecondFPRegOp
&& FirstFPRegOp
== getStackEntry(0)&&
1673 "Stack misconfiguration for RET!");
1675 // Duplicate the TOS so that we return it twice. Just pick some other FPx
1676 // register to hold it.
1677 unsigned NewReg
= getScratchReg();
1678 duplicateToTop(FirstFPRegOp
, NewReg
, MI
);
1679 FirstFPRegOp
= NewReg
;
1682 /// Okay we know we have two different FPx operands now:
1683 assert(StackTop
== 2 && "Must have two values live!");
1685 /// 3) If SecondFPRegOp is currently in ST(0) and FirstFPRegOp is currently
1686 /// in ST(1). In this case, emit an fxch.
1687 if (getStackEntry(0) == SecondFPRegOp
) {
1688 assert(getStackEntry(1) == FirstFPRegOp
&& "Unknown regs live");
1689 moveToTop(FirstFPRegOp
, MI
);
1692 /// 4) Finally, FirstFPRegOp must be in ST(0) and SecondFPRegOp must be in
1693 /// ST(1). Just remove both from our understanding of the stack and return.
1694 assert(getStackEntry(0) == FirstFPRegOp
&& "Unknown regs live");
1695 assert(getStackEntry(1) == SecondFPRegOp
&& "Unknown regs live");
1700 I
= MBB
->erase(I
); // Remove the pseudo instruction
1702 // We want to leave I pointing to the previous instruction, but what if we
1703 // just erased the first instruction?
1704 if (I
== MBB
->begin()) {
1705 DEBUG(dbgs() << "Inserting dummy KILL\n");
1706 I
= BuildMI(*MBB
, I
, DebugLoc(), TII
->get(TargetOpcode::KILL
));