1 //===-- X86FloatingPoint.cpp - Floating point Reg -> Stack converter ------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file defines the pass which converts floating point instructions from
11 // pseudo registers into register stack instructions. This pass uses live
12 // variable information to indicate where the FPn registers are used and their
15 // The x87 hardware tracks liveness of the stack registers, so it is necessary
16 // to implement exact liveness tracking between basic blocks. The CFG edges are
17 // partitioned into bundles where the same FP registers must be live in
18 // identical stack positions. Instructions are inserted at the end of each basic
19 // block to rearrange the live registers to match the outgoing bundle.
21 // This approach avoids splitting critical edges at the potential cost of more
22 // live register shuffling instructions when critical edges are present.
24 //===----------------------------------------------------------------------===//
26 #define DEBUG_TYPE "x86-codegen"
28 #include "X86InstrInfo.h"
29 #include "llvm/ADT/DepthFirstIterator.h"
30 #include "llvm/ADT/DenseMap.h"
31 #include "llvm/ADT/SmallPtrSet.h"
32 #include "llvm/ADT/SmallVector.h"
33 #include "llvm/ADT/Statistic.h"
34 #include "llvm/ADT/STLExtras.h"
35 #include "llvm/CodeGen/EdgeBundles.h"
36 #include "llvm/CodeGen/MachineFunctionPass.h"
37 #include "llvm/CodeGen/MachineInstrBuilder.h"
38 #include "llvm/CodeGen/MachineRegisterInfo.h"
39 #include "llvm/CodeGen/Passes.h"
40 #include "llvm/Support/Debug.h"
41 #include "llvm/Support/ErrorHandling.h"
42 #include "llvm/Support/raw_ostream.h"
43 #include "llvm/Target/TargetInstrInfo.h"
44 #include "llvm/Target/TargetMachine.h"
48 STATISTIC(NumFXCH
, "Number of fxch instructions inserted");
49 STATISTIC(NumFP
, "Number of floating point instructions");
52 struct FPS
: public MachineFunctionPass
{
54 FPS() : MachineFunctionPass(ID
) {
55 initializeEdgeBundlesPass(*PassRegistry::getPassRegistry());
56 // This is really only to keep valgrind quiet.
57 // The logic in isLive() is too much for it.
58 memset(Stack
, 0, sizeof(Stack
));
59 memset(RegMap
, 0, sizeof(RegMap
));
62 virtual void getAnalysisUsage(AnalysisUsage
&AU
) const {
64 AU
.addRequired
<EdgeBundles
>();
65 AU
.addPreservedID(MachineLoopInfoID
);
66 AU
.addPreservedID(MachineDominatorsID
);
67 MachineFunctionPass::getAnalysisUsage(AU
);
70 virtual bool runOnMachineFunction(MachineFunction
&MF
);
72 virtual const char *getPassName() const { return "X86 FP Stackifier"; }
75 const TargetInstrInfo
*TII
; // Machine instruction info.
77 // Two CFG edges are related if they leave the same block, or enter the same
78 // block. The transitive closure of an edge under this relation is a
79 // LiveBundle. It represents a set of CFG edges where the live FP stack
80 // registers must be allocated identically in the x87 stack.
82 // A LiveBundle is usually all the edges leaving a block, or all the edges
83 // entering a block, but it can contain more edges if critical edges are
86 // The set of live FP registers in a LiveBundle is calculated by bundleCFG,
87 // but the exact mapping of FP registers to stack slots is fixed later.
89 // Bit mask of live FP registers. Bit 0 = FP0, bit 1 = FP1, &c.
92 // Number of pre-assigned live registers in FixStack. This is 0 when the
93 // stack order has not yet been fixed.
96 // Assigned stack order for live-in registers.
97 // FixStack[i] == getStackEntry(i) for all i < FixCount.
98 unsigned char FixStack
[8];
100 LiveBundle() : Mask(0), FixCount(0) {}
102 // Have the live registers been assigned a stack order yet?
103 bool isFixed() const { return !Mask
|| FixCount
; }
106 // Numbered LiveBundle structs. LiveBundles[0] is used for all CFG edges
107 // with no live FP registers.
108 SmallVector
<LiveBundle
, 8> LiveBundles
;
110 // The edge bundle analysis provides indices into the LiveBundles vector.
111 EdgeBundles
*Bundles
;
113 // Return a bitmask of FP registers in block's live-in list.
114 unsigned calcLiveInMask(MachineBasicBlock
*MBB
) {
116 for (MachineBasicBlock::livein_iterator I
= MBB
->livein_begin(),
117 E
= MBB
->livein_end(); I
!= E
; ++I
) {
118 unsigned Reg
= *I
- X86::FP0
;
125 // Partition all the CFG edges into LiveBundles.
126 void bundleCFG(MachineFunction
&MF
);
128 MachineBasicBlock
*MBB
; // Current basic block
129 unsigned Stack
[8]; // FP<n> Registers in each stack slot...
130 unsigned RegMap
[8]; // Track which stack slot contains each register
131 unsigned StackTop
; // The current top of the FP stack.
133 // Set up our stack model to match the incoming registers to MBB.
134 void setupBlockStack();
136 // Shuffle live registers to match the expectations of successor blocks.
137 void finishBlockStack();
139 void dumpStack() const {
140 dbgs() << "Stack contents:";
141 for (unsigned i
= 0; i
!= StackTop
; ++i
) {
142 dbgs() << " FP" << Stack
[i
];
143 assert(RegMap
[Stack
[i
]] == i
&& "Stack[] doesn't match RegMap[]!");
148 /// getSlot - Return the stack slot number a particular register number is
150 unsigned getSlot(unsigned RegNo
) const {
151 assert(RegNo
< 8 && "Regno out of range!");
152 return RegMap
[RegNo
];
155 /// isLive - Is RegNo currently live in the stack?
156 bool isLive(unsigned RegNo
) const {
157 unsigned Slot
= getSlot(RegNo
);
158 return Slot
< StackTop
&& Stack
[Slot
] == RegNo
;
161 /// getScratchReg - Return an FP register that is not currently in use.
162 unsigned getScratchReg() {
163 for (int i
= 7; i
>= 0; --i
)
166 llvm_unreachable("Ran out of scratch FP registers");
169 /// getStackEntry - Return the X86::FP<n> register in register ST(i).
170 unsigned getStackEntry(unsigned STi
) const {
172 report_fatal_error("Access past stack top!");
173 return Stack
[StackTop
-1-STi
];
176 /// getSTReg - Return the X86::ST(i) register which contains the specified
177 /// FP<RegNo> register.
178 unsigned getSTReg(unsigned RegNo
) const {
179 return StackTop
- 1 - getSlot(RegNo
) + llvm::X86::ST0
;
182 // pushReg - Push the specified FP<n> register onto the stack.
183 void pushReg(unsigned Reg
) {
184 assert(Reg
< 8 && "Register number out of range!");
186 report_fatal_error("Stack overflow!");
187 Stack
[StackTop
] = Reg
;
188 RegMap
[Reg
] = StackTop
++;
191 bool isAtTop(unsigned RegNo
) const { return getSlot(RegNo
) == StackTop
-1; }
192 void moveToTop(unsigned RegNo
, MachineBasicBlock::iterator I
) {
193 DebugLoc dl
= I
== MBB
->end() ? DebugLoc() : I
->getDebugLoc();
194 if (isAtTop(RegNo
)) return;
196 unsigned STReg
= getSTReg(RegNo
);
197 unsigned RegOnTop
= getStackEntry(0);
199 // Swap the slots the regs are in.
200 std::swap(RegMap
[RegNo
], RegMap
[RegOnTop
]);
202 // Swap stack slot contents.
203 if (RegMap
[RegOnTop
] >= StackTop
)
204 report_fatal_error("Access past stack top!");
205 std::swap(Stack
[RegMap
[RegOnTop
]], Stack
[StackTop
-1]);
207 // Emit an fxch to update the runtime processors version of the state.
208 BuildMI(*MBB
, I
, dl
, TII
->get(X86::XCH_F
)).addReg(STReg
);
212 void duplicateToTop(unsigned RegNo
, unsigned AsReg
, MachineInstr
*I
) {
213 DebugLoc dl
= I
== MBB
->end() ? DebugLoc() : I
->getDebugLoc();
214 unsigned STReg
= getSTReg(RegNo
);
215 pushReg(AsReg
); // New register on top of stack
217 BuildMI(*MBB
, I
, dl
, TII
->get(X86::LD_Frr
)).addReg(STReg
);
220 /// popStackAfter - Pop the current value off of the top of the FP stack
221 /// after the specified instruction.
222 void popStackAfter(MachineBasicBlock::iterator
&I
);
224 /// freeStackSlotAfter - Free the specified register from the register
225 /// stack, so that it is no longer in a register. If the register is
226 /// currently at the top of the stack, we just pop the current instruction,
227 /// otherwise we store the current top-of-stack into the specified slot,
228 /// then pop the top of stack.
229 void freeStackSlotAfter(MachineBasicBlock::iterator
&I
, unsigned Reg
);
231 /// freeStackSlotBefore - Just the pop, no folding. Return the inserted
233 MachineBasicBlock::iterator
234 freeStackSlotBefore(MachineBasicBlock::iterator I
, unsigned FPRegNo
);
236 /// Adjust the live registers to be the set in Mask.
237 void adjustLiveRegs(unsigned Mask
, MachineBasicBlock::iterator I
);
239 /// Shuffle the top FixCount stack entries susch that FP reg FixStack[0] is
240 /// st(0), FP reg FixStack[1] is st(1) etc.
241 void shuffleStackTop(const unsigned char *FixStack
, unsigned FixCount
,
242 MachineBasicBlock::iterator I
);
244 bool processBasicBlock(MachineFunction
&MF
, MachineBasicBlock
&MBB
);
246 void handleZeroArgFP(MachineBasicBlock::iterator
&I
);
247 void handleOneArgFP(MachineBasicBlock::iterator
&I
);
248 void handleOneArgFPRW(MachineBasicBlock::iterator
&I
);
249 void handleTwoArgFP(MachineBasicBlock::iterator
&I
);
250 void handleCompareFP(MachineBasicBlock::iterator
&I
);
251 void handleCondMovFP(MachineBasicBlock::iterator
&I
);
252 void handleSpecialFP(MachineBasicBlock::iterator
&I
);
254 bool translateCopy(MachineInstr
*);
259 FunctionPass
*llvm::createX86FloatingPointStackifierPass() { return new FPS(); }
261 /// getFPReg - Return the X86::FPx register number for the specified operand.
262 /// For example, this returns 3 for X86::FP3.
263 static unsigned getFPReg(const MachineOperand
&MO
) {
264 assert(MO
.isReg() && "Expected an FP register!");
265 unsigned Reg
= MO
.getReg();
266 assert(Reg
>= X86::FP0
&& Reg
<= X86::FP6
&& "Expected FP register!");
267 return Reg
- X86::FP0
;
270 /// runOnMachineFunction - Loop over all of the basic blocks, transforming FP
271 /// register references into FP stack references.
273 bool FPS::runOnMachineFunction(MachineFunction
&MF
) {
274 // We only need to run this pass if there are any FP registers used in this
275 // function. If it is all integer, there is nothing for us to do!
276 bool FPIsUsed
= false;
278 assert(X86::FP6
== X86::FP0
+6 && "Register enums aren't sorted right!");
279 for (unsigned i
= 0; i
<= 6; ++i
)
280 if (MF
.getRegInfo().isPhysRegUsed(X86::FP0
+i
)) {
286 if (!FPIsUsed
) return false;
288 Bundles
= &getAnalysis
<EdgeBundles
>();
289 TII
= MF
.getTarget().getInstrInfo();
291 // Prepare cross-MBB liveness.
296 // Process the function in depth first order so that we process at least one
297 // of the predecessors for every reachable block in the function.
298 SmallPtrSet
<MachineBasicBlock
*, 8> Processed
;
299 MachineBasicBlock
*Entry
= MF
.begin();
301 bool Changed
= false;
302 for (df_ext_iterator
<MachineBasicBlock
*, SmallPtrSet
<MachineBasicBlock
*, 8> >
303 I
= df_ext_begin(Entry
, Processed
), E
= df_ext_end(Entry
, Processed
);
305 Changed
|= processBasicBlock(MF
, **I
);
307 // Process any unreachable blocks in arbitrary order now.
308 if (MF
.size() != Processed
.size())
309 for (MachineFunction::iterator BB
= MF
.begin(), E
= MF
.end(); BB
!= E
; ++BB
)
310 if (Processed
.insert(BB
))
311 Changed
|= processBasicBlock(MF
, *BB
);
318 /// bundleCFG - Scan all the basic blocks to determine consistent live-in and
319 /// live-out sets for the FP registers. Consistent means that the set of
320 /// registers live-out from a block is identical to the live-in set of all
321 /// successors. This is not enforced by the normal live-in lists since
322 /// registers may be implicitly defined, or not used by all successors.
323 void FPS::bundleCFG(MachineFunction
&MF
) {
324 assert(LiveBundles
.empty() && "Stale data in LiveBundles");
325 LiveBundles
.resize(Bundles
->getNumBundles());
327 // Gather the actual live-in masks for all MBBs.
328 for (MachineFunction::iterator I
= MF
.begin(), E
= MF
.end(); I
!= E
; ++I
) {
329 MachineBasicBlock
*MBB
= I
;
330 const unsigned Mask
= calcLiveInMask(MBB
);
333 // Update MBB ingoing bundle mask.
334 LiveBundles
[Bundles
->getBundle(MBB
->getNumber(), false)].Mask
|= Mask
;
338 /// processBasicBlock - Loop over all of the instructions in the basic block,
339 /// transforming FP instructions into their stack form.
341 bool FPS::processBasicBlock(MachineFunction
&MF
, MachineBasicBlock
&BB
) {
342 bool Changed
= false;
347 for (MachineBasicBlock::iterator I
= BB
.begin(); I
!= BB
.end(); ++I
) {
348 MachineInstr
*MI
= I
;
349 uint64_t Flags
= MI
->getDesc().TSFlags
;
351 unsigned FPInstClass
= Flags
& X86II::FPTypeMask
;
352 if (MI
->isInlineAsm())
353 FPInstClass
= X86II::SpecialFP
;
355 if (MI
->isCopy() && translateCopy(MI
))
356 FPInstClass
= X86II::SpecialFP
;
358 if (FPInstClass
== X86II::NotFP
)
359 continue; // Efficiently ignore non-fp insts!
361 MachineInstr
*PrevMI
= 0;
365 ++NumFP
; // Keep track of # of pseudo instrs
366 DEBUG(dbgs() << "\nFPInst:\t" << *MI
);
368 // Get dead variables list now because the MI pointer may be deleted as part
370 SmallVector
<unsigned, 8> DeadRegs
;
371 for (unsigned i
= 0, e
= MI
->getNumOperands(); i
!= e
; ++i
) {
372 const MachineOperand
&MO
= MI
->getOperand(i
);
373 if (MO
.isReg() && MO
.isDead())
374 DeadRegs
.push_back(MO
.getReg());
377 switch (FPInstClass
) {
378 case X86II::ZeroArgFP
: handleZeroArgFP(I
); break;
379 case X86II::OneArgFP
: handleOneArgFP(I
); break; // fstp ST(0)
380 case X86II::OneArgFPRW
: handleOneArgFPRW(I
); break; // ST(0) = fsqrt(ST(0))
381 case X86II::TwoArgFP
: handleTwoArgFP(I
); break;
382 case X86II::CompareFP
: handleCompareFP(I
); break;
383 case X86II::CondMovFP
: handleCondMovFP(I
); break;
384 case X86II::SpecialFP
: handleSpecialFP(I
); break;
385 default: llvm_unreachable("Unknown FP Type!");
388 // Check to see if any of the values defined by this instruction are dead
389 // after definition. If so, pop them.
390 for (unsigned i
= 0, e
= DeadRegs
.size(); i
!= e
; ++i
) {
391 unsigned Reg
= DeadRegs
[i
];
392 if (Reg
>= X86::FP0
&& Reg
<= X86::FP6
) {
393 DEBUG(dbgs() << "Register FP#" << Reg
-X86::FP0
<< " is dead!\n");
394 freeStackSlotAfter(I
, Reg
-X86::FP0
);
398 // Print out all of the instructions expanded to if -debug
400 MachineBasicBlock::iterator
PrevI(PrevMI
);
402 dbgs() << "Just deleted pseudo instruction\n";
404 MachineBasicBlock::iterator Start
= I
;
405 // Rewind to first instruction newly inserted.
406 while (Start
!= BB
.begin() && prior(Start
) != PrevI
) --Start
;
407 dbgs() << "Inserted instructions:\n\t";
408 Start
->print(dbgs(), &MF
.getTarget());
409 while (++Start
!= llvm::next(I
)) {}
422 /// setupBlockStack - Use the live bundles to set up our model of the stack
423 /// to match predecessors' live out stack.
424 void FPS::setupBlockStack() {
425 DEBUG(dbgs() << "\nSetting up live-ins for BB#" << MBB
->getNumber()
426 << " derived from " << MBB
->getName() << ".\n");
428 // Get the live-in bundle for MBB.
429 const LiveBundle
&Bundle
=
430 LiveBundles
[Bundles
->getBundle(MBB
->getNumber(), false)];
433 DEBUG(dbgs() << "Block has no FP live-ins.\n");
437 // Depth-first iteration should ensure that we always have an assigned stack.
438 assert(Bundle
.isFixed() && "Reached block before any predecessors");
440 // Push the fixed live-in registers.
441 for (unsigned i
= Bundle
.FixCount
; i
> 0; --i
) {
442 MBB
->addLiveIn(X86::ST0
+i
-1);
443 DEBUG(dbgs() << "Live-in st(" << (i
-1) << "): %FP"
444 << unsigned(Bundle
.FixStack
[i
-1]) << '\n');
445 pushReg(Bundle
.FixStack
[i
-1]);
448 // Kill off unwanted live-ins. This can happen with a critical edge.
449 // FIXME: We could keep these live registers around as zombies. They may need
450 // to be revived at the end of a short block. It might save a few instrs.
451 adjustLiveRegs(calcLiveInMask(MBB
), MBB
->begin());
455 /// finishBlockStack - Revive live-outs that are implicitly defined out of
456 /// MBB. Shuffle live registers to match the expected fixed stack of any
457 /// predecessors, and ensure that all predecessors are expecting the same
459 void FPS::finishBlockStack() {
460 // The RET handling below takes care of return blocks for us.
461 if (MBB
->succ_empty())
464 DEBUG(dbgs() << "Setting up live-outs for BB#" << MBB
->getNumber()
465 << " derived from " << MBB
->getName() << ".\n");
467 // Get MBB's live-out bundle.
468 unsigned BundleIdx
= Bundles
->getBundle(MBB
->getNumber(), true);
469 LiveBundle
&Bundle
= LiveBundles
[BundleIdx
];
471 // We may need to kill and define some registers to match successors.
472 // FIXME: This can probably be combined with the shuffle below.
473 MachineBasicBlock::iterator Term
= MBB
->getFirstTerminator();
474 adjustLiveRegs(Bundle
.Mask
, Term
);
477 DEBUG(dbgs() << "No live-outs.\n");
481 // Has the stack order been fixed yet?
482 DEBUG(dbgs() << "LB#" << BundleIdx
<< ": ");
483 if (Bundle
.isFixed()) {
484 DEBUG(dbgs() << "Shuffling stack to match.\n");
485 shuffleStackTop(Bundle
.FixStack
, Bundle
.FixCount
, Term
);
487 // Not fixed yet, we get to choose.
488 DEBUG(dbgs() << "Fixing stack order now.\n");
489 Bundle
.FixCount
= StackTop
;
490 for (unsigned i
= 0; i
< StackTop
; ++i
)
491 Bundle
.FixStack
[i
] = getStackEntry(i
);
496 //===----------------------------------------------------------------------===//
497 // Efficient Lookup Table Support
498 //===----------------------------------------------------------------------===//
504 bool operator<(const TableEntry
&TE
) const { return from
< TE
.from
; }
505 friend bool operator<(const TableEntry
&TE
, unsigned V
) {
508 friend bool LLVM_ATTRIBUTE_USED
operator<(unsigned V
,
509 const TableEntry
&TE
) {
516 static bool TableIsSorted(const TableEntry
*Table
, unsigned NumEntries
) {
517 for (unsigned i
= 0; i
!= NumEntries
-1; ++i
)
518 if (!(Table
[i
] < Table
[i
+1])) return false;
523 static int Lookup(const TableEntry
*Table
, unsigned N
, unsigned Opcode
) {
524 const TableEntry
*I
= std::lower_bound(Table
, Table
+N
, Opcode
);
525 if (I
!= Table
+N
&& I
->from
== Opcode
)
531 #define ASSERT_SORTED(TABLE)
533 #define ASSERT_SORTED(TABLE) \
534 { static bool TABLE##Checked = false; \
535 if (!TABLE##Checked) { \
536 assert(TableIsSorted(TABLE, array_lengthof(TABLE)) && \
537 "All lookup tables must be sorted for efficient access!"); \
538 TABLE##Checked = true; \
543 //===----------------------------------------------------------------------===//
544 // Register File -> Register Stack Mapping Methods
545 //===----------------------------------------------------------------------===//
547 // OpcodeTable - Sorted map of register instructions to their stack version.
548 // The first element is an register file pseudo instruction, the second is the
549 // concrete X86 instruction which uses the register stack.
551 static const TableEntry OpcodeTable
[] = {
552 { X86::ABS_Fp32
, X86::ABS_F
},
553 { X86::ABS_Fp64
, X86::ABS_F
},
554 { X86::ABS_Fp80
, X86::ABS_F
},
555 { X86::ADD_Fp32m
, X86::ADD_F32m
},
556 { X86::ADD_Fp64m
, X86::ADD_F64m
},
557 { X86::ADD_Fp64m32
, X86::ADD_F32m
},
558 { X86::ADD_Fp80m32
, X86::ADD_F32m
},
559 { X86::ADD_Fp80m64
, X86::ADD_F64m
},
560 { X86::ADD_FpI16m32
, X86::ADD_FI16m
},
561 { X86::ADD_FpI16m64
, X86::ADD_FI16m
},
562 { X86::ADD_FpI16m80
, X86::ADD_FI16m
},
563 { X86::ADD_FpI32m32
, X86::ADD_FI32m
},
564 { X86::ADD_FpI32m64
, X86::ADD_FI32m
},
565 { X86::ADD_FpI32m80
, X86::ADD_FI32m
},
566 { X86::CHS_Fp32
, X86::CHS_F
},
567 { X86::CHS_Fp64
, X86::CHS_F
},
568 { X86::CHS_Fp80
, X86::CHS_F
},
569 { X86::CMOVBE_Fp32
, X86::CMOVBE_F
},
570 { X86::CMOVBE_Fp64
, X86::CMOVBE_F
},
571 { X86::CMOVBE_Fp80
, X86::CMOVBE_F
},
572 { X86::CMOVB_Fp32
, X86::CMOVB_F
},
573 { X86::CMOVB_Fp64
, X86::CMOVB_F
},
574 { X86::CMOVB_Fp80
, X86::CMOVB_F
},
575 { X86::CMOVE_Fp32
, X86::CMOVE_F
},
576 { X86::CMOVE_Fp64
, X86::CMOVE_F
},
577 { X86::CMOVE_Fp80
, X86::CMOVE_F
},
578 { X86::CMOVNBE_Fp32
, X86::CMOVNBE_F
},
579 { X86::CMOVNBE_Fp64
, X86::CMOVNBE_F
},
580 { X86::CMOVNBE_Fp80
, X86::CMOVNBE_F
},
581 { X86::CMOVNB_Fp32
, X86::CMOVNB_F
},
582 { X86::CMOVNB_Fp64
, X86::CMOVNB_F
},
583 { X86::CMOVNB_Fp80
, X86::CMOVNB_F
},
584 { X86::CMOVNE_Fp32
, X86::CMOVNE_F
},
585 { X86::CMOVNE_Fp64
, X86::CMOVNE_F
},
586 { X86::CMOVNE_Fp80
, X86::CMOVNE_F
},
587 { X86::CMOVNP_Fp32
, X86::CMOVNP_F
},
588 { X86::CMOVNP_Fp64
, X86::CMOVNP_F
},
589 { X86::CMOVNP_Fp80
, X86::CMOVNP_F
},
590 { X86::CMOVP_Fp32
, X86::CMOVP_F
},
591 { X86::CMOVP_Fp64
, X86::CMOVP_F
},
592 { X86::CMOVP_Fp80
, X86::CMOVP_F
},
593 { X86::COS_Fp32
, X86::COS_F
},
594 { X86::COS_Fp64
, X86::COS_F
},
595 { X86::COS_Fp80
, X86::COS_F
},
596 { X86::DIVR_Fp32m
, X86::DIVR_F32m
},
597 { X86::DIVR_Fp64m
, X86::DIVR_F64m
},
598 { X86::DIVR_Fp64m32
, X86::DIVR_F32m
},
599 { X86::DIVR_Fp80m32
, X86::DIVR_F32m
},
600 { X86::DIVR_Fp80m64
, X86::DIVR_F64m
},
601 { X86::DIVR_FpI16m32
, X86::DIVR_FI16m
},
602 { X86::DIVR_FpI16m64
, X86::DIVR_FI16m
},
603 { X86::DIVR_FpI16m80
, X86::DIVR_FI16m
},
604 { X86::DIVR_FpI32m32
, X86::DIVR_FI32m
},
605 { X86::DIVR_FpI32m64
, X86::DIVR_FI32m
},
606 { X86::DIVR_FpI32m80
, X86::DIVR_FI32m
},
607 { X86::DIV_Fp32m
, X86::DIV_F32m
},
608 { X86::DIV_Fp64m
, X86::DIV_F64m
},
609 { X86::DIV_Fp64m32
, X86::DIV_F32m
},
610 { X86::DIV_Fp80m32
, X86::DIV_F32m
},
611 { X86::DIV_Fp80m64
, X86::DIV_F64m
},
612 { X86::DIV_FpI16m32
, X86::DIV_FI16m
},
613 { X86::DIV_FpI16m64
, X86::DIV_FI16m
},
614 { X86::DIV_FpI16m80
, X86::DIV_FI16m
},
615 { X86::DIV_FpI32m32
, X86::DIV_FI32m
},
616 { X86::DIV_FpI32m64
, X86::DIV_FI32m
},
617 { X86::DIV_FpI32m80
, X86::DIV_FI32m
},
618 { X86::ILD_Fp16m32
, X86::ILD_F16m
},
619 { X86::ILD_Fp16m64
, X86::ILD_F16m
},
620 { X86::ILD_Fp16m80
, X86::ILD_F16m
},
621 { X86::ILD_Fp32m32
, X86::ILD_F32m
},
622 { X86::ILD_Fp32m64
, X86::ILD_F32m
},
623 { X86::ILD_Fp32m80
, X86::ILD_F32m
},
624 { X86::ILD_Fp64m32
, X86::ILD_F64m
},
625 { X86::ILD_Fp64m64
, X86::ILD_F64m
},
626 { X86::ILD_Fp64m80
, X86::ILD_F64m
},
627 { X86::ISTT_Fp16m32
, X86::ISTT_FP16m
},
628 { X86::ISTT_Fp16m64
, X86::ISTT_FP16m
},
629 { X86::ISTT_Fp16m80
, X86::ISTT_FP16m
},
630 { X86::ISTT_Fp32m32
, X86::ISTT_FP32m
},
631 { X86::ISTT_Fp32m64
, X86::ISTT_FP32m
},
632 { X86::ISTT_Fp32m80
, X86::ISTT_FP32m
},
633 { X86::ISTT_Fp64m32
, X86::ISTT_FP64m
},
634 { X86::ISTT_Fp64m64
, X86::ISTT_FP64m
},
635 { X86::ISTT_Fp64m80
, X86::ISTT_FP64m
},
636 { X86::IST_Fp16m32
, X86::IST_F16m
},
637 { X86::IST_Fp16m64
, X86::IST_F16m
},
638 { X86::IST_Fp16m80
, X86::IST_F16m
},
639 { X86::IST_Fp32m32
, X86::IST_F32m
},
640 { X86::IST_Fp32m64
, X86::IST_F32m
},
641 { X86::IST_Fp32m80
, X86::IST_F32m
},
642 { X86::IST_Fp64m32
, X86::IST_FP64m
},
643 { X86::IST_Fp64m64
, X86::IST_FP64m
},
644 { X86::IST_Fp64m80
, X86::IST_FP64m
},
645 { X86::LD_Fp032
, X86::LD_F0
},
646 { X86::LD_Fp064
, X86::LD_F0
},
647 { X86::LD_Fp080
, X86::LD_F0
},
648 { X86::LD_Fp132
, X86::LD_F1
},
649 { X86::LD_Fp164
, X86::LD_F1
},
650 { X86::LD_Fp180
, X86::LD_F1
},
651 { X86::LD_Fp32m
, X86::LD_F32m
},
652 { X86::LD_Fp32m64
, X86::LD_F32m
},
653 { X86::LD_Fp32m80
, X86::LD_F32m
},
654 { X86::LD_Fp64m
, X86::LD_F64m
},
655 { X86::LD_Fp64m80
, X86::LD_F64m
},
656 { X86::LD_Fp80m
, X86::LD_F80m
},
657 { X86::MUL_Fp32m
, X86::MUL_F32m
},
658 { X86::MUL_Fp64m
, X86::MUL_F64m
},
659 { X86::MUL_Fp64m32
, X86::MUL_F32m
},
660 { X86::MUL_Fp80m32
, X86::MUL_F32m
},
661 { X86::MUL_Fp80m64
, X86::MUL_F64m
},
662 { X86::MUL_FpI16m32
, X86::MUL_FI16m
},
663 { X86::MUL_FpI16m64
, X86::MUL_FI16m
},
664 { X86::MUL_FpI16m80
, X86::MUL_FI16m
},
665 { X86::MUL_FpI32m32
, X86::MUL_FI32m
},
666 { X86::MUL_FpI32m64
, X86::MUL_FI32m
},
667 { X86::MUL_FpI32m80
, X86::MUL_FI32m
},
668 { X86::SIN_Fp32
, X86::SIN_F
},
669 { X86::SIN_Fp64
, X86::SIN_F
},
670 { X86::SIN_Fp80
, X86::SIN_F
},
671 { X86::SQRT_Fp32
, X86::SQRT_F
},
672 { X86::SQRT_Fp64
, X86::SQRT_F
},
673 { X86::SQRT_Fp80
, X86::SQRT_F
},
674 { X86::ST_Fp32m
, X86::ST_F32m
},
675 { X86::ST_Fp64m
, X86::ST_F64m
},
676 { X86::ST_Fp64m32
, X86::ST_F32m
},
677 { X86::ST_Fp80m32
, X86::ST_F32m
},
678 { X86::ST_Fp80m64
, X86::ST_F64m
},
679 { X86::ST_FpP80m
, X86::ST_FP80m
},
680 { X86::SUBR_Fp32m
, X86::SUBR_F32m
},
681 { X86::SUBR_Fp64m
, X86::SUBR_F64m
},
682 { X86::SUBR_Fp64m32
, X86::SUBR_F32m
},
683 { X86::SUBR_Fp80m32
, X86::SUBR_F32m
},
684 { X86::SUBR_Fp80m64
, X86::SUBR_F64m
},
685 { X86::SUBR_FpI16m32
, X86::SUBR_FI16m
},
686 { X86::SUBR_FpI16m64
, X86::SUBR_FI16m
},
687 { X86::SUBR_FpI16m80
, X86::SUBR_FI16m
},
688 { X86::SUBR_FpI32m32
, X86::SUBR_FI32m
},
689 { X86::SUBR_FpI32m64
, X86::SUBR_FI32m
},
690 { X86::SUBR_FpI32m80
, X86::SUBR_FI32m
},
691 { X86::SUB_Fp32m
, X86::SUB_F32m
},
692 { X86::SUB_Fp64m
, X86::SUB_F64m
},
693 { X86::SUB_Fp64m32
, X86::SUB_F32m
},
694 { X86::SUB_Fp80m32
, X86::SUB_F32m
},
695 { X86::SUB_Fp80m64
, X86::SUB_F64m
},
696 { X86::SUB_FpI16m32
, X86::SUB_FI16m
},
697 { X86::SUB_FpI16m64
, X86::SUB_FI16m
},
698 { X86::SUB_FpI16m80
, X86::SUB_FI16m
},
699 { X86::SUB_FpI32m32
, X86::SUB_FI32m
},
700 { X86::SUB_FpI32m64
, X86::SUB_FI32m
},
701 { X86::SUB_FpI32m80
, X86::SUB_FI32m
},
702 { X86::TST_Fp32
, X86::TST_F
},
703 { X86::TST_Fp64
, X86::TST_F
},
704 { X86::TST_Fp80
, X86::TST_F
},
705 { X86::UCOM_FpIr32
, X86::UCOM_FIr
},
706 { X86::UCOM_FpIr64
, X86::UCOM_FIr
},
707 { X86::UCOM_FpIr80
, X86::UCOM_FIr
},
708 { X86::UCOM_Fpr32
, X86::UCOM_Fr
},
709 { X86::UCOM_Fpr64
, X86::UCOM_Fr
},
710 { X86::UCOM_Fpr80
, X86::UCOM_Fr
},
713 static unsigned getConcreteOpcode(unsigned Opcode
) {
714 ASSERT_SORTED(OpcodeTable
);
715 int Opc
= Lookup(OpcodeTable
, array_lengthof(OpcodeTable
), Opcode
);
716 assert(Opc
!= -1 && "FP Stack instruction not in OpcodeTable!");
720 //===----------------------------------------------------------------------===//
722 //===----------------------------------------------------------------------===//
724 // PopTable - Sorted map of instructions to their popping version. The first
725 // element is an instruction, the second is the version which pops.
727 static const TableEntry PopTable
[] = {
728 { X86::ADD_FrST0
, X86::ADD_FPrST0
},
730 { X86::DIVR_FrST0
, X86::DIVR_FPrST0
},
731 { X86::DIV_FrST0
, X86::DIV_FPrST0
},
733 { X86::IST_F16m
, X86::IST_FP16m
},
734 { X86::IST_F32m
, X86::IST_FP32m
},
736 { X86::MUL_FrST0
, X86::MUL_FPrST0
},
738 { X86::ST_F32m
, X86::ST_FP32m
},
739 { X86::ST_F64m
, X86::ST_FP64m
},
740 { X86::ST_Frr
, X86::ST_FPrr
},
742 { X86::SUBR_FrST0
, X86::SUBR_FPrST0
},
743 { X86::SUB_FrST0
, X86::SUB_FPrST0
},
745 { X86::UCOM_FIr
, X86::UCOM_FIPr
},
747 { X86::UCOM_FPr
, X86::UCOM_FPPr
},
748 { X86::UCOM_Fr
, X86::UCOM_FPr
},
751 /// popStackAfter - Pop the current value off of the top of the FP stack after
752 /// the specified instruction. This attempts to be sneaky and combine the pop
753 /// into the instruction itself if possible. The iterator is left pointing to
754 /// the last instruction, be it a new pop instruction inserted, or the old
755 /// instruction if it was modified in place.
757 void FPS::popStackAfter(MachineBasicBlock::iterator
&I
) {
758 MachineInstr
* MI
= I
;
759 DebugLoc dl
= MI
->getDebugLoc();
760 ASSERT_SORTED(PopTable
);
762 report_fatal_error("Cannot pop empty stack!");
763 RegMap
[Stack
[--StackTop
]] = ~0; // Update state
765 // Check to see if there is a popping version of this instruction...
766 int Opcode
= Lookup(PopTable
, array_lengthof(PopTable
), I
->getOpcode());
768 I
->setDesc(TII
->get(Opcode
));
769 if (Opcode
== X86::UCOM_FPPr
)
771 } else { // Insert an explicit pop
772 I
= BuildMI(*MBB
, ++I
, dl
, TII
->get(X86::ST_FPrr
)).addReg(X86::ST0
);
776 /// freeStackSlotAfter - Free the specified register from the register stack, so
777 /// that it is no longer in a register. If the register is currently at the top
778 /// of the stack, we just pop the current instruction, otherwise we store the
779 /// current top-of-stack into the specified slot, then pop the top of stack.
780 void FPS::freeStackSlotAfter(MachineBasicBlock::iterator
&I
, unsigned FPRegNo
) {
781 if (getStackEntry(0) == FPRegNo
) { // already at the top of stack? easy.
786 // Otherwise, store the top of stack into the dead slot, killing the operand
787 // without having to add in an explicit xchg then pop.
789 I
= freeStackSlotBefore(++I
, FPRegNo
);
792 /// freeStackSlotBefore - Free the specified register without trying any
794 MachineBasicBlock::iterator
795 FPS::freeStackSlotBefore(MachineBasicBlock::iterator I
, unsigned FPRegNo
) {
796 unsigned STReg
= getSTReg(FPRegNo
);
797 unsigned OldSlot
= getSlot(FPRegNo
);
798 unsigned TopReg
= Stack
[StackTop
-1];
799 Stack
[OldSlot
] = TopReg
;
800 RegMap
[TopReg
] = OldSlot
;
801 RegMap
[FPRegNo
] = ~0;
802 Stack
[--StackTop
] = ~0;
803 return BuildMI(*MBB
, I
, DebugLoc(), TII
->get(X86::ST_FPrr
)).addReg(STReg
);
806 /// adjustLiveRegs - Kill and revive registers such that exactly the FP
807 /// registers with a bit in Mask are live.
808 void FPS::adjustLiveRegs(unsigned Mask
, MachineBasicBlock::iterator I
) {
809 unsigned Defs
= Mask
;
811 for (unsigned i
= 0; i
< StackTop
; ++i
) {
812 unsigned RegNo
= Stack
[i
];
813 if (!(Defs
& (1 << RegNo
)))
814 // This register is live, but we don't want it.
815 Kills
|= (1 << RegNo
);
817 // We don't need to imp-def this live register.
818 Defs
&= ~(1 << RegNo
);
820 assert((Kills
& Defs
) == 0 && "Register needs killing and def'ing?");
822 // Produce implicit-defs for free by using killed registers.
823 while (Kills
&& Defs
) {
824 unsigned KReg
= CountTrailingZeros_32(Kills
);
825 unsigned DReg
= CountTrailingZeros_32(Defs
);
826 DEBUG(dbgs() << "Renaming %FP" << KReg
<< " as imp %FP" << DReg
<< "\n");
827 std::swap(Stack
[getSlot(KReg
)], Stack
[getSlot(DReg
)]);
828 std::swap(RegMap
[KReg
], RegMap
[DReg
]);
829 Kills
&= ~(1 << KReg
);
830 Defs
&= ~(1 << DReg
);
833 // Kill registers by popping.
834 if (Kills
&& I
!= MBB
->begin()) {
835 MachineBasicBlock::iterator I2
= llvm::prior(I
);
837 unsigned KReg
= getStackEntry(0);
838 if (!(Kills
& (1 << KReg
)))
840 DEBUG(dbgs() << "Popping %FP" << KReg
<< "\n");
842 Kills
&= ~(1 << KReg
);
846 // Manually kill the rest.
848 unsigned KReg
= CountTrailingZeros_32(Kills
);
849 DEBUG(dbgs() << "Killing %FP" << KReg
<< "\n");
850 freeStackSlotBefore(I
, KReg
);
851 Kills
&= ~(1 << KReg
);
854 // Load zeros for all the imp-defs.
856 unsigned DReg
= CountTrailingZeros_32(Defs
);
857 DEBUG(dbgs() << "Defining %FP" << DReg
<< " as 0\n");
858 BuildMI(*MBB
, I
, DebugLoc(), TII
->get(X86::LD_F0
));
860 Defs
&= ~(1 << DReg
);
863 // Now we should have the correct registers live.
865 assert(StackTop
== CountPopulation_32(Mask
) && "Live count mismatch");
868 /// shuffleStackTop - emit fxch instructions before I to shuffle the top
869 /// FixCount entries into the order given by FixStack.
870 /// FIXME: Is there a better algorithm than insertion sort?
871 void FPS::shuffleStackTop(const unsigned char *FixStack
,
873 MachineBasicBlock::iterator I
) {
874 // Move items into place, starting from the desired stack bottom.
876 // Old register at position FixCount.
877 unsigned OldReg
= getStackEntry(FixCount
);
878 // Desired register at position FixCount.
879 unsigned Reg
= FixStack
[FixCount
];
882 // (Reg st0) (OldReg st0) = (Reg OldReg st0)
884 moveToTop(OldReg
, I
);
890 //===----------------------------------------------------------------------===//
891 // Instruction transformation implementation
892 //===----------------------------------------------------------------------===//
894 /// handleZeroArgFP - ST(0) = fld0 ST(0) = flds <mem>
896 void FPS::handleZeroArgFP(MachineBasicBlock::iterator
&I
) {
897 MachineInstr
*MI
= I
;
898 unsigned DestReg
= getFPReg(MI
->getOperand(0));
900 // Change from the pseudo instruction to the concrete instruction.
901 MI
->RemoveOperand(0); // Remove the explicit ST(0) operand
902 MI
->setDesc(TII
->get(getConcreteOpcode(MI
->getOpcode())));
904 // Result gets pushed on the stack.
908 /// handleOneArgFP - fst <mem>, ST(0)
910 void FPS::handleOneArgFP(MachineBasicBlock::iterator
&I
) {
911 MachineInstr
*MI
= I
;
912 unsigned NumOps
= MI
->getDesc().getNumOperands();
913 assert((NumOps
== X86::AddrNumOperands
+ 1 || NumOps
== 1) &&
914 "Can only handle fst* & ftst instructions!");
916 // Is this the last use of the source register?
917 unsigned Reg
= getFPReg(MI
->getOperand(NumOps
-1));
918 bool KillsSrc
= MI
->killsRegister(X86::FP0
+Reg
);
920 // FISTP64m is strange because there isn't a non-popping versions.
921 // If we have one _and_ we don't want to pop the operand, duplicate the value
922 // on the stack instead of moving it. This ensure that popping the value is
924 // Ditto FISTTP16m, FISTTP32m, FISTTP64m, ST_FpP80m.
927 (MI
->getOpcode() == X86::IST_Fp64m32
||
928 MI
->getOpcode() == X86::ISTT_Fp16m32
||
929 MI
->getOpcode() == X86::ISTT_Fp32m32
||
930 MI
->getOpcode() == X86::ISTT_Fp64m32
||
931 MI
->getOpcode() == X86::IST_Fp64m64
||
932 MI
->getOpcode() == X86::ISTT_Fp16m64
||
933 MI
->getOpcode() == X86::ISTT_Fp32m64
||
934 MI
->getOpcode() == X86::ISTT_Fp64m64
||
935 MI
->getOpcode() == X86::IST_Fp64m80
||
936 MI
->getOpcode() == X86::ISTT_Fp16m80
||
937 MI
->getOpcode() == X86::ISTT_Fp32m80
||
938 MI
->getOpcode() == X86::ISTT_Fp64m80
||
939 MI
->getOpcode() == X86::ST_FpP80m
)) {
940 duplicateToTop(Reg
, getScratchReg(), I
);
942 moveToTop(Reg
, I
); // Move to the top of the stack...
945 // Convert from the pseudo instruction to the concrete instruction.
946 MI
->RemoveOperand(NumOps
-1); // Remove explicit ST(0) operand
947 MI
->setDesc(TII
->get(getConcreteOpcode(MI
->getOpcode())));
949 if (MI
->getOpcode() == X86::IST_FP64m
||
950 MI
->getOpcode() == X86::ISTT_FP16m
||
951 MI
->getOpcode() == X86::ISTT_FP32m
||
952 MI
->getOpcode() == X86::ISTT_FP64m
||
953 MI
->getOpcode() == X86::ST_FP80m
) {
955 report_fatal_error("Stack empty??");
957 } else if (KillsSrc
) { // Last use of operand?
963 /// handleOneArgFPRW: Handle instructions that read from the top of stack and
964 /// replace the value with a newly computed value. These instructions may have
965 /// non-fp operands after their FP operands.
969 /// R1 = fadd R2, [mem]
971 void FPS::handleOneArgFPRW(MachineBasicBlock::iterator
&I
) {
972 MachineInstr
*MI
= I
;
974 unsigned NumOps
= MI
->getDesc().getNumOperands();
975 assert(NumOps
>= 2 && "FPRW instructions must have 2 ops!!");
978 // Is this the last use of the source register?
979 unsigned Reg
= getFPReg(MI
->getOperand(1));
980 bool KillsSrc
= MI
->killsRegister(X86::FP0
+Reg
);
983 // If this is the last use of the source register, just make sure it's on
984 // the top of the stack.
987 report_fatal_error("Stack cannot be empty!");
989 pushReg(getFPReg(MI
->getOperand(0)));
991 // If this is not the last use of the source register, _copy_ it to the top
993 duplicateToTop(Reg
, getFPReg(MI
->getOperand(0)), I
);
996 // Change from the pseudo instruction to the concrete instruction.
997 MI
->RemoveOperand(1); // Drop the source operand.
998 MI
->RemoveOperand(0); // Drop the destination operand.
999 MI
->setDesc(TII
->get(getConcreteOpcode(MI
->getOpcode())));
1003 //===----------------------------------------------------------------------===//
1004 // Define tables of various ways to map pseudo instructions
1007 // ForwardST0Table - Map: A = B op C into: ST(0) = ST(0) op ST(i)
1008 static const TableEntry ForwardST0Table
[] = {
1009 { X86::ADD_Fp32
, X86::ADD_FST0r
},
1010 { X86::ADD_Fp64
, X86::ADD_FST0r
},
1011 { X86::ADD_Fp80
, X86::ADD_FST0r
},
1012 { X86::DIV_Fp32
, X86::DIV_FST0r
},
1013 { X86::DIV_Fp64
, X86::DIV_FST0r
},
1014 { X86::DIV_Fp80
, X86::DIV_FST0r
},
1015 { X86::MUL_Fp32
, X86::MUL_FST0r
},
1016 { X86::MUL_Fp64
, X86::MUL_FST0r
},
1017 { X86::MUL_Fp80
, X86::MUL_FST0r
},
1018 { X86::SUB_Fp32
, X86::SUB_FST0r
},
1019 { X86::SUB_Fp64
, X86::SUB_FST0r
},
1020 { X86::SUB_Fp80
, X86::SUB_FST0r
},
1023 // ReverseST0Table - Map: A = B op C into: ST(0) = ST(i) op ST(0)
1024 static const TableEntry ReverseST0Table
[] = {
1025 { X86::ADD_Fp32
, X86::ADD_FST0r
}, // commutative
1026 { X86::ADD_Fp64
, X86::ADD_FST0r
}, // commutative
1027 { X86::ADD_Fp80
, X86::ADD_FST0r
}, // commutative
1028 { X86::DIV_Fp32
, X86::DIVR_FST0r
},
1029 { X86::DIV_Fp64
, X86::DIVR_FST0r
},
1030 { X86::DIV_Fp80
, X86::DIVR_FST0r
},
1031 { X86::MUL_Fp32
, X86::MUL_FST0r
}, // commutative
1032 { X86::MUL_Fp64
, X86::MUL_FST0r
}, // commutative
1033 { X86::MUL_Fp80
, X86::MUL_FST0r
}, // commutative
1034 { X86::SUB_Fp32
, X86::SUBR_FST0r
},
1035 { X86::SUB_Fp64
, X86::SUBR_FST0r
},
1036 { X86::SUB_Fp80
, X86::SUBR_FST0r
},
1039 // ForwardSTiTable - Map: A = B op C into: ST(i) = ST(0) op ST(i)
1040 static const TableEntry ForwardSTiTable
[] = {
1041 { X86::ADD_Fp32
, X86::ADD_FrST0
}, // commutative
1042 { X86::ADD_Fp64
, X86::ADD_FrST0
}, // commutative
1043 { X86::ADD_Fp80
, X86::ADD_FrST0
}, // commutative
1044 { X86::DIV_Fp32
, X86::DIVR_FrST0
},
1045 { X86::DIV_Fp64
, X86::DIVR_FrST0
},
1046 { X86::DIV_Fp80
, X86::DIVR_FrST0
},
1047 { X86::MUL_Fp32
, X86::MUL_FrST0
}, // commutative
1048 { X86::MUL_Fp64
, X86::MUL_FrST0
}, // commutative
1049 { X86::MUL_Fp80
, X86::MUL_FrST0
}, // commutative
1050 { X86::SUB_Fp32
, X86::SUBR_FrST0
},
1051 { X86::SUB_Fp64
, X86::SUBR_FrST0
},
1052 { X86::SUB_Fp80
, X86::SUBR_FrST0
},
1055 // ReverseSTiTable - Map: A = B op C into: ST(i) = ST(i) op ST(0)
1056 static const TableEntry ReverseSTiTable
[] = {
1057 { X86::ADD_Fp32
, X86::ADD_FrST0
},
1058 { X86::ADD_Fp64
, X86::ADD_FrST0
},
1059 { X86::ADD_Fp80
, X86::ADD_FrST0
},
1060 { X86::DIV_Fp32
, X86::DIV_FrST0
},
1061 { X86::DIV_Fp64
, X86::DIV_FrST0
},
1062 { X86::DIV_Fp80
, X86::DIV_FrST0
},
1063 { X86::MUL_Fp32
, X86::MUL_FrST0
},
1064 { X86::MUL_Fp64
, X86::MUL_FrST0
},
1065 { X86::MUL_Fp80
, X86::MUL_FrST0
},
1066 { X86::SUB_Fp32
, X86::SUB_FrST0
},
1067 { X86::SUB_Fp64
, X86::SUB_FrST0
},
1068 { X86::SUB_Fp80
, X86::SUB_FrST0
},
1072 /// handleTwoArgFP - Handle instructions like FADD and friends which are virtual
1073 /// instructions which need to be simplified and possibly transformed.
1075 /// Result: ST(0) = fsub ST(0), ST(i)
1076 /// ST(i) = fsub ST(0), ST(i)
1077 /// ST(0) = fsubr ST(0), ST(i)
1078 /// ST(i) = fsubr ST(0), ST(i)
1080 void FPS::handleTwoArgFP(MachineBasicBlock::iterator
&I
) {
1081 ASSERT_SORTED(ForwardST0Table
); ASSERT_SORTED(ReverseST0Table
);
1082 ASSERT_SORTED(ForwardSTiTable
); ASSERT_SORTED(ReverseSTiTable
);
1083 MachineInstr
*MI
= I
;
1085 unsigned NumOperands
= MI
->getDesc().getNumOperands();
1086 assert(NumOperands
== 3 && "Illegal TwoArgFP instruction!");
1087 unsigned Dest
= getFPReg(MI
->getOperand(0));
1088 unsigned Op0
= getFPReg(MI
->getOperand(NumOperands
-2));
1089 unsigned Op1
= getFPReg(MI
->getOperand(NumOperands
-1));
1090 bool KillsOp0
= MI
->killsRegister(X86::FP0
+Op0
);
1091 bool KillsOp1
= MI
->killsRegister(X86::FP0
+Op1
);
1092 DebugLoc dl
= MI
->getDebugLoc();
1094 unsigned TOS
= getStackEntry(0);
1096 // One of our operands must be on the top of the stack. If neither is yet, we
1097 // need to move one.
1098 if (Op0
!= TOS
&& Op1
!= TOS
) { // No operand at TOS?
1099 // We can choose to move either operand to the top of the stack. If one of
1100 // the operands is killed by this instruction, we want that one so that we
1101 // can update right on top of the old version.
1103 moveToTop(Op0
, I
); // Move dead operand to TOS.
1105 } else if (KillsOp1
) {
1109 // All of the operands are live after this instruction executes, so we
1110 // cannot update on top of any operand. Because of this, we must
1111 // duplicate one of the stack elements to the top. It doesn't matter
1112 // which one we pick.
1114 duplicateToTop(Op0
, Dest
, I
);
1118 } else if (!KillsOp0
&& !KillsOp1
) {
1119 // If we DO have one of our operands at the top of the stack, but we don't
1120 // have a dead operand, we must duplicate one of the operands to a new slot
1122 duplicateToTop(Op0
, Dest
, I
);
1127 // Now we know that one of our operands is on the top of the stack, and at
1128 // least one of our operands is killed by this instruction.
1129 assert((TOS
== Op0
|| TOS
== Op1
) && (KillsOp0
|| KillsOp1
) &&
1130 "Stack conditions not set up right!");
1132 // We decide which form to use based on what is on the top of the stack, and
1133 // which operand is killed by this instruction.
1134 const TableEntry
*InstTable
;
1135 bool isForward
= TOS
== Op0
;
1136 bool updateST0
= (TOS
== Op0
&& !KillsOp1
) || (TOS
== Op1
&& !KillsOp0
);
1139 InstTable
= ForwardST0Table
;
1141 InstTable
= ReverseST0Table
;
1144 InstTable
= ForwardSTiTable
;
1146 InstTable
= ReverseSTiTable
;
1149 int Opcode
= Lookup(InstTable
, array_lengthof(ForwardST0Table
),
1151 assert(Opcode
!= -1 && "Unknown TwoArgFP pseudo instruction!");
1153 // NotTOS - The register which is not on the top of stack...
1154 unsigned NotTOS
= (TOS
== Op0
) ? Op1
: Op0
;
1156 // Replace the old instruction with a new instruction
1158 I
= BuildMI(*MBB
, I
, dl
, TII
->get(Opcode
)).addReg(getSTReg(NotTOS
));
1160 // If both operands are killed, pop one off of the stack in addition to
1161 // overwriting the other one.
1162 if (KillsOp0
&& KillsOp1
&& Op0
!= Op1
) {
1163 assert(!updateST0
&& "Should have updated other operand!");
1164 popStackAfter(I
); // Pop the top of stack
1167 // Update stack information so that we know the destination register is now on
1169 unsigned UpdatedSlot
= getSlot(updateST0
? TOS
: NotTOS
);
1170 assert(UpdatedSlot
< StackTop
&& Dest
< 7);
1171 Stack
[UpdatedSlot
] = Dest
;
1172 RegMap
[Dest
] = UpdatedSlot
;
1173 MBB
->getParent()->DeleteMachineInstr(MI
); // Remove the old instruction
1176 /// handleCompareFP - Handle FUCOM and FUCOMI instructions, which have two FP
1177 /// register arguments and no explicit destinations.
1179 void FPS::handleCompareFP(MachineBasicBlock::iterator
&I
) {
1180 ASSERT_SORTED(ForwardST0Table
); ASSERT_SORTED(ReverseST0Table
);
1181 ASSERT_SORTED(ForwardSTiTable
); ASSERT_SORTED(ReverseSTiTable
);
1182 MachineInstr
*MI
= I
;
1184 unsigned NumOperands
= MI
->getDesc().getNumOperands();
1185 assert(NumOperands
== 2 && "Illegal FUCOM* instruction!");
1186 unsigned Op0
= getFPReg(MI
->getOperand(NumOperands
-2));
1187 unsigned Op1
= getFPReg(MI
->getOperand(NumOperands
-1));
1188 bool KillsOp0
= MI
->killsRegister(X86::FP0
+Op0
);
1189 bool KillsOp1
= MI
->killsRegister(X86::FP0
+Op1
);
1191 // Make sure the first operand is on the top of stack, the other one can be
1195 // Change from the pseudo instruction to the concrete instruction.
1196 MI
->getOperand(0).setReg(getSTReg(Op1
));
1197 MI
->RemoveOperand(1);
1198 MI
->setDesc(TII
->get(getConcreteOpcode(MI
->getOpcode())));
1200 // If any of the operands are killed by this instruction, free them.
1201 if (KillsOp0
) freeStackSlotAfter(I
, Op0
);
1202 if (KillsOp1
&& Op0
!= Op1
) freeStackSlotAfter(I
, Op1
);
1205 /// handleCondMovFP - Handle two address conditional move instructions. These
1206 /// instructions move a st(i) register to st(0) iff a condition is true. These
1207 /// instructions require that the first operand is at the top of the stack, but
1208 /// otherwise don't modify the stack at all.
1209 void FPS::handleCondMovFP(MachineBasicBlock::iterator
&I
) {
1210 MachineInstr
*MI
= I
;
1212 unsigned Op0
= getFPReg(MI
->getOperand(0));
1213 unsigned Op1
= getFPReg(MI
->getOperand(2));
1214 bool KillsOp1
= MI
->killsRegister(X86::FP0
+Op1
);
1216 // The first operand *must* be on the top of the stack.
1219 // Change the second operand to the stack register that the operand is in.
1220 // Change from the pseudo instruction to the concrete instruction.
1221 MI
->RemoveOperand(0);
1222 MI
->RemoveOperand(1);
1223 MI
->getOperand(0).setReg(getSTReg(Op1
));
1224 MI
->setDesc(TII
->get(getConcreteOpcode(MI
->getOpcode())));
1226 // If we kill the second operand, make sure to pop it from the stack.
1227 if (Op0
!= Op1
&& KillsOp1
) {
1228 // Get this value off of the register stack.
1229 freeStackSlotAfter(I
, Op1
);
1234 /// handleSpecialFP - Handle special instructions which behave unlike other
1235 /// floating point instructions. This is primarily intended for use by pseudo
1238 void FPS::handleSpecialFP(MachineBasicBlock::iterator
&I
) {
1239 MachineInstr
*MI
= I
;
1240 switch (MI
->getOpcode()) {
1241 default: llvm_unreachable("Unknown SpecialFP instruction!");
1242 case X86::FpGET_ST0_32
:// Appears immediately after a call returning FP type!
1243 case X86::FpGET_ST0_64
:// Appears immediately after a call returning FP type!
1244 case X86::FpGET_ST0_80
:// Appears immediately after a call returning FP type!
1245 assert(StackTop
== 0 && "Stack should be empty after a call!");
1246 pushReg(getFPReg(MI
->getOperand(0)));
1248 case X86::FpGET_ST1_32
:// Appears immediately after a call returning FP type!
1249 case X86::FpGET_ST1_64
:// Appears immediately after a call returning FP type!
1250 case X86::FpGET_ST1_80
:{// Appears immediately after a call returning FP type!
1251 // FpGET_ST1 should occur right after a FpGET_ST0 for a call or inline asm.
1252 // The pattern we expect is:
1257 // At this point, we've pushed FP1 on the top of stack, so it should be
1258 // present if it isn't dead. If it was dead, we already emitted a pop to
1259 // remove it from the stack and StackTop = 0.
1261 // Push FP4 as top of stack next.
1262 pushReg(getFPReg(MI
->getOperand(0)));
1264 // If StackTop was 0 before we pushed our operand, then ST(0) must have been
1265 // dead. In this case, the ST(1) value is the only thing that is live, so
1266 // it should be on the TOS (after the pop that was emitted) and is. Just
1267 // continue in this case.
1271 // Because pushReg just pushed ST(1) as TOS, we now have to swap the two top
1272 // elements so that our accounting is correct.
1273 unsigned RegOnTop
= getStackEntry(0);
1274 unsigned RegNo
= getStackEntry(1);
1276 // Swap the slots the regs are in.
1277 std::swap(RegMap
[RegNo
], RegMap
[RegOnTop
]);
1279 // Swap stack slot contents.
1280 if (RegMap
[RegOnTop
] >= StackTop
)
1281 report_fatal_error("Access past stack top!");
1282 std::swap(Stack
[RegMap
[RegOnTop
]], Stack
[StackTop
-1]);
1285 case X86::FpSET_ST0_32
:
1286 case X86::FpSET_ST0_64
:
1287 case X86::FpSET_ST0_80
: {
1288 // FpSET_ST0_80 is generated by copyRegToReg for setting up inline asm
1289 // arguments that use an st constraint. We expect a sequence of
1290 // instructions: Fp_SET_ST0 Fp_SET_ST1? INLINEASM
1291 unsigned Op0
= getFPReg(MI
->getOperand(0));
1293 if (!MI
->killsRegister(X86::FP0
+ Op0
)) {
1294 // Duplicate Op0 into a temporary on the stack top.
1295 duplicateToTop(Op0
, getScratchReg(), I
);
1297 // Op0 is killed, so just swap it into position.
1300 --StackTop
; // "Forget" we have something on the top of stack!
1303 case X86::FpSET_ST1_32
:
1304 case X86::FpSET_ST1_64
:
1305 case X86::FpSET_ST1_80
: {
1306 // Set up st(1) for inline asm. We are assuming that st(0) has already been
1307 // set up by FpSET_ST0, and our StackTop is off by one because of it.
1308 unsigned Op0
= getFPReg(MI
->getOperand(0));
1309 // Restore the actual StackTop from before Fp_SET_ST0.
1310 // Note we can't handle Fp_SET_ST1 without a preceeding Fp_SET_ST0, and we
1311 // are not enforcing the constraint.
1313 unsigned RegOnTop
= getStackEntry(0); // This reg must remain in st(0).
1314 if (!MI
->killsRegister(X86::FP0
+ Op0
)) {
1315 duplicateToTop(Op0
, getScratchReg(), I
);
1316 moveToTop(RegOnTop
, I
);
1317 } else if (getSTReg(Op0
) != X86::ST1
) {
1318 // We have the wrong value at st(1). Shuffle! Untested!
1319 moveToTop(getStackEntry(1), I
);
1321 moveToTop(RegOnTop
, I
);
1323 assert(StackTop
>= 2 && "Too few live registers");
1324 StackTop
-= 2; // "Forget" both st(0) and st(1).
1327 case X86::MOV_Fp3232
:
1328 case X86::MOV_Fp3264
:
1329 case X86::MOV_Fp6432
:
1330 case X86::MOV_Fp6464
:
1331 case X86::MOV_Fp3280
:
1332 case X86::MOV_Fp6480
:
1333 case X86::MOV_Fp8032
:
1334 case X86::MOV_Fp8064
:
1335 case X86::MOV_Fp8080
: {
1336 const MachineOperand
&MO1
= MI
->getOperand(1);
1337 unsigned SrcReg
= getFPReg(MO1
);
1339 const MachineOperand
&MO0
= MI
->getOperand(0);
1340 unsigned DestReg
= getFPReg(MO0
);
1341 if (MI
->killsRegister(X86::FP0
+SrcReg
)) {
1342 // If the input operand is killed, we can just change the owner of the
1343 // incoming stack slot into the result.
1344 unsigned Slot
= getSlot(SrcReg
);
1345 assert(Slot
< 7 && DestReg
< 7 && "FpMOV operands invalid!");
1346 Stack
[Slot
] = DestReg
;
1347 RegMap
[DestReg
] = Slot
;
1350 // For FMOV we just duplicate the specified value to a new stack slot.
1351 // This could be made better, but would require substantial changes.
1352 duplicateToTop(SrcReg
, DestReg
, I
);
1356 case TargetOpcode::INLINEASM
: {
1357 // The inline asm MachineInstr currently only *uses* FP registers for the
1358 // 'f' constraint. These should be turned into the current ST(x) register
1359 // in the machine instr. Also, any kills should be explicitly popped after
1362 for (unsigned i
= 0, e
= MI
->getNumOperands(); i
!= e
; ++i
) {
1363 MachineOperand
&Op
= MI
->getOperand(i
);
1364 if (!Op
.isReg() || Op
.getReg() < X86::FP0
|| Op
.getReg() > X86::FP6
)
1366 assert(Op
.isUse() && "Only handle inline asm uses right now");
1368 unsigned FPReg
= getFPReg(Op
);
1369 Op
.setReg(getSTReg(FPReg
));
1371 // If we kill this operand, make sure to pop it from the stack after the
1372 // asm. We just remember it for now, and pop them all off at the end in
1375 Kills
|= 1U << FPReg
;
1378 // If this asm kills any FP registers (is the last use of them) we must
1379 // explicitly emit pop instructions for them. Do this now after the asm has
1380 // executed so that the ST(x) numbers are not off (which would happen if we
1381 // did this inline with operand rewriting).
1383 // Note: this might be a non-optimal pop sequence. We might be able to do
1384 // better by trying to pop in stack order or something.
1385 MachineBasicBlock::iterator InsertPt
= MI
;
1387 unsigned FPReg
= CountTrailingZeros_32(Kills
);
1388 freeStackSlotAfter(InsertPt
, FPReg
);
1389 Kills
&= ~(1U << FPReg
);
1391 // Don't delete the inline asm!
1397 // If RET has an FP register use operand, pass the first one in ST(0) and
1398 // the second one in ST(1).
1400 // Find the register operands.
1401 unsigned FirstFPRegOp
= ~0U, SecondFPRegOp
= ~0U;
1402 unsigned LiveMask
= 0;
1404 for (unsigned i
= 0, e
= MI
->getNumOperands(); i
!= e
; ++i
) {
1405 MachineOperand
&Op
= MI
->getOperand(i
);
1406 if (!Op
.isReg() || Op
.getReg() < X86::FP0
|| Op
.getReg() > X86::FP6
)
1408 // FP Register uses must be kills unless there are two uses of the same
1409 // register, in which case only one will be a kill.
1410 assert(Op
.isUse() &&
1411 (Op
.isKill() || // Marked kill.
1412 getFPReg(Op
) == FirstFPRegOp
|| // Second instance.
1413 MI
->killsRegister(Op
.getReg())) && // Later use is marked kill.
1414 "Ret only defs operands, and values aren't live beyond it");
1416 if (FirstFPRegOp
== ~0U)
1417 FirstFPRegOp
= getFPReg(Op
);
1419 assert(SecondFPRegOp
== ~0U && "More than two fp operands!");
1420 SecondFPRegOp
= getFPReg(Op
);
1422 LiveMask
|= (1 << getFPReg(Op
));
1424 // Remove the operand so that later passes don't see it.
1425 MI
->RemoveOperand(i
);
1429 // We may have been carrying spurious live-ins, so make sure only the returned
1430 // registers are left live.
1431 adjustLiveRegs(LiveMask
, MI
);
1432 if (!LiveMask
) return; // Quick check to see if any are possible.
1434 // There are only four possibilities here:
1435 // 1) we are returning a single FP value. In this case, it has to be in
1436 // ST(0) already, so just declare success by removing the value from the
1438 if (SecondFPRegOp
== ~0U) {
1439 // Assert that the top of stack contains the right FP register.
1440 assert(StackTop
== 1 && FirstFPRegOp
== getStackEntry(0) &&
1441 "Top of stack not the right register for RET!");
1443 // Ok, everything is good, mark the value as not being on the stack
1444 // anymore so that our assertion about the stack being empty at end of
1445 // block doesn't fire.
1450 // Otherwise, we are returning two values:
1451 // 2) If returning the same value for both, we only have one thing in the FP
1452 // stack. Consider: RET FP1, FP1
1453 if (StackTop
== 1) {
1454 assert(FirstFPRegOp
== SecondFPRegOp
&& FirstFPRegOp
== getStackEntry(0)&&
1455 "Stack misconfiguration for RET!");
1457 // Duplicate the TOS so that we return it twice. Just pick some other FPx
1458 // register to hold it.
1459 unsigned NewReg
= getScratchReg();
1460 duplicateToTop(FirstFPRegOp
, NewReg
, MI
);
1461 FirstFPRegOp
= NewReg
;
1464 /// Okay we know we have two different FPx operands now:
1465 assert(StackTop
== 2 && "Must have two values live!");
1467 /// 3) If SecondFPRegOp is currently in ST(0) and FirstFPRegOp is currently
1468 /// in ST(1). In this case, emit an fxch.
1469 if (getStackEntry(0) == SecondFPRegOp
) {
1470 assert(getStackEntry(1) == FirstFPRegOp
&& "Unknown regs live");
1471 moveToTop(FirstFPRegOp
, MI
);
1474 /// 4) Finally, FirstFPRegOp must be in ST(0) and SecondFPRegOp must be in
1475 /// ST(1). Just remove both from our understanding of the stack and return.
1476 assert(getStackEntry(0) == FirstFPRegOp
&& "Unknown regs live");
1477 assert(getStackEntry(1) == SecondFPRegOp
&& "Unknown regs live");
1482 I
= MBB
->erase(I
); // Remove the pseudo instruction
1484 // We want to leave I pointing to the previous instruction, but what if we
1485 // just erased the first instruction?
1486 if (I
== MBB
->begin()) {
1487 DEBUG(dbgs() << "Inserting dummy KILL\n");
1488 I
= BuildMI(*MBB
, I
, DebugLoc(), TII
->get(TargetOpcode::KILL
));
1493 // Translate a COPY instruction to a pseudo-op that handleSpecialFP understands.
1494 bool FPS::translateCopy(MachineInstr
*MI
) {
1495 unsigned DstReg
= MI
->getOperand(0).getReg();
1496 unsigned SrcReg
= MI
->getOperand(1).getReg();
1498 if (DstReg
== X86::ST0
) {
1499 MI
->setDesc(TII
->get(X86::FpSET_ST0_80
));
1500 MI
->RemoveOperand(0);
1503 if (DstReg
== X86::ST1
) {
1504 MI
->setDesc(TII
->get(X86::FpSET_ST1_80
));
1505 MI
->RemoveOperand(0);
1508 if (SrcReg
== X86::ST0
) {
1509 MI
->setDesc(TII
->get(X86::FpGET_ST0_80
));
1512 if (SrcReg
== X86::ST1
) {
1513 MI
->setDesc(TII
->get(X86::FpGET_ST1_80
));
1516 if (X86::RFP80RegClass
.contains(DstReg
, SrcReg
)) {
1517 MI
->setDesc(TII
->get(X86::MOV_Fp8080
));