1 //===-- X86FloatingPoint.cpp - Floating point Reg -> Stack converter ------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file defines the pass which converts floating point instructions from
11 // pseudo registers into register stack instructions. This pass uses live
12 // variable information to indicate where the FPn registers are used and their
15 // The x87 hardware tracks liveness of the stack registers, so it is necessary
16 // to implement exact liveness tracking between basic blocks. The CFG edges are
17 // partitioned into bundles where the same FP registers must be live in
18 // identical stack positions. Instructions are inserted at the end of each basic
19 // block to rearrange the live registers to match the outgoing bundle.
21 // This approach avoids splitting critical edges at the potential cost of more
22 // live register shuffling instructions when critical edges are present.
24 //===----------------------------------------------------------------------===//
27 #include "X86InstrInfo.h"
28 #include "llvm/ADT/DepthFirstIterator.h"
29 #include "llvm/ADT/STLExtras.h"
30 #include "llvm/ADT/SmallPtrSet.h"
31 #include "llvm/ADT/SmallSet.h"
32 #include "llvm/ADT/SmallVector.h"
33 #include "llvm/ADT/Statistic.h"
34 #include "llvm/CodeGen/EdgeBundles.h"
35 #include "llvm/CodeGen/LivePhysRegs.h"
36 #include "llvm/CodeGen/MachineFunctionPass.h"
37 #include "llvm/CodeGen/MachineInstrBuilder.h"
38 #include "llvm/CodeGen/MachineRegisterInfo.h"
39 #include "llvm/CodeGen/Passes.h"
40 #include "llvm/CodeGen/TargetInstrInfo.h"
41 #include "llvm/CodeGen/TargetSubtargetInfo.h"
42 #include "llvm/Config/llvm-config.h"
43 #include "llvm/IR/InlineAsm.h"
44 #include "llvm/Support/Debug.h"
45 #include "llvm/Support/ErrorHandling.h"
46 #include "llvm/Support/raw_ostream.h"
47 #include "llvm/Target/TargetMachine.h"
52 #define DEBUG_TYPE "x86-codegen"
54 STATISTIC(NumFXCH
, "Number of fxch instructions inserted");
55 STATISTIC(NumFP
, "Number of floating point instructions");
58 const unsigned ScratchFPReg
= 7;
60 struct FPS
: public MachineFunctionPass
{
62 FPS() : MachineFunctionPass(ID
) {
63 initializeEdgeBundlesPass(*PassRegistry::getPassRegistry());
64 // This is really only to keep valgrind quiet.
65 // The logic in isLive() is too much for it.
66 memset(Stack
, 0, sizeof(Stack
));
67 memset(RegMap
, 0, sizeof(RegMap
));
70 void getAnalysisUsage(AnalysisUsage
&AU
) const override
{
72 AU
.addRequired
<EdgeBundles
>();
73 AU
.addPreservedID(MachineLoopInfoID
);
74 AU
.addPreservedID(MachineDominatorsID
);
75 MachineFunctionPass::getAnalysisUsage(AU
);
78 bool runOnMachineFunction(MachineFunction
&MF
) override
;
80 MachineFunctionProperties
getRequiredProperties() const override
{
81 return MachineFunctionProperties().set(
82 MachineFunctionProperties::Property::NoVRegs
);
85 StringRef
getPassName() const override
{ return "X86 FP Stackifier"; }
88 const TargetInstrInfo
*TII
; // Machine instruction info.
90 // Two CFG edges are related if they leave the same block, or enter the same
91 // block. The transitive closure of an edge under this relation is a
92 // LiveBundle. It represents a set of CFG edges where the live FP stack
93 // registers must be allocated identically in the x87 stack.
95 // A LiveBundle is usually all the edges leaving a block, or all the edges
96 // entering a block, but it can contain more edges if critical edges are
99 // The set of live FP registers in a LiveBundle is calculated by bundleCFG,
100 // but the exact mapping of FP registers to stack slots is fixed later.
102 // Bit mask of live FP registers. Bit 0 = FP0, bit 1 = FP1, &c.
105 // Number of pre-assigned live registers in FixStack. This is 0 when the
106 // stack order has not yet been fixed.
109 // Assigned stack order for live-in registers.
110 // FixStack[i] == getStackEntry(i) for all i < FixCount.
111 unsigned char FixStack
[8];
113 LiveBundle() : Mask(0), FixCount(0) {}
115 // Have the live registers been assigned a stack order yet?
116 bool isFixed() const { return !Mask
|| FixCount
; }
119 // Numbered LiveBundle structs. LiveBundles[0] is used for all CFG edges
120 // with no live FP registers.
121 SmallVector
<LiveBundle
, 8> LiveBundles
;
123 // The edge bundle analysis provides indices into the LiveBundles vector.
124 EdgeBundles
*Bundles
;
126 // Return a bitmask of FP registers in block's live-in list.
127 static unsigned calcLiveInMask(MachineBasicBlock
*MBB
, bool RemoveFPs
) {
129 for (MachineBasicBlock::livein_iterator I
= MBB
->livein_begin();
130 I
!= MBB
->livein_end(); ) {
131 MCPhysReg Reg
= I
->PhysReg
;
132 static_assert(X86::FP6
- X86::FP0
== 6, "sequential regnums");
133 if (Reg
>= X86::FP0
&& Reg
<= X86::FP6
) {
134 Mask
|= 1 << (Reg
- X86::FP0
);
136 I
= MBB
->removeLiveIn(I
);
145 // Partition all the CFG edges into LiveBundles.
146 void bundleCFGRecomputeKillFlags(MachineFunction
&MF
);
148 MachineBasicBlock
*MBB
; // Current basic block
150 // The hardware keeps track of how many FP registers are live, so we have
151 // to model that exactly. Usually, each live register corresponds to an
152 // FP<n> register, but when dealing with calls, returns, and inline
153 // assembly, it is sometimes necessary to have live scratch registers.
154 unsigned Stack
[8]; // FP<n> Registers in each stack slot...
155 unsigned StackTop
; // The current top of the FP stack.
158 NumFPRegs
= 8 // Including scratch pseudo-registers.
161 // For each live FP<n> register, point to its Stack[] entry.
162 // The first entries correspond to FP0-FP6, the rest are scratch registers
163 // used when we need slightly different live registers than what the
164 // register allocator thinks.
165 unsigned RegMap
[NumFPRegs
];
167 // Set up our stack model to match the incoming registers to MBB.
168 void setupBlockStack();
170 // Shuffle live registers to match the expectations of successor blocks.
171 void finishBlockStack();
173 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
174 void dumpStack() const {
175 dbgs() << "Stack contents:";
176 for (unsigned i
= 0; i
!= StackTop
; ++i
) {
177 dbgs() << " FP" << Stack
[i
];
178 assert(RegMap
[Stack
[i
]] == i
&& "Stack[] doesn't match RegMap[]!");
183 /// getSlot - Return the stack slot number a particular register number is
185 unsigned getSlot(unsigned RegNo
) const {
186 assert(RegNo
< NumFPRegs
&& "Regno out of range!");
187 return RegMap
[RegNo
];
190 /// isLive - Is RegNo currently live in the stack?
191 bool isLive(unsigned RegNo
) const {
192 unsigned Slot
= getSlot(RegNo
);
193 return Slot
< StackTop
&& Stack
[Slot
] == RegNo
;
196 /// getStackEntry - Return the X86::FP<n> register in register ST(i).
197 unsigned getStackEntry(unsigned STi
) const {
199 report_fatal_error("Access past stack top!");
200 return Stack
[StackTop
-1-STi
];
203 /// getSTReg - Return the X86::ST(i) register which contains the specified
204 /// FP<RegNo> register.
205 unsigned getSTReg(unsigned RegNo
) const {
206 return StackTop
- 1 - getSlot(RegNo
) + X86::ST0
;
209 // pushReg - Push the specified FP<n> register onto the stack.
210 void pushReg(unsigned Reg
) {
211 assert(Reg
< NumFPRegs
&& "Register number out of range!");
213 report_fatal_error("Stack overflow!");
214 Stack
[StackTop
] = Reg
;
215 RegMap
[Reg
] = StackTop
++;
218 // popReg - Pop a register from the stack.
221 report_fatal_error("Cannot pop empty stack!");
222 RegMap
[Stack
[--StackTop
]] = ~0; // Update state
225 bool isAtTop(unsigned RegNo
) const { return getSlot(RegNo
) == StackTop
-1; }
226 void moveToTop(unsigned RegNo
, MachineBasicBlock::iterator I
) {
227 DebugLoc dl
= I
== MBB
->end() ? DebugLoc() : I
->getDebugLoc();
228 if (isAtTop(RegNo
)) return;
230 unsigned STReg
= getSTReg(RegNo
);
231 unsigned RegOnTop
= getStackEntry(0);
233 // Swap the slots the regs are in.
234 std::swap(RegMap
[RegNo
], RegMap
[RegOnTop
]);
236 // Swap stack slot contents.
237 if (RegMap
[RegOnTop
] >= StackTop
)
238 report_fatal_error("Access past stack top!");
239 std::swap(Stack
[RegMap
[RegOnTop
]], Stack
[StackTop
-1]);
241 // Emit an fxch to update the runtime processors version of the state.
242 BuildMI(*MBB
, I
, dl
, TII
->get(X86::XCH_F
)).addReg(STReg
);
246 void duplicateToTop(unsigned RegNo
, unsigned AsReg
,
247 MachineBasicBlock::iterator I
) {
248 DebugLoc dl
= I
== MBB
->end() ? DebugLoc() : I
->getDebugLoc();
249 unsigned STReg
= getSTReg(RegNo
);
250 pushReg(AsReg
); // New register on top of stack
252 BuildMI(*MBB
, I
, dl
, TII
->get(X86::LD_Frr
)).addReg(STReg
);
255 /// popStackAfter - Pop the current value off of the top of the FP stack
256 /// after the specified instruction.
257 void popStackAfter(MachineBasicBlock::iterator
&I
);
259 /// freeStackSlotAfter - Free the specified register from the register
260 /// stack, so that it is no longer in a register. If the register is
261 /// currently at the top of the stack, we just pop the current instruction,
262 /// otherwise we store the current top-of-stack into the specified slot,
263 /// then pop the top of stack.
264 void freeStackSlotAfter(MachineBasicBlock::iterator
&I
, unsigned Reg
);
266 /// freeStackSlotBefore - Just the pop, no folding. Return the inserted
268 MachineBasicBlock::iterator
269 freeStackSlotBefore(MachineBasicBlock::iterator I
, unsigned FPRegNo
);
271 /// Adjust the live registers to be the set in Mask.
272 void adjustLiveRegs(unsigned Mask
, MachineBasicBlock::iterator I
);
274 /// Shuffle the top FixCount stack entries such that FP reg FixStack[0] is
275 /// st(0), FP reg FixStack[1] is st(1) etc.
276 void shuffleStackTop(const unsigned char *FixStack
, unsigned FixCount
,
277 MachineBasicBlock::iterator I
);
279 bool processBasicBlock(MachineFunction
&MF
, MachineBasicBlock
&MBB
);
281 void handleCall(MachineBasicBlock::iterator
&I
);
282 void handleReturn(MachineBasicBlock::iterator
&I
);
283 void handleZeroArgFP(MachineBasicBlock::iterator
&I
);
284 void handleOneArgFP(MachineBasicBlock::iterator
&I
);
285 void handleOneArgFPRW(MachineBasicBlock::iterator
&I
);
286 void handleTwoArgFP(MachineBasicBlock::iterator
&I
);
287 void handleCompareFP(MachineBasicBlock::iterator
&I
);
288 void handleCondMovFP(MachineBasicBlock::iterator
&I
);
289 void handleSpecialFP(MachineBasicBlock::iterator
&I
);
291 // Check if a COPY instruction is using FP registers.
292 static bool isFPCopy(MachineInstr
&MI
) {
293 unsigned DstReg
= MI
.getOperand(0).getReg();
294 unsigned SrcReg
= MI
.getOperand(1).getReg();
296 return X86::RFP80RegClass
.contains(DstReg
) ||
297 X86::RFP80RegClass
.contains(SrcReg
);
300 void setKillFlags(MachineBasicBlock
&MBB
) const;
305 FunctionPass
*llvm::createX86FloatingPointStackifierPass() { return new FPS(); }
307 /// getFPReg - Return the X86::FPx register number for the specified operand.
308 /// For example, this returns 3 for X86::FP3.
309 static unsigned getFPReg(const MachineOperand
&MO
) {
310 assert(MO
.isReg() && "Expected an FP register!");
311 unsigned Reg
= MO
.getReg();
312 assert(Reg
>= X86::FP0
&& Reg
<= X86::FP6
&& "Expected FP register!");
313 return Reg
- X86::FP0
;
316 /// runOnMachineFunction - Loop over all of the basic blocks, transforming FP
317 /// register references into FP stack references.
319 bool FPS::runOnMachineFunction(MachineFunction
&MF
) {
320 // We only need to run this pass if there are any FP registers used in this
321 // function. If it is all integer, there is nothing for us to do!
322 bool FPIsUsed
= false;
324 static_assert(X86::FP6
== X86::FP0
+6, "Register enums aren't sorted right!");
325 const MachineRegisterInfo
&MRI
= MF
.getRegInfo();
326 for (unsigned i
= 0; i
<= 6; ++i
)
327 if (!MRI
.reg_nodbg_empty(X86::FP0
+ i
)) {
333 if (!FPIsUsed
) return false;
335 Bundles
= &getAnalysis
<EdgeBundles
>();
336 TII
= MF
.getSubtarget().getInstrInfo();
338 // Prepare cross-MBB liveness.
339 bundleCFGRecomputeKillFlags(MF
);
343 // Process the function in depth first order so that we process at least one
344 // of the predecessors for every reachable block in the function.
345 df_iterator_default_set
<MachineBasicBlock
*> Processed
;
346 MachineBasicBlock
*Entry
= &MF
.front();
349 LiveBundles
[Bundles
->getBundle(Entry
->getNumber(), false)];
351 // In regcall convention, some FP registers may not be passed through
352 // the stack, so they will need to be assigned to the stack first
353 if ((Entry
->getParent()->getFunction().getCallingConv() ==
354 CallingConv::X86_RegCall
) && (Bundle
.Mask
&& !Bundle
.FixCount
)) {
355 // In the register calling convention, up to one FP argument could be
356 // saved in the first FP register.
357 // If bundle.mask is non-zero and Bundle.FixCount is zero, it means
358 // that the FP registers contain arguments.
359 // The actual value is passed in FP0.
360 // Here we fix the stack and mark FP0 as pre-assigned register.
361 assert((Bundle
.Mask
& 0xFE) == 0 &&
362 "Only FP0 could be passed as an argument");
364 Bundle
.FixStack
[0] = 0;
367 bool Changed
= false;
368 for (MachineBasicBlock
*BB
: depth_first_ext(Entry
, Processed
))
369 Changed
|= processBasicBlock(MF
, *BB
);
371 // Process any unreachable blocks in arbitrary order now.
372 if (MF
.size() != Processed
.size())
373 for (MachineBasicBlock
&BB
: MF
)
374 if (Processed
.insert(&BB
).second
)
375 Changed
|= processBasicBlock(MF
, BB
);
382 /// bundleCFG - Scan all the basic blocks to determine consistent live-in and
383 /// live-out sets for the FP registers. Consistent means that the set of
384 /// registers live-out from a block is identical to the live-in set of all
385 /// successors. This is not enforced by the normal live-in lists since
386 /// registers may be implicitly defined, or not used by all successors.
387 void FPS::bundleCFGRecomputeKillFlags(MachineFunction
&MF
) {
388 assert(LiveBundles
.empty() && "Stale data in LiveBundles");
389 LiveBundles
.resize(Bundles
->getNumBundles());
391 // Gather the actual live-in masks for all MBBs.
392 for (MachineBasicBlock
&MBB
: MF
) {
395 const unsigned Mask
= calcLiveInMask(&MBB
, false);
398 // Update MBB ingoing bundle mask.
399 LiveBundles
[Bundles
->getBundle(MBB
.getNumber(), false)].Mask
|= Mask
;
403 /// processBasicBlock - Loop over all of the instructions in the basic block,
404 /// transforming FP instructions into their stack form.
406 bool FPS::processBasicBlock(MachineFunction
&MF
, MachineBasicBlock
&BB
) {
407 bool Changed
= false;
412 for (MachineBasicBlock::iterator I
= BB
.begin(); I
!= BB
.end(); ++I
) {
413 MachineInstr
&MI
= *I
;
414 uint64_t Flags
= MI
.getDesc().TSFlags
;
416 unsigned FPInstClass
= Flags
& X86II::FPTypeMask
;
417 if (MI
.isInlineAsm())
418 FPInstClass
= X86II::SpecialFP
;
420 if (MI
.isCopy() && isFPCopy(MI
))
421 FPInstClass
= X86II::SpecialFP
;
423 if (MI
.isImplicitDef() &&
424 X86::RFP80RegClass
.contains(MI
.getOperand(0).getReg()))
425 FPInstClass
= X86II::SpecialFP
;
428 FPInstClass
= X86II::SpecialFP
;
430 if (FPInstClass
== X86II::NotFP
)
431 continue; // Efficiently ignore non-fp insts!
433 MachineInstr
*PrevMI
= nullptr;
435 PrevMI
= &*std::prev(I
);
437 ++NumFP
; // Keep track of # of pseudo instrs
438 LLVM_DEBUG(dbgs() << "\nFPInst:\t" << MI
);
440 // Get dead variables list now because the MI pointer may be deleted as part
442 SmallVector
<unsigned, 8> DeadRegs
;
443 for (unsigned i
= 0, e
= MI
.getNumOperands(); i
!= e
; ++i
) {
444 const MachineOperand
&MO
= MI
.getOperand(i
);
445 if (MO
.isReg() && MO
.isDead())
446 DeadRegs
.push_back(MO
.getReg());
449 switch (FPInstClass
) {
450 case X86II::ZeroArgFP
: handleZeroArgFP(I
); break;
451 case X86II::OneArgFP
: handleOneArgFP(I
); break; // fstp ST(0)
452 case X86II::OneArgFPRW
: handleOneArgFPRW(I
); break; // ST(0) = fsqrt(ST(0))
453 case X86II::TwoArgFP
: handleTwoArgFP(I
); break;
454 case X86II::CompareFP
: handleCompareFP(I
); break;
455 case X86II::CondMovFP
: handleCondMovFP(I
); break;
456 case X86II::SpecialFP
: handleSpecialFP(I
); break;
457 default: llvm_unreachable("Unknown FP Type!");
460 // Check to see if any of the values defined by this instruction are dead
461 // after definition. If so, pop them.
462 for (unsigned i
= 0, e
= DeadRegs
.size(); i
!= e
; ++i
) {
463 unsigned Reg
= DeadRegs
[i
];
464 // Check if Reg is live on the stack. An inline-asm register operand that
465 // is in the clobber list and marked dead might not be live on the stack.
466 static_assert(X86::FP7
- X86::FP0
== 7, "sequential FP regnumbers");
467 if (Reg
>= X86::FP0
&& Reg
<= X86::FP6
&& isLive(Reg
-X86::FP0
)) {
468 LLVM_DEBUG(dbgs() << "Register FP#" << Reg
- X86::FP0
<< " is dead!\n");
469 freeStackSlotAfter(I
, Reg
-X86::FP0
);
473 // Print out all of the instructions expanded to if -debug
475 MachineBasicBlock::iterator PrevI
= PrevMI
;
477 dbgs() << "Just deleted pseudo instruction\n";
479 MachineBasicBlock::iterator Start
= I
;
480 // Rewind to first instruction newly inserted.
481 while (Start
!= BB
.begin() && std::prev(Start
) != PrevI
)
483 dbgs() << "Inserted instructions:\n\t";
484 Start
->print(dbgs());
485 while (++Start
!= std::next(I
)) {
500 /// setupBlockStack - Use the live bundles to set up our model of the stack
501 /// to match predecessors' live out stack.
502 void FPS::setupBlockStack() {
503 LLVM_DEBUG(dbgs() << "\nSetting up live-ins for " << printMBBReference(*MBB
)
504 << " derived from " << MBB
->getName() << ".\n");
506 // Get the live-in bundle for MBB.
507 const LiveBundle
&Bundle
=
508 LiveBundles
[Bundles
->getBundle(MBB
->getNumber(), false)];
511 LLVM_DEBUG(dbgs() << "Block has no FP live-ins.\n");
515 // Depth-first iteration should ensure that we always have an assigned stack.
516 assert(Bundle
.isFixed() && "Reached block before any predecessors");
518 // Push the fixed live-in registers.
519 for (unsigned i
= Bundle
.FixCount
; i
> 0; --i
) {
520 LLVM_DEBUG(dbgs() << "Live-in st(" << (i
- 1) << "): %fp"
521 << unsigned(Bundle
.FixStack
[i
- 1]) << '\n');
522 pushReg(Bundle
.FixStack
[i
-1]);
525 // Kill off unwanted live-ins. This can happen with a critical edge.
526 // FIXME: We could keep these live registers around as zombies. They may need
527 // to be revived at the end of a short block. It might save a few instrs.
528 unsigned Mask
= calcLiveInMask(MBB
, /*RemoveFPs=*/true);
529 adjustLiveRegs(Mask
, MBB
->begin());
530 LLVM_DEBUG(MBB
->dump());
533 /// finishBlockStack - Revive live-outs that are implicitly defined out of
534 /// MBB. Shuffle live registers to match the expected fixed stack of any
535 /// predecessors, and ensure that all predecessors are expecting the same
537 void FPS::finishBlockStack() {
538 // The RET handling below takes care of return blocks for us.
539 if (MBB
->succ_empty())
542 LLVM_DEBUG(dbgs() << "Setting up live-outs for " << printMBBReference(*MBB
)
543 << " derived from " << MBB
->getName() << ".\n");
545 // Get MBB's live-out bundle.
546 unsigned BundleIdx
= Bundles
->getBundle(MBB
->getNumber(), true);
547 LiveBundle
&Bundle
= LiveBundles
[BundleIdx
];
549 // We may need to kill and define some registers to match successors.
550 // FIXME: This can probably be combined with the shuffle below.
551 MachineBasicBlock::iterator Term
= MBB
->getFirstTerminator();
552 adjustLiveRegs(Bundle
.Mask
, Term
);
555 LLVM_DEBUG(dbgs() << "No live-outs.\n");
559 // Has the stack order been fixed yet?
560 LLVM_DEBUG(dbgs() << "LB#" << BundleIdx
<< ": ");
561 if (Bundle
.isFixed()) {
562 LLVM_DEBUG(dbgs() << "Shuffling stack to match.\n");
563 shuffleStackTop(Bundle
.FixStack
, Bundle
.FixCount
, Term
);
565 // Not fixed yet, we get to choose.
566 LLVM_DEBUG(dbgs() << "Fixing stack order now.\n");
567 Bundle
.FixCount
= StackTop
;
568 for (unsigned i
= 0; i
< StackTop
; ++i
)
569 Bundle
.FixStack
[i
] = getStackEntry(i
);
574 //===----------------------------------------------------------------------===//
575 // Efficient Lookup Table Support
576 //===----------------------------------------------------------------------===//
582 bool operator<(const TableEntry
&TE
) const { return from
< TE
.from
; }
583 friend bool operator<(const TableEntry
&TE
, unsigned V
) {
586 friend bool LLVM_ATTRIBUTE_UNUSED
operator<(unsigned V
,
587 const TableEntry
&TE
) {
593 static int Lookup(ArrayRef
<TableEntry
> Table
, unsigned Opcode
) {
594 const TableEntry
*I
= std::lower_bound(Table
.begin(), Table
.end(), Opcode
);
595 if (I
!= Table
.end() && I
->from
== Opcode
)
601 #define ASSERT_SORTED(TABLE)
603 #define ASSERT_SORTED(TABLE) \
605 static std::atomic<bool> TABLE##Checked(false); \
606 if (!TABLE##Checked.load(std::memory_order_relaxed)) { \
607 assert(std::is_sorted(std::begin(TABLE), std::end(TABLE)) && \
608 "All lookup tables must be sorted for efficient access!"); \
609 TABLE##Checked.store(true, std::memory_order_relaxed); \
614 //===----------------------------------------------------------------------===//
615 // Register File -> Register Stack Mapping Methods
616 //===----------------------------------------------------------------------===//
618 // OpcodeTable - Sorted map of register instructions to their stack version.
619 // The first element is an register file pseudo instruction, the second is the
620 // concrete X86 instruction which uses the register stack.
622 static const TableEntry OpcodeTable
[] = {
623 { X86::ABS_Fp32
, X86::ABS_F
},
624 { X86::ABS_Fp64
, X86::ABS_F
},
625 { X86::ABS_Fp80
, X86::ABS_F
},
626 { X86::ADD_Fp32m
, X86::ADD_F32m
},
627 { X86::ADD_Fp64m
, X86::ADD_F64m
},
628 { X86::ADD_Fp64m32
, X86::ADD_F32m
},
629 { X86::ADD_Fp80m32
, X86::ADD_F32m
},
630 { X86::ADD_Fp80m64
, X86::ADD_F64m
},
631 { X86::ADD_FpI16m32
, X86::ADD_FI16m
},
632 { X86::ADD_FpI16m64
, X86::ADD_FI16m
},
633 { X86::ADD_FpI16m80
, X86::ADD_FI16m
},
634 { X86::ADD_FpI32m32
, X86::ADD_FI32m
},
635 { X86::ADD_FpI32m64
, X86::ADD_FI32m
},
636 { X86::ADD_FpI32m80
, X86::ADD_FI32m
},
637 { X86::CHS_Fp32
, X86::CHS_F
},
638 { X86::CHS_Fp64
, X86::CHS_F
},
639 { X86::CHS_Fp80
, X86::CHS_F
},
640 { X86::CMOVBE_Fp32
, X86::CMOVBE_F
},
641 { X86::CMOVBE_Fp64
, X86::CMOVBE_F
},
642 { X86::CMOVBE_Fp80
, X86::CMOVBE_F
},
643 { X86::CMOVB_Fp32
, X86::CMOVB_F
},
644 { X86::CMOVB_Fp64
, X86::CMOVB_F
},
645 { X86::CMOVB_Fp80
, X86::CMOVB_F
},
646 { X86::CMOVE_Fp32
, X86::CMOVE_F
},
647 { X86::CMOVE_Fp64
, X86::CMOVE_F
},
648 { X86::CMOVE_Fp80
, X86::CMOVE_F
},
649 { X86::CMOVNBE_Fp32
, X86::CMOVNBE_F
},
650 { X86::CMOVNBE_Fp64
, X86::CMOVNBE_F
},
651 { X86::CMOVNBE_Fp80
, X86::CMOVNBE_F
},
652 { X86::CMOVNB_Fp32
, X86::CMOVNB_F
},
653 { X86::CMOVNB_Fp64
, X86::CMOVNB_F
},
654 { X86::CMOVNB_Fp80
, X86::CMOVNB_F
},
655 { X86::CMOVNE_Fp32
, X86::CMOVNE_F
},
656 { X86::CMOVNE_Fp64
, X86::CMOVNE_F
},
657 { X86::CMOVNE_Fp80
, X86::CMOVNE_F
},
658 { X86::CMOVNP_Fp32
, X86::CMOVNP_F
},
659 { X86::CMOVNP_Fp64
, X86::CMOVNP_F
},
660 { X86::CMOVNP_Fp80
, X86::CMOVNP_F
},
661 { X86::CMOVP_Fp32
, X86::CMOVP_F
},
662 { X86::CMOVP_Fp64
, X86::CMOVP_F
},
663 { X86::CMOVP_Fp80
, X86::CMOVP_F
},
664 { X86::COS_Fp32
, X86::COS_F
},
665 { X86::COS_Fp64
, X86::COS_F
},
666 { X86::COS_Fp80
, X86::COS_F
},
667 { X86::DIVR_Fp32m
, X86::DIVR_F32m
},
668 { X86::DIVR_Fp64m
, X86::DIVR_F64m
},
669 { X86::DIVR_Fp64m32
, X86::DIVR_F32m
},
670 { X86::DIVR_Fp80m32
, X86::DIVR_F32m
},
671 { X86::DIVR_Fp80m64
, X86::DIVR_F64m
},
672 { X86::DIVR_FpI16m32
, X86::DIVR_FI16m
},
673 { X86::DIVR_FpI16m64
, X86::DIVR_FI16m
},
674 { X86::DIVR_FpI16m80
, X86::DIVR_FI16m
},
675 { X86::DIVR_FpI32m32
, X86::DIVR_FI32m
},
676 { X86::DIVR_FpI32m64
, X86::DIVR_FI32m
},
677 { X86::DIVR_FpI32m80
, X86::DIVR_FI32m
},
678 { X86::DIV_Fp32m
, X86::DIV_F32m
},
679 { X86::DIV_Fp64m
, X86::DIV_F64m
},
680 { X86::DIV_Fp64m32
, X86::DIV_F32m
},
681 { X86::DIV_Fp80m32
, X86::DIV_F32m
},
682 { X86::DIV_Fp80m64
, X86::DIV_F64m
},
683 { X86::DIV_FpI16m32
, X86::DIV_FI16m
},
684 { X86::DIV_FpI16m64
, X86::DIV_FI16m
},
685 { X86::DIV_FpI16m80
, X86::DIV_FI16m
},
686 { X86::DIV_FpI32m32
, X86::DIV_FI32m
},
687 { X86::DIV_FpI32m64
, X86::DIV_FI32m
},
688 { X86::DIV_FpI32m80
, X86::DIV_FI32m
},
689 { X86::ILD_Fp16m32
, X86::ILD_F16m
},
690 { X86::ILD_Fp16m64
, X86::ILD_F16m
},
691 { X86::ILD_Fp16m80
, X86::ILD_F16m
},
692 { X86::ILD_Fp32m32
, X86::ILD_F32m
},
693 { X86::ILD_Fp32m64
, X86::ILD_F32m
},
694 { X86::ILD_Fp32m80
, X86::ILD_F32m
},
695 { X86::ILD_Fp64m32
, X86::ILD_F64m
},
696 { X86::ILD_Fp64m64
, X86::ILD_F64m
},
697 { X86::ILD_Fp64m80
, X86::ILD_F64m
},
698 { X86::ISTT_Fp16m32
, X86::ISTT_FP16m
},
699 { X86::ISTT_Fp16m64
, X86::ISTT_FP16m
},
700 { X86::ISTT_Fp16m80
, X86::ISTT_FP16m
},
701 { X86::ISTT_Fp32m32
, X86::ISTT_FP32m
},
702 { X86::ISTT_Fp32m64
, X86::ISTT_FP32m
},
703 { X86::ISTT_Fp32m80
, X86::ISTT_FP32m
},
704 { X86::ISTT_Fp64m32
, X86::ISTT_FP64m
},
705 { X86::ISTT_Fp64m64
, X86::ISTT_FP64m
},
706 { X86::ISTT_Fp64m80
, X86::ISTT_FP64m
},
707 { X86::IST_Fp16m32
, X86::IST_F16m
},
708 { X86::IST_Fp16m64
, X86::IST_F16m
},
709 { X86::IST_Fp16m80
, X86::IST_F16m
},
710 { X86::IST_Fp32m32
, X86::IST_F32m
},
711 { X86::IST_Fp32m64
, X86::IST_F32m
},
712 { X86::IST_Fp32m80
, X86::IST_F32m
},
713 { X86::IST_Fp64m32
, X86::IST_FP64m
},
714 { X86::IST_Fp64m64
, X86::IST_FP64m
},
715 { X86::IST_Fp64m80
, X86::IST_FP64m
},
716 { X86::LD_Fp032
, X86::LD_F0
},
717 { X86::LD_Fp064
, X86::LD_F0
},
718 { X86::LD_Fp080
, X86::LD_F0
},
719 { X86::LD_Fp132
, X86::LD_F1
},
720 { X86::LD_Fp164
, X86::LD_F1
},
721 { X86::LD_Fp180
, X86::LD_F1
},
722 { X86::LD_Fp32m
, X86::LD_F32m
},
723 { X86::LD_Fp32m64
, X86::LD_F32m
},
724 { X86::LD_Fp32m80
, X86::LD_F32m
},
725 { X86::LD_Fp64m
, X86::LD_F64m
},
726 { X86::LD_Fp64m80
, X86::LD_F64m
},
727 { X86::LD_Fp80m
, X86::LD_F80m
},
728 { X86::MUL_Fp32m
, X86::MUL_F32m
},
729 { X86::MUL_Fp64m
, X86::MUL_F64m
},
730 { X86::MUL_Fp64m32
, X86::MUL_F32m
},
731 { X86::MUL_Fp80m32
, X86::MUL_F32m
},
732 { X86::MUL_Fp80m64
, X86::MUL_F64m
},
733 { X86::MUL_FpI16m32
, X86::MUL_FI16m
},
734 { X86::MUL_FpI16m64
, X86::MUL_FI16m
},
735 { X86::MUL_FpI16m80
, X86::MUL_FI16m
},
736 { X86::MUL_FpI32m32
, X86::MUL_FI32m
},
737 { X86::MUL_FpI32m64
, X86::MUL_FI32m
},
738 { X86::MUL_FpI32m80
, X86::MUL_FI32m
},
739 { X86::SIN_Fp32
, X86::SIN_F
},
740 { X86::SIN_Fp64
, X86::SIN_F
},
741 { X86::SIN_Fp80
, X86::SIN_F
},
742 { X86::SQRT_Fp32
, X86::SQRT_F
},
743 { X86::SQRT_Fp64
, X86::SQRT_F
},
744 { X86::SQRT_Fp80
, X86::SQRT_F
},
745 { X86::ST_Fp32m
, X86::ST_F32m
},
746 { X86::ST_Fp64m
, X86::ST_F64m
},
747 { X86::ST_Fp64m32
, X86::ST_F32m
},
748 { X86::ST_Fp80m32
, X86::ST_F32m
},
749 { X86::ST_Fp80m64
, X86::ST_F64m
},
750 { X86::ST_FpP80m
, X86::ST_FP80m
},
751 { X86::SUBR_Fp32m
, X86::SUBR_F32m
},
752 { X86::SUBR_Fp64m
, X86::SUBR_F64m
},
753 { X86::SUBR_Fp64m32
, X86::SUBR_F32m
},
754 { X86::SUBR_Fp80m32
, X86::SUBR_F32m
},
755 { X86::SUBR_Fp80m64
, X86::SUBR_F64m
},
756 { X86::SUBR_FpI16m32
, X86::SUBR_FI16m
},
757 { X86::SUBR_FpI16m64
, X86::SUBR_FI16m
},
758 { X86::SUBR_FpI16m80
, X86::SUBR_FI16m
},
759 { X86::SUBR_FpI32m32
, X86::SUBR_FI32m
},
760 { X86::SUBR_FpI32m64
, X86::SUBR_FI32m
},
761 { X86::SUBR_FpI32m80
, X86::SUBR_FI32m
},
762 { X86::SUB_Fp32m
, X86::SUB_F32m
},
763 { X86::SUB_Fp64m
, X86::SUB_F64m
},
764 { X86::SUB_Fp64m32
, X86::SUB_F32m
},
765 { X86::SUB_Fp80m32
, X86::SUB_F32m
},
766 { X86::SUB_Fp80m64
, X86::SUB_F64m
},
767 { X86::SUB_FpI16m32
, X86::SUB_FI16m
},
768 { X86::SUB_FpI16m64
, X86::SUB_FI16m
},
769 { X86::SUB_FpI16m80
, X86::SUB_FI16m
},
770 { X86::SUB_FpI32m32
, X86::SUB_FI32m
},
771 { X86::SUB_FpI32m64
, X86::SUB_FI32m
},
772 { X86::SUB_FpI32m80
, X86::SUB_FI32m
},
773 { X86::TST_Fp32
, X86::TST_F
},
774 { X86::TST_Fp64
, X86::TST_F
},
775 { X86::TST_Fp80
, X86::TST_F
},
776 { X86::UCOM_FpIr32
, X86::UCOM_FIr
},
777 { X86::UCOM_FpIr64
, X86::UCOM_FIr
},
778 { X86::UCOM_FpIr80
, X86::UCOM_FIr
},
779 { X86::UCOM_Fpr32
, X86::UCOM_Fr
},
780 { X86::UCOM_Fpr64
, X86::UCOM_Fr
},
781 { X86::UCOM_Fpr80
, X86::UCOM_Fr
},
784 static unsigned getConcreteOpcode(unsigned Opcode
) {
785 ASSERT_SORTED(OpcodeTable
);
786 int Opc
= Lookup(OpcodeTable
, Opcode
);
787 assert(Opc
!= -1 && "FP Stack instruction not in OpcodeTable!");
791 //===----------------------------------------------------------------------===//
793 //===----------------------------------------------------------------------===//
795 // PopTable - Sorted map of instructions to their popping version. The first
796 // element is an instruction, the second is the version which pops.
798 static const TableEntry PopTable
[] = {
799 { X86::ADD_FrST0
, X86::ADD_FPrST0
},
801 { X86::DIVR_FrST0
, X86::DIVR_FPrST0
},
802 { X86::DIV_FrST0
, X86::DIV_FPrST0
},
804 { X86::IST_F16m
, X86::IST_FP16m
},
805 { X86::IST_F32m
, X86::IST_FP32m
},
807 { X86::MUL_FrST0
, X86::MUL_FPrST0
},
809 { X86::ST_F32m
, X86::ST_FP32m
},
810 { X86::ST_F64m
, X86::ST_FP64m
},
811 { X86::ST_Frr
, X86::ST_FPrr
},
813 { X86::SUBR_FrST0
, X86::SUBR_FPrST0
},
814 { X86::SUB_FrST0
, X86::SUB_FPrST0
},
816 { X86::UCOM_FIr
, X86::UCOM_FIPr
},
818 { X86::UCOM_FPr
, X86::UCOM_FPPr
},
819 { X86::UCOM_Fr
, X86::UCOM_FPr
},
822 /// popStackAfter - Pop the current value off of the top of the FP stack after
823 /// the specified instruction. This attempts to be sneaky and combine the pop
824 /// into the instruction itself if possible. The iterator is left pointing to
825 /// the last instruction, be it a new pop instruction inserted, or the old
826 /// instruction if it was modified in place.
828 void FPS::popStackAfter(MachineBasicBlock::iterator
&I
) {
829 MachineInstr
&MI
= *I
;
830 const DebugLoc
&dl
= MI
.getDebugLoc();
831 ASSERT_SORTED(PopTable
);
835 // Check to see if there is a popping version of this instruction...
836 int Opcode
= Lookup(PopTable
, I
->getOpcode());
838 I
->setDesc(TII
->get(Opcode
));
839 if (Opcode
== X86::UCOM_FPPr
)
841 } else { // Insert an explicit pop
842 I
= BuildMI(*MBB
, ++I
, dl
, TII
->get(X86::ST_FPrr
)).addReg(X86::ST0
);
846 /// freeStackSlotAfter - Free the specified register from the register stack, so
847 /// that it is no longer in a register. If the register is currently at the top
848 /// of the stack, we just pop the current instruction, otherwise we store the
849 /// current top-of-stack into the specified slot, then pop the top of stack.
850 void FPS::freeStackSlotAfter(MachineBasicBlock::iterator
&I
, unsigned FPRegNo
) {
851 if (getStackEntry(0) == FPRegNo
) { // already at the top of stack? easy.
856 // Otherwise, store the top of stack into the dead slot, killing the operand
857 // without having to add in an explicit xchg then pop.
859 I
= freeStackSlotBefore(++I
, FPRegNo
);
862 /// freeStackSlotBefore - Free the specified register without trying any
864 MachineBasicBlock::iterator
865 FPS::freeStackSlotBefore(MachineBasicBlock::iterator I
, unsigned FPRegNo
) {
866 unsigned STReg
= getSTReg(FPRegNo
);
867 unsigned OldSlot
= getSlot(FPRegNo
);
868 unsigned TopReg
= Stack
[StackTop
-1];
869 Stack
[OldSlot
] = TopReg
;
870 RegMap
[TopReg
] = OldSlot
;
871 RegMap
[FPRegNo
] = ~0;
872 Stack
[--StackTop
] = ~0;
873 return BuildMI(*MBB
, I
, DebugLoc(), TII
->get(X86::ST_FPrr
))
878 /// adjustLiveRegs - Kill and revive registers such that exactly the FP
879 /// registers with a bit in Mask are live.
880 void FPS::adjustLiveRegs(unsigned Mask
, MachineBasicBlock::iterator I
) {
881 unsigned Defs
= Mask
;
883 for (unsigned i
= 0; i
< StackTop
; ++i
) {
884 unsigned RegNo
= Stack
[i
];
885 if (!(Defs
& (1 << RegNo
)))
886 // This register is live, but we don't want it.
887 Kills
|= (1 << RegNo
);
889 // We don't need to imp-def this live register.
890 Defs
&= ~(1 << RegNo
);
892 assert((Kills
& Defs
) == 0 && "Register needs killing and def'ing?");
894 // Produce implicit-defs for free by using killed registers.
895 while (Kills
&& Defs
) {
896 unsigned KReg
= countTrailingZeros(Kills
);
897 unsigned DReg
= countTrailingZeros(Defs
);
898 LLVM_DEBUG(dbgs() << "Renaming %fp" << KReg
<< " as imp %fp" << DReg
900 std::swap(Stack
[getSlot(KReg
)], Stack
[getSlot(DReg
)]);
901 std::swap(RegMap
[KReg
], RegMap
[DReg
]);
902 Kills
&= ~(1 << KReg
);
903 Defs
&= ~(1 << DReg
);
906 // Kill registers by popping.
907 if (Kills
&& I
!= MBB
->begin()) {
908 MachineBasicBlock::iterator I2
= std::prev(I
);
910 unsigned KReg
= getStackEntry(0);
911 if (!(Kills
& (1 << KReg
)))
913 LLVM_DEBUG(dbgs() << "Popping %fp" << KReg
<< "\n");
915 Kills
&= ~(1 << KReg
);
919 // Manually kill the rest.
921 unsigned KReg
= countTrailingZeros(Kills
);
922 LLVM_DEBUG(dbgs() << "Killing %fp" << KReg
<< "\n");
923 freeStackSlotBefore(I
, KReg
);
924 Kills
&= ~(1 << KReg
);
927 // Load zeros for all the imp-defs.
929 unsigned DReg
= countTrailingZeros(Defs
);
930 LLVM_DEBUG(dbgs() << "Defining %fp" << DReg
<< " as 0\n");
931 BuildMI(*MBB
, I
, DebugLoc(), TII
->get(X86::LD_F0
));
933 Defs
&= ~(1 << DReg
);
936 // Now we should have the correct registers live.
937 LLVM_DEBUG(dumpStack());
938 assert(StackTop
== countPopulation(Mask
) && "Live count mismatch");
941 /// shuffleStackTop - emit fxch instructions before I to shuffle the top
942 /// FixCount entries into the order given by FixStack.
943 /// FIXME: Is there a better algorithm than insertion sort?
944 void FPS::shuffleStackTop(const unsigned char *FixStack
,
946 MachineBasicBlock::iterator I
) {
947 // Move items into place, starting from the desired stack bottom.
949 // Old register at position FixCount.
950 unsigned OldReg
= getStackEntry(FixCount
);
951 // Desired register at position FixCount.
952 unsigned Reg
= FixStack
[FixCount
];
955 // (Reg st0) (OldReg st0) = (Reg OldReg st0)
958 moveToTop(OldReg
, I
);
960 LLVM_DEBUG(dumpStack());
964 //===----------------------------------------------------------------------===//
965 // Instruction transformation implementation
966 //===----------------------------------------------------------------------===//
968 void FPS::handleCall(MachineBasicBlock::iterator
&I
) {
969 unsigned STReturns
= 0;
970 const MachineFunction
* MF
= I
->getParent()->getParent();
972 for (const auto &MO
: I
->operands()) {
976 unsigned R
= MO
.getReg() - X86::FP0
;
979 if (MF
->getFunction().getCallingConv() != CallingConv::X86_RegCall
) {
980 assert(MO
.isDef() && MO
.isImplicit());
987 unsigned N
= countTrailingOnes(STReturns
);
989 // FP registers used for function return must be consecutive starting at
991 assert(STReturns
== 0 || (isMask_32(STReturns
) && N
<= 2));
993 // Reset the FP Stack - It is required because of possible leftovers from
994 // passed arguments. The caller should assume that the FP stack is
995 // returned empty (unless the callee returns values on FP stack).
999 for (unsigned I
= 0; I
< N
; ++I
)
1003 /// If RET has an FP register use operand, pass the first one in ST(0) and
1004 /// the second one in ST(1).
1005 void FPS::handleReturn(MachineBasicBlock::iterator
&I
) {
1006 MachineInstr
&MI
= *I
;
1008 // Find the register operands.
1009 unsigned FirstFPRegOp
= ~0U, SecondFPRegOp
= ~0U;
1010 unsigned LiveMask
= 0;
1012 for (unsigned i
= 0, e
= MI
.getNumOperands(); i
!= e
; ++i
) {
1013 MachineOperand
&Op
= MI
.getOperand(i
);
1014 if (!Op
.isReg() || Op
.getReg() < X86::FP0
|| Op
.getReg() > X86::FP6
)
1016 // FP Register uses must be kills unless there are two uses of the same
1017 // register, in which case only one will be a kill.
1018 assert(Op
.isUse() &&
1019 (Op
.isKill() || // Marked kill.
1020 getFPReg(Op
) == FirstFPRegOp
|| // Second instance.
1021 MI
.killsRegister(Op
.getReg())) && // Later use is marked kill.
1022 "Ret only defs operands, and values aren't live beyond it");
1024 if (FirstFPRegOp
== ~0U)
1025 FirstFPRegOp
= getFPReg(Op
);
1027 assert(SecondFPRegOp
== ~0U && "More than two fp operands!");
1028 SecondFPRegOp
= getFPReg(Op
);
1030 LiveMask
|= (1 << getFPReg(Op
));
1032 // Remove the operand so that later passes don't see it.
1033 MI
.RemoveOperand(i
);
1038 // We may have been carrying spurious live-ins, so make sure only the
1039 // returned registers are left live.
1040 adjustLiveRegs(LiveMask
, MI
);
1041 if (!LiveMask
) return; // Quick check to see if any are possible.
1043 // There are only four possibilities here:
1044 // 1) we are returning a single FP value. In this case, it has to be in
1045 // ST(0) already, so just declare success by removing the value from the
1047 if (SecondFPRegOp
== ~0U) {
1048 // Assert that the top of stack contains the right FP register.
1049 assert(StackTop
== 1 && FirstFPRegOp
== getStackEntry(0) &&
1050 "Top of stack not the right register for RET!");
1052 // Ok, everything is good, mark the value as not being on the stack
1053 // anymore so that our assertion about the stack being empty at end of
1054 // block doesn't fire.
1059 // Otherwise, we are returning two values:
1060 // 2) If returning the same value for both, we only have one thing in the FP
1061 // stack. Consider: RET FP1, FP1
1062 if (StackTop
== 1) {
1063 assert(FirstFPRegOp
== SecondFPRegOp
&& FirstFPRegOp
== getStackEntry(0)&&
1064 "Stack misconfiguration for RET!");
1066 // Duplicate the TOS so that we return it twice. Just pick some other FPx
1067 // register to hold it.
1068 unsigned NewReg
= ScratchFPReg
;
1069 duplicateToTop(FirstFPRegOp
, NewReg
, MI
);
1070 FirstFPRegOp
= NewReg
;
1073 /// Okay we know we have two different FPx operands now:
1074 assert(StackTop
== 2 && "Must have two values live!");
1076 /// 3) If SecondFPRegOp is currently in ST(0) and FirstFPRegOp is currently
1077 /// in ST(1). In this case, emit an fxch.
1078 if (getStackEntry(0) == SecondFPRegOp
) {
1079 assert(getStackEntry(1) == FirstFPRegOp
&& "Unknown regs live");
1080 moveToTop(FirstFPRegOp
, MI
);
1083 /// 4) Finally, FirstFPRegOp must be in ST(0) and SecondFPRegOp must be in
1084 /// ST(1). Just remove both from our understanding of the stack and return.
1085 assert(getStackEntry(0) == FirstFPRegOp
&& "Unknown regs live");
1086 assert(getStackEntry(1) == SecondFPRegOp
&& "Unknown regs live");
1090 /// handleZeroArgFP - ST(0) = fld0 ST(0) = flds <mem>
1092 void FPS::handleZeroArgFP(MachineBasicBlock::iterator
&I
) {
1093 MachineInstr
&MI
= *I
;
1094 unsigned DestReg
= getFPReg(MI
.getOperand(0));
1096 // Change from the pseudo instruction to the concrete instruction.
1097 MI
.RemoveOperand(0); // Remove the explicit ST(0) operand
1098 MI
.setDesc(TII
->get(getConcreteOpcode(MI
.getOpcode())));
1100 // Result gets pushed on the stack.
1104 /// handleOneArgFP - fst <mem>, ST(0)
1106 void FPS::handleOneArgFP(MachineBasicBlock::iterator
&I
) {
1107 MachineInstr
&MI
= *I
;
1108 unsigned NumOps
= MI
.getDesc().getNumOperands();
1109 assert((NumOps
== X86::AddrNumOperands
+ 1 || NumOps
== 1) &&
1110 "Can only handle fst* & ftst instructions!");
1112 // Is this the last use of the source register?
1113 unsigned Reg
= getFPReg(MI
.getOperand(NumOps
- 1));
1114 bool KillsSrc
= MI
.killsRegister(X86::FP0
+ Reg
);
1116 // FISTP64m is strange because there isn't a non-popping versions.
1117 // If we have one _and_ we don't want to pop the operand, duplicate the value
1118 // on the stack instead of moving it. This ensure that popping the value is
1120 // Ditto FISTTP16m, FISTTP32m, FISTTP64m, ST_FpP80m.
1122 if (!KillsSrc
&& (MI
.getOpcode() == X86::IST_Fp64m32
||
1123 MI
.getOpcode() == X86::ISTT_Fp16m32
||
1124 MI
.getOpcode() == X86::ISTT_Fp32m32
||
1125 MI
.getOpcode() == X86::ISTT_Fp64m32
||
1126 MI
.getOpcode() == X86::IST_Fp64m64
||
1127 MI
.getOpcode() == X86::ISTT_Fp16m64
||
1128 MI
.getOpcode() == X86::ISTT_Fp32m64
||
1129 MI
.getOpcode() == X86::ISTT_Fp64m64
||
1130 MI
.getOpcode() == X86::IST_Fp64m80
||
1131 MI
.getOpcode() == X86::ISTT_Fp16m80
||
1132 MI
.getOpcode() == X86::ISTT_Fp32m80
||
1133 MI
.getOpcode() == X86::ISTT_Fp64m80
||
1134 MI
.getOpcode() == X86::ST_FpP80m
)) {
1135 duplicateToTop(Reg
, ScratchFPReg
, I
);
1137 moveToTop(Reg
, I
); // Move to the top of the stack...
1140 // Convert from the pseudo instruction to the concrete instruction.
1141 MI
.RemoveOperand(NumOps
- 1); // Remove explicit ST(0) operand
1142 MI
.setDesc(TII
->get(getConcreteOpcode(MI
.getOpcode())));
1144 if (MI
.getOpcode() == X86::IST_FP64m
|| MI
.getOpcode() == X86::ISTT_FP16m
||
1145 MI
.getOpcode() == X86::ISTT_FP32m
|| MI
.getOpcode() == X86::ISTT_FP64m
||
1146 MI
.getOpcode() == X86::ST_FP80m
) {
1148 report_fatal_error("Stack empty??");
1150 } else if (KillsSrc
) { // Last use of operand?
1156 /// handleOneArgFPRW: Handle instructions that read from the top of stack and
1157 /// replace the value with a newly computed value. These instructions may have
1158 /// non-fp operands after their FP operands.
1162 /// R1 = fadd R2, [mem]
1164 void FPS::handleOneArgFPRW(MachineBasicBlock::iterator
&I
) {
1165 MachineInstr
&MI
= *I
;
1167 unsigned NumOps
= MI
.getDesc().getNumOperands();
1168 assert(NumOps
>= 2 && "FPRW instructions must have 2 ops!!");
1171 // Is this the last use of the source register?
1172 unsigned Reg
= getFPReg(MI
.getOperand(1));
1173 bool KillsSrc
= MI
.killsRegister(X86::FP0
+ Reg
);
1176 // If this is the last use of the source register, just make sure it's on
1177 // the top of the stack.
1180 report_fatal_error("Stack cannot be empty!");
1182 pushReg(getFPReg(MI
.getOperand(0)));
1184 // If this is not the last use of the source register, _copy_ it to the top
1186 duplicateToTop(Reg
, getFPReg(MI
.getOperand(0)), I
);
1189 // Change from the pseudo instruction to the concrete instruction.
1190 MI
.RemoveOperand(1); // Drop the source operand.
1191 MI
.RemoveOperand(0); // Drop the destination operand.
1192 MI
.setDesc(TII
->get(getConcreteOpcode(MI
.getOpcode())));
1196 //===----------------------------------------------------------------------===//
1197 // Define tables of various ways to map pseudo instructions
1200 // ForwardST0Table - Map: A = B op C into: ST(0) = ST(0) op ST(i)
1201 static const TableEntry ForwardST0Table
[] = {
1202 { X86::ADD_Fp32
, X86::ADD_FST0r
},
1203 { X86::ADD_Fp64
, X86::ADD_FST0r
},
1204 { X86::ADD_Fp80
, X86::ADD_FST0r
},
1205 { X86::DIV_Fp32
, X86::DIV_FST0r
},
1206 { X86::DIV_Fp64
, X86::DIV_FST0r
},
1207 { X86::DIV_Fp80
, X86::DIV_FST0r
},
1208 { X86::MUL_Fp32
, X86::MUL_FST0r
},
1209 { X86::MUL_Fp64
, X86::MUL_FST0r
},
1210 { X86::MUL_Fp80
, X86::MUL_FST0r
},
1211 { X86::SUB_Fp32
, X86::SUB_FST0r
},
1212 { X86::SUB_Fp64
, X86::SUB_FST0r
},
1213 { X86::SUB_Fp80
, X86::SUB_FST0r
},
1216 // ReverseST0Table - Map: A = B op C into: ST(0) = ST(i) op ST(0)
1217 static const TableEntry ReverseST0Table
[] = {
1218 { X86::ADD_Fp32
, X86::ADD_FST0r
}, // commutative
1219 { X86::ADD_Fp64
, X86::ADD_FST0r
}, // commutative
1220 { X86::ADD_Fp80
, X86::ADD_FST0r
}, // commutative
1221 { X86::DIV_Fp32
, X86::DIVR_FST0r
},
1222 { X86::DIV_Fp64
, X86::DIVR_FST0r
},
1223 { X86::DIV_Fp80
, X86::DIVR_FST0r
},
1224 { X86::MUL_Fp32
, X86::MUL_FST0r
}, // commutative
1225 { X86::MUL_Fp64
, X86::MUL_FST0r
}, // commutative
1226 { X86::MUL_Fp80
, X86::MUL_FST0r
}, // commutative
1227 { X86::SUB_Fp32
, X86::SUBR_FST0r
},
1228 { X86::SUB_Fp64
, X86::SUBR_FST0r
},
1229 { X86::SUB_Fp80
, X86::SUBR_FST0r
},
1232 // ForwardSTiTable - Map: A = B op C into: ST(i) = ST(0) op ST(i)
1233 static const TableEntry ForwardSTiTable
[] = {
1234 { X86::ADD_Fp32
, X86::ADD_FrST0
}, // commutative
1235 { X86::ADD_Fp64
, X86::ADD_FrST0
}, // commutative
1236 { X86::ADD_Fp80
, X86::ADD_FrST0
}, // commutative
1237 { X86::DIV_Fp32
, X86::DIVR_FrST0
},
1238 { X86::DIV_Fp64
, X86::DIVR_FrST0
},
1239 { X86::DIV_Fp80
, X86::DIVR_FrST0
},
1240 { X86::MUL_Fp32
, X86::MUL_FrST0
}, // commutative
1241 { X86::MUL_Fp64
, X86::MUL_FrST0
}, // commutative
1242 { X86::MUL_Fp80
, X86::MUL_FrST0
}, // commutative
1243 { X86::SUB_Fp32
, X86::SUBR_FrST0
},
1244 { X86::SUB_Fp64
, X86::SUBR_FrST0
},
1245 { X86::SUB_Fp80
, X86::SUBR_FrST0
},
1248 // ReverseSTiTable - Map: A = B op C into: ST(i) = ST(i) op ST(0)
1249 static const TableEntry ReverseSTiTable
[] = {
1250 { X86::ADD_Fp32
, X86::ADD_FrST0
},
1251 { X86::ADD_Fp64
, X86::ADD_FrST0
},
1252 { X86::ADD_Fp80
, X86::ADD_FrST0
},
1253 { X86::DIV_Fp32
, X86::DIV_FrST0
},
1254 { X86::DIV_Fp64
, X86::DIV_FrST0
},
1255 { X86::DIV_Fp80
, X86::DIV_FrST0
},
1256 { X86::MUL_Fp32
, X86::MUL_FrST0
},
1257 { X86::MUL_Fp64
, X86::MUL_FrST0
},
1258 { X86::MUL_Fp80
, X86::MUL_FrST0
},
1259 { X86::SUB_Fp32
, X86::SUB_FrST0
},
1260 { X86::SUB_Fp64
, X86::SUB_FrST0
},
1261 { X86::SUB_Fp80
, X86::SUB_FrST0
},
1265 /// handleTwoArgFP - Handle instructions like FADD and friends which are virtual
1266 /// instructions which need to be simplified and possibly transformed.
1268 /// Result: ST(0) = fsub ST(0), ST(i)
1269 /// ST(i) = fsub ST(0), ST(i)
1270 /// ST(0) = fsubr ST(0), ST(i)
1271 /// ST(i) = fsubr ST(0), ST(i)
1273 void FPS::handleTwoArgFP(MachineBasicBlock::iterator
&I
) {
1274 ASSERT_SORTED(ForwardST0Table
); ASSERT_SORTED(ReverseST0Table
);
1275 ASSERT_SORTED(ForwardSTiTable
); ASSERT_SORTED(ReverseSTiTable
);
1276 MachineInstr
&MI
= *I
;
1278 unsigned NumOperands
= MI
.getDesc().getNumOperands();
1279 assert(NumOperands
== 3 && "Illegal TwoArgFP instruction!");
1280 unsigned Dest
= getFPReg(MI
.getOperand(0));
1281 unsigned Op0
= getFPReg(MI
.getOperand(NumOperands
- 2));
1282 unsigned Op1
= getFPReg(MI
.getOperand(NumOperands
- 1));
1283 bool KillsOp0
= MI
.killsRegister(X86::FP0
+ Op0
);
1284 bool KillsOp1
= MI
.killsRegister(X86::FP0
+ Op1
);
1285 DebugLoc dl
= MI
.getDebugLoc();
1287 unsigned TOS
= getStackEntry(0);
1289 // One of our operands must be on the top of the stack. If neither is yet, we
1290 // need to move one.
1291 if (Op0
!= TOS
&& Op1
!= TOS
) { // No operand at TOS?
1292 // We can choose to move either operand to the top of the stack. If one of
1293 // the operands is killed by this instruction, we want that one so that we
1294 // can update right on top of the old version.
1296 moveToTop(Op0
, I
); // Move dead operand to TOS.
1298 } else if (KillsOp1
) {
1302 // All of the operands are live after this instruction executes, so we
1303 // cannot update on top of any operand. Because of this, we must
1304 // duplicate one of the stack elements to the top. It doesn't matter
1305 // which one we pick.
1307 duplicateToTop(Op0
, Dest
, I
);
1311 } else if (!KillsOp0
&& !KillsOp1
) {
1312 // If we DO have one of our operands at the top of the stack, but we don't
1313 // have a dead operand, we must duplicate one of the operands to a new slot
1315 duplicateToTop(Op0
, Dest
, I
);
1320 // Now we know that one of our operands is on the top of the stack, and at
1321 // least one of our operands is killed by this instruction.
1322 assert((TOS
== Op0
|| TOS
== Op1
) && (KillsOp0
|| KillsOp1
) &&
1323 "Stack conditions not set up right!");
1325 // We decide which form to use based on what is on the top of the stack, and
1326 // which operand is killed by this instruction.
1327 ArrayRef
<TableEntry
> InstTable
;
1328 bool isForward
= TOS
== Op0
;
1329 bool updateST0
= (TOS
== Op0
&& !KillsOp1
) || (TOS
== Op1
&& !KillsOp0
);
1332 InstTable
= ForwardST0Table
;
1334 InstTable
= ReverseST0Table
;
1337 InstTable
= ForwardSTiTable
;
1339 InstTable
= ReverseSTiTable
;
1342 int Opcode
= Lookup(InstTable
, MI
.getOpcode());
1343 assert(Opcode
!= -1 && "Unknown TwoArgFP pseudo instruction!");
1345 // NotTOS - The register which is not on the top of stack...
1346 unsigned NotTOS
= (TOS
== Op0
) ? Op1
: Op0
;
1348 // Replace the old instruction with a new instruction
1350 I
= BuildMI(*MBB
, I
, dl
, TII
->get(Opcode
)).addReg(getSTReg(NotTOS
));
1352 // If both operands are killed, pop one off of the stack in addition to
1353 // overwriting the other one.
1354 if (KillsOp0
&& KillsOp1
&& Op0
!= Op1
) {
1355 assert(!updateST0
&& "Should have updated other operand!");
1356 popStackAfter(I
); // Pop the top of stack
1359 // Update stack information so that we know the destination register is now on
1361 unsigned UpdatedSlot
= getSlot(updateST0
? TOS
: NotTOS
);
1362 assert(UpdatedSlot
< StackTop
&& Dest
< 7);
1363 Stack
[UpdatedSlot
] = Dest
;
1364 RegMap
[Dest
] = UpdatedSlot
;
1365 MBB
->getParent()->DeleteMachineInstr(&MI
); // Remove the old instruction
1368 /// handleCompareFP - Handle FUCOM and FUCOMI instructions, which have two FP
1369 /// register arguments and no explicit destinations.
1371 void FPS::handleCompareFP(MachineBasicBlock::iterator
&I
) {
1372 ASSERT_SORTED(ForwardST0Table
); ASSERT_SORTED(ReverseST0Table
);
1373 ASSERT_SORTED(ForwardSTiTable
); ASSERT_SORTED(ReverseSTiTable
);
1374 MachineInstr
&MI
= *I
;
1376 unsigned NumOperands
= MI
.getDesc().getNumOperands();
1377 assert(NumOperands
== 2 && "Illegal FUCOM* instruction!");
1378 unsigned Op0
= getFPReg(MI
.getOperand(NumOperands
- 2));
1379 unsigned Op1
= getFPReg(MI
.getOperand(NumOperands
- 1));
1380 bool KillsOp0
= MI
.killsRegister(X86::FP0
+ Op0
);
1381 bool KillsOp1
= MI
.killsRegister(X86::FP0
+ Op1
);
1383 // Make sure the first operand is on the top of stack, the other one can be
1387 // Change from the pseudo instruction to the concrete instruction.
1388 MI
.getOperand(0).setReg(getSTReg(Op1
));
1389 MI
.RemoveOperand(1);
1390 MI
.setDesc(TII
->get(getConcreteOpcode(MI
.getOpcode())));
1392 // If any of the operands are killed by this instruction, free them.
1393 if (KillsOp0
) freeStackSlotAfter(I
, Op0
);
1394 if (KillsOp1
&& Op0
!= Op1
) freeStackSlotAfter(I
, Op1
);
1397 /// handleCondMovFP - Handle two address conditional move instructions. These
1398 /// instructions move a st(i) register to st(0) iff a condition is true. These
1399 /// instructions require that the first operand is at the top of the stack, but
1400 /// otherwise don't modify the stack at all.
1401 void FPS::handleCondMovFP(MachineBasicBlock::iterator
&I
) {
1402 MachineInstr
&MI
= *I
;
1404 unsigned Op0
= getFPReg(MI
.getOperand(0));
1405 unsigned Op1
= getFPReg(MI
.getOperand(2));
1406 bool KillsOp1
= MI
.killsRegister(X86::FP0
+ Op1
);
1408 // The first operand *must* be on the top of the stack.
1411 // Change the second operand to the stack register that the operand is in.
1412 // Change from the pseudo instruction to the concrete instruction.
1413 MI
.RemoveOperand(0);
1414 MI
.RemoveOperand(1);
1415 MI
.getOperand(0).setReg(getSTReg(Op1
));
1416 MI
.setDesc(TII
->get(getConcreteOpcode(MI
.getOpcode())));
1418 // If we kill the second operand, make sure to pop it from the stack.
1419 if (Op0
!= Op1
&& KillsOp1
) {
1420 // Get this value off of the register stack.
1421 freeStackSlotAfter(I
, Op1
);
1426 /// handleSpecialFP - Handle special instructions which behave unlike other
1427 /// floating point instructions. This is primarily intended for use by pseudo
1430 void FPS::handleSpecialFP(MachineBasicBlock::iterator
&Inst
) {
1431 MachineInstr
&MI
= *Inst
;
1438 if (MI
.isReturn()) {
1443 switch (MI
.getOpcode()) {
1444 default: llvm_unreachable("Unknown SpecialFP instruction!");
1445 case TargetOpcode::COPY
: {
1446 // We handle three kinds of copies: FP <- FP, FP <- ST, and ST <- FP.
1447 const MachineOperand
&MO1
= MI
.getOperand(1);
1448 const MachineOperand
&MO0
= MI
.getOperand(0);
1449 bool KillsSrc
= MI
.killsRegister(MO1
.getReg());
1452 unsigned DstFP
= getFPReg(MO0
);
1453 unsigned SrcFP
= getFPReg(MO1
);
1454 assert(isLive(SrcFP
) && "Cannot copy dead register");
1456 // If the input operand is killed, we can just change the owner of the
1457 // incoming stack slot into the result.
1458 unsigned Slot
= getSlot(SrcFP
);
1459 Stack
[Slot
] = DstFP
;
1460 RegMap
[DstFP
] = Slot
;
1462 // For COPY we just duplicate the specified value to a new stack slot.
1463 // This could be made better, but would require substantial changes.
1464 duplicateToTop(SrcFP
, DstFP
, Inst
);
1469 case TargetOpcode::IMPLICIT_DEF
: {
1470 // All FP registers must be explicitly defined, so load a 0 instead.
1471 unsigned Reg
= MI
.getOperand(0).getReg() - X86::FP0
;
1472 LLVM_DEBUG(dbgs() << "Emitting LD_F0 for implicit FP" << Reg
<< '\n');
1473 BuildMI(*MBB
, Inst
, MI
.getDebugLoc(), TII
->get(X86::LD_F0
));
1478 case TargetOpcode::INLINEASM
: {
1479 // The inline asm MachineInstr currently only *uses* FP registers for the
1480 // 'f' constraint. These should be turned into the current ST(x) register
1481 // in the machine instr.
1483 // There are special rules for x87 inline assembly. The compiler must know
1484 // exactly how many registers are popped and pushed implicitly by the asm.
1485 // Otherwise it is not possible to restore the stack state after the inline
1488 // There are 3 kinds of input operands:
1490 // 1. Popped inputs. These must appear at the stack top in ST0-STn. A
1491 // popped input operand must be in a fixed stack slot, and it is either
1492 // tied to an output operand, or in the clobber list. The MI has ST use
1493 // and def operands for these inputs.
1495 // 2. Fixed inputs. These inputs appear in fixed stack slots, but are
1496 // preserved by the inline asm. The fixed stack slots must be STn-STm
1497 // following the popped inputs. A fixed input operand cannot be tied to
1498 // an output or appear in the clobber list. The MI has ST use operands
1499 // and no defs for these inputs.
1501 // 3. Preserved inputs. These inputs use the "f" constraint which is
1502 // represented as an FP register. The inline asm won't change these
1505 // Outputs must be in ST registers, FP outputs are not allowed. Clobbered
1506 // registers do not count as output operands. The inline asm changes the
1507 // stack as if it popped all the popped inputs and then pushed all the
1510 // Scan the assembly for ST registers used, defined and clobbered. We can
1511 // only tell clobbers from defs by looking at the asm descriptor.
1512 unsigned STUses
= 0, STDefs
= 0, STClobbers
= 0, STDeadDefs
= 0;
1513 unsigned NumOps
= 0;
1514 SmallSet
<unsigned, 1> FRegIdx
;
1517 for (unsigned i
= InlineAsm::MIOp_FirstOperand
, e
= MI
.getNumOperands();
1518 i
!= e
&& MI
.getOperand(i
).isImm(); i
+= 1 + NumOps
) {
1519 unsigned Flags
= MI
.getOperand(i
).getImm();
1521 NumOps
= InlineAsm::getNumOperandRegisters(Flags
);
1524 const MachineOperand
&MO
= MI
.getOperand(i
+ 1);
1527 unsigned STReg
= MO
.getReg() - X86::FP0
;
1531 // If the flag has a register class constraint, this must be an operand
1532 // with constraint "f". Record its index and continue.
1533 if (InlineAsm::hasRegClassConstraint(Flags
, RCID
)) {
1534 FRegIdx
.insert(i
+ 1);
1538 switch (InlineAsm::getKind(Flags
)) {
1539 case InlineAsm::Kind_RegUse
:
1540 STUses
|= (1u << STReg
);
1542 case InlineAsm::Kind_RegDef
:
1543 case InlineAsm::Kind_RegDefEarlyClobber
:
1544 STDefs
|= (1u << STReg
);
1546 STDeadDefs
|= (1u << STReg
);
1548 case InlineAsm::Kind_Clobber
:
1549 STClobbers
|= (1u << STReg
);
1556 if (STUses
&& !isMask_32(STUses
))
1557 MI
.emitError("fixed input regs must be last on the x87 stack");
1558 unsigned NumSTUses
= countTrailingOnes(STUses
);
1560 // Defs must be contiguous from the stack top. ST0-STn.
1561 if (STDefs
&& !isMask_32(STDefs
)) {
1562 MI
.emitError("output regs must be last on the x87 stack");
1563 STDefs
= NextPowerOf2(STDefs
) - 1;
1565 unsigned NumSTDefs
= countTrailingOnes(STDefs
);
1567 // So must the clobbered stack slots. ST0-STm, m >= n.
1568 if (STClobbers
&& !isMask_32(STDefs
| STClobbers
))
1569 MI
.emitError("clobbers must be last on the x87 stack");
1571 // Popped inputs are the ones that are also clobbered or defined.
1572 unsigned STPopped
= STUses
& (STDefs
| STClobbers
);
1573 if (STPopped
&& !isMask_32(STPopped
))
1574 MI
.emitError("implicitly popped regs must be last on the x87 stack");
1575 unsigned NumSTPopped
= countTrailingOnes(STPopped
);
1577 LLVM_DEBUG(dbgs() << "Asm uses " << NumSTUses
<< " fixed regs, pops "
1578 << NumSTPopped
<< ", and defines " << NumSTDefs
1582 // If any input operand uses constraint "f", all output register
1583 // constraints must be early-clobber defs.
1584 for (unsigned I
= 0, E
= MI
.getNumOperands(); I
< E
; ++I
)
1585 if (FRegIdx
.count(I
)) {
1586 assert((1 << getFPReg(MI
.getOperand(I
)) & STDefs
) == 0 &&
1587 "Operands with constraint \"f\" cannot overlap with defs");
1591 // Collect all FP registers (register operands with constraints "t", "u",
1592 // and "f") to kill afer the instruction.
1593 unsigned FPKills
= ((1u << NumFPRegs
) - 1) & ~0xff;
1594 for (unsigned i
= 0, e
= MI
.getNumOperands(); i
!= e
; ++i
) {
1595 MachineOperand
&Op
= MI
.getOperand(i
);
1596 if (!Op
.isReg() || Op
.getReg() < X86::FP0
|| Op
.getReg() > X86::FP6
)
1598 unsigned FPReg
= getFPReg(Op
);
1600 // If we kill this operand, make sure to pop it from the stack after the
1601 // asm. We just remember it for now, and pop them all off at the end in
1603 if (Op
.isUse() && Op
.isKill())
1604 FPKills
|= 1U << FPReg
;
1607 // Do not include registers that are implicitly popped by defs/clobbers.
1608 FPKills
&= ~(STDefs
| STClobbers
);
1610 // Now we can rearrange the live registers to match what was requested.
1611 unsigned char STUsesArray
[8];
1613 for (unsigned I
= 0; I
< NumSTUses
; ++I
)
1616 shuffleStackTop(STUsesArray
, NumSTUses
, Inst
);
1618 dbgs() << "Before asm: ";
1622 // With the stack layout fixed, rewrite the FP registers.
1623 for (unsigned i
= 0, e
= MI
.getNumOperands(); i
!= e
; ++i
) {
1624 MachineOperand
&Op
= MI
.getOperand(i
);
1625 if (!Op
.isReg() || Op
.getReg() < X86::FP0
|| Op
.getReg() > X86::FP6
)
1628 unsigned FPReg
= getFPReg(Op
);
1630 if (FRegIdx
.count(i
))
1631 // Operand with constraint "f".
1632 Op
.setReg(getSTReg(FPReg
));
1634 // Operand with a single register class constraint ("t" or "u").
1635 Op
.setReg(X86::ST0
+ FPReg
);
1638 // Simulate the inline asm popping its inputs and pushing its outputs.
1639 StackTop
-= NumSTPopped
;
1641 for (unsigned i
= 0; i
< NumSTDefs
; ++i
)
1642 pushReg(NumSTDefs
- i
- 1);
1644 // If this asm kills any FP registers (is the last use of them) we must
1645 // explicitly emit pop instructions for them. Do this now after the asm has
1646 // executed so that the ST(x) numbers are not off (which would happen if we
1647 // did this inline with operand rewriting).
1649 // Note: this might be a non-optimal pop sequence. We might be able to do
1650 // better by trying to pop in stack order or something.
1652 unsigned FPReg
= countTrailingZeros(FPKills
);
1654 freeStackSlotAfter(Inst
, FPReg
);
1655 FPKills
&= ~(1U << FPReg
);
1658 // Don't delete the inline asm!
1663 Inst
= MBB
->erase(Inst
); // Remove the pseudo instruction
1665 // We want to leave I pointing to the previous instruction, but what if we
1666 // just erased the first instruction?
1667 if (Inst
== MBB
->begin()) {
1668 LLVM_DEBUG(dbgs() << "Inserting dummy KILL\n");
1669 Inst
= BuildMI(*MBB
, Inst
, DebugLoc(), TII
->get(TargetOpcode::KILL
));
1674 void FPS::setKillFlags(MachineBasicBlock
&MBB
) const {
1675 const TargetRegisterInfo
&TRI
=
1676 *MBB
.getParent()->getSubtarget().getRegisterInfo();
1677 LivePhysRegs
LPR(TRI
);
1679 LPR
.addLiveOuts(MBB
);
1681 for (MachineBasicBlock::reverse_iterator I
= MBB
.rbegin(), E
= MBB
.rend();
1683 if (I
->isDebugInstr())
1686 std::bitset
<8> Defs
;
1687 SmallVector
<MachineOperand
*, 2> Uses
;
1688 MachineInstr
&MI
= *I
;
1690 for (auto &MO
: I
->operands()) {
1694 unsigned Reg
= MO
.getReg() - X86::FP0
;
1701 if (!LPR
.contains(MO
.getReg()))
1704 Uses
.push_back(&MO
);
1707 for (auto *MO
: Uses
)
1708 if (Defs
.test(getFPReg(*MO
)) || !LPR
.contains(MO
->getReg()))
1711 LPR
.stepBackward(MI
);