1 //===- X86ISelDAGToDAG.cpp - A DAG pattern matching inst selector for X86 -===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file defines a DAG pattern matching instruction selector for X86,
11 // converting from a legalized dag to a X86 dag.
13 //===----------------------------------------------------------------------===//
15 #define DEBUG_TYPE "x86-isel"
17 #include "X86InstrBuilder.h"
18 #include "X86ISelLowering.h"
19 #include "X86MachineFunctionInfo.h"
20 #include "X86RegisterInfo.h"
21 #include "X86Subtarget.h"
22 #include "X86TargetMachine.h"
23 #include "llvm/GlobalValue.h"
24 #include "llvm/Instructions.h"
25 #include "llvm/Intrinsics.h"
26 #include "llvm/Support/CFG.h"
27 #include "llvm/Type.h"
28 #include "llvm/CodeGen/MachineConstantPool.h"
29 #include "llvm/CodeGen/MachineFunction.h"
30 #include "llvm/CodeGen/MachineFrameInfo.h"
31 #include "llvm/CodeGen/MachineInstrBuilder.h"
32 #include "llvm/CodeGen/MachineRegisterInfo.h"
33 #include "llvm/CodeGen/SelectionDAGISel.h"
34 #include "llvm/Target/TargetMachine.h"
35 #include "llvm/Target/TargetOptions.h"
36 #include "llvm/Support/Compiler.h"
37 #include "llvm/Support/Debug.h"
38 #include "llvm/Support/MathExtras.h"
39 #include "llvm/Support/Streams.h"
40 #include "llvm/ADT/SmallPtrSet.h"
41 #include "llvm/ADT/Statistic.h"
44 #include "llvm/Support/CommandLine.h"
45 static cl::opt
<bool> AvoidDupAddrCompute("x86-avoid-dup-address", cl::Hidden
);
47 STATISTIC(NumLoadMoved
, "Number of loads moved below TokenFactor");
49 //===----------------------------------------------------------------------===//
50 // Pattern Matcher Implementation
51 //===----------------------------------------------------------------------===//
54 /// X86ISelAddressMode - This corresponds to X86AddressMode, but uses
55 /// SDValue's instead of register numbers for the leaves of the matched
57 struct X86ISelAddressMode
{
63 struct { // This is really a union, discriminated by BaseType!
68 bool isRIPRel
; // RIP as base?
77 unsigned Align
; // CP alignment.
80 : BaseType(RegBase
), isRIPRel(false), Scale(1), IndexReg(), Disp(0),
81 Segment(), GV(0), CP(0), ES(0), JT(-1), Align(0) {
84 bool hasSymbolicDisplacement() const {
85 return GV
!= 0 || CP
!= 0 || ES
!= 0 || JT
!= -1;
89 cerr
<< "X86ISelAddressMode " << this << "\n";
91 if (Base
.Reg
.getNode() != 0) Base
.Reg
.getNode()->dump();
93 cerr
<< " Base.FrameIndex " << Base
.FrameIndex
<< "\n";
94 cerr
<< "isRIPRel " << isRIPRel
<< " Scale" << Scale
<< "\n";
96 if (IndexReg
.getNode() != 0) IndexReg
.getNode()->dump();
98 cerr
<< " Disp " << Disp
<< "\n";
99 cerr
<< "GV "; if (GV
) GV
->dump();
101 cerr
<< " CP "; if (CP
) CP
->dump();
104 cerr
<< "ES "; if (ES
) cerr
<< ES
; else cerr
<< "nul";
105 cerr
<< " JT" << JT
<< " Align" << Align
<< "\n";
111 //===--------------------------------------------------------------------===//
112 /// ISel - X86 specific code to select X86 machine instructions for
113 /// SelectionDAG operations.
115 class VISIBILITY_HIDDEN X86DAGToDAGISel
: public SelectionDAGISel
{
116 /// TM - Keep a reference to X86TargetMachine.
118 X86TargetMachine
&TM
;
120 /// X86Lowering - This object fully describes how to lower LLVM code to an
121 /// X86-specific SelectionDAG.
122 X86TargetLowering
&X86Lowering
;
124 /// Subtarget - Keep a pointer to the X86Subtarget around so that we can
125 /// make the right decision when generating code for different targets.
126 const X86Subtarget
*Subtarget
;
128 /// CurBB - Current BB being isel'd.
130 MachineBasicBlock
*CurBB
;
132 /// OptForSize - If true, selector should try to optimize for code size
133 /// instead of performance.
137 explicit X86DAGToDAGISel(X86TargetMachine
&tm
, CodeGenOpt::Level OptLevel
)
138 : SelectionDAGISel(tm
, OptLevel
),
139 TM(tm
), X86Lowering(*TM
.getTargetLowering()),
140 Subtarget(&TM
.getSubtarget
<X86Subtarget
>()),
143 virtual const char *getPassName() const {
144 return "X86 DAG->DAG Instruction Selection";
147 /// InstructionSelect - This callback is invoked by
148 /// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
149 virtual void InstructionSelect();
151 virtual void EmitFunctionEntryCode(Function
&Fn
, MachineFunction
&MF
);
154 bool IsLegalAndProfitableToFold(SDNode
*N
, SDNode
*U
, SDNode
*Root
) const;
156 // Include the pieces autogenerated from the target description.
157 #include "X86GenDAGISel.inc"
160 SDNode
*Select(SDValue N
);
161 SDNode
*SelectAtomic64(SDNode
*Node
, unsigned Opc
);
163 bool MatchSegmentBaseAddress(SDValue N
, X86ISelAddressMode
&AM
);
164 bool MatchLoad(SDValue N
, X86ISelAddressMode
&AM
);
165 bool MatchWrapper(SDValue N
, X86ISelAddressMode
&AM
);
166 bool MatchAddress(SDValue N
, X86ISelAddressMode
&AM
,
168 bool MatchAddressBase(SDValue N
, X86ISelAddressMode
&AM
);
169 bool SelectAddr(SDValue Op
, SDValue N
, SDValue
&Base
,
170 SDValue
&Scale
, SDValue
&Index
, SDValue
&Disp
,
172 bool SelectLEAAddr(SDValue Op
, SDValue N
, SDValue
&Base
,
173 SDValue
&Scale
, SDValue
&Index
, SDValue
&Disp
);
174 bool SelectScalarSSELoad(SDValue Op
, SDValue Pred
,
175 SDValue N
, SDValue
&Base
, SDValue
&Scale
,
176 SDValue
&Index
, SDValue
&Disp
,
178 SDValue
&InChain
, SDValue
&OutChain
);
179 bool TryFoldLoad(SDValue P
, SDValue N
,
180 SDValue
&Base
, SDValue
&Scale
,
181 SDValue
&Index
, SDValue
&Disp
,
183 void PreprocessForRMW();
184 void PreprocessForFPConvert();
186 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
187 /// inline asm expressions.
188 virtual bool SelectInlineAsmMemoryOperand(const SDValue
&Op
,
190 std::vector
<SDValue
> &OutOps
);
192 void EmitSpecialCodeForMain(MachineBasicBlock
*BB
, MachineFrameInfo
*MFI
);
194 inline void getAddressOperands(X86ISelAddressMode
&AM
, SDValue
&Base
,
195 SDValue
&Scale
, SDValue
&Index
,
196 SDValue
&Disp
, SDValue
&Segment
) {
197 Base
= (AM
.BaseType
== X86ISelAddressMode::FrameIndexBase
) ?
198 CurDAG
->getTargetFrameIndex(AM
.Base
.FrameIndex
, TLI
.getPointerTy()) :
200 Scale
= getI8Imm(AM
.Scale
);
202 // These are 32-bit even in 64-bit mode since RIP relative offset
205 Disp
= CurDAG
->getTargetGlobalAddress(AM
.GV
, MVT::i32
, AM
.Disp
);
207 Disp
= CurDAG
->getTargetConstantPool(AM
.CP
, MVT::i32
,
210 Disp
= CurDAG
->getTargetExternalSymbol(AM
.ES
, MVT::i32
);
211 else if (AM
.JT
!= -1)
212 Disp
= CurDAG
->getTargetJumpTable(AM
.JT
, MVT::i32
);
214 Disp
= CurDAG
->getTargetConstant(AM
.Disp
, MVT::i32
);
216 if (AM
.Segment
.getNode())
217 Segment
= AM
.Segment
;
219 Segment
= CurDAG
->getRegister(0, MVT::i32
);
222 /// getI8Imm - Return a target constant with the specified value, of type
224 inline SDValue
getI8Imm(unsigned Imm
) {
225 return CurDAG
->getTargetConstant(Imm
, MVT::i8
);
228 /// getI16Imm - Return a target constant with the specified value, of type
230 inline SDValue
getI16Imm(unsigned Imm
) {
231 return CurDAG
->getTargetConstant(Imm
, MVT::i16
);
234 /// getI32Imm - Return a target constant with the specified value, of type
236 inline SDValue
getI32Imm(unsigned Imm
) {
237 return CurDAG
->getTargetConstant(Imm
, MVT::i32
);
240 /// getGlobalBaseReg - Return an SDNode that returns the value of
241 /// the global base register. Output instructions required to
242 /// initialize the global base register, if necessary.
244 SDNode
*getGlobalBaseReg();
252 /// findFlagUse - Return use of MVT::Flag value produced by the specified
255 static SDNode
*findFlagUse(SDNode
*N
) {
256 unsigned FlagResNo
= N
->getNumValues()-1;
257 for (SDNode::use_iterator I
= N
->use_begin(), E
= N
->use_end(); I
!= E
; ++I
) {
258 SDUse
&Use
= I
.getUse();
259 if (Use
.getResNo() == FlagResNo
)
260 return Use
.getUser();
265 /// findNonImmUse - Return true if "Use" is a non-immediate use of "Def".
266 /// This function recursively traverses up the operand chain, ignoring
268 static bool findNonImmUse(SDNode
*Use
, SDNode
* Def
, SDNode
*ImmedUse
,
270 SmallPtrSet
<SDNode
*, 16> &Visited
) {
271 if (Use
->getNodeId() < Def
->getNodeId() ||
272 !Visited
.insert(Use
))
275 for (unsigned i
= 0, e
= Use
->getNumOperands(); i
!= e
; ++i
) {
276 SDNode
*N
= Use
->getOperand(i
).getNode();
278 if (Use
== ImmedUse
|| Use
== Root
)
279 continue; // We are not looking for immediate use.
284 // Traverse up the operand chain.
285 if (findNonImmUse(N
, Def
, ImmedUse
, Root
, Visited
))
291 /// isNonImmUse - Start searching from Root up the DAG to check is Def can
292 /// be reached. Return true if that's the case. However, ignore direct uses
293 /// by ImmedUse (which would be U in the example illustrated in
294 /// IsLegalAndProfitableToFold) and by Root (which can happen in the store
296 /// FIXME: to be really generic, we should allow direct use by any node
297 /// that is being folded. But realisticly since we only fold loads which
298 /// have one non-chain use, we only need to watch out for load/op/store
299 /// and load/op/cmp case where the root (store / cmp) may reach the load via
300 /// its chain operand.
301 static inline bool isNonImmUse(SDNode
*Root
, SDNode
*Def
, SDNode
*ImmedUse
) {
302 SmallPtrSet
<SDNode
*, 16> Visited
;
303 return findNonImmUse(Root
, Def
, ImmedUse
, Root
, Visited
);
307 bool X86DAGToDAGISel::IsLegalAndProfitableToFold(SDNode
*N
, SDNode
*U
,
308 SDNode
*Root
) const {
309 if (OptLevel
== CodeGenOpt::None
) return false;
312 switch (U
->getOpcode()) {
320 SDValue Op1
= U
->getOperand(1);
322 // If the other operand is a 8-bit immediate we should fold the immediate
323 // instead. This reduces code size.
325 // movl 4(%esp), %eax
329 // addl 4(%esp), %eax
330 // The former is 2 bytes shorter. In case where the increment is 1, then
331 // the saving can be 4 bytes (by using incl %eax).
332 if (ConstantSDNode
*Imm
= dyn_cast
<ConstantSDNode
>(Op1
))
333 if (Imm
->getAPIntValue().isSignedIntN(8))
336 // If the other operand is a TLS address, we should fold it instead.
339 // leal i@NTPOFF(%eax), %eax
341 // movl $i@NTPOFF, %eax
343 // if the block also has an access to a second TLS address this will save
345 // FIXME: This is probably also true for non TLS addresses.
346 if (Op1
.getOpcode() == X86ISD::Wrapper
) {
347 SDValue Val
= Op1
.getOperand(0);
348 if (Val
.getOpcode() == ISD::TargetGlobalTLSAddress
)
354 // If Root use can somehow reach N through a path that that doesn't contain
355 // U then folding N would create a cycle. e.g. In the following
356 // diagram, Root can reach N through X. If N is folded into into Root, then
357 // X is both a predecessor and a successor of U.
368 // * indicates nodes to be folded together.
370 // If Root produces a flag, then it gets (even more) interesting. Since it
371 // will be "glued" together with its flag use in the scheduler, we need to
372 // check if it might reach N.
391 // If FU (flag use) indirectly reaches N (the load), and Root folds N
392 // (call it Fold), then X is a predecessor of FU and a successor of
393 // Fold. But since Fold and FU are flagged together, this will create
394 // a cycle in the scheduling graph.
396 MVT VT
= Root
->getValueType(Root
->getNumValues()-1);
397 while (VT
== MVT::Flag
) {
398 SDNode
*FU
= findFlagUse(Root
);
402 VT
= Root
->getValueType(Root
->getNumValues()-1);
405 return !isNonImmUse(Root
, N
, U
);
408 /// MoveBelowTokenFactor - Replace TokenFactor operand with load's chain operand
409 /// and move load below the TokenFactor. Replace store's chain operand with
410 /// load's chain result.
411 static void MoveBelowTokenFactor(SelectionDAG
*CurDAG
, SDValue Load
,
412 SDValue Store
, SDValue TF
) {
413 SmallVector
<SDValue
, 4> Ops
;
414 for (unsigned i
= 0, e
= TF
.getNode()->getNumOperands(); i
!= e
; ++i
)
415 if (Load
.getNode() == TF
.getOperand(i
).getNode())
416 Ops
.push_back(Load
.getOperand(0));
418 Ops
.push_back(TF
.getOperand(i
));
419 CurDAG
->UpdateNodeOperands(TF
, &Ops
[0], Ops
.size());
420 CurDAG
->UpdateNodeOperands(Load
, TF
, Load
.getOperand(1), Load
.getOperand(2));
421 CurDAG
->UpdateNodeOperands(Store
, Load
.getValue(1), Store
.getOperand(1),
422 Store
.getOperand(2), Store
.getOperand(3));
425 /// isRMWLoad - Return true if N is a load that's part of RMW sub-DAG.
427 static bool isRMWLoad(SDValue N
, SDValue Chain
, SDValue Address
,
429 if (N
.getOpcode() == ISD::BIT_CONVERT
)
432 LoadSDNode
*LD
= dyn_cast
<LoadSDNode
>(N
);
433 if (!LD
|| LD
->isVolatile())
435 if (LD
->getAddressingMode() != ISD::UNINDEXED
)
438 ISD::LoadExtType ExtType
= LD
->getExtensionType();
439 if (ExtType
!= ISD::NON_EXTLOAD
&& ExtType
!= ISD::EXTLOAD
)
443 N
.getOperand(1) == Address
&&
444 N
.getNode()->isOperandOf(Chain
.getNode())) {
451 /// MoveBelowCallSeqStart - Replace CALLSEQ_START operand with load's chain
452 /// operand and move load below the call's chain operand.
453 static void MoveBelowCallSeqStart(SelectionDAG
*CurDAG
, SDValue Load
,
454 SDValue Call
, SDValue CallSeqStart
) {
455 SmallVector
<SDValue
, 8> Ops
;
456 SDValue Chain
= CallSeqStart
.getOperand(0);
457 if (Chain
.getNode() == Load
.getNode())
458 Ops
.push_back(Load
.getOperand(0));
460 assert(Chain
.getOpcode() == ISD::TokenFactor
&&
461 "Unexpected CallSeqStart chain operand");
462 for (unsigned i
= 0, e
= Chain
.getNumOperands(); i
!= e
; ++i
)
463 if (Chain
.getOperand(i
).getNode() == Load
.getNode())
464 Ops
.push_back(Load
.getOperand(0));
466 Ops
.push_back(Chain
.getOperand(i
));
468 CurDAG
->getNode(ISD::TokenFactor
, Load
.getDebugLoc(),
469 MVT::Other
, &Ops
[0], Ops
.size());
471 Ops
.push_back(NewChain
);
473 for (unsigned i
= 1, e
= CallSeqStart
.getNumOperands(); i
!= e
; ++i
)
474 Ops
.push_back(CallSeqStart
.getOperand(i
));
475 CurDAG
->UpdateNodeOperands(CallSeqStart
, &Ops
[0], Ops
.size());
476 CurDAG
->UpdateNodeOperands(Load
, Call
.getOperand(0),
477 Load
.getOperand(1), Load
.getOperand(2));
479 Ops
.push_back(SDValue(Load
.getNode(), 1));
480 for (unsigned i
= 1, e
= Call
.getNode()->getNumOperands(); i
!= e
; ++i
)
481 Ops
.push_back(Call
.getOperand(i
));
482 CurDAG
->UpdateNodeOperands(Call
, &Ops
[0], Ops
.size());
485 /// isCalleeLoad - Return true if call address is a load and it can be
486 /// moved below CALLSEQ_START and the chains leading up to the call.
487 /// Return the CALLSEQ_START by reference as a second output.
488 static bool isCalleeLoad(SDValue Callee
, SDValue
&Chain
) {
489 if (Callee
.getNode() == Chain
.getNode() || !Callee
.hasOneUse())
491 LoadSDNode
*LD
= dyn_cast
<LoadSDNode
>(Callee
.getNode());
494 LD
->getAddressingMode() != ISD::UNINDEXED
||
495 LD
->getExtensionType() != ISD::NON_EXTLOAD
)
498 // Now let's find the callseq_start.
499 while (Chain
.getOpcode() != ISD::CALLSEQ_START
) {
500 if (!Chain
.hasOneUse())
502 Chain
= Chain
.getOperand(0);
505 if (Chain
.getOperand(0).getNode() == Callee
.getNode())
507 if (Chain
.getOperand(0).getOpcode() == ISD::TokenFactor
&&
508 Callee
.getValue(1).isOperandOf(Chain
.getOperand(0).getNode()))
514 /// PreprocessForRMW - Preprocess the DAG to make instruction selection better.
515 /// This is only run if not in -O0 mode.
516 /// This allows the instruction selector to pick more read-modify-write
517 /// instructions. This is a common case:
527 /// [TokenFactor] [Op]
534 /// The fact the store's chain operand != load's chain will prevent the
535 /// (store (op (load))) instruction from being selected. We can transform it to:
554 void X86DAGToDAGISel::PreprocessForRMW() {
555 for (SelectionDAG::allnodes_iterator I
= CurDAG
->allnodes_begin(),
556 E
= CurDAG
->allnodes_end(); I
!= E
; ++I
) {
557 if (I
->getOpcode() == X86ISD::CALL
) {
558 /// Also try moving call address load from outside callseq_start to just
559 /// before the call to allow it to be folded.
577 SDValue Chain
= I
->getOperand(0);
578 SDValue Load
= I
->getOperand(1);
579 if (!isCalleeLoad(Load
, Chain
))
581 MoveBelowCallSeqStart(CurDAG
, Load
, SDValue(I
, 0), Chain
);
586 if (!ISD::isNON_TRUNCStore(I
))
588 SDValue Chain
= I
->getOperand(0);
590 if (Chain
.getNode()->getOpcode() != ISD::TokenFactor
)
593 SDValue N1
= I
->getOperand(1);
594 SDValue N2
= I
->getOperand(2);
595 if ((N1
.getValueType().isFloatingPoint() &&
596 !N1
.getValueType().isVector()) ||
602 unsigned Opcode
= N1
.getNode()->getOpcode();
611 case ISD::VECTOR_SHUFFLE
: {
612 SDValue N10
= N1
.getOperand(0);
613 SDValue N11
= N1
.getOperand(1);
614 RModW
= isRMWLoad(N10
, Chain
, N2
, Load
);
616 RModW
= isRMWLoad(N11
, Chain
, N2
, Load
);
629 SDValue N10
= N1
.getOperand(0);
630 RModW
= isRMWLoad(N10
, Chain
, N2
, Load
);
636 MoveBelowTokenFactor(CurDAG
, Load
, SDValue(I
, 0), Chain
);
643 /// PreprocessForFPConvert - Walk over the dag lowering fpround and fpextend
644 /// nodes that target the FP stack to be store and load to the stack. This is a
645 /// gross hack. We would like to simply mark these as being illegal, but when
646 /// we do that, legalize produces these when it expands calls, then expands
647 /// these in the same legalize pass. We would like dag combine to be able to
648 /// hack on these between the call expansion and the node legalization. As such
649 /// this pass basically does "really late" legalization of these inline with the
651 void X86DAGToDAGISel::PreprocessForFPConvert() {
652 for (SelectionDAG::allnodes_iterator I
= CurDAG
->allnodes_begin(),
653 E
= CurDAG
->allnodes_end(); I
!= E
; ) {
654 SDNode
*N
= I
++; // Preincrement iterator to avoid invalidation issues.
655 if (N
->getOpcode() != ISD::FP_ROUND
&& N
->getOpcode() != ISD::FP_EXTEND
)
658 // If the source and destination are SSE registers, then this is a legal
659 // conversion that should not be lowered.
660 MVT SrcVT
= N
->getOperand(0).getValueType();
661 MVT DstVT
= N
->getValueType(0);
662 bool SrcIsSSE
= X86Lowering
.isScalarFPTypeInSSEReg(SrcVT
);
663 bool DstIsSSE
= X86Lowering
.isScalarFPTypeInSSEReg(DstVT
);
664 if (SrcIsSSE
&& DstIsSSE
)
667 if (!SrcIsSSE
&& !DstIsSSE
) {
668 // If this is an FPStack extension, it is a noop.
669 if (N
->getOpcode() == ISD::FP_EXTEND
)
671 // If this is a value-preserving FPStack truncation, it is a noop.
672 if (N
->getConstantOperandVal(1))
676 // Here we could have an FP stack truncation or an FPStack <-> SSE convert.
677 // FPStack has extload and truncstore. SSE can fold direct loads into other
678 // operations. Based on this, decide what we want to do.
680 if (N
->getOpcode() == ISD::FP_ROUND
)
681 MemVT
= DstVT
; // FP_ROUND must use DstVT, we can't do a 'trunc load'.
683 MemVT
= SrcIsSSE
? SrcVT
: DstVT
;
685 SDValue MemTmp
= CurDAG
->CreateStackTemporary(MemVT
);
686 DebugLoc dl
= N
->getDebugLoc();
688 // FIXME: optimize the case where the src/dest is a load or store?
689 SDValue Store
= CurDAG
->getTruncStore(CurDAG
->getEntryNode(), dl
,
691 MemTmp
, NULL
, 0, MemVT
);
692 SDValue Result
= CurDAG
->getExtLoad(ISD::EXTLOAD
, dl
, DstVT
, Store
, MemTmp
,
695 // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the
696 // extload we created. This will cause general havok on the dag because
697 // anything below the conversion could be folded into other existing nodes.
698 // To avoid invalidating 'I', back it up to the convert node.
700 CurDAG
->ReplaceAllUsesOfValueWith(SDValue(N
, 0), Result
);
702 // Now that we did that, the node is dead. Increment the iterator to the
703 // next node to process, then delete N.
705 CurDAG
->DeleteNode(N
);
709 /// InstructionSelectBasicBlock - This callback is invoked by SelectionDAGISel
710 /// when it has created a SelectionDAG for us to codegen.
711 void X86DAGToDAGISel::InstructionSelect() {
712 CurBB
= BB
; // BB can change as result of isel.
713 const Function
*F
= CurDAG
->getMachineFunction().getFunction();
714 OptForSize
= F
->hasFnAttr(Attribute::OptimizeForSize
);
717 if (OptLevel
!= CodeGenOpt::None
)
720 // FIXME: This should only happen when not compiled with -O0.
721 PreprocessForFPConvert();
723 // Codegen the basic block.
725 DOUT
<< "===== Instruction selection begins:\n";
730 DOUT
<< "===== Instruction selection ends:\n";
733 CurDAG
->RemoveDeadNodes();
736 /// EmitSpecialCodeForMain - Emit any code that needs to be executed only in
737 /// the main function.
738 void X86DAGToDAGISel::EmitSpecialCodeForMain(MachineBasicBlock
*BB
,
739 MachineFrameInfo
*MFI
) {
740 const TargetInstrInfo
*TII
= TM
.getInstrInfo();
741 if (Subtarget
->isTargetCygMing())
742 BuildMI(BB
, DebugLoc::getUnknownLoc(),
743 TII
->get(X86::CALLpcrel32
)).addExternalSymbol("__main");
746 void X86DAGToDAGISel::EmitFunctionEntryCode(Function
&Fn
, MachineFunction
&MF
) {
747 // If this is main, emit special code for main.
748 MachineBasicBlock
*BB
= MF
.begin();
749 if (Fn
.hasExternalLinkage() && Fn
.getName() == "main")
750 EmitSpecialCodeForMain(BB
, MF
.getFrameInfo());
754 bool X86DAGToDAGISel::MatchSegmentBaseAddress(SDValue N
,
755 X86ISelAddressMode
&AM
) {
756 assert(N
.getOpcode() == X86ISD::SegmentBaseAddress
);
757 SDValue Segment
= N
.getOperand(0);
759 if (AM
.Segment
.getNode() == 0) {
760 AM
.Segment
= Segment
;
767 bool X86DAGToDAGISel::MatchLoad(SDValue N
, X86ISelAddressMode
&AM
) {
768 // This optimization is valid because the GNU TLS model defines that
769 // gs:0 (or fs:0 on X86-64) contains its own address.
770 // For more information see http://people.redhat.com/drepper/tls.pdf
772 SDValue Address
= N
.getOperand(1);
773 if (Address
.getOpcode() == X86ISD::SegmentBaseAddress
&&
774 !MatchSegmentBaseAddress (Address
, AM
))
780 bool X86DAGToDAGISel::MatchWrapper(SDValue N
, X86ISelAddressMode
&AM
) {
781 bool is64Bit
= Subtarget
->is64Bit();
782 DOUT
<< "Wrapper: 64bit " << is64Bit
;
783 DOUT
<< " AM "; DEBUG(AM
.dump()); DOUT
<< "\n";
785 // Under X86-64 non-small code model, GV (and friends) are 64-bits.
786 if (is64Bit
&& (TM
.getCodeModel() != CodeModel::Small
))
789 // Base and index reg must be 0 in order to use rip as base.
790 bool canUsePICRel
= !AM
.Base
.Reg
.getNode() && !AM
.IndexReg
.getNode();
791 if (is64Bit
&& !canUsePICRel
&& TM
.symbolicAddressesAreRIPRel())
794 if (AM
.hasSymbolicDisplacement())
796 // If value is available in a register both base and index components have
797 // been picked, we can't fit the result available in the register in the
798 // addressing mode. Duplicate GlobalAddress or ConstantPool as displacement.
800 SDValue N0
= N
.getOperand(0);
801 if (GlobalAddressSDNode
*G
= dyn_cast
<GlobalAddressSDNode
>(N0
)) {
802 uint64_t Offset
= G
->getOffset();
803 if (!is64Bit
|| isInt32(AM
.Disp
+ Offset
)) {
804 GlobalValue
*GV
= G
->getGlobal();
805 bool isRIPRel
= TM
.symbolicAddressesAreRIPRel();
806 if (N0
.getOpcode() == llvm::ISD::TargetGlobalTLSAddress
) {
807 TLSModel::Model model
=
808 getTLSModel (GV
, TM
.getRelocationModel());
809 if (is64Bit
&& model
== TLSModel::InitialExec
)
814 AM
.isRIPRel
= isRIPRel
;
817 } else if (ConstantPoolSDNode
*CP
= dyn_cast
<ConstantPoolSDNode
>(N0
)) {
818 uint64_t Offset
= CP
->getOffset();
819 if (!is64Bit
|| isInt32(AM
.Disp
+ Offset
)) {
820 AM
.CP
= CP
->getConstVal();
821 AM
.Align
= CP
->getAlignment();
823 AM
.isRIPRel
= TM
.symbolicAddressesAreRIPRel();
826 } else if (ExternalSymbolSDNode
*S
=dyn_cast
<ExternalSymbolSDNode
>(N0
)) {
827 AM
.ES
= S
->getSymbol();
828 AM
.isRIPRel
= TM
.symbolicAddressesAreRIPRel();
830 } else if (JumpTableSDNode
*J
= dyn_cast
<JumpTableSDNode
>(N0
)) {
831 AM
.JT
= J
->getIndex();
832 AM
.isRIPRel
= TM
.symbolicAddressesAreRIPRel();
839 /// MatchAddress - Add the specified node to the specified addressing mode,
840 /// returning true if it cannot be done. This just pattern matches for the
842 bool X86DAGToDAGISel::MatchAddress(SDValue N
, X86ISelAddressMode
&AM
,
844 bool is64Bit
= Subtarget
->is64Bit();
845 DebugLoc dl
= N
.getDebugLoc();
846 DOUT
<< "MatchAddress: "; DEBUG(AM
.dump());
849 return MatchAddressBase(N
, AM
);
851 // RIP relative addressing: %rip + 32-bit displacement!
853 if (!AM
.ES
&& AM
.JT
!= -1 && N
.getOpcode() == ISD::Constant
) {
854 uint64_t Val
= cast
<ConstantSDNode
>(N
)->getSExtValue();
855 if (!is64Bit
|| isInt32(AM
.Disp
+ Val
)) {
863 switch (N
.getOpcode()) {
865 case ISD::Constant
: {
866 uint64_t Val
= cast
<ConstantSDNode
>(N
)->getSExtValue();
867 if (!is64Bit
|| isInt32(AM
.Disp
+ Val
)) {
874 case X86ISD::SegmentBaseAddress
:
875 if (!MatchSegmentBaseAddress(N
, AM
))
879 case X86ISD::Wrapper
:
880 if (!MatchWrapper(N
, AM
))
885 if (!MatchLoad(N
, AM
))
889 case ISD::FrameIndex
:
890 if (AM
.BaseType
== X86ISelAddressMode::RegBase
891 && AM
.Base
.Reg
.getNode() == 0) {
892 AM
.BaseType
= X86ISelAddressMode::FrameIndexBase
;
893 AM
.Base
.FrameIndex
= cast
<FrameIndexSDNode
>(N
)->getIndex();
899 if (AM
.IndexReg
.getNode() != 0 || AM
.Scale
!= 1 || AM
.isRIPRel
)
903 *CN
= dyn_cast
<ConstantSDNode
>(N
.getNode()->getOperand(1))) {
904 unsigned Val
= CN
->getZExtValue();
905 if (Val
== 1 || Val
== 2 || Val
== 3) {
907 SDValue ShVal
= N
.getNode()->getOperand(0);
909 // Okay, we know that we have a scale by now. However, if the scaled
910 // value is an add of something and a constant, we can fold the
911 // constant into the disp field here.
912 if (ShVal
.getNode()->getOpcode() == ISD::ADD
&& ShVal
.hasOneUse() &&
913 isa
<ConstantSDNode
>(ShVal
.getNode()->getOperand(1))) {
914 AM
.IndexReg
= ShVal
.getNode()->getOperand(0);
915 ConstantSDNode
*AddVal
=
916 cast
<ConstantSDNode
>(ShVal
.getNode()->getOperand(1));
917 uint64_t Disp
= AM
.Disp
+ (AddVal
->getSExtValue() << Val
);
918 if (!is64Bit
|| isInt32(Disp
))
932 // A mul_lohi where we need the low part can be folded as a plain multiply.
933 if (N
.getResNo() != 0) break;
936 case X86ISD::MUL_IMM
:
937 // X*[3,5,9] -> X+X*[2,4,8]
938 if (AM
.BaseType
== X86ISelAddressMode::RegBase
&&
939 AM
.Base
.Reg
.getNode() == 0 &&
940 AM
.IndexReg
.getNode() == 0 &&
943 *CN
= dyn_cast
<ConstantSDNode
>(N
.getNode()->getOperand(1)))
944 if (CN
->getZExtValue() == 3 || CN
->getZExtValue() == 5 ||
945 CN
->getZExtValue() == 9) {
946 AM
.Scale
= unsigned(CN
->getZExtValue())-1;
948 SDValue MulVal
= N
.getNode()->getOperand(0);
951 // Okay, we know that we have a scale by now. However, if the scaled
952 // value is an add of something and a constant, we can fold the
953 // constant into the disp field here.
954 if (MulVal
.getNode()->getOpcode() == ISD::ADD
&& MulVal
.hasOneUse() &&
955 isa
<ConstantSDNode
>(MulVal
.getNode()->getOperand(1))) {
956 Reg
= MulVal
.getNode()->getOperand(0);
957 ConstantSDNode
*AddVal
=
958 cast
<ConstantSDNode
>(MulVal
.getNode()->getOperand(1));
959 uint64_t Disp
= AM
.Disp
+ AddVal
->getSExtValue() *
961 if (!is64Bit
|| isInt32(Disp
))
964 Reg
= N
.getNode()->getOperand(0);
966 Reg
= N
.getNode()->getOperand(0);
969 AM
.IndexReg
= AM
.Base
.Reg
= Reg
;
976 X86ISelAddressMode Backup
= AM
;
977 if (!MatchAddress(N
.getNode()->getOperand(0), AM
, Depth
+1) &&
978 !MatchAddress(N
.getNode()->getOperand(1), AM
, Depth
+1))
981 if (!MatchAddress(N
.getNode()->getOperand(1), AM
, Depth
+1) &&
982 !MatchAddress(N
.getNode()->getOperand(0), AM
, Depth
+1))
986 // If we couldn't fold both operands into the address at the same time,
987 // see if we can just put each operand into a register and fold at least
989 if (AM
.BaseType
== X86ISelAddressMode::RegBase
&&
990 !AM
.Base
.Reg
.getNode() &&
991 !AM
.IndexReg
.getNode() &&
993 AM
.Base
.Reg
= N
.getNode()->getOperand(0);
994 AM
.IndexReg
= N
.getNode()->getOperand(1);
1002 // Handle "X | C" as "X + C" iff X is known to have C bits clear.
1003 if (ConstantSDNode
*CN
= dyn_cast
<ConstantSDNode
>(N
.getOperand(1))) {
1004 X86ISelAddressMode Backup
= AM
;
1005 uint64_t Offset
= CN
->getSExtValue();
1006 // Start with the LHS as an addr mode.
1007 if (!MatchAddress(N
.getOperand(0), AM
, Depth
+1) &&
1008 // Address could not have picked a GV address for the displacement.
1010 // On x86-64, the resultant disp must fit in 32-bits.
1011 (!is64Bit
|| isInt32(AM
.Disp
+ Offset
)) &&
1012 // Check to see if the LHS & C is zero.
1013 CurDAG
->MaskedValueIsZero(N
.getOperand(0), CN
->getAPIntValue())) {
1022 // Perform some heroic transforms on an and of a constant-count shift
1023 // with a constant to enable use of the scaled offset field.
1025 SDValue Shift
= N
.getOperand(0);
1026 if (Shift
.getNumOperands() != 2) break;
1028 // Scale must not be used already.
1029 if (AM
.IndexReg
.getNode() != 0 || AM
.Scale
!= 1) break;
1031 // Not when RIP is used as the base.
1032 if (AM
.isRIPRel
) break;
1034 SDValue X
= Shift
.getOperand(0);
1035 ConstantSDNode
*C2
= dyn_cast
<ConstantSDNode
>(N
.getOperand(1));
1036 ConstantSDNode
*C1
= dyn_cast
<ConstantSDNode
>(Shift
.getOperand(1));
1037 if (!C1
|| !C2
) break;
1039 // Handle "(X >> (8-C1)) & C2" as "(X >> 8) & 0xff)" if safe. This
1040 // allows us to convert the shift and and into an h-register extract and
1042 if (Shift
.getOpcode() == ISD::SRL
&& Shift
.hasOneUse()) {
1043 unsigned ScaleLog
= 8 - C1
->getZExtValue();
1044 if (ScaleLog
> 0 && ScaleLog
< 4 &&
1045 C2
->getZExtValue() == (UINT64_C(0xff) << ScaleLog
)) {
1046 SDValue Eight
= CurDAG
->getConstant(8, MVT::i8
);
1047 SDValue Mask
= CurDAG
->getConstant(0xff, N
.getValueType());
1048 SDValue Srl
= CurDAG
->getNode(ISD::SRL
, dl
, N
.getValueType(),
1050 SDValue And
= CurDAG
->getNode(ISD::AND
, dl
, N
.getValueType(),
1052 SDValue ShlCount
= CurDAG
->getConstant(ScaleLog
, MVT::i8
);
1053 SDValue Shl
= CurDAG
->getNode(ISD::SHL
, dl
, N
.getValueType(),
1056 // Insert the new nodes into the topological ordering.
1057 if (Eight
.getNode()->getNodeId() == -1 ||
1058 Eight
.getNode()->getNodeId() > X
.getNode()->getNodeId()) {
1059 CurDAG
->RepositionNode(X
.getNode(), Eight
.getNode());
1060 Eight
.getNode()->setNodeId(X
.getNode()->getNodeId());
1062 if (Mask
.getNode()->getNodeId() == -1 ||
1063 Mask
.getNode()->getNodeId() > X
.getNode()->getNodeId()) {
1064 CurDAG
->RepositionNode(X
.getNode(), Mask
.getNode());
1065 Mask
.getNode()->setNodeId(X
.getNode()->getNodeId());
1067 if (Srl
.getNode()->getNodeId() == -1 ||
1068 Srl
.getNode()->getNodeId() > Shift
.getNode()->getNodeId()) {
1069 CurDAG
->RepositionNode(Shift
.getNode(), Srl
.getNode());
1070 Srl
.getNode()->setNodeId(Shift
.getNode()->getNodeId());
1072 if (And
.getNode()->getNodeId() == -1 ||
1073 And
.getNode()->getNodeId() > N
.getNode()->getNodeId()) {
1074 CurDAG
->RepositionNode(N
.getNode(), And
.getNode());
1075 And
.getNode()->setNodeId(N
.getNode()->getNodeId());
1077 if (ShlCount
.getNode()->getNodeId() == -1 ||
1078 ShlCount
.getNode()->getNodeId() > X
.getNode()->getNodeId()) {
1079 CurDAG
->RepositionNode(X
.getNode(), ShlCount
.getNode());
1080 ShlCount
.getNode()->setNodeId(N
.getNode()->getNodeId());
1082 if (Shl
.getNode()->getNodeId() == -1 ||
1083 Shl
.getNode()->getNodeId() > N
.getNode()->getNodeId()) {
1084 CurDAG
->RepositionNode(N
.getNode(), Shl
.getNode());
1085 Shl
.getNode()->setNodeId(N
.getNode()->getNodeId());
1087 CurDAG
->ReplaceAllUsesWith(N
, Shl
);
1089 AM
.Scale
= (1 << ScaleLog
);
1094 // Handle "(X << C1) & C2" as "(X & (C2>>C1)) << C1" if safe and if this
1095 // allows us to fold the shift into this addressing mode.
1096 if (Shift
.getOpcode() != ISD::SHL
) break;
1098 // Not likely to be profitable if either the AND or SHIFT node has more
1099 // than one use (unless all uses are for address computation). Besides,
1100 // isel mechanism requires their node ids to be reused.
1101 if (!N
.hasOneUse() || !Shift
.hasOneUse())
1104 // Verify that the shift amount is something we can fold.
1105 unsigned ShiftCst
= C1
->getZExtValue();
1106 if (ShiftCst
!= 1 && ShiftCst
!= 2 && ShiftCst
!= 3)
1109 // Get the new AND mask, this folds to a constant.
1110 SDValue NewANDMask
= CurDAG
->getNode(ISD::SRL
, dl
, N
.getValueType(),
1111 SDValue(C2
, 0), SDValue(C1
, 0));
1112 SDValue NewAND
= CurDAG
->getNode(ISD::AND
, dl
, N
.getValueType(), X
,
1114 SDValue NewSHIFT
= CurDAG
->getNode(ISD::SHL
, dl
, N
.getValueType(),
1115 NewAND
, SDValue(C1
, 0));
1117 // Insert the new nodes into the topological ordering.
1118 if (C1
->getNodeId() > X
.getNode()->getNodeId()) {
1119 CurDAG
->RepositionNode(X
.getNode(), C1
);
1120 C1
->setNodeId(X
.getNode()->getNodeId());
1122 if (NewANDMask
.getNode()->getNodeId() == -1 ||
1123 NewANDMask
.getNode()->getNodeId() > X
.getNode()->getNodeId()) {
1124 CurDAG
->RepositionNode(X
.getNode(), NewANDMask
.getNode());
1125 NewANDMask
.getNode()->setNodeId(X
.getNode()->getNodeId());
1127 if (NewAND
.getNode()->getNodeId() == -1 ||
1128 NewAND
.getNode()->getNodeId() > Shift
.getNode()->getNodeId()) {
1129 CurDAG
->RepositionNode(Shift
.getNode(), NewAND
.getNode());
1130 NewAND
.getNode()->setNodeId(Shift
.getNode()->getNodeId());
1132 if (NewSHIFT
.getNode()->getNodeId() == -1 ||
1133 NewSHIFT
.getNode()->getNodeId() > N
.getNode()->getNodeId()) {
1134 CurDAG
->RepositionNode(N
.getNode(), NewSHIFT
.getNode());
1135 NewSHIFT
.getNode()->setNodeId(N
.getNode()->getNodeId());
1138 CurDAG
->ReplaceAllUsesWith(N
, NewSHIFT
);
1140 AM
.Scale
= 1 << ShiftCst
;
1141 AM
.IndexReg
= NewAND
;
1146 return MatchAddressBase(N
, AM
);
1149 /// MatchAddressBase - Helper for MatchAddress. Add the specified node to the
1150 /// specified addressing mode without any further recursion.
1151 bool X86DAGToDAGISel::MatchAddressBase(SDValue N
, X86ISelAddressMode
&AM
) {
1152 // Is the base register already occupied?
1153 if (AM
.BaseType
!= X86ISelAddressMode::RegBase
|| AM
.Base
.Reg
.getNode()) {
1154 // If so, check to see if the scale index register is set.
1155 if (AM
.IndexReg
.getNode() == 0 && !AM
.isRIPRel
) {
1161 // Otherwise, we cannot select it.
1165 // Default, generate it as a register.
1166 AM
.BaseType
= X86ISelAddressMode::RegBase
;
1171 /// SelectAddr - returns true if it is able pattern match an addressing mode.
1172 /// It returns the operands which make up the maximal addressing mode it can
1173 /// match by reference.
1174 bool X86DAGToDAGISel::SelectAddr(SDValue Op
, SDValue N
, SDValue
&Base
,
1175 SDValue
&Scale
, SDValue
&Index
,
1176 SDValue
&Disp
, SDValue
&Segment
) {
1177 X86ISelAddressMode AM
;
1179 if (AvoidDupAddrCompute
&& !N
.hasOneUse()) {
1180 unsigned Opcode
= N
.getOpcode();
1181 if (Opcode
!= ISD::Constant
&& Opcode
!= ISD::FrameIndex
&&
1182 Opcode
!= X86ISD::Wrapper
) {
1183 // If we are able to fold N into addressing mode, then we'll allow it even
1184 // if N has multiple uses. In general, addressing computation is used as
1185 // addresses by all of its uses. But watch out for CopyToReg uses, that
1186 // means the address computation is liveout. It will be computed by a LEA
1187 // so we want to avoid computing the address twice.
1188 for (SDNode::use_iterator UI
= N
.getNode()->use_begin(),
1189 UE
= N
.getNode()->use_end(); UI
!= UE
; ++UI
) {
1190 if (UI
->getOpcode() == ISD::CopyToReg
) {
1191 MatchAddressBase(N
, AM
);
1199 if (!Done
&& MatchAddress(N
, AM
))
1202 MVT VT
= N
.getValueType();
1203 if (AM
.BaseType
== X86ISelAddressMode::RegBase
) {
1204 if (!AM
.Base
.Reg
.getNode())
1205 AM
.Base
.Reg
= CurDAG
->getRegister(0, VT
);
1208 if (!AM
.IndexReg
.getNode())
1209 AM
.IndexReg
= CurDAG
->getRegister(0, VT
);
1211 getAddressOperands(AM
, Base
, Scale
, Index
, Disp
, Segment
);
1215 /// SelectScalarSSELoad - Match a scalar SSE load. In particular, we want to
1216 /// match a load whose top elements are either undef or zeros. The load flavor
1217 /// is derived from the type of N, which is either v4f32 or v2f64.
1218 bool X86DAGToDAGISel::SelectScalarSSELoad(SDValue Op
, SDValue Pred
,
1219 SDValue N
, SDValue
&Base
,
1220 SDValue
&Scale
, SDValue
&Index
,
1221 SDValue
&Disp
, SDValue
&Segment
,
1223 SDValue
&OutChain
) {
1224 if (N
.getOpcode() == ISD::SCALAR_TO_VECTOR
) {
1225 InChain
= N
.getOperand(0).getValue(1);
1226 if (ISD::isNON_EXTLoad(InChain
.getNode()) &&
1227 InChain
.getValue(0).hasOneUse() &&
1229 IsLegalAndProfitableToFold(N
.getNode(), Pred
.getNode(), Op
.getNode())) {
1230 LoadSDNode
*LD
= cast
<LoadSDNode
>(InChain
);
1231 if (!SelectAddr(Op
, LD
->getBasePtr(), Base
, Scale
, Index
, Disp
, Segment
))
1233 OutChain
= LD
->getChain();
1238 // Also handle the case where we explicitly require zeros in the top
1239 // elements. This is a vector shuffle from the zero vector.
1240 if (N
.getOpcode() == X86ISD::VZEXT_MOVL
&& N
.getNode()->hasOneUse() &&
1241 // Check to see if the top elements are all zeros (or bitcast of zeros).
1242 N
.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR
&&
1243 N
.getOperand(0).getNode()->hasOneUse() &&
1244 ISD::isNON_EXTLoad(N
.getOperand(0).getOperand(0).getNode()) &&
1245 N
.getOperand(0).getOperand(0).hasOneUse()) {
1246 // Okay, this is a zero extending load. Fold it.
1247 LoadSDNode
*LD
= cast
<LoadSDNode
>(N
.getOperand(0).getOperand(0));
1248 if (!SelectAddr(Op
, LD
->getBasePtr(), Base
, Scale
, Index
, Disp
, Segment
))
1250 OutChain
= LD
->getChain();
1251 InChain
= SDValue(LD
, 1);
1258 /// SelectLEAAddr - it calls SelectAddr and determines if the maximal addressing
1259 /// mode it matches can be cost effectively emitted as an LEA instruction.
1260 bool X86DAGToDAGISel::SelectLEAAddr(SDValue Op
, SDValue N
,
1261 SDValue
&Base
, SDValue
&Scale
,
1262 SDValue
&Index
, SDValue
&Disp
) {
1263 X86ISelAddressMode AM
;
1265 // Set AM.Segment to prevent MatchAddress from using one. LEA doesn't support
1267 SDValue Copy
= AM
.Segment
;
1268 SDValue T
= CurDAG
->getRegister(0, MVT::i32
);
1270 if (MatchAddress(N
, AM
))
1272 assert (T
== AM
.Segment
);
1275 MVT VT
= N
.getValueType();
1276 unsigned Complexity
= 0;
1277 if (AM
.BaseType
== X86ISelAddressMode::RegBase
)
1278 if (AM
.Base
.Reg
.getNode())
1281 AM
.Base
.Reg
= CurDAG
->getRegister(0, VT
);
1282 else if (AM
.BaseType
== X86ISelAddressMode::FrameIndexBase
)
1285 if (AM
.IndexReg
.getNode())
1288 AM
.IndexReg
= CurDAG
->getRegister(0, VT
);
1290 // Don't match just leal(,%reg,2). It's cheaper to do addl %reg, %reg, or with
1295 // FIXME: We are artificially lowering the criteria to turn ADD %reg, $GA
1296 // to a LEA. This is determined with some expermentation but is by no means
1297 // optimal (especially for code size consideration). LEA is nice because of
1298 // its three-address nature. Tweak the cost function again when we can run
1299 // convertToThreeAddress() at register allocation time.
1300 if (AM
.hasSymbolicDisplacement()) {
1301 // For X86-64, we should always use lea to materialize RIP relative
1303 if (Subtarget
->is64Bit())
1309 if (AM
.Disp
&& (AM
.Base
.Reg
.getNode() || AM
.IndexReg
.getNode()))
1312 if (Complexity
> 2) {
1314 getAddressOperands(AM
, Base
, Scale
, Index
, Disp
, Segment
);
1320 bool X86DAGToDAGISel::TryFoldLoad(SDValue P
, SDValue N
,
1321 SDValue
&Base
, SDValue
&Scale
,
1322 SDValue
&Index
, SDValue
&Disp
,
1324 if (ISD::isNON_EXTLoad(N
.getNode()) &&
1326 IsLegalAndProfitableToFold(N
.getNode(), P
.getNode(), P
.getNode()))
1327 return SelectAddr(P
, N
.getOperand(1), Base
, Scale
, Index
, Disp
, Segment
);
1331 /// getGlobalBaseReg - Return an SDNode that returns the value of
1332 /// the global base register. Output instructions required to
1333 /// initialize the global base register, if necessary.
1335 SDNode
*X86DAGToDAGISel::getGlobalBaseReg() {
1336 MachineFunction
*MF
= CurBB
->getParent();
1337 unsigned GlobalBaseReg
= TM
.getInstrInfo()->getGlobalBaseReg(MF
);
1338 return CurDAG
->getRegister(GlobalBaseReg
, TLI
.getPointerTy()).getNode();
1341 static SDNode
*FindCallStartFromCall(SDNode
*Node
) {
1342 if (Node
->getOpcode() == ISD::CALLSEQ_START
) return Node
;
1343 assert(Node
->getOperand(0).getValueType() == MVT::Other
&&
1344 "Node doesn't have a token chain argument!");
1345 return FindCallStartFromCall(Node
->getOperand(0).getNode());
1348 SDNode
*X86DAGToDAGISel::SelectAtomic64(SDNode
*Node
, unsigned Opc
) {
1349 SDValue Chain
= Node
->getOperand(0);
1350 SDValue In1
= Node
->getOperand(1);
1351 SDValue In2L
= Node
->getOperand(2);
1352 SDValue In2H
= Node
->getOperand(3);
1353 SDValue Tmp0
, Tmp1
, Tmp2
, Tmp3
, Tmp4
;
1354 if (!SelectAddr(In1
, In1
, Tmp0
, Tmp1
, Tmp2
, Tmp3
, Tmp4
))
1356 SDValue LSI
= Node
->getOperand(4); // MemOperand
1357 const SDValue Ops
[] = { Tmp0
, Tmp1
, Tmp2
, Tmp3
, Tmp4
, In2L
, In2H
, LSI
, Chain
};
1358 return CurDAG
->getTargetNode(Opc
, Node
->getDebugLoc(),
1359 MVT::i32
, MVT::i32
, MVT::Other
, Ops
,
1360 array_lengthof(Ops
));
1363 SDNode
*X86DAGToDAGISel::Select(SDValue N
) {
1364 SDNode
*Node
= N
.getNode();
1365 MVT NVT
= Node
->getValueType(0);
1367 unsigned Opcode
= Node
->getOpcode();
1368 DebugLoc dl
= Node
->getDebugLoc();
1371 DOUT
<< std::string(Indent
, ' ') << "Selecting: ";
1372 DEBUG(Node
->dump(CurDAG
));
1377 if (Node
->isMachineOpcode()) {
1379 DOUT
<< std::string(Indent
-2, ' ') << "== ";
1380 DEBUG(Node
->dump(CurDAG
));
1384 return NULL
; // Already selected.
1389 case X86ISD::GlobalBaseReg
:
1390 return getGlobalBaseReg();
1392 case X86ISD::ATOMOR64_DAG
:
1393 return SelectAtomic64(Node
, X86::ATOMOR6432
);
1394 case X86ISD::ATOMXOR64_DAG
:
1395 return SelectAtomic64(Node
, X86::ATOMXOR6432
);
1396 case X86ISD::ATOMADD64_DAG
:
1397 return SelectAtomic64(Node
, X86::ATOMADD6432
);
1398 case X86ISD::ATOMSUB64_DAG
:
1399 return SelectAtomic64(Node
, X86::ATOMSUB6432
);
1400 case X86ISD::ATOMNAND64_DAG
:
1401 return SelectAtomic64(Node
, X86::ATOMNAND6432
);
1402 case X86ISD::ATOMAND64_DAG
:
1403 return SelectAtomic64(Node
, X86::ATOMAND6432
);
1404 case X86ISD::ATOMSWAP64_DAG
:
1405 return SelectAtomic64(Node
, X86::ATOMSWAP6432
);
1407 case ISD::SMUL_LOHI
:
1408 case ISD::UMUL_LOHI
: {
1409 SDValue N0
= Node
->getOperand(0);
1410 SDValue N1
= Node
->getOperand(1);
1412 bool isSigned
= Opcode
== ISD::SMUL_LOHI
;
1414 switch (NVT
.getSimpleVT()) {
1415 default: assert(0 && "Unsupported VT!");
1416 case MVT::i8
: Opc
= X86::MUL8r
; MOpc
= X86::MUL8m
; break;
1417 case MVT::i16
: Opc
= X86::MUL16r
; MOpc
= X86::MUL16m
; break;
1418 case MVT::i32
: Opc
= X86::MUL32r
; MOpc
= X86::MUL32m
; break;
1419 case MVT::i64
: Opc
= X86::MUL64r
; MOpc
= X86::MUL64m
; break;
1422 switch (NVT
.getSimpleVT()) {
1423 default: assert(0 && "Unsupported VT!");
1424 case MVT::i8
: Opc
= X86::IMUL8r
; MOpc
= X86::IMUL8m
; break;
1425 case MVT::i16
: Opc
= X86::IMUL16r
; MOpc
= X86::IMUL16m
; break;
1426 case MVT::i32
: Opc
= X86::IMUL32r
; MOpc
= X86::IMUL32m
; break;
1427 case MVT::i64
: Opc
= X86::IMUL64r
; MOpc
= X86::IMUL64m
; break;
1430 unsigned LoReg
, HiReg
;
1431 switch (NVT
.getSimpleVT()) {
1432 default: assert(0 && "Unsupported VT!");
1433 case MVT::i8
: LoReg
= X86::AL
; HiReg
= X86::AH
; break;
1434 case MVT::i16
: LoReg
= X86::AX
; HiReg
= X86::DX
; break;
1435 case MVT::i32
: LoReg
= X86::EAX
; HiReg
= X86::EDX
; break;
1436 case MVT::i64
: LoReg
= X86::RAX
; HiReg
= X86::RDX
; break;
1439 SDValue Tmp0
, Tmp1
, Tmp2
, Tmp3
, Tmp4
;
1440 bool foldedLoad
= TryFoldLoad(N
, N1
, Tmp0
, Tmp1
, Tmp2
, Tmp3
, Tmp4
);
1441 // multiplty is commmutative
1443 foldedLoad
= TryFoldLoad(N
, N0
, Tmp0
, Tmp1
, Tmp2
, Tmp3
, Tmp4
);
1448 SDValue InFlag
= CurDAG
->getCopyToReg(CurDAG
->getEntryNode(), dl
, LoReg
,
1449 N0
, SDValue()).getValue(1);
1452 SDValue Ops
[] = { Tmp0
, Tmp1
, Tmp2
, Tmp3
, Tmp4
, N1
.getOperand(0),
1455 CurDAG
->getTargetNode(MOpc
, dl
, MVT::Other
, MVT::Flag
, Ops
,
1456 array_lengthof(Ops
));
1457 InFlag
= SDValue(CNode
, 1);
1458 // Update the chain.
1459 ReplaceUses(N1
.getValue(1), SDValue(CNode
, 0));
1462 SDValue(CurDAG
->getTargetNode(Opc
, dl
, MVT::Flag
, N1
, InFlag
), 0);
1465 // Copy the low half of the result, if it is needed.
1466 if (!N
.getValue(0).use_empty()) {
1467 SDValue Result
= CurDAG
->getCopyFromReg(CurDAG
->getEntryNode(), dl
,
1468 LoReg
, NVT
, InFlag
);
1469 InFlag
= Result
.getValue(2);
1470 ReplaceUses(N
.getValue(0), Result
);
1472 DOUT
<< std::string(Indent
-2, ' ') << "=> ";
1473 DEBUG(Result
.getNode()->dump(CurDAG
));
1477 // Copy the high half of the result, if it is needed.
1478 if (!N
.getValue(1).use_empty()) {
1480 if (HiReg
== X86::AH
&& Subtarget
->is64Bit()) {
1481 // Prevent use of AH in a REX instruction by referencing AX instead.
1482 // Shift it down 8 bits.
1483 Result
= CurDAG
->getCopyFromReg(CurDAG
->getEntryNode(), dl
,
1484 X86::AX
, MVT::i16
, InFlag
);
1485 InFlag
= Result
.getValue(2);
1486 Result
= SDValue(CurDAG
->getTargetNode(X86::SHR16ri
, dl
, MVT::i16
,
1488 CurDAG
->getTargetConstant(8, MVT::i8
)), 0);
1489 // Then truncate it down to i8.
1490 SDValue SRIdx
= CurDAG
->getTargetConstant(X86::SUBREG_8BIT
, MVT::i32
);
1491 Result
= SDValue(CurDAG
->getTargetNode(X86::EXTRACT_SUBREG
, dl
,
1492 MVT::i8
, Result
, SRIdx
), 0);
1494 Result
= CurDAG
->getCopyFromReg(CurDAG
->getEntryNode(), dl
,
1495 HiReg
, NVT
, InFlag
);
1496 InFlag
= Result
.getValue(2);
1498 ReplaceUses(N
.getValue(1), Result
);
1500 DOUT
<< std::string(Indent
-2, ' ') << "=> ";
1501 DEBUG(Result
.getNode()->dump(CurDAG
));
1514 case ISD::UDIVREM
: {
1515 SDValue N0
= Node
->getOperand(0);
1516 SDValue N1
= Node
->getOperand(1);
1518 bool isSigned
= Opcode
== ISD::SDIVREM
;
1520 switch (NVT
.getSimpleVT()) {
1521 default: assert(0 && "Unsupported VT!");
1522 case MVT::i8
: Opc
= X86::DIV8r
; MOpc
= X86::DIV8m
; break;
1523 case MVT::i16
: Opc
= X86::DIV16r
; MOpc
= X86::DIV16m
; break;
1524 case MVT::i32
: Opc
= X86::DIV32r
; MOpc
= X86::DIV32m
; break;
1525 case MVT::i64
: Opc
= X86::DIV64r
; MOpc
= X86::DIV64m
; break;
1528 switch (NVT
.getSimpleVT()) {
1529 default: assert(0 && "Unsupported VT!");
1530 case MVT::i8
: Opc
= X86::IDIV8r
; MOpc
= X86::IDIV8m
; break;
1531 case MVT::i16
: Opc
= X86::IDIV16r
; MOpc
= X86::IDIV16m
; break;
1532 case MVT::i32
: Opc
= X86::IDIV32r
; MOpc
= X86::IDIV32m
; break;
1533 case MVT::i64
: Opc
= X86::IDIV64r
; MOpc
= X86::IDIV64m
; break;
1536 unsigned LoReg
, HiReg
;
1537 unsigned ClrOpcode
, SExtOpcode
;
1538 switch (NVT
.getSimpleVT()) {
1539 default: assert(0 && "Unsupported VT!");
1541 LoReg
= X86::AL
; HiReg
= X86::AH
;
1543 SExtOpcode
= X86::CBW
;
1546 LoReg
= X86::AX
; HiReg
= X86::DX
;
1547 ClrOpcode
= X86::MOV16r0
;
1548 SExtOpcode
= X86::CWD
;
1551 LoReg
= X86::EAX
; HiReg
= X86::EDX
;
1552 ClrOpcode
= X86::MOV32r0
;
1553 SExtOpcode
= X86::CDQ
;
1556 LoReg
= X86::RAX
; HiReg
= X86::RDX
;
1557 ClrOpcode
= X86::MOV64r0
;
1558 SExtOpcode
= X86::CQO
;
1562 SDValue Tmp0
, Tmp1
, Tmp2
, Tmp3
, Tmp4
;
1563 bool foldedLoad
= TryFoldLoad(N
, N1
, Tmp0
, Tmp1
, Tmp2
, Tmp3
, Tmp4
);
1564 bool signBitIsZero
= CurDAG
->SignBitIsZero(N0
);
1567 if (NVT
== MVT::i8
&& (!isSigned
|| signBitIsZero
)) {
1568 // Special case for div8, just use a move with zero extension to AX to
1569 // clear the upper 8 bits (AH).
1570 SDValue Tmp0
, Tmp1
, Tmp2
, Tmp3
, Tmp4
, Move
, Chain
;
1571 if (TryFoldLoad(N
, N0
, Tmp0
, Tmp1
, Tmp2
, Tmp3
, Tmp4
)) {
1572 SDValue Ops
[] = { Tmp0
, Tmp1
, Tmp2
, Tmp3
, Tmp4
, N0
.getOperand(0) };
1574 SDValue(CurDAG
->getTargetNode(X86::MOVZX16rm8
, dl
, MVT::i16
,
1576 array_lengthof(Ops
)), 0);
1577 Chain
= Move
.getValue(1);
1578 ReplaceUses(N0
.getValue(1), Chain
);
1581 SDValue(CurDAG
->getTargetNode(X86::MOVZX16rr8
, dl
, MVT::i16
, N0
),0);
1582 Chain
= CurDAG
->getEntryNode();
1584 Chain
= CurDAG
->getCopyToReg(Chain
, dl
, X86::AX
, Move
, SDValue());
1585 InFlag
= Chain
.getValue(1);
1588 CurDAG
->getCopyToReg(CurDAG
->getEntryNode(), dl
,
1589 LoReg
, N0
, SDValue()).getValue(1);
1590 if (isSigned
&& !signBitIsZero
) {
1591 // Sign extend the low part into the high part.
1593 SDValue(CurDAG
->getTargetNode(SExtOpcode
, dl
, MVT::Flag
, InFlag
),0);
1595 // Zero out the high part, effectively zero extending the input.
1596 SDValue ClrNode
= SDValue(CurDAG
->getTargetNode(ClrOpcode
, dl
, NVT
),
1598 InFlag
= CurDAG
->getCopyToReg(CurDAG
->getEntryNode(), dl
, HiReg
,
1599 ClrNode
, InFlag
).getValue(1);
1604 SDValue Ops
[] = { Tmp0
, Tmp1
, Tmp2
, Tmp3
, Tmp4
, N1
.getOperand(0),
1607 CurDAG
->getTargetNode(MOpc
, dl
, MVT::Other
, MVT::Flag
, Ops
,
1608 array_lengthof(Ops
));
1609 InFlag
= SDValue(CNode
, 1);
1610 // Update the chain.
1611 ReplaceUses(N1
.getValue(1), SDValue(CNode
, 0));
1614 SDValue(CurDAG
->getTargetNode(Opc
, dl
, MVT::Flag
, N1
, InFlag
), 0);
1617 // Copy the division (low) result, if it is needed.
1618 if (!N
.getValue(0).use_empty()) {
1619 SDValue Result
= CurDAG
->getCopyFromReg(CurDAG
->getEntryNode(), dl
,
1620 LoReg
, NVT
, InFlag
);
1621 InFlag
= Result
.getValue(2);
1622 ReplaceUses(N
.getValue(0), Result
);
1624 DOUT
<< std::string(Indent
-2, ' ') << "=> ";
1625 DEBUG(Result
.getNode()->dump(CurDAG
));
1629 // Copy the remainder (high) result, if it is needed.
1630 if (!N
.getValue(1).use_empty()) {
1632 if (HiReg
== X86::AH
&& Subtarget
->is64Bit()) {
1633 // Prevent use of AH in a REX instruction by referencing AX instead.
1634 // Shift it down 8 bits.
1635 Result
= CurDAG
->getCopyFromReg(CurDAG
->getEntryNode(), dl
,
1636 X86::AX
, MVT::i16
, InFlag
);
1637 InFlag
= Result
.getValue(2);
1638 Result
= SDValue(CurDAG
->getTargetNode(X86::SHR16ri
, dl
, MVT::i16
,
1640 CurDAG
->getTargetConstant(8, MVT::i8
)),
1642 // Then truncate it down to i8.
1643 SDValue SRIdx
= CurDAG
->getTargetConstant(X86::SUBREG_8BIT
, MVT::i32
);
1644 Result
= SDValue(CurDAG
->getTargetNode(X86::EXTRACT_SUBREG
, dl
,
1645 MVT::i8
, Result
, SRIdx
), 0);
1647 Result
= CurDAG
->getCopyFromReg(CurDAG
->getEntryNode(), dl
,
1648 HiReg
, NVT
, InFlag
);
1649 InFlag
= Result
.getValue(2);
1651 ReplaceUses(N
.getValue(1), Result
);
1653 DOUT
<< std::string(Indent
-2, ' ') << "=> ";
1654 DEBUG(Result
.getNode()->dump(CurDAG
));
1666 case ISD::DECLARE
: {
1667 // Handle DECLARE nodes here because the second operand may have been
1668 // wrapped in X86ISD::Wrapper.
1669 SDValue Chain
= Node
->getOperand(0);
1670 SDValue N1
= Node
->getOperand(1);
1671 SDValue N2
= Node
->getOperand(2);
1672 FrameIndexSDNode
*FINode
= dyn_cast
<FrameIndexSDNode
>(N1
);
1674 // FIXME: We need to handle this for VLAs.
1676 ReplaceUses(N
.getValue(0), Chain
);
1680 if (N2
.getOpcode() == ISD::ADD
&&
1681 N2
.getOperand(0).getOpcode() == X86ISD::GlobalBaseReg
)
1682 N2
= N2
.getOperand(1);
1684 // If N2 is not Wrapper(decriptor) then the llvm.declare is mangled
1685 // somehow, just ignore it.
1686 if (N2
.getOpcode() != X86ISD::Wrapper
) {
1687 ReplaceUses(N
.getValue(0), Chain
);
1690 GlobalAddressSDNode
*GVNode
=
1691 dyn_cast
<GlobalAddressSDNode
>(N2
.getOperand(0));
1693 ReplaceUses(N
.getValue(0), Chain
);
1696 SDValue Tmp1
= CurDAG
->getTargetFrameIndex(FINode
->getIndex(),
1697 TLI
.getPointerTy());
1698 SDValue Tmp2
= CurDAG
->getTargetGlobalAddress(GVNode
->getGlobal(),
1699 TLI
.getPointerTy());
1700 SDValue Ops
[] = { Tmp1
, Tmp2
, Chain
};
1701 return CurDAG
->getTargetNode(TargetInstrInfo::DECLARE
, dl
,
1703 array_lengthof(Ops
));
1707 SDNode
*ResNode
= SelectCode(N
);
1710 DOUT
<< std::string(Indent
-2, ' ') << "=> ";
1711 if (ResNode
== NULL
|| ResNode
== N
.getNode())
1712 DEBUG(N
.getNode()->dump(CurDAG
));
1714 DEBUG(ResNode
->dump(CurDAG
));
1722 bool X86DAGToDAGISel::
1723 SelectInlineAsmMemoryOperand(const SDValue
&Op
, char ConstraintCode
,
1724 std::vector
<SDValue
> &OutOps
) {
1725 SDValue Op0
, Op1
, Op2
, Op3
, Op4
;
1726 switch (ConstraintCode
) {
1727 case 'o': // offsetable ??
1728 case 'v': // not offsetable ??
1729 default: return true;
1731 if (!SelectAddr(Op
, Op
, Op0
, Op1
, Op2
, Op3
, Op4
))
1736 OutOps
.push_back(Op0
);
1737 OutOps
.push_back(Op1
);
1738 OutOps
.push_back(Op2
);
1739 OutOps
.push_back(Op3
);
1740 OutOps
.push_back(Op4
);
1744 /// createX86ISelDag - This pass converts a legalized DAG into a
1745 /// X86-specific DAG, ready for instruction scheduling.
1747 FunctionPass
*llvm::createX86ISelDag(X86TargetMachine
&TM
,
1748 llvm::CodeGenOpt::Level OptLevel
) {
1749 return new X86DAGToDAGISel(TM
, OptLevel
);