1 //===- X86ISelDAGToDAG.cpp - A DAG pattern matching inst selector for X86 -===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file defines a DAG pattern matching instruction selector for X86,
11 // converting from a legalized dag to a X86 dag.
13 //===----------------------------------------------------------------------===//
15 #define DEBUG_TYPE "x86-isel"
17 #include "X86InstrBuilder.h"
18 #include "X86MachineFunctionInfo.h"
19 #include "X86RegisterInfo.h"
20 #include "X86Subtarget.h"
21 #include "X86TargetMachine.h"
22 #include "llvm/Instructions.h"
23 #include "llvm/Intrinsics.h"
24 #include "llvm/Support/CFG.h"
25 #include "llvm/Type.h"
26 #include "llvm/CodeGen/MachineConstantPool.h"
27 #include "llvm/CodeGen/MachineFunction.h"
28 #include "llvm/CodeGen/MachineFrameInfo.h"
29 #include "llvm/CodeGen/MachineInstrBuilder.h"
30 #include "llvm/CodeGen/MachineRegisterInfo.h"
31 #include "llvm/CodeGen/SelectionDAGISel.h"
32 #include "llvm/Target/TargetMachine.h"
33 #include "llvm/Target/TargetOptions.h"
34 #include "llvm/Support/Debug.h"
35 #include "llvm/Support/ErrorHandling.h"
36 #include "llvm/Support/MathExtras.h"
37 #include "llvm/Support/raw_ostream.h"
38 #include "llvm/ADT/SmallPtrSet.h"
39 #include "llvm/ADT/Statistic.h"
42 STATISTIC(NumLoadMoved
, "Number of loads moved below TokenFactor");
44 //===----------------------------------------------------------------------===//
45 // Pattern Matcher Implementation
46 //===----------------------------------------------------------------------===//
49 /// X86ISelAddressMode - This corresponds to X86AddressMode, but uses
50 /// SDValue's instead of register numbers for the leaves of the matched
52 struct X86ISelAddressMode
{
58 // This is really a union, discriminated by BaseType!
66 const GlobalValue
*GV
;
68 const BlockAddress
*BlockAddr
;
71 unsigned Align
; // CP alignment.
72 unsigned char SymbolFlags
; // X86II::MO_*
75 : BaseType(RegBase
), Base_FrameIndex(0), Scale(1), IndexReg(), Disp(0),
76 Segment(), GV(0), CP(0), BlockAddr(0), ES(0), JT(-1), Align(0),
77 SymbolFlags(X86II::MO_NO_FLAG
) {
80 bool hasSymbolicDisplacement() const {
81 return GV
!= 0 || CP
!= 0 || ES
!= 0 || JT
!= -1 || BlockAddr
!= 0;
84 bool hasBaseOrIndexReg() const {
85 return IndexReg
.getNode() != 0 || Base_Reg
.getNode() != 0;
88 /// isRIPRelative - Return true if this addressing mode is already RIP
90 bool isRIPRelative() const {
91 if (BaseType
!= RegBase
) return false;
92 if (RegisterSDNode
*RegNode
=
93 dyn_cast_or_null
<RegisterSDNode
>(Base_Reg
.getNode()))
94 return RegNode
->getReg() == X86::RIP
;
98 void setBaseReg(SDValue Reg
) {
104 dbgs() << "X86ISelAddressMode " << this << '\n';
105 dbgs() << "Base_Reg ";
106 if (Base_Reg
.getNode() != 0)
107 Base_Reg
.getNode()->dump();
110 dbgs() << " Base.FrameIndex " << Base_FrameIndex
<< '\n'
111 << " Scale" << Scale
<< '\n'
113 if (IndexReg
.getNode() != 0)
114 IndexReg
.getNode()->dump();
117 dbgs() << " Disp " << Disp
<< '\n'
134 dbgs() << " JT" << JT
<< " Align" << Align
<< '\n';
140 //===--------------------------------------------------------------------===//
141 /// ISel - X86 specific code to select X86 machine instructions for
142 /// SelectionDAG operations.
144 class X86DAGToDAGISel
: public SelectionDAGISel
{
145 /// X86Lowering - This object fully describes how to lower LLVM code to an
146 /// X86-specific SelectionDAG.
147 const X86TargetLowering
&X86Lowering
;
149 /// Subtarget - Keep a pointer to the X86Subtarget around so that we can
150 /// make the right decision when generating code for different targets.
151 const X86Subtarget
*Subtarget
;
153 /// OptForSize - If true, selector should try to optimize for code size
154 /// instead of performance.
158 explicit X86DAGToDAGISel(X86TargetMachine
&tm
, CodeGenOpt::Level OptLevel
)
159 : SelectionDAGISel(tm
, OptLevel
),
160 X86Lowering(*tm
.getTargetLowering()),
161 Subtarget(&tm
.getSubtarget
<X86Subtarget
>()),
164 virtual const char *getPassName() const {
165 return "X86 DAG->DAG Instruction Selection";
168 virtual void EmitFunctionEntryCode();
170 virtual bool IsProfitableToFold(SDValue N
, SDNode
*U
, SDNode
*Root
) const;
172 virtual void PreprocessISelDAG();
174 inline bool immSext8(SDNode
*N
) const {
175 return isInt
<8>(cast
<ConstantSDNode
>(N
)->getSExtValue());
178 // i64immSExt32 predicate - True if the 64-bit immediate fits in a 32-bit
179 // sign extended field.
180 inline bool i64immSExt32(SDNode
*N
) const {
181 uint64_t v
= cast
<ConstantSDNode
>(N
)->getZExtValue();
182 return (int64_t)v
== (int32_t)v
;
185 // Include the pieces autogenerated from the target description.
186 #include "X86GenDAGISel.inc"
189 SDNode
*Select(SDNode
*N
);
190 SDNode
*SelectAtomic64(SDNode
*Node
, unsigned Opc
);
191 SDNode
*SelectAtomicLoadAdd(SDNode
*Node
, EVT NVT
);
193 bool MatchLoadInAddress(LoadSDNode
*N
, X86ISelAddressMode
&AM
);
194 bool MatchWrapper(SDValue N
, X86ISelAddressMode
&AM
);
195 bool MatchAddress(SDValue N
, X86ISelAddressMode
&AM
);
196 bool MatchAddressRecursively(SDValue N
, X86ISelAddressMode
&AM
,
198 bool MatchAddressBase(SDValue N
, X86ISelAddressMode
&AM
);
199 bool SelectAddr(SDNode
*Parent
, SDValue N
, SDValue
&Base
,
200 SDValue
&Scale
, SDValue
&Index
, SDValue
&Disp
,
202 bool SelectLEAAddr(SDValue N
, SDValue
&Base
,
203 SDValue
&Scale
, SDValue
&Index
, SDValue
&Disp
,
205 bool SelectTLSADDRAddr(SDValue N
, SDValue
&Base
,
206 SDValue
&Scale
, SDValue
&Index
, SDValue
&Disp
,
208 bool SelectScalarSSELoad(SDNode
*Root
, SDValue N
,
209 SDValue
&Base
, SDValue
&Scale
,
210 SDValue
&Index
, SDValue
&Disp
,
212 SDValue
&NodeWithChain
);
214 bool TryFoldLoad(SDNode
*P
, SDValue N
,
215 SDValue
&Base
, SDValue
&Scale
,
216 SDValue
&Index
, SDValue
&Disp
,
219 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
220 /// inline asm expressions.
221 virtual bool SelectInlineAsmMemoryOperand(const SDValue
&Op
,
223 std::vector
<SDValue
> &OutOps
);
225 void EmitSpecialCodeForMain(MachineBasicBlock
*BB
, MachineFrameInfo
*MFI
);
227 inline void getAddressOperands(X86ISelAddressMode
&AM
, SDValue
&Base
,
228 SDValue
&Scale
, SDValue
&Index
,
229 SDValue
&Disp
, SDValue
&Segment
) {
230 Base
= (AM
.BaseType
== X86ISelAddressMode::FrameIndexBase
) ?
231 CurDAG
->getTargetFrameIndex(AM
.Base_FrameIndex
, TLI
.getPointerTy()) :
233 Scale
= getI8Imm(AM
.Scale
);
235 // These are 32-bit even in 64-bit mode since RIP relative offset
238 Disp
= CurDAG
->getTargetGlobalAddress(AM
.GV
, DebugLoc(),
242 Disp
= CurDAG
->getTargetConstantPool(AM
.CP
, MVT::i32
,
243 AM
.Align
, AM
.Disp
, AM
.SymbolFlags
);
245 Disp
= CurDAG
->getTargetExternalSymbol(AM
.ES
, MVT::i32
, AM
.SymbolFlags
);
246 else if (AM
.JT
!= -1)
247 Disp
= CurDAG
->getTargetJumpTable(AM
.JT
, MVT::i32
, AM
.SymbolFlags
);
248 else if (AM
.BlockAddr
)
249 Disp
= CurDAG
->getBlockAddress(AM
.BlockAddr
, MVT::i32
,
250 true, AM
.SymbolFlags
);
252 Disp
= CurDAG
->getTargetConstant(AM
.Disp
, MVT::i32
);
254 if (AM
.Segment
.getNode())
255 Segment
= AM
.Segment
;
257 Segment
= CurDAG
->getRegister(0, MVT::i32
);
260 /// getI8Imm - Return a target constant with the specified value, of type
262 inline SDValue
getI8Imm(unsigned Imm
) {
263 return CurDAG
->getTargetConstant(Imm
, MVT::i8
);
266 /// getI32Imm - Return a target constant with the specified value, of type
268 inline SDValue
getI32Imm(unsigned Imm
) {
269 return CurDAG
->getTargetConstant(Imm
, MVT::i32
);
272 /// getGlobalBaseReg - Return an SDNode that returns the value of
273 /// the global base register. Output instructions required to
274 /// initialize the global base register, if necessary.
276 SDNode
*getGlobalBaseReg();
278 /// getTargetMachine - Return a reference to the TargetMachine, casted
279 /// to the target-specific type.
280 const X86TargetMachine
&getTargetMachine() {
281 return static_cast<const X86TargetMachine
&>(TM
);
284 /// getInstrInfo - Return a reference to the TargetInstrInfo, casted
285 /// to the target-specific type.
286 const X86InstrInfo
*getInstrInfo() {
287 return getTargetMachine().getInstrInfo();
294 X86DAGToDAGISel::IsProfitableToFold(SDValue N
, SDNode
*U
, SDNode
*Root
) const {
295 if (OptLevel
== CodeGenOpt::None
) return false;
300 if (N
.getOpcode() != ISD::LOAD
)
303 // If N is a load, do additional profitability checks.
305 switch (U
->getOpcode()) {
318 SDValue Op1
= U
->getOperand(1);
320 // If the other operand is a 8-bit immediate we should fold the immediate
321 // instead. This reduces code size.
323 // movl 4(%esp), %eax
327 // addl 4(%esp), %eax
328 // The former is 2 bytes shorter. In case where the increment is 1, then
329 // the saving can be 4 bytes (by using incl %eax).
330 if (ConstantSDNode
*Imm
= dyn_cast
<ConstantSDNode
>(Op1
))
331 if (Imm
->getAPIntValue().isSignedIntN(8))
334 // If the other operand is a TLS address, we should fold it instead.
337 // leal i@NTPOFF(%eax), %eax
339 // movl $i@NTPOFF, %eax
341 // if the block also has an access to a second TLS address this will save
343 // FIXME: This is probably also true for non TLS addresses.
344 if (Op1
.getOpcode() == X86ISD::Wrapper
) {
345 SDValue Val
= Op1
.getOperand(0);
346 if (Val
.getOpcode() == ISD::TargetGlobalTLSAddress
)
356 /// MoveBelowCallOrigChain - Replace the original chain operand of the call with
357 /// load's chain operand and move load below the call's chain operand.
358 static void MoveBelowOrigChain(SelectionDAG
*CurDAG
, SDValue Load
,
359 SDValue Call
, SDValue OrigChain
) {
360 SmallVector
<SDValue
, 8> Ops
;
361 SDValue Chain
= OrigChain
.getOperand(0);
362 if (Chain
.getNode() == Load
.getNode())
363 Ops
.push_back(Load
.getOperand(0));
365 assert(Chain
.getOpcode() == ISD::TokenFactor
&&
366 "Unexpected chain operand");
367 for (unsigned i
= 0, e
= Chain
.getNumOperands(); i
!= e
; ++i
)
368 if (Chain
.getOperand(i
).getNode() == Load
.getNode())
369 Ops
.push_back(Load
.getOperand(0));
371 Ops
.push_back(Chain
.getOperand(i
));
373 CurDAG
->getNode(ISD::TokenFactor
, Load
.getDebugLoc(),
374 MVT::Other
, &Ops
[0], Ops
.size());
376 Ops
.push_back(NewChain
);
378 for (unsigned i
= 1, e
= OrigChain
.getNumOperands(); i
!= e
; ++i
)
379 Ops
.push_back(OrigChain
.getOperand(i
));
380 CurDAG
->UpdateNodeOperands(OrigChain
.getNode(), &Ops
[0], Ops
.size());
381 CurDAG
->UpdateNodeOperands(Load
.getNode(), Call
.getOperand(0),
382 Load
.getOperand(1), Load
.getOperand(2));
384 Ops
.push_back(SDValue(Load
.getNode(), 1));
385 for (unsigned i
= 1, e
= Call
.getNode()->getNumOperands(); i
!= e
; ++i
)
386 Ops
.push_back(Call
.getOperand(i
));
387 CurDAG
->UpdateNodeOperands(Call
.getNode(), &Ops
[0], Ops
.size());
390 /// isCalleeLoad - Return true if call address is a load and it can be
391 /// moved below CALLSEQ_START and the chains leading up to the call.
392 /// Return the CALLSEQ_START by reference as a second output.
393 /// In the case of a tail call, there isn't a callseq node between the call
394 /// chain and the load.
395 static bool isCalleeLoad(SDValue Callee
, SDValue
&Chain
, bool HasCallSeq
) {
396 if (Callee
.getNode() == Chain
.getNode() || !Callee
.hasOneUse())
398 LoadSDNode
*LD
= dyn_cast
<LoadSDNode
>(Callee
.getNode());
401 LD
->getAddressingMode() != ISD::UNINDEXED
||
402 LD
->getExtensionType() != ISD::NON_EXTLOAD
)
405 // Now let's find the callseq_start.
406 while (HasCallSeq
&& Chain
.getOpcode() != ISD::CALLSEQ_START
) {
407 if (!Chain
.hasOneUse())
409 Chain
= Chain
.getOperand(0);
412 if (!Chain
.getNumOperands())
414 if (Chain
.getOperand(0).getNode() == Callee
.getNode())
416 if (Chain
.getOperand(0).getOpcode() == ISD::TokenFactor
&&
417 Callee
.getValue(1).isOperandOf(Chain
.getOperand(0).getNode()) &&
418 Callee
.getValue(1).hasOneUse())
423 void X86DAGToDAGISel::PreprocessISelDAG() {
424 // OptForSize is used in pattern predicates that isel is matching.
425 OptForSize
= MF
->getFunction()->hasFnAttr(Attribute::OptimizeForSize
);
427 for (SelectionDAG::allnodes_iterator I
= CurDAG
->allnodes_begin(),
428 E
= CurDAG
->allnodes_end(); I
!= E
; ) {
429 SDNode
*N
= I
++; // Preincrement iterator to avoid invalidation issues.
431 if (OptLevel
!= CodeGenOpt::None
&&
432 (N
->getOpcode() == X86ISD::CALL
||
433 N
->getOpcode() == X86ISD::TC_RETURN
)) {
434 /// Also try moving call address load from outside callseq_start to just
435 /// before the call to allow it to be folded.
453 bool HasCallSeq
= N
->getOpcode() == X86ISD::CALL
;
454 SDValue Chain
= N
->getOperand(0);
455 SDValue Load
= N
->getOperand(1);
456 if (!isCalleeLoad(Load
, Chain
, HasCallSeq
))
458 MoveBelowOrigChain(CurDAG
, Load
, SDValue(N
, 0), Chain
);
463 // Lower fpround and fpextend nodes that target the FP stack to be store and
464 // load to the stack. This is a gross hack. We would like to simply mark
465 // these as being illegal, but when we do that, legalize produces these when
466 // it expands calls, then expands these in the same legalize pass. We would
467 // like dag combine to be able to hack on these between the call expansion
468 // and the node legalization. As such this pass basically does "really
469 // late" legalization of these inline with the X86 isel pass.
470 // FIXME: This should only happen when not compiled with -O0.
471 if (N
->getOpcode() != ISD::FP_ROUND
&& N
->getOpcode() != ISD::FP_EXTEND
)
474 // If the source and destination are SSE registers, then this is a legal
475 // conversion that should not be lowered.
476 EVT SrcVT
= N
->getOperand(0).getValueType();
477 EVT DstVT
= N
->getValueType(0);
478 bool SrcIsSSE
= X86Lowering
.isScalarFPTypeInSSEReg(SrcVT
);
479 bool DstIsSSE
= X86Lowering
.isScalarFPTypeInSSEReg(DstVT
);
480 if (SrcIsSSE
&& DstIsSSE
)
483 if (!SrcIsSSE
&& !DstIsSSE
) {
484 // If this is an FPStack extension, it is a noop.
485 if (N
->getOpcode() == ISD::FP_EXTEND
)
487 // If this is a value-preserving FPStack truncation, it is a noop.
488 if (N
->getConstantOperandVal(1))
492 // Here we could have an FP stack truncation or an FPStack <-> SSE convert.
493 // FPStack has extload and truncstore. SSE can fold direct loads into other
494 // operations. Based on this, decide what we want to do.
496 if (N
->getOpcode() == ISD::FP_ROUND
)
497 MemVT
= DstVT
; // FP_ROUND must use DstVT, we can't do a 'trunc load'.
499 MemVT
= SrcIsSSE
? SrcVT
: DstVT
;
501 SDValue MemTmp
= CurDAG
->CreateStackTemporary(MemVT
);
502 DebugLoc dl
= N
->getDebugLoc();
504 // FIXME: optimize the case where the src/dest is a load or store?
505 SDValue Store
= CurDAG
->getTruncStore(CurDAG
->getEntryNode(), dl
,
507 MemTmp
, MachinePointerInfo(), MemVT
,
509 SDValue Result
= CurDAG
->getExtLoad(ISD::EXTLOAD
, DstVT
, dl
, Store
, MemTmp
,
510 MachinePointerInfo(),
511 MemVT
, false, false, 0);
513 // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the
514 // extload we created. This will cause general havok on the dag because
515 // anything below the conversion could be folded into other existing nodes.
516 // To avoid invalidating 'I', back it up to the convert node.
518 CurDAG
->ReplaceAllUsesOfValueWith(SDValue(N
, 0), Result
);
520 // Now that we did that, the node is dead. Increment the iterator to the
521 // next node to process, then delete N.
523 CurDAG
->DeleteNode(N
);
528 /// EmitSpecialCodeForMain - Emit any code that needs to be executed only in
529 /// the main function.
530 void X86DAGToDAGISel::EmitSpecialCodeForMain(MachineBasicBlock
*BB
,
531 MachineFrameInfo
*MFI
) {
532 const TargetInstrInfo
*TII
= TM
.getInstrInfo();
533 if (Subtarget
->isTargetCygMing())
534 BuildMI(BB
, DebugLoc(),
535 TII
->get(X86::CALLpcrel32
)).addExternalSymbol("__main");
538 void X86DAGToDAGISel::EmitFunctionEntryCode() {
539 // If this is main, emit special code for main.
540 if (const Function
*Fn
= MF
->getFunction())
541 if (Fn
->hasExternalLinkage() && Fn
->getName() == "main")
542 EmitSpecialCodeForMain(MF
->begin(), MF
->getFrameInfo());
546 bool X86DAGToDAGISel::MatchLoadInAddress(LoadSDNode
*N
, X86ISelAddressMode
&AM
){
547 SDValue Address
= N
->getOperand(1);
549 // load gs:0 -> GS segment register.
550 // load fs:0 -> FS segment register.
552 // This optimization is valid because the GNU TLS model defines that
553 // gs:0 (or fs:0 on X86-64) contains its own address.
554 // For more information see http://people.redhat.com/drepper/tls.pdf
555 if (ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(Address
))
556 if (C
->getSExtValue() == 0 && AM
.Segment
.getNode() == 0 &&
557 Subtarget
->isTargetELF())
558 switch (N
->getPointerInfo().getAddrSpace()) {
560 AM
.Segment
= CurDAG
->getRegister(X86::GS
, MVT::i16
);
563 AM
.Segment
= CurDAG
->getRegister(X86::FS
, MVT::i16
);
570 /// MatchWrapper - Try to match X86ISD::Wrapper and X86ISD::WrapperRIP nodes
571 /// into an addressing mode. These wrap things that will resolve down into a
572 /// symbol reference. If no match is possible, this returns true, otherwise it
574 bool X86DAGToDAGISel::MatchWrapper(SDValue N
, X86ISelAddressMode
&AM
) {
575 // If the addressing mode already has a symbol as the displacement, we can
576 // never match another symbol.
577 if (AM
.hasSymbolicDisplacement())
580 SDValue N0
= N
.getOperand(0);
581 CodeModel::Model M
= TM
.getCodeModel();
583 // Handle X86-64 rip-relative addresses. We check this before checking direct
584 // folding because RIP is preferable to non-RIP accesses.
585 if (Subtarget
->is64Bit() &&
586 // Under X86-64 non-small code model, GV (and friends) are 64-bits, so
587 // they cannot be folded into immediate fields.
588 // FIXME: This can be improved for kernel and other models?
589 (M
== CodeModel::Small
|| M
== CodeModel::Kernel
) &&
590 // Base and index reg must be 0 in order to use %rip as base and lowering
592 !AM
.hasBaseOrIndexReg() && N
.getOpcode() == X86ISD::WrapperRIP
) {
593 if (GlobalAddressSDNode
*G
= dyn_cast
<GlobalAddressSDNode
>(N0
)) {
594 int64_t Offset
= AM
.Disp
+ G
->getOffset();
595 if (!X86::isOffsetSuitableForCodeModel(Offset
, M
)) return true;
596 AM
.GV
= G
->getGlobal();
598 AM
.SymbolFlags
= G
->getTargetFlags();
599 } else if (ConstantPoolSDNode
*CP
= dyn_cast
<ConstantPoolSDNode
>(N0
)) {
600 int64_t Offset
= AM
.Disp
+ CP
->getOffset();
601 if (!X86::isOffsetSuitableForCodeModel(Offset
, M
)) return true;
602 AM
.CP
= CP
->getConstVal();
603 AM
.Align
= CP
->getAlignment();
605 AM
.SymbolFlags
= CP
->getTargetFlags();
606 } else if (ExternalSymbolSDNode
*S
= dyn_cast
<ExternalSymbolSDNode
>(N0
)) {
607 AM
.ES
= S
->getSymbol();
608 AM
.SymbolFlags
= S
->getTargetFlags();
609 } else if (JumpTableSDNode
*J
= dyn_cast
<JumpTableSDNode
>(N0
)) {
610 AM
.JT
= J
->getIndex();
611 AM
.SymbolFlags
= J
->getTargetFlags();
613 AM
.BlockAddr
= cast
<BlockAddressSDNode
>(N0
)->getBlockAddress();
614 AM
.SymbolFlags
= cast
<BlockAddressSDNode
>(N0
)->getTargetFlags();
617 if (N
.getOpcode() == X86ISD::WrapperRIP
)
618 AM
.setBaseReg(CurDAG
->getRegister(X86::RIP
, MVT::i64
));
622 // Handle the case when globals fit in our immediate field: This is true for
623 // X86-32 always and X86-64 when in -static -mcmodel=small mode. In 64-bit
624 // mode, this results in a non-RIP-relative computation.
625 if (!Subtarget
->is64Bit() ||
626 ((M
== CodeModel::Small
|| M
== CodeModel::Kernel
) &&
627 TM
.getRelocationModel() == Reloc::Static
)) {
628 if (GlobalAddressSDNode
*G
= dyn_cast
<GlobalAddressSDNode
>(N0
)) {
629 AM
.GV
= G
->getGlobal();
630 AM
.Disp
+= G
->getOffset();
631 AM
.SymbolFlags
= G
->getTargetFlags();
632 } else if (ConstantPoolSDNode
*CP
= dyn_cast
<ConstantPoolSDNode
>(N0
)) {
633 AM
.CP
= CP
->getConstVal();
634 AM
.Align
= CP
->getAlignment();
635 AM
.Disp
+= CP
->getOffset();
636 AM
.SymbolFlags
= CP
->getTargetFlags();
637 } else if (ExternalSymbolSDNode
*S
= dyn_cast
<ExternalSymbolSDNode
>(N0
)) {
638 AM
.ES
= S
->getSymbol();
639 AM
.SymbolFlags
= S
->getTargetFlags();
640 } else if (JumpTableSDNode
*J
= dyn_cast
<JumpTableSDNode
>(N0
)) {
641 AM
.JT
= J
->getIndex();
642 AM
.SymbolFlags
= J
->getTargetFlags();
644 AM
.BlockAddr
= cast
<BlockAddressSDNode
>(N0
)->getBlockAddress();
645 AM
.SymbolFlags
= cast
<BlockAddressSDNode
>(N0
)->getTargetFlags();
653 /// MatchAddress - Add the specified node to the specified addressing mode,
654 /// returning true if it cannot be done. This just pattern matches for the
656 bool X86DAGToDAGISel::MatchAddress(SDValue N
, X86ISelAddressMode
&AM
) {
657 if (MatchAddressRecursively(N
, AM
, 0))
660 // Post-processing: Convert lea(,%reg,2) to lea(%reg,%reg), which has
661 // a smaller encoding and avoids a scaled-index.
663 AM
.BaseType
== X86ISelAddressMode::RegBase
&&
664 AM
.Base_Reg
.getNode() == 0) {
665 AM
.Base_Reg
= AM
.IndexReg
;
669 // Post-processing: Convert foo to foo(%rip), even in non-PIC mode,
670 // because it has a smaller encoding.
671 // TODO: Which other code models can use this?
672 if (TM
.getCodeModel() == CodeModel::Small
&&
673 Subtarget
->is64Bit() &&
675 AM
.BaseType
== X86ISelAddressMode::RegBase
&&
676 AM
.Base_Reg
.getNode() == 0 &&
677 AM
.IndexReg
.getNode() == 0 &&
678 AM
.SymbolFlags
== X86II::MO_NO_FLAG
&&
679 AM
.hasSymbolicDisplacement())
680 AM
.Base_Reg
= CurDAG
->getRegister(X86::RIP
, MVT::i64
);
685 /// isLogicallyAddWithConstant - Return true if this node is semantically an
686 /// add of a value with a constantint.
687 static bool isLogicallyAddWithConstant(SDValue V
, SelectionDAG
*CurDAG
) {
688 // Check for (add x, Cst)
689 if (V
->getOpcode() == ISD::ADD
)
690 return isa
<ConstantSDNode
>(V
->getOperand(1));
692 // Check for (or x, Cst), where Cst & x == 0.
693 if (V
->getOpcode() != ISD::OR
||
694 !isa
<ConstantSDNode
>(V
->getOperand(1)))
697 // Handle "X | C" as "X + C" iff X is known to have C bits clear.
698 ConstantSDNode
*CN
= cast
<ConstantSDNode
>(V
->getOperand(1));
700 // Check to see if the LHS & C is zero.
701 return CurDAG
->MaskedValueIsZero(V
->getOperand(0), CN
->getAPIntValue());
704 bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N
, X86ISelAddressMode
&AM
,
706 bool is64Bit
= Subtarget
->is64Bit();
707 DebugLoc dl
= N
.getDebugLoc();
709 dbgs() << "MatchAddress: ";
714 return MatchAddressBase(N
, AM
);
716 CodeModel::Model M
= TM
.getCodeModel();
718 // If this is already a %rip relative address, we can only merge immediates
719 // into it. Instead of handling this in every case, we handle it here.
720 // RIP relative addressing: %rip + 32-bit displacement!
721 if (AM
.isRIPRelative()) {
722 // FIXME: JumpTable and ExternalSymbol address currently don't like
723 // displacements. It isn't very important, but this should be fixed for
725 if (!AM
.ES
&& AM
.JT
!= -1) return true;
727 if (ConstantSDNode
*Cst
= dyn_cast
<ConstantSDNode
>(N
)) {
728 int64_t Val
= AM
.Disp
+ Cst
->getSExtValue();
729 if (X86::isOffsetSuitableForCodeModel(Val
, M
,
730 AM
.hasSymbolicDisplacement())) {
738 switch (N
.getOpcode()) {
740 case ISD::Constant
: {
741 uint64_t Val
= cast
<ConstantSDNode
>(N
)->getSExtValue();
743 X86::isOffsetSuitableForCodeModel(AM
.Disp
+ Val
, M
,
744 AM
.hasSymbolicDisplacement())) {
751 case X86ISD::Wrapper
:
752 case X86ISD::WrapperRIP
:
753 if (!MatchWrapper(N
, AM
))
758 if (!MatchLoadInAddress(cast
<LoadSDNode
>(N
), AM
))
762 case ISD::FrameIndex
:
763 if (AM
.BaseType
== X86ISelAddressMode::RegBase
764 && AM
.Base_Reg
.getNode() == 0) {
765 AM
.BaseType
= X86ISelAddressMode::FrameIndexBase
;
766 AM
.Base_FrameIndex
= cast
<FrameIndexSDNode
>(N
)->getIndex();
772 if (AM
.IndexReg
.getNode() != 0 || AM
.Scale
!= 1)
776 *CN
= dyn_cast
<ConstantSDNode
>(N
.getNode()->getOperand(1))) {
777 unsigned Val
= CN
->getZExtValue();
778 // Note that we handle x<<1 as (,x,2) rather than (x,x) here so
779 // that the base operand remains free for further matching. If
780 // the base doesn't end up getting used, a post-processing step
781 // in MatchAddress turns (,x,2) into (x,x), which is cheaper.
782 if (Val
== 1 || Val
== 2 || Val
== 3) {
784 SDValue ShVal
= N
.getNode()->getOperand(0);
786 // Okay, we know that we have a scale by now. However, if the scaled
787 // value is an add of something and a constant, we can fold the
788 // constant into the disp field here.
789 if (isLogicallyAddWithConstant(ShVal
, CurDAG
)) {
790 AM
.IndexReg
= ShVal
.getNode()->getOperand(0);
791 ConstantSDNode
*AddVal
=
792 cast
<ConstantSDNode
>(ShVal
.getNode()->getOperand(1));
793 uint64_t Disp
= AM
.Disp
+ (AddVal
->getSExtValue() << Val
);
795 X86::isOffsetSuitableForCodeModel(Disp
, M
,
796 AM
.hasSymbolicDisplacement()))
810 // A mul_lohi where we need the low part can be folded as a plain multiply.
811 if (N
.getResNo() != 0) break;
814 case X86ISD::MUL_IMM
:
815 // X*[3,5,9] -> X+X*[2,4,8]
816 if (AM
.BaseType
== X86ISelAddressMode::RegBase
&&
817 AM
.Base_Reg
.getNode() == 0 &&
818 AM
.IndexReg
.getNode() == 0) {
820 *CN
= dyn_cast
<ConstantSDNode
>(N
.getNode()->getOperand(1)))
821 if (CN
->getZExtValue() == 3 || CN
->getZExtValue() == 5 ||
822 CN
->getZExtValue() == 9) {
823 AM
.Scale
= unsigned(CN
->getZExtValue())-1;
825 SDValue MulVal
= N
.getNode()->getOperand(0);
828 // Okay, we know that we have a scale by now. However, if the scaled
829 // value is an add of something and a constant, we can fold the
830 // constant into the disp field here.
831 if (MulVal
.getNode()->getOpcode() == ISD::ADD
&& MulVal
.hasOneUse() &&
832 isa
<ConstantSDNode
>(MulVal
.getNode()->getOperand(1))) {
833 Reg
= MulVal
.getNode()->getOperand(0);
834 ConstantSDNode
*AddVal
=
835 cast
<ConstantSDNode
>(MulVal
.getNode()->getOperand(1));
836 uint64_t Disp
= AM
.Disp
+ AddVal
->getSExtValue() *
839 X86::isOffsetSuitableForCodeModel(Disp
, M
,
840 AM
.hasSymbolicDisplacement()))
843 Reg
= N
.getNode()->getOperand(0);
845 Reg
= N
.getNode()->getOperand(0);
848 AM
.IndexReg
= AM
.Base_Reg
= Reg
;
855 // Given A-B, if A can be completely folded into the address and
856 // the index field with the index field unused, use -B as the index.
857 // This is a win if a has multiple parts that can be folded into
858 // the address. Also, this saves a mov if the base register has
859 // other uses, since it avoids a two-address sub instruction, however
860 // it costs an additional mov if the index register has other uses.
862 // Add an artificial use to this node so that we can keep track of
863 // it if it gets CSE'd with a different node.
864 HandleSDNode
Handle(N
);
866 // Test if the LHS of the sub can be folded.
867 X86ISelAddressMode Backup
= AM
;
868 if (MatchAddressRecursively(N
.getNode()->getOperand(0), AM
, Depth
+1)) {
872 // Test if the index field is free for use.
873 if (AM
.IndexReg
.getNode() || AM
.isRIPRelative()) {
879 SDValue RHS
= Handle
.getValue().getNode()->getOperand(1);
880 // If the RHS involves a register with multiple uses, this
881 // transformation incurs an extra mov, due to the neg instruction
882 // clobbering its operand.
883 if (!RHS
.getNode()->hasOneUse() ||
884 RHS
.getNode()->getOpcode() == ISD::CopyFromReg
||
885 RHS
.getNode()->getOpcode() == ISD::TRUNCATE
||
886 RHS
.getNode()->getOpcode() == ISD::ANY_EXTEND
||
887 (RHS
.getNode()->getOpcode() == ISD::ZERO_EXTEND
&&
888 RHS
.getNode()->getOperand(0).getValueType() == MVT::i32
))
890 // If the base is a register with multiple uses, this
891 // transformation may save a mov.
892 if ((AM
.BaseType
== X86ISelAddressMode::RegBase
&&
893 AM
.Base_Reg
.getNode() &&
894 !AM
.Base_Reg
.getNode()->hasOneUse()) ||
895 AM
.BaseType
== X86ISelAddressMode::FrameIndexBase
)
897 // If the folded LHS was interesting, this transformation saves
898 // address arithmetic.
899 if ((AM
.hasSymbolicDisplacement() && !Backup
.hasSymbolicDisplacement()) +
900 ((AM
.Disp
!= 0) && (Backup
.Disp
== 0)) +
901 (AM
.Segment
.getNode() && !Backup
.Segment
.getNode()) >= 2)
903 // If it doesn't look like it may be an overall win, don't do it.
909 // Ok, the transformation is legal and appears profitable. Go for it.
910 SDValue Zero
= CurDAG
->getConstant(0, N
.getValueType());
911 SDValue Neg
= CurDAG
->getNode(ISD::SUB
, dl
, N
.getValueType(), Zero
, RHS
);
915 // Insert the new nodes into the topological ordering.
916 if (Zero
.getNode()->getNodeId() == -1 ||
917 Zero
.getNode()->getNodeId() > N
.getNode()->getNodeId()) {
918 CurDAG
->RepositionNode(N
.getNode(), Zero
.getNode());
919 Zero
.getNode()->setNodeId(N
.getNode()->getNodeId());
921 if (Neg
.getNode()->getNodeId() == -1 ||
922 Neg
.getNode()->getNodeId() > N
.getNode()->getNodeId()) {
923 CurDAG
->RepositionNode(N
.getNode(), Neg
.getNode());
924 Neg
.getNode()->setNodeId(N
.getNode()->getNodeId());
930 // Add an artificial use to this node so that we can keep track of
931 // it if it gets CSE'd with a different node.
932 HandleSDNode
Handle(N
);
933 SDValue LHS
= Handle
.getValue().getNode()->getOperand(0);
934 SDValue RHS
= Handle
.getValue().getNode()->getOperand(1);
936 X86ISelAddressMode Backup
= AM
;
937 if (!MatchAddressRecursively(LHS
, AM
, Depth
+1) &&
938 !MatchAddressRecursively(RHS
, AM
, Depth
+1))
941 LHS
= Handle
.getValue().getNode()->getOperand(0);
942 RHS
= Handle
.getValue().getNode()->getOperand(1);
944 // Try again after commuting the operands.
945 if (!MatchAddressRecursively(RHS
, AM
, Depth
+1) &&
946 !MatchAddressRecursively(LHS
, AM
, Depth
+1))
949 LHS
= Handle
.getValue().getNode()->getOperand(0);
950 RHS
= Handle
.getValue().getNode()->getOperand(1);
952 // If we couldn't fold both operands into the address at the same time,
953 // see if we can just put each operand into a register and fold at least
955 if (AM
.BaseType
== X86ISelAddressMode::RegBase
&&
956 !AM
.Base_Reg
.getNode() &&
957 !AM
.IndexReg
.getNode()) {
967 // Handle "X | C" as "X + C" iff X is known to have C bits clear.
968 if (isLogicallyAddWithConstant(N
, CurDAG
)) {
969 X86ISelAddressMode Backup
= AM
;
970 ConstantSDNode
*CN
= cast
<ConstantSDNode
>(N
.getOperand(1));
971 uint64_t Offset
= CN
->getSExtValue();
973 // Start with the LHS as an addr mode.
974 if (!MatchAddressRecursively(N
.getOperand(0), AM
, Depth
+1) &&
975 // Address could not have picked a GV address for the displacement.
977 // On x86-64, the resultant disp must fit in 32-bits.
979 X86::isOffsetSuitableForCodeModel(AM
.Disp
+ Offset
, M
,
980 AM
.hasSymbolicDisplacement()))) {
989 // Perform some heroic transforms on an and of a constant-count shift
990 // with a constant to enable use of the scaled offset field.
992 SDValue Shift
= N
.getOperand(0);
993 if (Shift
.getNumOperands() != 2) break;
995 // Scale must not be used already.
996 if (AM
.IndexReg
.getNode() != 0 || AM
.Scale
!= 1) break;
998 SDValue X
= Shift
.getOperand(0);
999 ConstantSDNode
*C2
= dyn_cast
<ConstantSDNode
>(N
.getOperand(1));
1000 ConstantSDNode
*C1
= dyn_cast
<ConstantSDNode
>(Shift
.getOperand(1));
1001 if (!C1
|| !C2
) break;
1003 // Handle "(X >> (8-C1)) & C2" as "(X >> 8) & 0xff)" if safe. This
1004 // allows us to convert the shift and and into an h-register extract and
1006 if (Shift
.getOpcode() == ISD::SRL
&& Shift
.hasOneUse()) {
1007 unsigned ScaleLog
= 8 - C1
->getZExtValue();
1008 if (ScaleLog
> 0 && ScaleLog
< 4 &&
1009 C2
->getZExtValue() == (UINT64_C(0xff) << ScaleLog
)) {
1010 SDValue Eight
= CurDAG
->getConstant(8, MVT::i8
);
1011 SDValue Mask
= CurDAG
->getConstant(0xff, N
.getValueType());
1012 SDValue Srl
= CurDAG
->getNode(ISD::SRL
, dl
, N
.getValueType(),
1014 SDValue And
= CurDAG
->getNode(ISD::AND
, dl
, N
.getValueType(),
1016 SDValue ShlCount
= CurDAG
->getConstant(ScaleLog
, MVT::i8
);
1017 SDValue Shl
= CurDAG
->getNode(ISD::SHL
, dl
, N
.getValueType(),
1020 // Insert the new nodes into the topological ordering.
1021 if (Eight
.getNode()->getNodeId() == -1 ||
1022 Eight
.getNode()->getNodeId() > X
.getNode()->getNodeId()) {
1023 CurDAG
->RepositionNode(X
.getNode(), Eight
.getNode());
1024 Eight
.getNode()->setNodeId(X
.getNode()->getNodeId());
1026 if (Mask
.getNode()->getNodeId() == -1 ||
1027 Mask
.getNode()->getNodeId() > X
.getNode()->getNodeId()) {
1028 CurDAG
->RepositionNode(X
.getNode(), Mask
.getNode());
1029 Mask
.getNode()->setNodeId(X
.getNode()->getNodeId());
1031 if (Srl
.getNode()->getNodeId() == -1 ||
1032 Srl
.getNode()->getNodeId() > Shift
.getNode()->getNodeId()) {
1033 CurDAG
->RepositionNode(Shift
.getNode(), Srl
.getNode());
1034 Srl
.getNode()->setNodeId(Shift
.getNode()->getNodeId());
1036 if (And
.getNode()->getNodeId() == -1 ||
1037 And
.getNode()->getNodeId() > N
.getNode()->getNodeId()) {
1038 CurDAG
->RepositionNode(N
.getNode(), And
.getNode());
1039 And
.getNode()->setNodeId(N
.getNode()->getNodeId());
1041 if (ShlCount
.getNode()->getNodeId() == -1 ||
1042 ShlCount
.getNode()->getNodeId() > X
.getNode()->getNodeId()) {
1043 CurDAG
->RepositionNode(X
.getNode(), ShlCount
.getNode());
1044 ShlCount
.getNode()->setNodeId(N
.getNode()->getNodeId());
1046 if (Shl
.getNode()->getNodeId() == -1 ||
1047 Shl
.getNode()->getNodeId() > N
.getNode()->getNodeId()) {
1048 CurDAG
->RepositionNode(N
.getNode(), Shl
.getNode());
1049 Shl
.getNode()->setNodeId(N
.getNode()->getNodeId());
1051 CurDAG
->ReplaceAllUsesWith(N
, Shl
);
1053 AM
.Scale
= (1 << ScaleLog
);
1058 // Handle "(X << C1) & C2" as "(X & (C2>>C1)) << C1" if safe and if this
1059 // allows us to fold the shift into this addressing mode.
1060 if (Shift
.getOpcode() != ISD::SHL
) break;
1062 // Not likely to be profitable if either the AND or SHIFT node has more
1063 // than one use (unless all uses are for address computation). Besides,
1064 // isel mechanism requires their node ids to be reused.
1065 if (!N
.hasOneUse() || !Shift
.hasOneUse())
1068 // Verify that the shift amount is something we can fold.
1069 unsigned ShiftCst
= C1
->getZExtValue();
1070 if (ShiftCst
!= 1 && ShiftCst
!= 2 && ShiftCst
!= 3)
1073 // Get the new AND mask, this folds to a constant.
1074 SDValue NewANDMask
= CurDAG
->getNode(ISD::SRL
, dl
, N
.getValueType(),
1075 SDValue(C2
, 0), SDValue(C1
, 0));
1076 SDValue NewAND
= CurDAG
->getNode(ISD::AND
, dl
, N
.getValueType(), X
,
1078 SDValue NewSHIFT
= CurDAG
->getNode(ISD::SHL
, dl
, N
.getValueType(),
1079 NewAND
, SDValue(C1
, 0));
1081 // Insert the new nodes into the topological ordering.
1082 if (C1
->getNodeId() > X
.getNode()->getNodeId()) {
1083 CurDAG
->RepositionNode(X
.getNode(), C1
);
1084 C1
->setNodeId(X
.getNode()->getNodeId());
1086 if (NewANDMask
.getNode()->getNodeId() == -1 ||
1087 NewANDMask
.getNode()->getNodeId() > X
.getNode()->getNodeId()) {
1088 CurDAG
->RepositionNode(X
.getNode(), NewANDMask
.getNode());
1089 NewANDMask
.getNode()->setNodeId(X
.getNode()->getNodeId());
1091 if (NewAND
.getNode()->getNodeId() == -1 ||
1092 NewAND
.getNode()->getNodeId() > Shift
.getNode()->getNodeId()) {
1093 CurDAG
->RepositionNode(Shift
.getNode(), NewAND
.getNode());
1094 NewAND
.getNode()->setNodeId(Shift
.getNode()->getNodeId());
1096 if (NewSHIFT
.getNode()->getNodeId() == -1 ||
1097 NewSHIFT
.getNode()->getNodeId() > N
.getNode()->getNodeId()) {
1098 CurDAG
->RepositionNode(N
.getNode(), NewSHIFT
.getNode());
1099 NewSHIFT
.getNode()->setNodeId(N
.getNode()->getNodeId());
1102 CurDAG
->ReplaceAllUsesWith(N
, NewSHIFT
);
1104 AM
.Scale
= 1 << ShiftCst
;
1105 AM
.IndexReg
= NewAND
;
1110 return MatchAddressBase(N
, AM
);
1113 /// MatchAddressBase - Helper for MatchAddress. Add the specified node to the
1114 /// specified addressing mode without any further recursion.
1115 bool X86DAGToDAGISel::MatchAddressBase(SDValue N
, X86ISelAddressMode
&AM
) {
1116 // Is the base register already occupied?
1117 if (AM
.BaseType
!= X86ISelAddressMode::RegBase
|| AM
.Base_Reg
.getNode()) {
1118 // If so, check to see if the scale index register is set.
1119 if (AM
.IndexReg
.getNode() == 0) {
1125 // Otherwise, we cannot select it.
1129 // Default, generate it as a register.
1130 AM
.BaseType
= X86ISelAddressMode::RegBase
;
1135 /// SelectAddr - returns true if it is able pattern match an addressing mode.
1136 /// It returns the operands which make up the maximal addressing mode it can
1137 /// match by reference.
1139 /// Parent is the parent node of the addr operand that is being matched. It
1140 /// is always a load, store, atomic node, or null. It is only null when
1141 /// checking memory operands for inline asm nodes.
1142 bool X86DAGToDAGISel::SelectAddr(SDNode
*Parent
, SDValue N
, SDValue
&Base
,
1143 SDValue
&Scale
, SDValue
&Index
,
1144 SDValue
&Disp
, SDValue
&Segment
) {
1145 X86ISelAddressMode AM
;
1148 // This list of opcodes are all the nodes that have an "addr:$ptr" operand
1149 // that are not a MemSDNode, and thus don't have proper addrspace info.
1150 Parent
->getOpcode() != ISD::INTRINSIC_W_CHAIN
&& // unaligned loads, fixme
1151 Parent
->getOpcode() != ISD::INTRINSIC_VOID
&& // nontemporal stores
1152 Parent
->getOpcode() != X86ISD::TLSCALL
) { // Fixme
1153 unsigned AddrSpace
=
1154 cast
<MemSDNode
>(Parent
)->getPointerInfo().getAddrSpace();
1155 // AddrSpace 256 -> GS, 257 -> FS.
1156 if (AddrSpace
== 256)
1157 AM
.Segment
= CurDAG
->getRegister(X86::GS
, MVT::i16
);
1158 if (AddrSpace
== 257)
1159 AM
.Segment
= CurDAG
->getRegister(X86::FS
, MVT::i16
);
1162 if (MatchAddress(N
, AM
))
1165 EVT VT
= N
.getValueType();
1166 if (AM
.BaseType
== X86ISelAddressMode::RegBase
) {
1167 if (!AM
.Base_Reg
.getNode())
1168 AM
.Base_Reg
= CurDAG
->getRegister(0, VT
);
1171 if (!AM
.IndexReg
.getNode())
1172 AM
.IndexReg
= CurDAG
->getRegister(0, VT
);
1174 getAddressOperands(AM
, Base
, Scale
, Index
, Disp
, Segment
);
1178 /// SelectScalarSSELoad - Match a scalar SSE load. In particular, we want to
1179 /// match a load whose top elements are either undef or zeros. The load flavor
1180 /// is derived from the type of N, which is either v4f32 or v2f64.
1183 /// PatternChainNode: this is the matched node that has a chain input and
1185 bool X86DAGToDAGISel::SelectScalarSSELoad(SDNode
*Root
,
1186 SDValue N
, SDValue
&Base
,
1187 SDValue
&Scale
, SDValue
&Index
,
1188 SDValue
&Disp
, SDValue
&Segment
,
1189 SDValue
&PatternNodeWithChain
) {
1190 if (N
.getOpcode() == ISD::SCALAR_TO_VECTOR
) {
1191 PatternNodeWithChain
= N
.getOperand(0);
1192 if (ISD::isNON_EXTLoad(PatternNodeWithChain
.getNode()) &&
1193 PatternNodeWithChain
.hasOneUse() &&
1194 IsProfitableToFold(N
.getOperand(0), N
.getNode(), Root
) &&
1195 IsLegalToFold(N
.getOperand(0), N
.getNode(), Root
, OptLevel
)) {
1196 LoadSDNode
*LD
= cast
<LoadSDNode
>(PatternNodeWithChain
);
1197 if (!SelectAddr(LD
, LD
->getBasePtr(), Base
, Scale
, Index
, Disp
, Segment
))
1203 // Also handle the case where we explicitly require zeros in the top
1204 // elements. This is a vector shuffle from the zero vector.
1205 if (N
.getOpcode() == X86ISD::VZEXT_MOVL
&& N
.getNode()->hasOneUse() &&
1206 // Check to see if the top elements are all zeros (or bitcast of zeros).
1207 N
.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR
&&
1208 N
.getOperand(0).getNode()->hasOneUse() &&
1209 ISD::isNON_EXTLoad(N
.getOperand(0).getOperand(0).getNode()) &&
1210 N
.getOperand(0).getOperand(0).hasOneUse() &&
1211 IsProfitableToFold(N
.getOperand(0), N
.getNode(), Root
) &&
1212 IsLegalToFold(N
.getOperand(0), N
.getNode(), Root
, OptLevel
)) {
1213 // Okay, this is a zero extending load. Fold it.
1214 LoadSDNode
*LD
= cast
<LoadSDNode
>(N
.getOperand(0).getOperand(0));
1215 if (!SelectAddr(LD
, LD
->getBasePtr(), Base
, Scale
, Index
, Disp
, Segment
))
1217 PatternNodeWithChain
= SDValue(LD
, 0);
1224 /// SelectLEAAddr - it calls SelectAddr and determines if the maximal addressing
1225 /// mode it matches can be cost effectively emitted as an LEA instruction.
1226 bool X86DAGToDAGISel::SelectLEAAddr(SDValue N
,
1227 SDValue
&Base
, SDValue
&Scale
,
1228 SDValue
&Index
, SDValue
&Disp
,
1230 X86ISelAddressMode AM
;
1232 // Set AM.Segment to prevent MatchAddress from using one. LEA doesn't support
1234 SDValue Copy
= AM
.Segment
;
1235 SDValue T
= CurDAG
->getRegister(0, MVT::i32
);
1237 if (MatchAddress(N
, AM
))
1239 assert (T
== AM
.Segment
);
1242 EVT VT
= N
.getValueType();
1243 unsigned Complexity
= 0;
1244 if (AM
.BaseType
== X86ISelAddressMode::RegBase
)
1245 if (AM
.Base_Reg
.getNode())
1248 AM
.Base_Reg
= CurDAG
->getRegister(0, VT
);
1249 else if (AM
.BaseType
== X86ISelAddressMode::FrameIndexBase
)
1252 if (AM
.IndexReg
.getNode())
1255 AM
.IndexReg
= CurDAG
->getRegister(0, VT
);
1257 // Don't match just leal(,%reg,2). It's cheaper to do addl %reg, %reg, or with
1262 // FIXME: We are artificially lowering the criteria to turn ADD %reg, $GA
1263 // to a LEA. This is determined with some expermentation but is by no means
1264 // optimal (especially for code size consideration). LEA is nice because of
1265 // its three-address nature. Tweak the cost function again when we can run
1266 // convertToThreeAddress() at register allocation time.
1267 if (AM
.hasSymbolicDisplacement()) {
1268 // For X86-64, we should always use lea to materialize RIP relative
1270 if (Subtarget
->is64Bit())
1276 if (AM
.Disp
&& (AM
.Base_Reg
.getNode() || AM
.IndexReg
.getNode()))
1279 // If it isn't worth using an LEA, reject it.
1280 if (Complexity
<= 2)
1283 getAddressOperands(AM
, Base
, Scale
, Index
, Disp
, Segment
);
1287 /// SelectTLSADDRAddr - This is only run on TargetGlobalTLSAddress nodes.
1288 bool X86DAGToDAGISel::SelectTLSADDRAddr(SDValue N
, SDValue
&Base
,
1289 SDValue
&Scale
, SDValue
&Index
,
1290 SDValue
&Disp
, SDValue
&Segment
) {
1291 assert(N
.getOpcode() == ISD::TargetGlobalTLSAddress
);
1292 const GlobalAddressSDNode
*GA
= cast
<GlobalAddressSDNode
>(N
);
1294 X86ISelAddressMode AM
;
1295 AM
.GV
= GA
->getGlobal();
1296 AM
.Disp
+= GA
->getOffset();
1297 AM
.Base_Reg
= CurDAG
->getRegister(0, N
.getValueType());
1298 AM
.SymbolFlags
= GA
->getTargetFlags();
1300 if (N
.getValueType() == MVT::i32
) {
1302 AM
.IndexReg
= CurDAG
->getRegister(X86::EBX
, MVT::i32
);
1304 AM
.IndexReg
= CurDAG
->getRegister(0, MVT::i64
);
1307 getAddressOperands(AM
, Base
, Scale
, Index
, Disp
, Segment
);
1312 bool X86DAGToDAGISel::TryFoldLoad(SDNode
*P
, SDValue N
,
1313 SDValue
&Base
, SDValue
&Scale
,
1314 SDValue
&Index
, SDValue
&Disp
,
1316 if (!ISD::isNON_EXTLoad(N
.getNode()) ||
1317 !IsProfitableToFold(N
, P
, P
) ||
1318 !IsLegalToFold(N
, P
, P
, OptLevel
))
1321 return SelectAddr(N
.getNode(),
1322 N
.getOperand(1), Base
, Scale
, Index
, Disp
, Segment
);
1325 /// getGlobalBaseReg - Return an SDNode that returns the value of
1326 /// the global base register. Output instructions required to
1327 /// initialize the global base register, if necessary.
1329 SDNode
*X86DAGToDAGISel::getGlobalBaseReg() {
1330 unsigned GlobalBaseReg
= getInstrInfo()->getGlobalBaseReg(MF
);
1331 return CurDAG
->getRegister(GlobalBaseReg
, TLI
.getPointerTy()).getNode();
1334 SDNode
*X86DAGToDAGISel::SelectAtomic64(SDNode
*Node
, unsigned Opc
) {
1335 SDValue Chain
= Node
->getOperand(0);
1336 SDValue In1
= Node
->getOperand(1);
1337 SDValue In2L
= Node
->getOperand(2);
1338 SDValue In2H
= Node
->getOperand(3);
1339 SDValue Tmp0
, Tmp1
, Tmp2
, Tmp3
, Tmp4
;
1340 if (!SelectAddr(Node
, In1
, Tmp0
, Tmp1
, Tmp2
, Tmp3
, Tmp4
))
1342 MachineSDNode::mmo_iterator MemOp
= MF
->allocateMemRefsArray(1);
1343 MemOp
[0] = cast
<MemSDNode
>(Node
)->getMemOperand();
1344 const SDValue Ops
[] = { Tmp0
, Tmp1
, Tmp2
, Tmp3
, Tmp4
, In2L
, In2H
, Chain
};
1345 SDNode
*ResNode
= CurDAG
->getMachineNode(Opc
, Node
->getDebugLoc(),
1346 MVT::i32
, MVT::i32
, MVT::Other
, Ops
,
1347 array_lengthof(Ops
));
1348 cast
<MachineSDNode
>(ResNode
)->setMemRefs(MemOp
, MemOp
+ 1);
1352 SDNode
*X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode
*Node
, EVT NVT
) {
1353 if (Node
->hasAnyUseOfValue(0))
1356 // Optimize common patterns for __sync_add_and_fetch and
1357 // __sync_sub_and_fetch where the result is not used. This allows us
1358 // to use "lock" version of add, sub, inc, dec instructions.
1359 // FIXME: Do not use special instructions but instead add the "lock"
1360 // prefix to the target node somehow. The extra information will then be
1361 // transferred to machine instruction and it denotes the prefix.
1362 SDValue Chain
= Node
->getOperand(0);
1363 SDValue Ptr
= Node
->getOperand(1);
1364 SDValue Val
= Node
->getOperand(2);
1365 SDValue Tmp0
, Tmp1
, Tmp2
, Tmp3
, Tmp4
;
1366 if (!SelectAddr(Node
, Ptr
, Tmp0
, Tmp1
, Tmp2
, Tmp3
, Tmp4
))
1369 bool isInc
= false, isDec
= false, isSub
= false, isCN
= false;
1370 ConstantSDNode
*CN
= dyn_cast
<ConstantSDNode
>(Val
);
1373 int64_t CNVal
= CN
->getSExtValue();
1376 else if (CNVal
== -1)
1378 else if (CNVal
>= 0)
1379 Val
= CurDAG
->getTargetConstant(CNVal
, NVT
);
1382 Val
= CurDAG
->getTargetConstant(-CNVal
, NVT
);
1384 } else if (Val
.hasOneUse() &&
1385 Val
.getOpcode() == ISD::SUB
&&
1386 X86::isZeroNode(Val
.getOperand(0))) {
1388 Val
= Val
.getOperand(1);
1392 switch (NVT
.getSimpleVT().SimpleTy
) {
1396 Opc
= X86::LOCK_INC8m
;
1398 Opc
= X86::LOCK_DEC8m
;
1401 Opc
= X86::LOCK_SUB8mi
;
1403 Opc
= X86::LOCK_SUB8mr
;
1406 Opc
= X86::LOCK_ADD8mi
;
1408 Opc
= X86::LOCK_ADD8mr
;
1413 Opc
= X86::LOCK_INC16m
;
1415 Opc
= X86::LOCK_DEC16m
;
1418 if (immSext8(Val
.getNode()))
1419 Opc
= X86::LOCK_SUB16mi8
;
1421 Opc
= X86::LOCK_SUB16mi
;
1423 Opc
= X86::LOCK_SUB16mr
;
1426 if (immSext8(Val
.getNode()))
1427 Opc
= X86::LOCK_ADD16mi8
;
1429 Opc
= X86::LOCK_ADD16mi
;
1431 Opc
= X86::LOCK_ADD16mr
;
1436 Opc
= X86::LOCK_INC32m
;
1438 Opc
= X86::LOCK_DEC32m
;
1441 if (immSext8(Val
.getNode()))
1442 Opc
= X86::LOCK_SUB32mi8
;
1444 Opc
= X86::LOCK_SUB32mi
;
1446 Opc
= X86::LOCK_SUB32mr
;
1449 if (immSext8(Val
.getNode()))
1450 Opc
= X86::LOCK_ADD32mi8
;
1452 Opc
= X86::LOCK_ADD32mi
;
1454 Opc
= X86::LOCK_ADD32mr
;
1459 Opc
= X86::LOCK_INC64m
;
1461 Opc
= X86::LOCK_DEC64m
;
1463 Opc
= X86::LOCK_SUB64mr
;
1465 if (immSext8(Val
.getNode()))
1466 Opc
= X86::LOCK_SUB64mi8
;
1467 else if (i64immSExt32(Val
.getNode()))
1468 Opc
= X86::LOCK_SUB64mi32
;
1471 Opc
= X86::LOCK_ADD64mr
;
1473 if (immSext8(Val
.getNode()))
1474 Opc
= X86::LOCK_ADD64mi8
;
1475 else if (i64immSExt32(Val
.getNode()))
1476 Opc
= X86::LOCK_ADD64mi32
;
1482 DebugLoc dl
= Node
->getDebugLoc();
1483 SDValue Undef
= SDValue(CurDAG
->getMachineNode(TargetOpcode::IMPLICIT_DEF
,
1485 MachineSDNode::mmo_iterator MemOp
= MF
->allocateMemRefsArray(1);
1486 MemOp
[0] = cast
<MemSDNode
>(Node
)->getMemOperand();
1487 if (isInc
|| isDec
) {
1488 SDValue Ops
[] = { Tmp0
, Tmp1
, Tmp2
, Tmp3
, Tmp4
, Chain
};
1489 SDValue Ret
= SDValue(CurDAG
->getMachineNode(Opc
, dl
, MVT::Other
, Ops
, 6), 0);
1490 cast
<MachineSDNode
>(Ret
)->setMemRefs(MemOp
, MemOp
+ 1);
1491 SDValue RetVals
[] = { Undef
, Ret
};
1492 return CurDAG
->getMergeValues(RetVals
, 2, dl
).getNode();
1494 SDValue Ops
[] = { Tmp0
, Tmp1
, Tmp2
, Tmp3
, Tmp4
, Val
, Chain
};
1495 SDValue Ret
= SDValue(CurDAG
->getMachineNode(Opc
, dl
, MVT::Other
, Ops
, 7), 0);
1496 cast
<MachineSDNode
>(Ret
)->setMemRefs(MemOp
, MemOp
+ 1);
1497 SDValue RetVals
[] = { Undef
, Ret
};
1498 return CurDAG
->getMergeValues(RetVals
, 2, dl
).getNode();
1502 /// HasNoSignedComparisonUses - Test whether the given X86ISD::CMP node has
1503 /// any uses which require the SF or OF bits to be accurate.
1504 static bool HasNoSignedComparisonUses(SDNode
*N
) {
1505 // Examine each user of the node.
1506 for (SDNode::use_iterator UI
= N
->use_begin(),
1507 UE
= N
->use_end(); UI
!= UE
; ++UI
) {
1508 // Only examine CopyToReg uses.
1509 if (UI
->getOpcode() != ISD::CopyToReg
)
1511 // Only examine CopyToReg uses that copy to EFLAGS.
1512 if (cast
<RegisterSDNode
>(UI
->getOperand(1))->getReg() !=
1515 // Examine each user of the CopyToReg use.
1516 for (SDNode::use_iterator FlagUI
= UI
->use_begin(),
1517 FlagUE
= UI
->use_end(); FlagUI
!= FlagUE
; ++FlagUI
) {
1518 // Only examine the Flag result.
1519 if (FlagUI
.getUse().getResNo() != 1) continue;
1520 // Anything unusual: assume conservatively.
1521 if (!FlagUI
->isMachineOpcode()) return false;
1522 // Examine the opcode of the user.
1523 switch (FlagUI
->getMachineOpcode()) {
1524 // These comparisons don't treat the most significant bit specially.
1525 case X86::SETAr
: case X86::SETAEr
: case X86::SETBr
: case X86::SETBEr
:
1526 case X86::SETEr
: case X86::SETNEr
: case X86::SETPr
: case X86::SETNPr
:
1527 case X86::SETAm
: case X86::SETAEm
: case X86::SETBm
: case X86::SETBEm
:
1528 case X86::SETEm
: case X86::SETNEm
: case X86::SETPm
: case X86::SETNPm
:
1529 case X86::JA_4
: case X86::JAE_4
: case X86::JB_4
: case X86::JBE_4
:
1530 case X86::JE_4
: case X86::JNE_4
: case X86::JP_4
: case X86::JNP_4
:
1531 case X86::CMOVA16rr
: case X86::CMOVA16rm
:
1532 case X86::CMOVA32rr
: case X86::CMOVA32rm
:
1533 case X86::CMOVA64rr
: case X86::CMOVA64rm
:
1534 case X86::CMOVAE16rr
: case X86::CMOVAE16rm
:
1535 case X86::CMOVAE32rr
: case X86::CMOVAE32rm
:
1536 case X86::CMOVAE64rr
: case X86::CMOVAE64rm
:
1537 case X86::CMOVB16rr
: case X86::CMOVB16rm
:
1538 case X86::CMOVB32rr
: case X86::CMOVB32rm
:
1539 case X86::CMOVB64rr
: case X86::CMOVB64rm
:
1540 case X86::CMOVBE16rr
: case X86::CMOVBE16rm
:
1541 case X86::CMOVBE32rr
: case X86::CMOVBE32rm
:
1542 case X86::CMOVBE64rr
: case X86::CMOVBE64rm
:
1543 case X86::CMOVE16rr
: case X86::CMOVE16rm
:
1544 case X86::CMOVE32rr
: case X86::CMOVE32rm
:
1545 case X86::CMOVE64rr
: case X86::CMOVE64rm
:
1546 case X86::CMOVNE16rr
: case X86::CMOVNE16rm
:
1547 case X86::CMOVNE32rr
: case X86::CMOVNE32rm
:
1548 case X86::CMOVNE64rr
: case X86::CMOVNE64rm
:
1549 case X86::CMOVNP16rr
: case X86::CMOVNP16rm
:
1550 case X86::CMOVNP32rr
: case X86::CMOVNP32rm
:
1551 case X86::CMOVNP64rr
: case X86::CMOVNP64rm
:
1552 case X86::CMOVP16rr
: case X86::CMOVP16rm
:
1553 case X86::CMOVP32rr
: case X86::CMOVP32rm
:
1554 case X86::CMOVP64rr
: case X86::CMOVP64rm
:
1556 // Anything else: assume conservatively.
1557 default: return false;
1564 SDNode
*X86DAGToDAGISel::Select(SDNode
*Node
) {
1565 EVT NVT
= Node
->getValueType(0);
1567 unsigned Opcode
= Node
->getOpcode();
1568 DebugLoc dl
= Node
->getDebugLoc();
1570 DEBUG(dbgs() << "Selecting: "; Node
->dump(CurDAG
); dbgs() << '\n');
1572 if (Node
->isMachineOpcode()) {
1573 DEBUG(dbgs() << "== "; Node
->dump(CurDAG
); dbgs() << '\n');
1574 return NULL
; // Already selected.
1579 case X86ISD::GlobalBaseReg
:
1580 return getGlobalBaseReg();
1582 case X86ISD::ATOMOR64_DAG
:
1583 return SelectAtomic64(Node
, X86::ATOMOR6432
);
1584 case X86ISD::ATOMXOR64_DAG
:
1585 return SelectAtomic64(Node
, X86::ATOMXOR6432
);
1586 case X86ISD::ATOMADD64_DAG
:
1587 return SelectAtomic64(Node
, X86::ATOMADD6432
);
1588 case X86ISD::ATOMSUB64_DAG
:
1589 return SelectAtomic64(Node
, X86::ATOMSUB6432
);
1590 case X86ISD::ATOMNAND64_DAG
:
1591 return SelectAtomic64(Node
, X86::ATOMNAND6432
);
1592 case X86ISD::ATOMAND64_DAG
:
1593 return SelectAtomic64(Node
, X86::ATOMAND6432
);
1594 case X86ISD::ATOMSWAP64_DAG
:
1595 return SelectAtomic64(Node
, X86::ATOMSWAP6432
);
1597 case ISD::ATOMIC_LOAD_ADD
: {
1598 SDNode
*RetVal
= SelectAtomicLoadAdd(Node
, NVT
);
1604 case ISD::SMUL_LOHI
:
1605 case ISD::UMUL_LOHI
: {
1606 SDValue N0
= Node
->getOperand(0);
1607 SDValue N1
= Node
->getOperand(1);
1609 bool isSigned
= Opcode
== ISD::SMUL_LOHI
;
1611 switch (NVT
.getSimpleVT().SimpleTy
) {
1612 default: llvm_unreachable("Unsupported VT!");
1613 case MVT::i8
: Opc
= X86::MUL8r
; MOpc
= X86::MUL8m
; break;
1614 case MVT::i16
: Opc
= X86::MUL16r
; MOpc
= X86::MUL16m
; break;
1615 case MVT::i32
: Opc
= X86::MUL32r
; MOpc
= X86::MUL32m
; break;
1616 case MVT::i64
: Opc
= X86::MUL64r
; MOpc
= X86::MUL64m
; break;
1619 switch (NVT
.getSimpleVT().SimpleTy
) {
1620 default: llvm_unreachable("Unsupported VT!");
1621 case MVT::i8
: Opc
= X86::IMUL8r
; MOpc
= X86::IMUL8m
; break;
1622 case MVT::i16
: Opc
= X86::IMUL16r
; MOpc
= X86::IMUL16m
; break;
1623 case MVT::i32
: Opc
= X86::IMUL32r
; MOpc
= X86::IMUL32m
; break;
1624 case MVT::i64
: Opc
= X86::IMUL64r
; MOpc
= X86::IMUL64m
; break;
1628 unsigned LoReg
, HiReg
;
1629 switch (NVT
.getSimpleVT().SimpleTy
) {
1630 default: llvm_unreachable("Unsupported VT!");
1631 case MVT::i8
: LoReg
= X86::AL
; HiReg
= X86::AH
; break;
1632 case MVT::i16
: LoReg
= X86::AX
; HiReg
= X86::DX
; break;
1633 case MVT::i32
: LoReg
= X86::EAX
; HiReg
= X86::EDX
; break;
1634 case MVT::i64
: LoReg
= X86::RAX
; HiReg
= X86::RDX
; break;
1637 SDValue Tmp0
, Tmp1
, Tmp2
, Tmp3
, Tmp4
;
1638 bool foldedLoad
= TryFoldLoad(Node
, N1
, Tmp0
, Tmp1
, Tmp2
, Tmp3
, Tmp4
);
1639 // Multiply is commmutative.
1641 foldedLoad
= TryFoldLoad(Node
, N0
, Tmp0
, Tmp1
, Tmp2
, Tmp3
, Tmp4
);
1646 SDValue InFlag
= CurDAG
->getCopyToReg(CurDAG
->getEntryNode(), dl
, LoReg
,
1647 N0
, SDValue()).getValue(1);
1650 SDValue Ops
[] = { Tmp0
, Tmp1
, Tmp2
, Tmp3
, Tmp4
, N1
.getOperand(0),
1653 CurDAG
->getMachineNode(MOpc
, dl
, MVT::Other
, MVT::Flag
, Ops
,
1654 array_lengthof(Ops
));
1655 InFlag
= SDValue(CNode
, 1);
1656 // Update the chain.
1657 ReplaceUses(N1
.getValue(1), SDValue(CNode
, 0));
1660 SDValue(CurDAG
->getMachineNode(Opc
, dl
, MVT::Flag
, N1
, InFlag
), 0);
1663 // Prevent use of AH in a REX instruction by referencing AX instead.
1664 if (HiReg
== X86::AH
&& Subtarget
->is64Bit() &&
1665 !SDValue(Node
, 1).use_empty()) {
1666 SDValue Result
= CurDAG
->getCopyFromReg(CurDAG
->getEntryNode(), dl
,
1667 X86::AX
, MVT::i16
, InFlag
);
1668 InFlag
= Result
.getValue(2);
1669 // Get the low part if needed. Don't use getCopyFromReg for aliasing
1671 if (!SDValue(Node
, 0).use_empty())
1672 ReplaceUses(SDValue(Node
, 1),
1673 CurDAG
->getTargetExtractSubreg(X86::sub_8bit
, dl
, MVT::i8
, Result
));
1675 // Shift AX down 8 bits.
1676 Result
= SDValue(CurDAG
->getMachineNode(X86::SHR16ri
, dl
, MVT::i16
,
1678 CurDAG
->getTargetConstant(8, MVT::i8
)), 0);
1679 // Then truncate it down to i8.
1680 ReplaceUses(SDValue(Node
, 1),
1681 CurDAG
->getTargetExtractSubreg(X86::sub_8bit
, dl
, MVT::i8
, Result
));
1683 // Copy the low half of the result, if it is needed.
1684 if (!SDValue(Node
, 0).use_empty()) {
1685 SDValue Result
= CurDAG
->getCopyFromReg(CurDAG
->getEntryNode(), dl
,
1686 LoReg
, NVT
, InFlag
);
1687 InFlag
= Result
.getValue(2);
1688 ReplaceUses(SDValue(Node
, 0), Result
);
1689 DEBUG(dbgs() << "=> "; Result
.getNode()->dump(CurDAG
); dbgs() << '\n');
1691 // Copy the high half of the result, if it is needed.
1692 if (!SDValue(Node
, 1).use_empty()) {
1693 SDValue Result
= CurDAG
->getCopyFromReg(CurDAG
->getEntryNode(), dl
,
1694 HiReg
, NVT
, InFlag
);
1695 InFlag
= Result
.getValue(2);
1696 ReplaceUses(SDValue(Node
, 1), Result
);
1697 DEBUG(dbgs() << "=> "; Result
.getNode()->dump(CurDAG
); dbgs() << '\n');
1704 case ISD::UDIVREM
: {
1705 SDValue N0
= Node
->getOperand(0);
1706 SDValue N1
= Node
->getOperand(1);
1708 bool isSigned
= Opcode
== ISD::SDIVREM
;
1710 switch (NVT
.getSimpleVT().SimpleTy
) {
1711 default: llvm_unreachable("Unsupported VT!");
1712 case MVT::i8
: Opc
= X86::DIV8r
; MOpc
= X86::DIV8m
; break;
1713 case MVT::i16
: Opc
= X86::DIV16r
; MOpc
= X86::DIV16m
; break;
1714 case MVT::i32
: Opc
= X86::DIV32r
; MOpc
= X86::DIV32m
; break;
1715 case MVT::i64
: Opc
= X86::DIV64r
; MOpc
= X86::DIV64m
; break;
1718 switch (NVT
.getSimpleVT().SimpleTy
) {
1719 default: llvm_unreachable("Unsupported VT!");
1720 case MVT::i8
: Opc
= X86::IDIV8r
; MOpc
= X86::IDIV8m
; break;
1721 case MVT::i16
: Opc
= X86::IDIV16r
; MOpc
= X86::IDIV16m
; break;
1722 case MVT::i32
: Opc
= X86::IDIV32r
; MOpc
= X86::IDIV32m
; break;
1723 case MVT::i64
: Opc
= X86::IDIV64r
; MOpc
= X86::IDIV64m
; break;
1727 unsigned LoReg
, HiReg
, ClrReg
;
1728 unsigned ClrOpcode
, SExtOpcode
;
1729 switch (NVT
.getSimpleVT().SimpleTy
) {
1730 default: llvm_unreachable("Unsupported VT!");
1732 LoReg
= X86::AL
; ClrReg
= HiReg
= X86::AH
;
1734 SExtOpcode
= X86::CBW
;
1737 LoReg
= X86::AX
; HiReg
= X86::DX
;
1738 ClrOpcode
= X86::MOV16r0
; ClrReg
= X86::DX
;
1739 SExtOpcode
= X86::CWD
;
1742 LoReg
= X86::EAX
; ClrReg
= HiReg
= X86::EDX
;
1743 ClrOpcode
= X86::MOV32r0
;
1744 SExtOpcode
= X86::CDQ
;
1747 LoReg
= X86::RAX
; ClrReg
= HiReg
= X86::RDX
;
1748 ClrOpcode
= X86::MOV64r0
;
1749 SExtOpcode
= X86::CQO
;
1753 SDValue Tmp0
, Tmp1
, Tmp2
, Tmp3
, Tmp4
;
1754 bool foldedLoad
= TryFoldLoad(Node
, N1
, Tmp0
, Tmp1
, Tmp2
, Tmp3
, Tmp4
);
1755 bool signBitIsZero
= CurDAG
->SignBitIsZero(N0
);
1758 if (NVT
== MVT::i8
&& (!isSigned
|| signBitIsZero
)) {
1759 // Special case for div8, just use a move with zero extension to AX to
1760 // clear the upper 8 bits (AH).
1761 SDValue Tmp0
, Tmp1
, Tmp2
, Tmp3
, Tmp4
, Move
, Chain
;
1762 if (TryFoldLoad(Node
, N0
, Tmp0
, Tmp1
, Tmp2
, Tmp3
, Tmp4
)) {
1763 SDValue Ops
[] = { Tmp0
, Tmp1
, Tmp2
, Tmp3
, Tmp4
, N0
.getOperand(0) };
1765 SDValue(CurDAG
->getMachineNode(X86::MOVZX16rm8
, dl
, MVT::i16
,
1767 array_lengthof(Ops
)), 0);
1768 Chain
= Move
.getValue(1);
1769 ReplaceUses(N0
.getValue(1), Chain
);
1772 SDValue(CurDAG
->getMachineNode(X86::MOVZX16rr8
, dl
, MVT::i16
, N0
),0);
1773 Chain
= CurDAG
->getEntryNode();
1775 Chain
= CurDAG
->getCopyToReg(Chain
, dl
, X86::AX
, Move
, SDValue());
1776 InFlag
= Chain
.getValue(1);
1779 CurDAG
->getCopyToReg(CurDAG
->getEntryNode(), dl
,
1780 LoReg
, N0
, SDValue()).getValue(1);
1781 if (isSigned
&& !signBitIsZero
) {
1782 // Sign extend the low part into the high part.
1784 SDValue(CurDAG
->getMachineNode(SExtOpcode
, dl
, MVT::Flag
, InFlag
),0);
1786 // Zero out the high part, effectively zero extending the input.
1788 SDValue(CurDAG
->getMachineNode(ClrOpcode
, dl
, NVT
), 0);
1789 InFlag
= CurDAG
->getCopyToReg(CurDAG
->getEntryNode(), dl
, ClrReg
,
1790 ClrNode
, InFlag
).getValue(1);
1795 SDValue Ops
[] = { Tmp0
, Tmp1
, Tmp2
, Tmp3
, Tmp4
, N1
.getOperand(0),
1798 CurDAG
->getMachineNode(MOpc
, dl
, MVT::Other
, MVT::Flag
, Ops
,
1799 array_lengthof(Ops
));
1800 InFlag
= SDValue(CNode
, 1);
1801 // Update the chain.
1802 ReplaceUses(N1
.getValue(1), SDValue(CNode
, 0));
1805 SDValue(CurDAG
->getMachineNode(Opc
, dl
, MVT::Flag
, N1
, InFlag
), 0);
1808 // Prevent use of AH in a REX instruction by referencing AX instead.
1809 // Shift it down 8 bits.
1810 if (HiReg
== X86::AH
&& Subtarget
->is64Bit() &&
1811 !SDValue(Node
, 1).use_empty()) {
1812 SDValue Result
= CurDAG
->getCopyFromReg(CurDAG
->getEntryNode(), dl
,
1813 X86::AX
, MVT::i16
, InFlag
);
1814 InFlag
= Result
.getValue(2);
1816 // If we also need AL (the quotient), get it by extracting a subreg from
1817 // Result. The fast register allocator does not like multiple CopyFromReg
1818 // nodes using aliasing registers.
1819 if (!SDValue(Node
, 0).use_empty())
1820 ReplaceUses(SDValue(Node
, 0),
1821 CurDAG
->getTargetExtractSubreg(X86::sub_8bit
, dl
, MVT::i8
, Result
));
1823 // Shift AX right by 8 bits instead of using AH.
1824 Result
= SDValue(CurDAG
->getMachineNode(X86::SHR16ri
, dl
, MVT::i16
,
1826 CurDAG
->getTargetConstant(8, MVT::i8
)),
1828 ReplaceUses(SDValue(Node
, 1),
1829 CurDAG
->getTargetExtractSubreg(X86::sub_8bit
, dl
, MVT::i8
, Result
));
1831 // Copy the division (low) result, if it is needed.
1832 if (!SDValue(Node
, 0).use_empty()) {
1833 SDValue Result
= CurDAG
->getCopyFromReg(CurDAG
->getEntryNode(), dl
,
1834 LoReg
, NVT
, InFlag
);
1835 InFlag
= Result
.getValue(2);
1836 ReplaceUses(SDValue(Node
, 0), Result
);
1837 DEBUG(dbgs() << "=> "; Result
.getNode()->dump(CurDAG
); dbgs() << '\n');
1839 // Copy the remainder (high) result, if it is needed.
1840 if (!SDValue(Node
, 1).use_empty()) {
1841 SDValue Result
= CurDAG
->getCopyFromReg(CurDAG
->getEntryNode(), dl
,
1842 HiReg
, NVT
, InFlag
);
1843 InFlag
= Result
.getValue(2);
1844 ReplaceUses(SDValue(Node
, 1), Result
);
1845 DEBUG(dbgs() << "=> "; Result
.getNode()->dump(CurDAG
); dbgs() << '\n');
1851 SDValue N0
= Node
->getOperand(0);
1852 SDValue N1
= Node
->getOperand(1);
1854 // Look for (X86cmp (and $op, $imm), 0) and see if we can convert it to
1855 // use a smaller encoding.
1856 if (N0
.getOpcode() == ISD::TRUNCATE
&& N0
.hasOneUse() &&
1857 HasNoSignedComparisonUses(Node
))
1858 // Look past the truncate if CMP is the only use of it.
1859 N0
= N0
.getOperand(0);
1860 if (N0
.getNode()->getOpcode() == ISD::AND
&& N0
.getNode()->hasOneUse() &&
1861 N0
.getValueType() != MVT::i8
&&
1862 X86::isZeroNode(N1
)) {
1863 ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(N0
.getNode()->getOperand(1));
1866 // For example, convert "testl %eax, $8" to "testb %al, $8"
1867 if ((C
->getZExtValue() & ~UINT64_C(0xff)) == 0 &&
1868 (!(C
->getZExtValue() & 0x80) ||
1869 HasNoSignedComparisonUses(Node
))) {
1870 SDValue Imm
= CurDAG
->getTargetConstant(C
->getZExtValue(), MVT::i8
);
1871 SDValue Reg
= N0
.getNode()->getOperand(0);
1873 // On x86-32, only the ABCD registers have 8-bit subregisters.
1874 if (!Subtarget
->is64Bit()) {
1875 TargetRegisterClass
*TRC
= 0;
1876 switch (N0
.getValueType().getSimpleVT().SimpleTy
) {
1877 case MVT::i32
: TRC
= &X86::GR32_ABCDRegClass
; break;
1878 case MVT::i16
: TRC
= &X86::GR16_ABCDRegClass
; break;
1879 default: llvm_unreachable("Unsupported TEST operand type!");
1881 SDValue RC
= CurDAG
->getTargetConstant(TRC
->getID(), MVT::i32
);
1882 Reg
= SDValue(CurDAG
->getMachineNode(X86::COPY_TO_REGCLASS
, dl
,
1883 Reg
.getValueType(), Reg
, RC
), 0);
1886 // Extract the l-register.
1887 SDValue Subreg
= CurDAG
->getTargetExtractSubreg(X86::sub_8bit
, dl
,
1891 return CurDAG
->getMachineNode(X86::TEST8ri
, dl
, MVT::i32
, Subreg
, Imm
);
1894 // For example, "testl %eax, $2048" to "testb %ah, $8".
1895 if ((C
->getZExtValue() & ~UINT64_C(0xff00)) == 0 &&
1896 (!(C
->getZExtValue() & 0x8000) ||
1897 HasNoSignedComparisonUses(Node
))) {
1898 // Shift the immediate right by 8 bits.
1899 SDValue ShiftedImm
= CurDAG
->getTargetConstant(C
->getZExtValue() >> 8,
1901 SDValue Reg
= N0
.getNode()->getOperand(0);
1903 // Put the value in an ABCD register.
1904 TargetRegisterClass
*TRC
= 0;
1905 switch (N0
.getValueType().getSimpleVT().SimpleTy
) {
1906 case MVT::i64
: TRC
= &X86::GR64_ABCDRegClass
; break;
1907 case MVT::i32
: TRC
= &X86::GR32_ABCDRegClass
; break;
1908 case MVT::i16
: TRC
= &X86::GR16_ABCDRegClass
; break;
1909 default: llvm_unreachable("Unsupported TEST operand type!");
1911 SDValue RC
= CurDAG
->getTargetConstant(TRC
->getID(), MVT::i32
);
1912 Reg
= SDValue(CurDAG
->getMachineNode(X86::COPY_TO_REGCLASS
, dl
,
1913 Reg
.getValueType(), Reg
, RC
), 0);
1915 // Extract the h-register.
1916 SDValue Subreg
= CurDAG
->getTargetExtractSubreg(X86::sub_8bit_hi
, dl
,
1919 // Emit a testb. No special NOREX tricks are needed since there's
1920 // only one GPR operand!
1921 return CurDAG
->getMachineNode(X86::TEST8ri
, dl
, MVT::i32
,
1922 Subreg
, ShiftedImm
);
1925 // For example, "testl %eax, $32776" to "testw %ax, $32776".
1926 if ((C
->getZExtValue() & ~UINT64_C(0xffff)) == 0 &&
1927 N0
.getValueType() != MVT::i16
&&
1928 (!(C
->getZExtValue() & 0x8000) ||
1929 HasNoSignedComparisonUses(Node
))) {
1930 SDValue Imm
= CurDAG
->getTargetConstant(C
->getZExtValue(), MVT::i16
);
1931 SDValue Reg
= N0
.getNode()->getOperand(0);
1933 // Extract the 16-bit subregister.
1934 SDValue Subreg
= CurDAG
->getTargetExtractSubreg(X86::sub_16bit
, dl
,
1938 return CurDAG
->getMachineNode(X86::TEST16ri
, dl
, MVT::i32
, Subreg
, Imm
);
1941 // For example, "testq %rax, $268468232" to "testl %eax, $268468232".
1942 if ((C
->getZExtValue() & ~UINT64_C(0xffffffff)) == 0 &&
1943 N0
.getValueType() == MVT::i64
&&
1944 (!(C
->getZExtValue() & 0x80000000) ||
1945 HasNoSignedComparisonUses(Node
))) {
1946 SDValue Imm
= CurDAG
->getTargetConstant(C
->getZExtValue(), MVT::i32
);
1947 SDValue Reg
= N0
.getNode()->getOperand(0);
1949 // Extract the 32-bit subregister.
1950 SDValue Subreg
= CurDAG
->getTargetExtractSubreg(X86::sub_32bit
, dl
,
1954 return CurDAG
->getMachineNode(X86::TEST32ri
, dl
, MVT::i32
, Subreg
, Imm
);
1961 SDNode
*ResNode
= SelectCode(Node
);
1963 DEBUG(dbgs() << "=> ";
1964 if (ResNode
== NULL
|| ResNode
== Node
)
1967 ResNode
->dump(CurDAG
);
1973 bool X86DAGToDAGISel::
1974 SelectInlineAsmMemoryOperand(const SDValue
&Op
, char ConstraintCode
,
1975 std::vector
<SDValue
> &OutOps
) {
1976 SDValue Op0
, Op1
, Op2
, Op3
, Op4
;
1977 switch (ConstraintCode
) {
1978 case 'o': // offsetable ??
1979 case 'v': // not offsetable ??
1980 default: return true;
1982 if (!SelectAddr(0, Op
, Op0
, Op1
, Op2
, Op3
, Op4
))
1987 OutOps
.push_back(Op0
);
1988 OutOps
.push_back(Op1
);
1989 OutOps
.push_back(Op2
);
1990 OutOps
.push_back(Op3
);
1991 OutOps
.push_back(Op4
);
1995 /// createX86ISelDag - This pass converts a legalized DAG into a
1996 /// X86-specific DAG, ready for instruction scheduling.
1998 FunctionPass
*llvm::createX86ISelDag(X86TargetMachine
&TM
,
1999 llvm::CodeGenOpt::Level OptLevel
) {
2000 return new X86DAGToDAGISel(TM
, OptLevel
);