1 //===- X86ISelDAGToDAG.cpp - A DAG pattern matching inst selector for X86 -===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file defines a DAG pattern matching instruction selector for X86,
11 // converting from a legalized dag to a X86 dag.
13 //===----------------------------------------------------------------------===//
15 #define DEBUG_TYPE "x86-isel"
17 #include "X86InstrBuilder.h"
18 #include "X86MachineFunctionInfo.h"
19 #include "X86RegisterInfo.h"
20 #include "X86Subtarget.h"
21 #include "X86TargetMachine.h"
22 #include "llvm/Instructions.h"
23 #include "llvm/Intrinsics.h"
24 #include "llvm/Support/CFG.h"
25 #include "llvm/Type.h"
26 #include "llvm/CodeGen/FunctionLoweringInfo.h"
27 #include "llvm/CodeGen/MachineConstantPool.h"
28 #include "llvm/CodeGen/MachineFunction.h"
29 #include "llvm/CodeGen/MachineFrameInfo.h"
30 #include "llvm/CodeGen/MachineInstrBuilder.h"
31 #include "llvm/CodeGen/MachineRegisterInfo.h"
32 #include "llvm/CodeGen/SelectionDAGISel.h"
33 #include "llvm/Target/TargetMachine.h"
34 #include "llvm/Target/TargetOptions.h"
35 #include "llvm/Support/Debug.h"
36 #include "llvm/Support/ErrorHandling.h"
37 #include "llvm/Support/MathExtras.h"
38 #include "llvm/Support/raw_ostream.h"
39 #include "llvm/ADT/SmallPtrSet.h"
40 #include "llvm/ADT/Statistic.h"
43 STATISTIC(NumLoadMoved
, "Number of loads moved below TokenFactor");
45 //===----------------------------------------------------------------------===//
46 // Pattern Matcher Implementation
47 //===----------------------------------------------------------------------===//
50 /// X86ISelAddressMode - This corresponds to X86AddressMode, but uses
51 /// SDValue's instead of register numbers for the leaves of the matched
53 struct X86ISelAddressMode
{
59 // This is really a union, discriminated by BaseType!
67 const GlobalValue
*GV
;
69 const BlockAddress
*BlockAddr
;
72 unsigned Align
; // CP alignment.
73 unsigned char SymbolFlags
; // X86II::MO_*
76 : BaseType(RegBase
), Base_FrameIndex(0), Scale(1), IndexReg(), Disp(0),
77 Segment(), GV(0), CP(0), BlockAddr(0), ES(0), JT(-1), Align(0),
78 SymbolFlags(X86II::MO_NO_FLAG
) {
81 bool hasSymbolicDisplacement() const {
82 return GV
!= 0 || CP
!= 0 || ES
!= 0 || JT
!= -1 || BlockAddr
!= 0;
85 bool hasBaseOrIndexReg() const {
86 return IndexReg
.getNode() != 0 || Base_Reg
.getNode() != 0;
89 /// isRIPRelative - Return true if this addressing mode is already RIP
91 bool isRIPRelative() const {
92 if (BaseType
!= RegBase
) return false;
93 if (RegisterSDNode
*RegNode
=
94 dyn_cast_or_null
<RegisterSDNode
>(Base_Reg
.getNode()))
95 return RegNode
->getReg() == X86::RIP
;
99 void setBaseReg(SDValue Reg
) {
105 dbgs() << "X86ISelAddressMode " << this << '\n';
106 dbgs() << "Base_Reg ";
107 if (Base_Reg
.getNode() != 0)
108 Base_Reg
.getNode()->dump();
111 dbgs() << " Base.FrameIndex " << Base_FrameIndex
<< '\n'
112 << " Scale" << Scale
<< '\n'
114 if (IndexReg
.getNode() != 0)
115 IndexReg
.getNode()->dump();
118 dbgs() << " Disp " << Disp
<< '\n'
135 dbgs() << " JT" << JT
<< " Align" << Align
<< '\n';
141 //===--------------------------------------------------------------------===//
142 /// ISel - X86 specific code to select X86 machine instructions for
143 /// SelectionDAG operations.
145 class X86DAGToDAGISel
: public SelectionDAGISel
{
146 /// X86Lowering - This object fully describes how to lower LLVM code to an
147 /// X86-specific SelectionDAG.
148 const X86TargetLowering
&X86Lowering
;
150 /// Subtarget - Keep a pointer to the X86Subtarget around so that we can
151 /// make the right decision when generating code for different targets.
152 const X86Subtarget
*Subtarget
;
154 /// OptForSize - If true, selector should try to optimize for code size
155 /// instead of performance.
159 explicit X86DAGToDAGISel(X86TargetMachine
&tm
, CodeGenOpt::Level OptLevel
)
160 : SelectionDAGISel(tm
, OptLevel
),
161 X86Lowering(*tm
.getTargetLowering()),
162 Subtarget(&tm
.getSubtarget
<X86Subtarget
>()),
165 virtual const char *getPassName() const {
166 return "X86 DAG->DAG Instruction Selection";
169 virtual void EmitFunctionEntryCode();
171 virtual bool IsProfitableToFold(SDValue N
, SDNode
*U
, SDNode
*Root
) const;
173 virtual void PreprocessISelDAG();
175 inline bool immSext8(SDNode
*N
) const {
176 return isInt
<8>(cast
<ConstantSDNode
>(N
)->getSExtValue());
179 // i64immSExt32 predicate - True if the 64-bit immediate fits in a 32-bit
180 // sign extended field.
181 inline bool i64immSExt32(SDNode
*N
) const {
182 uint64_t v
= cast
<ConstantSDNode
>(N
)->getZExtValue();
183 return (int64_t)v
== (int32_t)v
;
186 // Include the pieces autogenerated from the target description.
187 #include "X86GenDAGISel.inc"
190 SDNode
*Select(SDNode
*N
);
191 SDNode
*SelectAtomic64(SDNode
*Node
, unsigned Opc
);
192 SDNode
*SelectAtomicLoadAdd(SDNode
*Node
, EVT NVT
);
193 SDNode
*SelectAtomicLoadArith(SDNode
*Node
, EVT NVT
);
195 bool MatchLoadInAddress(LoadSDNode
*N
, X86ISelAddressMode
&AM
);
196 bool MatchWrapper(SDValue N
, X86ISelAddressMode
&AM
);
197 bool MatchAddress(SDValue N
, X86ISelAddressMode
&AM
);
198 bool MatchAddressRecursively(SDValue N
, X86ISelAddressMode
&AM
,
200 bool MatchAddressBase(SDValue N
, X86ISelAddressMode
&AM
);
201 bool SelectAddr(SDNode
*Parent
, SDValue N
, SDValue
&Base
,
202 SDValue
&Scale
, SDValue
&Index
, SDValue
&Disp
,
204 bool SelectLEAAddr(SDValue N
, SDValue
&Base
,
205 SDValue
&Scale
, SDValue
&Index
, SDValue
&Disp
,
207 bool SelectTLSADDRAddr(SDValue N
, SDValue
&Base
,
208 SDValue
&Scale
, SDValue
&Index
, SDValue
&Disp
,
210 bool SelectScalarSSELoad(SDNode
*Root
, SDValue N
,
211 SDValue
&Base
, SDValue
&Scale
,
212 SDValue
&Index
, SDValue
&Disp
,
214 SDValue
&NodeWithChain
);
216 bool TryFoldLoad(SDNode
*P
, SDValue N
,
217 SDValue
&Base
, SDValue
&Scale
,
218 SDValue
&Index
, SDValue
&Disp
,
221 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
222 /// inline asm expressions.
223 virtual bool SelectInlineAsmMemoryOperand(const SDValue
&Op
,
225 std::vector
<SDValue
> &OutOps
);
227 void EmitSpecialCodeForMain(MachineBasicBlock
*BB
, MachineFrameInfo
*MFI
);
229 inline void getAddressOperands(X86ISelAddressMode
&AM
, SDValue
&Base
,
230 SDValue
&Scale
, SDValue
&Index
,
231 SDValue
&Disp
, SDValue
&Segment
) {
232 Base
= (AM
.BaseType
== X86ISelAddressMode::FrameIndexBase
) ?
233 CurDAG
->getTargetFrameIndex(AM
.Base_FrameIndex
, TLI
.getPointerTy()) :
235 Scale
= getI8Imm(AM
.Scale
);
237 // These are 32-bit even in 64-bit mode since RIP relative offset
240 Disp
= CurDAG
->getTargetGlobalAddress(AM
.GV
, DebugLoc(),
244 Disp
= CurDAG
->getTargetConstantPool(AM
.CP
, MVT::i32
,
245 AM
.Align
, AM
.Disp
, AM
.SymbolFlags
);
247 Disp
= CurDAG
->getTargetExternalSymbol(AM
.ES
, MVT::i32
, AM
.SymbolFlags
);
248 else if (AM
.JT
!= -1)
249 Disp
= CurDAG
->getTargetJumpTable(AM
.JT
, MVT::i32
, AM
.SymbolFlags
);
250 else if (AM
.BlockAddr
)
251 Disp
= CurDAG
->getBlockAddress(AM
.BlockAddr
, MVT::i32
,
252 true, AM
.SymbolFlags
);
254 Disp
= CurDAG
->getTargetConstant(AM
.Disp
, MVT::i32
);
256 if (AM
.Segment
.getNode())
257 Segment
= AM
.Segment
;
259 Segment
= CurDAG
->getRegister(0, MVT::i32
);
262 /// getI8Imm - Return a target constant with the specified value, of type
264 inline SDValue
getI8Imm(unsigned Imm
) {
265 return CurDAG
->getTargetConstant(Imm
, MVT::i8
);
268 /// getI32Imm - Return a target constant with the specified value, of type
270 inline SDValue
getI32Imm(unsigned Imm
) {
271 return CurDAG
->getTargetConstant(Imm
, MVT::i32
);
274 /// getGlobalBaseReg - Return an SDNode that returns the value of
275 /// the global base register. Output instructions required to
276 /// initialize the global base register, if necessary.
278 SDNode
*getGlobalBaseReg();
280 /// getTargetMachine - Return a reference to the TargetMachine, casted
281 /// to the target-specific type.
282 const X86TargetMachine
&getTargetMachine() {
283 return static_cast<const X86TargetMachine
&>(TM
);
286 /// getInstrInfo - Return a reference to the TargetInstrInfo, casted
287 /// to the target-specific type.
288 const X86InstrInfo
*getInstrInfo() {
289 return getTargetMachine().getInstrInfo();
296 X86DAGToDAGISel::IsProfitableToFold(SDValue N
, SDNode
*U
, SDNode
*Root
) const {
297 if (OptLevel
== CodeGenOpt::None
) return false;
302 if (N
.getOpcode() != ISD::LOAD
)
305 // If N is a load, do additional profitability checks.
307 switch (U
->getOpcode()) {
320 SDValue Op1
= U
->getOperand(1);
322 // If the other operand is a 8-bit immediate we should fold the immediate
323 // instead. This reduces code size.
325 // movl 4(%esp), %eax
329 // addl 4(%esp), %eax
330 // The former is 2 bytes shorter. In case where the increment is 1, then
331 // the saving can be 4 bytes (by using incl %eax).
332 if (ConstantSDNode
*Imm
= dyn_cast
<ConstantSDNode
>(Op1
))
333 if (Imm
->getAPIntValue().isSignedIntN(8))
336 // If the other operand is a TLS address, we should fold it instead.
339 // leal i@NTPOFF(%eax), %eax
341 // movl $i@NTPOFF, %eax
343 // if the block also has an access to a second TLS address this will save
345 // FIXME: This is probably also true for non TLS addresses.
346 if (Op1
.getOpcode() == X86ISD::Wrapper
) {
347 SDValue Val
= Op1
.getOperand(0);
348 if (Val
.getOpcode() == ISD::TargetGlobalTLSAddress
)
358 /// MoveBelowCallOrigChain - Replace the original chain operand of the call with
359 /// load's chain operand and move load below the call's chain operand.
360 static void MoveBelowOrigChain(SelectionDAG
*CurDAG
, SDValue Load
,
361 SDValue Call
, SDValue OrigChain
) {
362 SmallVector
<SDValue
, 8> Ops
;
363 SDValue Chain
= OrigChain
.getOperand(0);
364 if (Chain
.getNode() == Load
.getNode())
365 Ops
.push_back(Load
.getOperand(0));
367 assert(Chain
.getOpcode() == ISD::TokenFactor
&&
368 "Unexpected chain operand");
369 for (unsigned i
= 0, e
= Chain
.getNumOperands(); i
!= e
; ++i
)
370 if (Chain
.getOperand(i
).getNode() == Load
.getNode())
371 Ops
.push_back(Load
.getOperand(0));
373 Ops
.push_back(Chain
.getOperand(i
));
375 CurDAG
->getNode(ISD::TokenFactor
, Load
.getDebugLoc(),
376 MVT::Other
, &Ops
[0], Ops
.size());
378 Ops
.push_back(NewChain
);
380 for (unsigned i
= 1, e
= OrigChain
.getNumOperands(); i
!= e
; ++i
)
381 Ops
.push_back(OrigChain
.getOperand(i
));
382 CurDAG
->UpdateNodeOperands(OrigChain
.getNode(), &Ops
[0], Ops
.size());
383 CurDAG
->UpdateNodeOperands(Load
.getNode(), Call
.getOperand(0),
384 Load
.getOperand(1), Load
.getOperand(2));
386 Ops
.push_back(SDValue(Load
.getNode(), 1));
387 for (unsigned i
= 1, e
= Call
.getNode()->getNumOperands(); i
!= e
; ++i
)
388 Ops
.push_back(Call
.getOperand(i
));
389 CurDAG
->UpdateNodeOperands(Call
.getNode(), &Ops
[0], Ops
.size());
392 /// isCalleeLoad - Return true if call address is a load and it can be
393 /// moved below CALLSEQ_START and the chains leading up to the call.
394 /// Return the CALLSEQ_START by reference as a second output.
395 /// In the case of a tail call, there isn't a callseq node between the call
396 /// chain and the load.
397 static bool isCalleeLoad(SDValue Callee
, SDValue
&Chain
, bool HasCallSeq
) {
398 if (Callee
.getNode() == Chain
.getNode() || !Callee
.hasOneUse())
400 LoadSDNode
*LD
= dyn_cast
<LoadSDNode
>(Callee
.getNode());
403 LD
->getAddressingMode() != ISD::UNINDEXED
||
404 LD
->getExtensionType() != ISD::NON_EXTLOAD
)
407 // Now let's find the callseq_start.
408 while (HasCallSeq
&& Chain
.getOpcode() != ISD::CALLSEQ_START
) {
409 if (!Chain
.hasOneUse())
411 Chain
= Chain
.getOperand(0);
414 if (!Chain
.getNumOperands())
416 if (Chain
.getOperand(0).getNode() == Callee
.getNode())
418 if (Chain
.getOperand(0).getOpcode() == ISD::TokenFactor
&&
419 Callee
.getValue(1).isOperandOf(Chain
.getOperand(0).getNode()) &&
420 Callee
.getValue(1).hasOneUse())
425 void X86DAGToDAGISel::PreprocessISelDAG() {
426 // OptForSize is used in pattern predicates that isel is matching.
427 OptForSize
= MF
->getFunction()->hasFnAttr(Attribute::OptimizeForSize
);
429 for (SelectionDAG::allnodes_iterator I
= CurDAG
->allnodes_begin(),
430 E
= CurDAG
->allnodes_end(); I
!= E
; ) {
431 SDNode
*N
= I
++; // Preincrement iterator to avoid invalidation issues.
433 if (OptLevel
!= CodeGenOpt::None
&&
434 (N
->getOpcode() == X86ISD::CALL
||
435 N
->getOpcode() == X86ISD::TC_RETURN
)) {
436 /// Also try moving call address load from outside callseq_start to just
437 /// before the call to allow it to be folded.
455 bool HasCallSeq
= N
->getOpcode() == X86ISD::CALL
;
456 SDValue Chain
= N
->getOperand(0);
457 SDValue Load
= N
->getOperand(1);
458 if (!isCalleeLoad(Load
, Chain
, HasCallSeq
))
460 MoveBelowOrigChain(CurDAG
, Load
, SDValue(N
, 0), Chain
);
465 // Lower fpround and fpextend nodes that target the FP stack to be store and
466 // load to the stack. This is a gross hack. We would like to simply mark
467 // these as being illegal, but when we do that, legalize produces these when
468 // it expands calls, then expands these in the same legalize pass. We would
469 // like dag combine to be able to hack on these between the call expansion
470 // and the node legalization. As such this pass basically does "really
471 // late" legalization of these inline with the X86 isel pass.
472 // FIXME: This should only happen when not compiled with -O0.
473 if (N
->getOpcode() != ISD::FP_ROUND
&& N
->getOpcode() != ISD::FP_EXTEND
)
476 // If the source and destination are SSE registers, then this is a legal
477 // conversion that should not be lowered.
478 EVT SrcVT
= N
->getOperand(0).getValueType();
479 EVT DstVT
= N
->getValueType(0);
480 bool SrcIsSSE
= X86Lowering
.isScalarFPTypeInSSEReg(SrcVT
);
481 bool DstIsSSE
= X86Lowering
.isScalarFPTypeInSSEReg(DstVT
);
482 if (SrcIsSSE
&& DstIsSSE
)
485 if (!SrcIsSSE
&& !DstIsSSE
) {
486 // If this is an FPStack extension, it is a noop.
487 if (N
->getOpcode() == ISD::FP_EXTEND
)
489 // If this is a value-preserving FPStack truncation, it is a noop.
490 if (N
->getConstantOperandVal(1))
494 // Here we could have an FP stack truncation or an FPStack <-> SSE convert.
495 // FPStack has extload and truncstore. SSE can fold direct loads into other
496 // operations. Based on this, decide what we want to do.
498 if (N
->getOpcode() == ISD::FP_ROUND
)
499 MemVT
= DstVT
; // FP_ROUND must use DstVT, we can't do a 'trunc load'.
501 MemVT
= SrcIsSSE
? SrcVT
: DstVT
;
503 SDValue MemTmp
= CurDAG
->CreateStackTemporary(MemVT
);
504 DebugLoc dl
= N
->getDebugLoc();
506 // FIXME: optimize the case where the src/dest is a load or store?
507 SDValue Store
= CurDAG
->getTruncStore(CurDAG
->getEntryNode(), dl
,
509 MemTmp
, MachinePointerInfo(), MemVT
,
511 SDValue Result
= CurDAG
->getExtLoad(ISD::EXTLOAD
, dl
, DstVT
, Store
, MemTmp
,
512 MachinePointerInfo(),
513 MemVT
, false, false, 0);
515 // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the
516 // extload we created. This will cause general havok on the dag because
517 // anything below the conversion could be folded into other existing nodes.
518 // To avoid invalidating 'I', back it up to the convert node.
520 CurDAG
->ReplaceAllUsesOfValueWith(SDValue(N
, 0), Result
);
522 // Now that we did that, the node is dead. Increment the iterator to the
523 // next node to process, then delete N.
525 CurDAG
->DeleteNode(N
);
530 /// EmitSpecialCodeForMain - Emit any code that needs to be executed only in
531 /// the main function.
532 void X86DAGToDAGISel::EmitSpecialCodeForMain(MachineBasicBlock
*BB
,
533 MachineFrameInfo
*MFI
) {
534 const TargetInstrInfo
*TII
= TM
.getInstrInfo();
535 if (Subtarget
->isTargetCygMing()) {
537 Subtarget
->is64Bit() ? X86::WINCALL64pcrel32
: X86::CALLpcrel32
;
538 BuildMI(BB
, DebugLoc(),
539 TII
->get(CallOp
)).addExternalSymbol("__main");
543 void X86DAGToDAGISel::EmitFunctionEntryCode() {
544 // If this is main, emit special code for main.
545 if (const Function
*Fn
= MF
->getFunction())
546 if (Fn
->hasExternalLinkage() && Fn
->getName() == "main")
547 EmitSpecialCodeForMain(MF
->begin(), MF
->getFrameInfo());
551 bool X86DAGToDAGISel::MatchLoadInAddress(LoadSDNode
*N
, X86ISelAddressMode
&AM
){
552 SDValue Address
= N
->getOperand(1);
554 // load gs:0 -> GS segment register.
555 // load fs:0 -> FS segment register.
557 // This optimization is valid because the GNU TLS model defines that
558 // gs:0 (or fs:0 on X86-64) contains its own address.
559 // For more information see http://people.redhat.com/drepper/tls.pdf
560 if (ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(Address
))
561 if (C
->getSExtValue() == 0 && AM
.Segment
.getNode() == 0 &&
562 Subtarget
->isTargetELF())
563 switch (N
->getPointerInfo().getAddrSpace()) {
565 AM
.Segment
= CurDAG
->getRegister(X86::GS
, MVT::i16
);
568 AM
.Segment
= CurDAG
->getRegister(X86::FS
, MVT::i16
);
575 /// MatchWrapper - Try to match X86ISD::Wrapper and X86ISD::WrapperRIP nodes
576 /// into an addressing mode. These wrap things that will resolve down into a
577 /// symbol reference. If no match is possible, this returns true, otherwise it
579 bool X86DAGToDAGISel::MatchWrapper(SDValue N
, X86ISelAddressMode
&AM
) {
580 // If the addressing mode already has a symbol as the displacement, we can
581 // never match another symbol.
582 if (AM
.hasSymbolicDisplacement())
585 SDValue N0
= N
.getOperand(0);
586 CodeModel::Model M
= TM
.getCodeModel();
588 // Handle X86-64 rip-relative addresses. We check this before checking direct
589 // folding because RIP is preferable to non-RIP accesses.
590 if (Subtarget
->is64Bit() &&
591 // Under X86-64 non-small code model, GV (and friends) are 64-bits, so
592 // they cannot be folded into immediate fields.
593 // FIXME: This can be improved for kernel and other models?
594 (M
== CodeModel::Small
|| M
== CodeModel::Kernel
) &&
595 // Base and index reg must be 0 in order to use %rip as base and lowering
597 !AM
.hasBaseOrIndexReg() && N
.getOpcode() == X86ISD::WrapperRIP
) {
598 if (GlobalAddressSDNode
*G
= dyn_cast
<GlobalAddressSDNode
>(N0
)) {
599 int64_t Offset
= AM
.Disp
+ G
->getOffset();
600 if (!X86::isOffsetSuitableForCodeModel(Offset
, M
)) return true;
601 AM
.GV
= G
->getGlobal();
603 AM
.SymbolFlags
= G
->getTargetFlags();
604 } else if (ConstantPoolSDNode
*CP
= dyn_cast
<ConstantPoolSDNode
>(N0
)) {
605 int64_t Offset
= AM
.Disp
+ CP
->getOffset();
606 if (!X86::isOffsetSuitableForCodeModel(Offset
, M
)) return true;
607 AM
.CP
= CP
->getConstVal();
608 AM
.Align
= CP
->getAlignment();
610 AM
.SymbolFlags
= CP
->getTargetFlags();
611 } else if (ExternalSymbolSDNode
*S
= dyn_cast
<ExternalSymbolSDNode
>(N0
)) {
612 AM
.ES
= S
->getSymbol();
613 AM
.SymbolFlags
= S
->getTargetFlags();
614 } else if (JumpTableSDNode
*J
= dyn_cast
<JumpTableSDNode
>(N0
)) {
615 AM
.JT
= J
->getIndex();
616 AM
.SymbolFlags
= J
->getTargetFlags();
618 AM
.BlockAddr
= cast
<BlockAddressSDNode
>(N0
)->getBlockAddress();
619 AM
.SymbolFlags
= cast
<BlockAddressSDNode
>(N0
)->getTargetFlags();
622 if (N
.getOpcode() == X86ISD::WrapperRIP
)
623 AM
.setBaseReg(CurDAG
->getRegister(X86::RIP
, MVT::i64
));
627 // Handle the case when globals fit in our immediate field: This is true for
628 // X86-32 always and X86-64 when in -static -mcmodel=small mode. In 64-bit
629 // mode, this results in a non-RIP-relative computation.
630 if (!Subtarget
->is64Bit() ||
631 ((M
== CodeModel::Small
|| M
== CodeModel::Kernel
) &&
632 TM
.getRelocationModel() == Reloc::Static
)) {
633 if (GlobalAddressSDNode
*G
= dyn_cast
<GlobalAddressSDNode
>(N0
)) {
634 AM
.GV
= G
->getGlobal();
635 AM
.Disp
+= G
->getOffset();
636 AM
.SymbolFlags
= G
->getTargetFlags();
637 } else if (ConstantPoolSDNode
*CP
= dyn_cast
<ConstantPoolSDNode
>(N0
)) {
638 AM
.CP
= CP
->getConstVal();
639 AM
.Align
= CP
->getAlignment();
640 AM
.Disp
+= CP
->getOffset();
641 AM
.SymbolFlags
= CP
->getTargetFlags();
642 } else if (ExternalSymbolSDNode
*S
= dyn_cast
<ExternalSymbolSDNode
>(N0
)) {
643 AM
.ES
= S
->getSymbol();
644 AM
.SymbolFlags
= S
->getTargetFlags();
645 } else if (JumpTableSDNode
*J
= dyn_cast
<JumpTableSDNode
>(N0
)) {
646 AM
.JT
= J
->getIndex();
647 AM
.SymbolFlags
= J
->getTargetFlags();
649 AM
.BlockAddr
= cast
<BlockAddressSDNode
>(N0
)->getBlockAddress();
650 AM
.SymbolFlags
= cast
<BlockAddressSDNode
>(N0
)->getTargetFlags();
658 /// MatchAddress - Add the specified node to the specified addressing mode,
659 /// returning true if it cannot be done. This just pattern matches for the
661 bool X86DAGToDAGISel::MatchAddress(SDValue N
, X86ISelAddressMode
&AM
) {
662 if (MatchAddressRecursively(N
, AM
, 0))
665 // Post-processing: Convert lea(,%reg,2) to lea(%reg,%reg), which has
666 // a smaller encoding and avoids a scaled-index.
668 AM
.BaseType
== X86ISelAddressMode::RegBase
&&
669 AM
.Base_Reg
.getNode() == 0) {
670 AM
.Base_Reg
= AM
.IndexReg
;
674 // Post-processing: Convert foo to foo(%rip), even in non-PIC mode,
675 // because it has a smaller encoding.
676 // TODO: Which other code models can use this?
677 if (TM
.getCodeModel() == CodeModel::Small
&&
678 Subtarget
->is64Bit() &&
680 AM
.BaseType
== X86ISelAddressMode::RegBase
&&
681 AM
.Base_Reg
.getNode() == 0 &&
682 AM
.IndexReg
.getNode() == 0 &&
683 AM
.SymbolFlags
== X86II::MO_NO_FLAG
&&
684 AM
.hasSymbolicDisplacement())
685 AM
.Base_Reg
= CurDAG
->getRegister(X86::RIP
, MVT::i64
);
690 bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N
, X86ISelAddressMode
&AM
,
692 bool is64Bit
= Subtarget
->is64Bit();
693 DebugLoc dl
= N
.getDebugLoc();
695 dbgs() << "MatchAddress: ";
700 return MatchAddressBase(N
, AM
);
702 CodeModel::Model M
= TM
.getCodeModel();
704 // If this is already a %rip relative address, we can only merge immediates
705 // into it. Instead of handling this in every case, we handle it here.
706 // RIP relative addressing: %rip + 32-bit displacement!
707 if (AM
.isRIPRelative()) {
708 // FIXME: JumpTable and ExternalSymbol address currently don't like
709 // displacements. It isn't very important, but this should be fixed for
711 if (!AM
.ES
&& AM
.JT
!= -1) return true;
713 if (ConstantSDNode
*Cst
= dyn_cast
<ConstantSDNode
>(N
)) {
714 int64_t Val
= AM
.Disp
+ Cst
->getSExtValue();
715 if (X86::isOffsetSuitableForCodeModel(Val
, M
,
716 AM
.hasSymbolicDisplacement())) {
724 switch (N
.getOpcode()) {
726 case ISD::Constant
: {
727 uint64_t Val
= cast
<ConstantSDNode
>(N
)->getSExtValue();
729 X86::isOffsetSuitableForCodeModel(AM
.Disp
+ Val
, M
,
730 AM
.hasSymbolicDisplacement())) {
737 case X86ISD::Wrapper
:
738 case X86ISD::WrapperRIP
:
739 if (!MatchWrapper(N
, AM
))
744 if (!MatchLoadInAddress(cast
<LoadSDNode
>(N
), AM
))
748 case ISD::FrameIndex
:
749 if (AM
.BaseType
== X86ISelAddressMode::RegBase
750 && AM
.Base_Reg
.getNode() == 0) {
751 AM
.BaseType
= X86ISelAddressMode::FrameIndexBase
;
752 AM
.Base_FrameIndex
= cast
<FrameIndexSDNode
>(N
)->getIndex();
758 if (AM
.IndexReg
.getNode() != 0 || AM
.Scale
!= 1)
762 *CN
= dyn_cast
<ConstantSDNode
>(N
.getNode()->getOperand(1))) {
763 unsigned Val
= CN
->getZExtValue();
764 // Note that we handle x<<1 as (,x,2) rather than (x,x) here so
765 // that the base operand remains free for further matching. If
766 // the base doesn't end up getting used, a post-processing step
767 // in MatchAddress turns (,x,2) into (x,x), which is cheaper.
768 if (Val
== 1 || Val
== 2 || Val
== 3) {
770 SDValue ShVal
= N
.getNode()->getOperand(0);
772 // Okay, we know that we have a scale by now. However, if the scaled
773 // value is an add of something and a constant, we can fold the
774 // constant into the disp field here.
775 if (CurDAG
->isBaseWithConstantOffset(ShVal
)) {
776 AM
.IndexReg
= ShVal
.getNode()->getOperand(0);
777 ConstantSDNode
*AddVal
=
778 cast
<ConstantSDNode
>(ShVal
.getNode()->getOperand(1));
779 uint64_t Disp
= AM
.Disp
+ (AddVal
->getSExtValue() << Val
);
781 X86::isOffsetSuitableForCodeModel(Disp
, M
,
782 AM
.hasSymbolicDisplacement()))
796 // A mul_lohi where we need the low part can be folded as a plain multiply.
797 if (N
.getResNo() != 0) break;
800 case X86ISD::MUL_IMM
:
801 // X*[3,5,9] -> X+X*[2,4,8]
802 if (AM
.BaseType
== X86ISelAddressMode::RegBase
&&
803 AM
.Base_Reg
.getNode() == 0 &&
804 AM
.IndexReg
.getNode() == 0) {
806 *CN
= dyn_cast
<ConstantSDNode
>(N
.getNode()->getOperand(1)))
807 if (CN
->getZExtValue() == 3 || CN
->getZExtValue() == 5 ||
808 CN
->getZExtValue() == 9) {
809 AM
.Scale
= unsigned(CN
->getZExtValue())-1;
811 SDValue MulVal
= N
.getNode()->getOperand(0);
814 // Okay, we know that we have a scale by now. However, if the scaled
815 // value is an add of something and a constant, we can fold the
816 // constant into the disp field here.
817 if (MulVal
.getNode()->getOpcode() == ISD::ADD
&& MulVal
.hasOneUse() &&
818 isa
<ConstantSDNode
>(MulVal
.getNode()->getOperand(1))) {
819 Reg
= MulVal
.getNode()->getOperand(0);
820 ConstantSDNode
*AddVal
=
821 cast
<ConstantSDNode
>(MulVal
.getNode()->getOperand(1));
822 uint64_t Disp
= AM
.Disp
+ AddVal
->getSExtValue() *
825 X86::isOffsetSuitableForCodeModel(Disp
, M
,
826 AM
.hasSymbolicDisplacement()))
829 Reg
= N
.getNode()->getOperand(0);
831 Reg
= N
.getNode()->getOperand(0);
834 AM
.IndexReg
= AM
.Base_Reg
= Reg
;
841 // Given A-B, if A can be completely folded into the address and
842 // the index field with the index field unused, use -B as the index.
843 // This is a win if a has multiple parts that can be folded into
844 // the address. Also, this saves a mov if the base register has
845 // other uses, since it avoids a two-address sub instruction, however
846 // it costs an additional mov if the index register has other uses.
848 // Add an artificial use to this node so that we can keep track of
849 // it if it gets CSE'd with a different node.
850 HandleSDNode
Handle(N
);
852 // Test if the LHS of the sub can be folded.
853 X86ISelAddressMode Backup
= AM
;
854 if (MatchAddressRecursively(N
.getNode()->getOperand(0), AM
, Depth
+1)) {
858 // Test if the index field is free for use.
859 if (AM
.IndexReg
.getNode() || AM
.isRIPRelative()) {
865 SDValue RHS
= Handle
.getValue().getNode()->getOperand(1);
866 // If the RHS involves a register with multiple uses, this
867 // transformation incurs an extra mov, due to the neg instruction
868 // clobbering its operand.
869 if (!RHS
.getNode()->hasOneUse() ||
870 RHS
.getNode()->getOpcode() == ISD::CopyFromReg
||
871 RHS
.getNode()->getOpcode() == ISD::TRUNCATE
||
872 RHS
.getNode()->getOpcode() == ISD::ANY_EXTEND
||
873 (RHS
.getNode()->getOpcode() == ISD::ZERO_EXTEND
&&
874 RHS
.getNode()->getOperand(0).getValueType() == MVT::i32
))
876 // If the base is a register with multiple uses, this
877 // transformation may save a mov.
878 if ((AM
.BaseType
== X86ISelAddressMode::RegBase
&&
879 AM
.Base_Reg
.getNode() &&
880 !AM
.Base_Reg
.getNode()->hasOneUse()) ||
881 AM
.BaseType
== X86ISelAddressMode::FrameIndexBase
)
883 // If the folded LHS was interesting, this transformation saves
884 // address arithmetic.
885 if ((AM
.hasSymbolicDisplacement() && !Backup
.hasSymbolicDisplacement()) +
886 ((AM
.Disp
!= 0) && (Backup
.Disp
== 0)) +
887 (AM
.Segment
.getNode() && !Backup
.Segment
.getNode()) >= 2)
889 // If it doesn't look like it may be an overall win, don't do it.
895 // Ok, the transformation is legal and appears profitable. Go for it.
896 SDValue Zero
= CurDAG
->getConstant(0, N
.getValueType());
897 SDValue Neg
= CurDAG
->getNode(ISD::SUB
, dl
, N
.getValueType(), Zero
, RHS
);
901 // Insert the new nodes into the topological ordering.
902 if (Zero
.getNode()->getNodeId() == -1 ||
903 Zero
.getNode()->getNodeId() > N
.getNode()->getNodeId()) {
904 CurDAG
->RepositionNode(N
.getNode(), Zero
.getNode());
905 Zero
.getNode()->setNodeId(N
.getNode()->getNodeId());
907 if (Neg
.getNode()->getNodeId() == -1 ||
908 Neg
.getNode()->getNodeId() > N
.getNode()->getNodeId()) {
909 CurDAG
->RepositionNode(N
.getNode(), Neg
.getNode());
910 Neg
.getNode()->setNodeId(N
.getNode()->getNodeId());
916 // Add an artificial use to this node so that we can keep track of
917 // it if it gets CSE'd with a different node.
918 HandleSDNode
Handle(N
);
920 X86ISelAddressMode Backup
= AM
;
921 if (!MatchAddressRecursively(N
.getOperand(0), AM
, Depth
+1) &&
922 !MatchAddressRecursively(Handle
.getValue().getOperand(1), AM
, Depth
+1))
926 // Try again after commuting the operands.
927 if (!MatchAddressRecursively(Handle
.getValue().getOperand(1), AM
, Depth
+1)&&
928 !MatchAddressRecursively(Handle
.getValue().getOperand(0), AM
, Depth
+1))
932 // If we couldn't fold both operands into the address at the same time,
933 // see if we can just put each operand into a register and fold at least
935 if (AM
.BaseType
== X86ISelAddressMode::RegBase
&&
936 !AM
.Base_Reg
.getNode() &&
937 !AM
.IndexReg
.getNode()) {
938 N
= Handle
.getValue();
939 AM
.Base_Reg
= N
.getOperand(0);
940 AM
.IndexReg
= N
.getOperand(1);
944 N
= Handle
.getValue();
949 // Handle "X | C" as "X + C" iff X is known to have C bits clear.
950 if (CurDAG
->isBaseWithConstantOffset(N
)) {
951 X86ISelAddressMode Backup
= AM
;
952 ConstantSDNode
*CN
= cast
<ConstantSDNode
>(N
.getOperand(1));
953 uint64_t Offset
= CN
->getSExtValue();
955 // Start with the LHS as an addr mode.
956 if (!MatchAddressRecursively(N
.getOperand(0), AM
, Depth
+1) &&
957 // Address could not have picked a GV address for the displacement.
959 // On x86-64, the resultant disp must fit in 32-bits.
961 X86::isOffsetSuitableForCodeModel(AM
.Disp
+ Offset
, M
,
962 AM
.hasSymbolicDisplacement()))) {
971 // Perform some heroic transforms on an and of a constant-count shift
972 // with a constant to enable use of the scaled offset field.
974 SDValue Shift
= N
.getOperand(0);
975 if (Shift
.getNumOperands() != 2) break;
977 // Scale must not be used already.
978 if (AM
.IndexReg
.getNode() != 0 || AM
.Scale
!= 1) break;
980 SDValue X
= Shift
.getOperand(0);
981 ConstantSDNode
*C2
= dyn_cast
<ConstantSDNode
>(N
.getOperand(1));
982 ConstantSDNode
*C1
= dyn_cast
<ConstantSDNode
>(Shift
.getOperand(1));
983 if (!C1
|| !C2
) break;
985 // Handle "(X >> (8-C1)) & C2" as "(X >> 8) & 0xff)" if safe. This
986 // allows us to convert the shift and and into an h-register extract and
988 if (Shift
.getOpcode() == ISD::SRL
&& Shift
.hasOneUse()) {
989 unsigned ScaleLog
= 8 - C1
->getZExtValue();
990 if (ScaleLog
> 0 && ScaleLog
< 4 &&
991 C2
->getZExtValue() == (UINT64_C(0xff) << ScaleLog
)) {
992 SDValue Eight
= CurDAG
->getConstant(8, MVT::i8
);
993 SDValue Mask
= CurDAG
->getConstant(0xff, N
.getValueType());
994 SDValue Srl
= CurDAG
->getNode(ISD::SRL
, dl
, N
.getValueType(),
996 SDValue And
= CurDAG
->getNode(ISD::AND
, dl
, N
.getValueType(),
998 SDValue ShlCount
= CurDAG
->getConstant(ScaleLog
, MVT::i8
);
999 SDValue Shl
= CurDAG
->getNode(ISD::SHL
, dl
, N
.getValueType(),
1002 // Insert the new nodes into the topological ordering.
1003 if (Eight
.getNode()->getNodeId() == -1 ||
1004 Eight
.getNode()->getNodeId() > X
.getNode()->getNodeId()) {
1005 CurDAG
->RepositionNode(X
.getNode(), Eight
.getNode());
1006 Eight
.getNode()->setNodeId(X
.getNode()->getNodeId());
1008 if (Mask
.getNode()->getNodeId() == -1 ||
1009 Mask
.getNode()->getNodeId() > X
.getNode()->getNodeId()) {
1010 CurDAG
->RepositionNode(X
.getNode(), Mask
.getNode());
1011 Mask
.getNode()->setNodeId(X
.getNode()->getNodeId());
1013 if (Srl
.getNode()->getNodeId() == -1 ||
1014 Srl
.getNode()->getNodeId() > Shift
.getNode()->getNodeId()) {
1015 CurDAG
->RepositionNode(Shift
.getNode(), Srl
.getNode());
1016 Srl
.getNode()->setNodeId(Shift
.getNode()->getNodeId());
1018 if (And
.getNode()->getNodeId() == -1 ||
1019 And
.getNode()->getNodeId() > N
.getNode()->getNodeId()) {
1020 CurDAG
->RepositionNode(N
.getNode(), And
.getNode());
1021 And
.getNode()->setNodeId(N
.getNode()->getNodeId());
1023 if (ShlCount
.getNode()->getNodeId() == -1 ||
1024 ShlCount
.getNode()->getNodeId() > X
.getNode()->getNodeId()) {
1025 CurDAG
->RepositionNode(X
.getNode(), ShlCount
.getNode());
1026 ShlCount
.getNode()->setNodeId(N
.getNode()->getNodeId());
1028 if (Shl
.getNode()->getNodeId() == -1 ||
1029 Shl
.getNode()->getNodeId() > N
.getNode()->getNodeId()) {
1030 CurDAG
->RepositionNode(N
.getNode(), Shl
.getNode());
1031 Shl
.getNode()->setNodeId(N
.getNode()->getNodeId());
1033 CurDAG
->ReplaceAllUsesWith(N
, Shl
);
1035 AM
.Scale
= (1 << ScaleLog
);
1040 // Handle "(X << C1) & C2" as "(X & (C2>>C1)) << C1" if safe and if this
1041 // allows us to fold the shift into this addressing mode.
1042 if (Shift
.getOpcode() != ISD::SHL
) break;
1044 // Not likely to be profitable if either the AND or SHIFT node has more
1045 // than one use (unless all uses are for address computation). Besides,
1046 // isel mechanism requires their node ids to be reused.
1047 if (!N
.hasOneUse() || !Shift
.hasOneUse())
1050 // Verify that the shift amount is something we can fold.
1051 unsigned ShiftCst
= C1
->getZExtValue();
1052 if (ShiftCst
!= 1 && ShiftCst
!= 2 && ShiftCst
!= 3)
1055 // Get the new AND mask, this folds to a constant.
1056 SDValue NewANDMask
= CurDAG
->getNode(ISD::SRL
, dl
, N
.getValueType(),
1057 SDValue(C2
, 0), SDValue(C1
, 0));
1058 SDValue NewAND
= CurDAG
->getNode(ISD::AND
, dl
, N
.getValueType(), X
,
1060 SDValue NewSHIFT
= CurDAG
->getNode(ISD::SHL
, dl
, N
.getValueType(),
1061 NewAND
, SDValue(C1
, 0));
1063 // Insert the new nodes into the topological ordering.
1064 if (C1
->getNodeId() > X
.getNode()->getNodeId()) {
1065 CurDAG
->RepositionNode(X
.getNode(), C1
);
1066 C1
->setNodeId(X
.getNode()->getNodeId());
1068 if (NewANDMask
.getNode()->getNodeId() == -1 ||
1069 NewANDMask
.getNode()->getNodeId() > X
.getNode()->getNodeId()) {
1070 CurDAG
->RepositionNode(X
.getNode(), NewANDMask
.getNode());
1071 NewANDMask
.getNode()->setNodeId(X
.getNode()->getNodeId());
1073 if (NewAND
.getNode()->getNodeId() == -1 ||
1074 NewAND
.getNode()->getNodeId() > Shift
.getNode()->getNodeId()) {
1075 CurDAG
->RepositionNode(Shift
.getNode(), NewAND
.getNode());
1076 NewAND
.getNode()->setNodeId(Shift
.getNode()->getNodeId());
1078 if (NewSHIFT
.getNode()->getNodeId() == -1 ||
1079 NewSHIFT
.getNode()->getNodeId() > N
.getNode()->getNodeId()) {
1080 CurDAG
->RepositionNode(N
.getNode(), NewSHIFT
.getNode());
1081 NewSHIFT
.getNode()->setNodeId(N
.getNode()->getNodeId());
1084 CurDAG
->ReplaceAllUsesWith(N
, NewSHIFT
);
1086 AM
.Scale
= 1 << ShiftCst
;
1087 AM
.IndexReg
= NewAND
;
1092 return MatchAddressBase(N
, AM
);
1095 /// MatchAddressBase - Helper for MatchAddress. Add the specified node to the
1096 /// specified addressing mode without any further recursion.
1097 bool X86DAGToDAGISel::MatchAddressBase(SDValue N
, X86ISelAddressMode
&AM
) {
1098 // Is the base register already occupied?
1099 if (AM
.BaseType
!= X86ISelAddressMode::RegBase
|| AM
.Base_Reg
.getNode()) {
1100 // If so, check to see if the scale index register is set.
1101 if (AM
.IndexReg
.getNode() == 0) {
1107 // Otherwise, we cannot select it.
1111 // Default, generate it as a register.
1112 AM
.BaseType
= X86ISelAddressMode::RegBase
;
1117 /// SelectAddr - returns true if it is able pattern match an addressing mode.
1118 /// It returns the operands which make up the maximal addressing mode it can
1119 /// match by reference.
1121 /// Parent is the parent node of the addr operand that is being matched. It
1122 /// is always a load, store, atomic node, or null. It is only null when
1123 /// checking memory operands for inline asm nodes.
1124 bool X86DAGToDAGISel::SelectAddr(SDNode
*Parent
, SDValue N
, SDValue
&Base
,
1125 SDValue
&Scale
, SDValue
&Index
,
1126 SDValue
&Disp
, SDValue
&Segment
) {
1127 X86ISelAddressMode AM
;
1130 // This list of opcodes are all the nodes that have an "addr:$ptr" operand
1131 // that are not a MemSDNode, and thus don't have proper addrspace info.
1132 Parent
->getOpcode() != ISD::INTRINSIC_W_CHAIN
&& // unaligned loads, fixme
1133 Parent
->getOpcode() != ISD::INTRINSIC_VOID
&& // nontemporal stores
1134 Parent
->getOpcode() != X86ISD::TLSCALL
) { // Fixme
1135 unsigned AddrSpace
=
1136 cast
<MemSDNode
>(Parent
)->getPointerInfo().getAddrSpace();
1137 // AddrSpace 256 -> GS, 257 -> FS.
1138 if (AddrSpace
== 256)
1139 AM
.Segment
= CurDAG
->getRegister(X86::GS
, MVT::i16
);
1140 if (AddrSpace
== 257)
1141 AM
.Segment
= CurDAG
->getRegister(X86::FS
, MVT::i16
);
1144 if (MatchAddress(N
, AM
))
1147 EVT VT
= N
.getValueType();
1148 if (AM
.BaseType
== X86ISelAddressMode::RegBase
) {
1149 if (!AM
.Base_Reg
.getNode())
1150 AM
.Base_Reg
= CurDAG
->getRegister(0, VT
);
1153 if (!AM
.IndexReg
.getNode())
1154 AM
.IndexReg
= CurDAG
->getRegister(0, VT
);
1156 getAddressOperands(AM
, Base
, Scale
, Index
, Disp
, Segment
);
1160 /// SelectScalarSSELoad - Match a scalar SSE load. In particular, we want to
1161 /// match a load whose top elements are either undef or zeros. The load flavor
1162 /// is derived from the type of N, which is either v4f32 or v2f64.
1165 /// PatternChainNode: this is the matched node that has a chain input and
1167 bool X86DAGToDAGISel::SelectScalarSSELoad(SDNode
*Root
,
1168 SDValue N
, SDValue
&Base
,
1169 SDValue
&Scale
, SDValue
&Index
,
1170 SDValue
&Disp
, SDValue
&Segment
,
1171 SDValue
&PatternNodeWithChain
) {
1172 if (N
.getOpcode() == ISD::SCALAR_TO_VECTOR
) {
1173 PatternNodeWithChain
= N
.getOperand(0);
1174 if (ISD::isNON_EXTLoad(PatternNodeWithChain
.getNode()) &&
1175 PatternNodeWithChain
.hasOneUse() &&
1176 IsProfitableToFold(N
.getOperand(0), N
.getNode(), Root
) &&
1177 IsLegalToFold(N
.getOperand(0), N
.getNode(), Root
, OptLevel
)) {
1178 LoadSDNode
*LD
= cast
<LoadSDNode
>(PatternNodeWithChain
);
1179 if (!SelectAddr(LD
, LD
->getBasePtr(), Base
, Scale
, Index
, Disp
, Segment
))
1185 // Also handle the case where we explicitly require zeros in the top
1186 // elements. This is a vector shuffle from the zero vector.
1187 if (N
.getOpcode() == X86ISD::VZEXT_MOVL
&& N
.getNode()->hasOneUse() &&
1188 // Check to see if the top elements are all zeros (or bitcast of zeros).
1189 N
.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR
&&
1190 N
.getOperand(0).getNode()->hasOneUse() &&
1191 ISD::isNON_EXTLoad(N
.getOperand(0).getOperand(0).getNode()) &&
1192 N
.getOperand(0).getOperand(0).hasOneUse() &&
1193 IsProfitableToFold(N
.getOperand(0), N
.getNode(), Root
) &&
1194 IsLegalToFold(N
.getOperand(0), N
.getNode(), Root
, OptLevel
)) {
1195 // Okay, this is a zero extending load. Fold it.
1196 LoadSDNode
*LD
= cast
<LoadSDNode
>(N
.getOperand(0).getOperand(0));
1197 if (!SelectAddr(LD
, LD
->getBasePtr(), Base
, Scale
, Index
, Disp
, Segment
))
1199 PatternNodeWithChain
= SDValue(LD
, 0);
1206 /// SelectLEAAddr - it calls SelectAddr and determines if the maximal addressing
1207 /// mode it matches can be cost effectively emitted as an LEA instruction.
1208 bool X86DAGToDAGISel::SelectLEAAddr(SDValue N
,
1209 SDValue
&Base
, SDValue
&Scale
,
1210 SDValue
&Index
, SDValue
&Disp
,
1212 X86ISelAddressMode AM
;
1214 // Set AM.Segment to prevent MatchAddress from using one. LEA doesn't support
1216 SDValue Copy
= AM
.Segment
;
1217 SDValue T
= CurDAG
->getRegister(0, MVT::i32
);
1219 if (MatchAddress(N
, AM
))
1221 assert (T
== AM
.Segment
);
1224 EVT VT
= N
.getValueType();
1225 unsigned Complexity
= 0;
1226 if (AM
.BaseType
== X86ISelAddressMode::RegBase
)
1227 if (AM
.Base_Reg
.getNode())
1230 AM
.Base_Reg
= CurDAG
->getRegister(0, VT
);
1231 else if (AM
.BaseType
== X86ISelAddressMode::FrameIndexBase
)
1234 if (AM
.IndexReg
.getNode())
1237 AM
.IndexReg
= CurDAG
->getRegister(0, VT
);
1239 // Don't match just leal(,%reg,2). It's cheaper to do addl %reg, %reg, or with
1244 // FIXME: We are artificially lowering the criteria to turn ADD %reg, $GA
1245 // to a LEA. This is determined with some expermentation but is by no means
1246 // optimal (especially for code size consideration). LEA is nice because of
1247 // its three-address nature. Tweak the cost function again when we can run
1248 // convertToThreeAddress() at register allocation time.
1249 if (AM
.hasSymbolicDisplacement()) {
1250 // For X86-64, we should always use lea to materialize RIP relative
1252 if (Subtarget
->is64Bit())
1258 if (AM
.Disp
&& (AM
.Base_Reg
.getNode() || AM
.IndexReg
.getNode()))
1261 // If it isn't worth using an LEA, reject it.
1262 if (Complexity
<= 2)
1265 getAddressOperands(AM
, Base
, Scale
, Index
, Disp
, Segment
);
1269 /// SelectTLSADDRAddr - This is only run on TargetGlobalTLSAddress nodes.
1270 bool X86DAGToDAGISel::SelectTLSADDRAddr(SDValue N
, SDValue
&Base
,
1271 SDValue
&Scale
, SDValue
&Index
,
1272 SDValue
&Disp
, SDValue
&Segment
) {
1273 assert(N
.getOpcode() == ISD::TargetGlobalTLSAddress
);
1274 const GlobalAddressSDNode
*GA
= cast
<GlobalAddressSDNode
>(N
);
1276 X86ISelAddressMode AM
;
1277 AM
.GV
= GA
->getGlobal();
1278 AM
.Disp
+= GA
->getOffset();
1279 AM
.Base_Reg
= CurDAG
->getRegister(0, N
.getValueType());
1280 AM
.SymbolFlags
= GA
->getTargetFlags();
1282 if (N
.getValueType() == MVT::i32
) {
1284 AM
.IndexReg
= CurDAG
->getRegister(X86::EBX
, MVT::i32
);
1286 AM
.IndexReg
= CurDAG
->getRegister(0, MVT::i64
);
1289 getAddressOperands(AM
, Base
, Scale
, Index
, Disp
, Segment
);
1294 bool X86DAGToDAGISel::TryFoldLoad(SDNode
*P
, SDValue N
,
1295 SDValue
&Base
, SDValue
&Scale
,
1296 SDValue
&Index
, SDValue
&Disp
,
1298 if (!ISD::isNON_EXTLoad(N
.getNode()) ||
1299 !IsProfitableToFold(N
, P
, P
) ||
1300 !IsLegalToFold(N
, P
, P
, OptLevel
))
1303 return SelectAddr(N
.getNode(),
1304 N
.getOperand(1), Base
, Scale
, Index
, Disp
, Segment
);
1307 /// getGlobalBaseReg - Return an SDNode that returns the value of
1308 /// the global base register. Output instructions required to
1309 /// initialize the global base register, if necessary.
1311 SDNode
*X86DAGToDAGISel::getGlobalBaseReg() {
1312 unsigned GlobalBaseReg
= getInstrInfo()->getGlobalBaseReg(MF
);
1313 return CurDAG
->getRegister(GlobalBaseReg
, TLI
.getPointerTy()).getNode();
1316 SDNode
*X86DAGToDAGISel::SelectAtomic64(SDNode
*Node
, unsigned Opc
) {
1317 SDValue Chain
= Node
->getOperand(0);
1318 SDValue In1
= Node
->getOperand(1);
1319 SDValue In2L
= Node
->getOperand(2);
1320 SDValue In2H
= Node
->getOperand(3);
1321 SDValue Tmp0
, Tmp1
, Tmp2
, Tmp3
, Tmp4
;
1322 if (!SelectAddr(Node
, In1
, Tmp0
, Tmp1
, Tmp2
, Tmp3
, Tmp4
))
1324 MachineSDNode::mmo_iterator MemOp
= MF
->allocateMemRefsArray(1);
1325 MemOp
[0] = cast
<MemSDNode
>(Node
)->getMemOperand();
1326 const SDValue Ops
[] = { Tmp0
, Tmp1
, Tmp2
, Tmp3
, Tmp4
, In2L
, In2H
, Chain
};
1327 SDNode
*ResNode
= CurDAG
->getMachineNode(Opc
, Node
->getDebugLoc(),
1328 MVT::i32
, MVT::i32
, MVT::Other
, Ops
,
1329 array_lengthof(Ops
));
1330 cast
<MachineSDNode
>(ResNode
)->setMemRefs(MemOp
, MemOp
+ 1);
1334 // FIXME: Figure out some way to unify this with the 'or' and other code
1336 SDNode
*X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode
*Node
, EVT NVT
) {
1337 if (Node
->hasAnyUseOfValue(0))
1340 // Optimize common patterns for __sync_add_and_fetch and
1341 // __sync_sub_and_fetch where the result is not used. This allows us
1342 // to use "lock" version of add, sub, inc, dec instructions.
1343 // FIXME: Do not use special instructions but instead add the "lock"
1344 // prefix to the target node somehow. The extra information will then be
1345 // transferred to machine instruction and it denotes the prefix.
1346 SDValue Chain
= Node
->getOperand(0);
1347 SDValue Ptr
= Node
->getOperand(1);
1348 SDValue Val
= Node
->getOperand(2);
1349 SDValue Tmp0
, Tmp1
, Tmp2
, Tmp3
, Tmp4
;
1350 if (!SelectAddr(Node
, Ptr
, Tmp0
, Tmp1
, Tmp2
, Tmp3
, Tmp4
))
1353 bool isInc
= false, isDec
= false, isSub
= false, isCN
= false;
1354 ConstantSDNode
*CN
= dyn_cast
<ConstantSDNode
>(Val
);
1355 if (CN
&& CN
->getSExtValue() == (int32_t)CN
->getSExtValue()) {
1357 int64_t CNVal
= CN
->getSExtValue();
1360 else if (CNVal
== -1)
1362 else if (CNVal
>= 0)
1363 Val
= CurDAG
->getTargetConstant(CNVal
, NVT
);
1366 Val
= CurDAG
->getTargetConstant(-CNVal
, NVT
);
1368 } else if (Val
.hasOneUse() &&
1369 Val
.getOpcode() == ISD::SUB
&&
1370 X86::isZeroNode(Val
.getOperand(0))) {
1372 Val
= Val
.getOperand(1);
1375 DebugLoc dl
= Node
->getDebugLoc();
1377 switch (NVT
.getSimpleVT().SimpleTy
) {
1381 Opc
= X86::LOCK_INC8m
;
1383 Opc
= X86::LOCK_DEC8m
;
1386 Opc
= X86::LOCK_SUB8mi
;
1388 Opc
= X86::LOCK_SUB8mr
;
1391 Opc
= X86::LOCK_ADD8mi
;
1393 Opc
= X86::LOCK_ADD8mr
;
1398 Opc
= X86::LOCK_INC16m
;
1400 Opc
= X86::LOCK_DEC16m
;
1403 if (immSext8(Val
.getNode()))
1404 Opc
= X86::LOCK_SUB16mi8
;
1406 Opc
= X86::LOCK_SUB16mi
;
1408 Opc
= X86::LOCK_SUB16mr
;
1411 if (immSext8(Val
.getNode()))
1412 Opc
= X86::LOCK_ADD16mi8
;
1414 Opc
= X86::LOCK_ADD16mi
;
1416 Opc
= X86::LOCK_ADD16mr
;
1421 Opc
= X86::LOCK_INC32m
;
1423 Opc
= X86::LOCK_DEC32m
;
1426 if (immSext8(Val
.getNode()))
1427 Opc
= X86::LOCK_SUB32mi8
;
1429 Opc
= X86::LOCK_SUB32mi
;
1431 Opc
= X86::LOCK_SUB32mr
;
1434 if (immSext8(Val
.getNode()))
1435 Opc
= X86::LOCK_ADD32mi8
;
1437 Opc
= X86::LOCK_ADD32mi
;
1439 Opc
= X86::LOCK_ADD32mr
;
1444 Opc
= X86::LOCK_INC64m
;
1446 Opc
= X86::LOCK_DEC64m
;
1448 Opc
= X86::LOCK_SUB64mr
;
1450 if (immSext8(Val
.getNode()))
1451 Opc
= X86::LOCK_SUB64mi8
;
1452 else if (i64immSExt32(Val
.getNode()))
1453 Opc
= X86::LOCK_SUB64mi32
;
1456 Opc
= X86::LOCK_ADD64mr
;
1458 if (immSext8(Val
.getNode()))
1459 Opc
= X86::LOCK_ADD64mi8
;
1460 else if (i64immSExt32(Val
.getNode()))
1461 Opc
= X86::LOCK_ADD64mi32
;
1467 SDValue Undef
= SDValue(CurDAG
->getMachineNode(TargetOpcode::IMPLICIT_DEF
,
1469 MachineSDNode::mmo_iterator MemOp
= MF
->allocateMemRefsArray(1);
1470 MemOp
[0] = cast
<MemSDNode
>(Node
)->getMemOperand();
1471 if (isInc
|| isDec
) {
1472 SDValue Ops
[] = { Tmp0
, Tmp1
, Tmp2
, Tmp3
, Tmp4
, Chain
};
1473 SDValue Ret
= SDValue(CurDAG
->getMachineNode(Opc
, dl
, MVT::Other
, Ops
, 6), 0);
1474 cast
<MachineSDNode
>(Ret
)->setMemRefs(MemOp
, MemOp
+ 1);
1475 SDValue RetVals
[] = { Undef
, Ret
};
1476 return CurDAG
->getMergeValues(RetVals
, 2, dl
).getNode();
1478 SDValue Ops
[] = { Tmp0
, Tmp1
, Tmp2
, Tmp3
, Tmp4
, Val
, Chain
};
1479 SDValue Ret
= SDValue(CurDAG
->getMachineNode(Opc
, dl
, MVT::Other
, Ops
, 7), 0);
1480 cast
<MachineSDNode
>(Ret
)->setMemRefs(MemOp
, MemOp
+ 1);
1481 SDValue RetVals
[] = { Undef
, Ret
};
1482 return CurDAG
->getMergeValues(RetVals
, 2, dl
).getNode();
1508 static const unsigned int AtomicOpcTbl
[AtomicOpcEnd
][AtomicSzEnd
] = {
1532 X86::LOCK_AND64mi32
,
1545 X86::LOCK_XOR64mi32
,
1550 SDNode
*X86DAGToDAGISel::SelectAtomicLoadArith(SDNode
*Node
, EVT NVT
) {
1551 if (Node
->hasAnyUseOfValue(0))
1554 // Optimize common patterns for __sync_or_and_fetch and similar arith
1555 // operations where the result is not used. This allows us to use the "lock"
1556 // version of the arithmetic instruction.
1557 // FIXME: Same as for 'add' and 'sub', try to merge those down here.
1558 SDValue Chain
= Node
->getOperand(0);
1559 SDValue Ptr
= Node
->getOperand(1);
1560 SDValue Val
= Node
->getOperand(2);
1561 SDValue Tmp0
, Tmp1
, Tmp2
, Tmp3
, Tmp4
;
1562 if (!SelectAddr(Node
, Ptr
, Tmp0
, Tmp1
, Tmp2
, Tmp3
, Tmp4
))
1565 // Which index into the table.
1567 switch (Node
->getOpcode()) {
1568 case ISD::ATOMIC_LOAD_OR
:
1571 case ISD::ATOMIC_LOAD_AND
:
1574 case ISD::ATOMIC_LOAD_XOR
:
1582 ConstantSDNode
*CN
= dyn_cast
<ConstantSDNode
>(Val
);
1583 if (CN
&& (int32_t)CN
->getSExtValue() == CN
->getSExtValue()) {
1585 Val
= CurDAG
->getTargetConstant(CN
->getSExtValue(), NVT
);
1589 switch (NVT
.getSimpleVT().SimpleTy
) {
1593 Opc
= AtomicOpcTbl
[Op
][ConstantI8
];
1595 Opc
= AtomicOpcTbl
[Op
][I8
];
1599 if (immSext8(Val
.getNode()))
1600 Opc
= AtomicOpcTbl
[Op
][SextConstantI16
];
1602 Opc
= AtomicOpcTbl
[Op
][ConstantI16
];
1604 Opc
= AtomicOpcTbl
[Op
][I16
];
1608 if (immSext8(Val
.getNode()))
1609 Opc
= AtomicOpcTbl
[Op
][SextConstantI32
];
1611 Opc
= AtomicOpcTbl
[Op
][ConstantI32
];
1613 Opc
= AtomicOpcTbl
[Op
][I32
];
1616 Opc
= AtomicOpcTbl
[Op
][I64
];
1618 if (immSext8(Val
.getNode()))
1619 Opc
= AtomicOpcTbl
[Op
][SextConstantI64
];
1620 else if (i64immSExt32(Val
.getNode()))
1621 Opc
= AtomicOpcTbl
[Op
][ConstantI64
];
1626 assert(Opc
!= 0 && "Invalid arith lock transform!");
1628 DebugLoc dl
= Node
->getDebugLoc();
1629 SDValue Undef
= SDValue(CurDAG
->getMachineNode(TargetOpcode::IMPLICIT_DEF
,
1631 MachineSDNode::mmo_iterator MemOp
= MF
->allocateMemRefsArray(1);
1632 MemOp
[0] = cast
<MemSDNode
>(Node
)->getMemOperand();
1633 SDValue Ops
[] = { Tmp0
, Tmp1
, Tmp2
, Tmp3
, Tmp4
, Val
, Chain
};
1634 SDValue Ret
= SDValue(CurDAG
->getMachineNode(Opc
, dl
, MVT::Other
, Ops
, 7), 0);
1635 cast
<MachineSDNode
>(Ret
)->setMemRefs(MemOp
, MemOp
+ 1);
1636 SDValue RetVals
[] = { Undef
, Ret
};
1637 return CurDAG
->getMergeValues(RetVals
, 2, dl
).getNode();
1640 /// HasNoSignedComparisonUses - Test whether the given X86ISD::CMP node has
1641 /// any uses which require the SF or OF bits to be accurate.
1642 static bool HasNoSignedComparisonUses(SDNode
*N
) {
1643 // Examine each user of the node.
1644 for (SDNode::use_iterator UI
= N
->use_begin(),
1645 UE
= N
->use_end(); UI
!= UE
; ++UI
) {
1646 // Only examine CopyToReg uses.
1647 if (UI
->getOpcode() != ISD::CopyToReg
)
1649 // Only examine CopyToReg uses that copy to EFLAGS.
1650 if (cast
<RegisterSDNode
>(UI
->getOperand(1))->getReg() !=
1653 // Examine each user of the CopyToReg use.
1654 for (SDNode::use_iterator FlagUI
= UI
->use_begin(),
1655 FlagUE
= UI
->use_end(); FlagUI
!= FlagUE
; ++FlagUI
) {
1656 // Only examine the Flag result.
1657 if (FlagUI
.getUse().getResNo() != 1) continue;
1658 // Anything unusual: assume conservatively.
1659 if (!FlagUI
->isMachineOpcode()) return false;
1660 // Examine the opcode of the user.
1661 switch (FlagUI
->getMachineOpcode()) {
1662 // These comparisons don't treat the most significant bit specially.
1663 case X86::SETAr
: case X86::SETAEr
: case X86::SETBr
: case X86::SETBEr
:
1664 case X86::SETEr
: case X86::SETNEr
: case X86::SETPr
: case X86::SETNPr
:
1665 case X86::SETAm
: case X86::SETAEm
: case X86::SETBm
: case X86::SETBEm
:
1666 case X86::SETEm
: case X86::SETNEm
: case X86::SETPm
: case X86::SETNPm
:
1667 case X86::JA_4
: case X86::JAE_4
: case X86::JB_4
: case X86::JBE_4
:
1668 case X86::JE_4
: case X86::JNE_4
: case X86::JP_4
: case X86::JNP_4
:
1669 case X86::CMOVA16rr
: case X86::CMOVA16rm
:
1670 case X86::CMOVA32rr
: case X86::CMOVA32rm
:
1671 case X86::CMOVA64rr
: case X86::CMOVA64rm
:
1672 case X86::CMOVAE16rr
: case X86::CMOVAE16rm
:
1673 case X86::CMOVAE32rr
: case X86::CMOVAE32rm
:
1674 case X86::CMOVAE64rr
: case X86::CMOVAE64rm
:
1675 case X86::CMOVB16rr
: case X86::CMOVB16rm
:
1676 case X86::CMOVB32rr
: case X86::CMOVB32rm
:
1677 case X86::CMOVB64rr
: case X86::CMOVB64rm
:
1678 case X86::CMOVBE16rr
: case X86::CMOVBE16rm
:
1679 case X86::CMOVBE32rr
: case X86::CMOVBE32rm
:
1680 case X86::CMOVBE64rr
: case X86::CMOVBE64rm
:
1681 case X86::CMOVE16rr
: case X86::CMOVE16rm
:
1682 case X86::CMOVE32rr
: case X86::CMOVE32rm
:
1683 case X86::CMOVE64rr
: case X86::CMOVE64rm
:
1684 case X86::CMOVNE16rr
: case X86::CMOVNE16rm
:
1685 case X86::CMOVNE32rr
: case X86::CMOVNE32rm
:
1686 case X86::CMOVNE64rr
: case X86::CMOVNE64rm
:
1687 case X86::CMOVNP16rr
: case X86::CMOVNP16rm
:
1688 case X86::CMOVNP32rr
: case X86::CMOVNP32rm
:
1689 case X86::CMOVNP64rr
: case X86::CMOVNP64rm
:
1690 case X86::CMOVP16rr
: case X86::CMOVP16rm
:
1691 case X86::CMOVP32rr
: case X86::CMOVP32rm
:
1692 case X86::CMOVP64rr
: case X86::CMOVP64rm
:
1694 // Anything else: assume conservatively.
1695 default: return false;
1702 SDNode
*X86DAGToDAGISel::Select(SDNode
*Node
) {
1703 EVT NVT
= Node
->getValueType(0);
1705 unsigned Opcode
= Node
->getOpcode();
1706 DebugLoc dl
= Node
->getDebugLoc();
1708 DEBUG(dbgs() << "Selecting: "; Node
->dump(CurDAG
); dbgs() << '\n');
1710 if (Node
->isMachineOpcode()) {
1711 DEBUG(dbgs() << "== "; Node
->dump(CurDAG
); dbgs() << '\n');
1712 return NULL
; // Already selected.
1717 case X86ISD::GlobalBaseReg
:
1718 return getGlobalBaseReg();
1720 case X86ISD::ATOMOR64_DAG
:
1721 return SelectAtomic64(Node
, X86::ATOMOR6432
);
1722 case X86ISD::ATOMXOR64_DAG
:
1723 return SelectAtomic64(Node
, X86::ATOMXOR6432
);
1724 case X86ISD::ATOMADD64_DAG
:
1725 return SelectAtomic64(Node
, X86::ATOMADD6432
);
1726 case X86ISD::ATOMSUB64_DAG
:
1727 return SelectAtomic64(Node
, X86::ATOMSUB6432
);
1728 case X86ISD::ATOMNAND64_DAG
:
1729 return SelectAtomic64(Node
, X86::ATOMNAND6432
);
1730 case X86ISD::ATOMAND64_DAG
:
1731 return SelectAtomic64(Node
, X86::ATOMAND6432
);
1732 case X86ISD::ATOMSWAP64_DAG
:
1733 return SelectAtomic64(Node
, X86::ATOMSWAP6432
);
1735 case ISD::ATOMIC_LOAD_ADD
: {
1736 SDNode
*RetVal
= SelectAtomicLoadAdd(Node
, NVT
);
1741 case ISD::ATOMIC_LOAD_XOR
:
1742 case ISD::ATOMIC_LOAD_AND
:
1743 case ISD::ATOMIC_LOAD_OR
: {
1744 SDNode
*RetVal
= SelectAtomicLoadArith(Node
, NVT
);
1752 // For operations of the form (x << C1) op C2, check if we can use a smaller
1753 // encoding for C2 by transforming it into (x op (C2>>C1)) << C1.
1754 SDValue N0
= Node
->getOperand(0);
1755 SDValue N1
= Node
->getOperand(1);
1757 if (N0
->getOpcode() != ISD::SHL
|| !N0
->hasOneUse())
1760 // i8 is unshrinkable, i16 should be promoted to i32.
1761 if (NVT
!= MVT::i32
&& NVT
!= MVT::i64
)
1764 ConstantSDNode
*Cst
= dyn_cast
<ConstantSDNode
>(N1
);
1765 ConstantSDNode
*ShlCst
= dyn_cast
<ConstantSDNode
>(N0
->getOperand(1));
1766 if (!Cst
|| !ShlCst
)
1769 int64_t Val
= Cst
->getSExtValue();
1770 uint64_t ShlVal
= ShlCst
->getZExtValue();
1772 // Make sure that we don't change the operation by removing bits.
1773 // This only matters for OR and XOR, AND is unaffected.
1774 if (Opcode
!= ISD::AND
&& ((Val
>> ShlVal
) << ShlVal
) != Val
)
1777 unsigned ShlOp
, Op
= 0;
1780 // Check the minimum bitwidth for the new constant.
1781 // TODO: AND32ri is the same as AND64ri32 with zext imm.
1782 // TODO: MOV32ri+OR64r is cheaper than MOV64ri64+OR64rr
1783 // TODO: Using 16 and 8 bit operations is also possible for or32 & xor32.
1784 if (!isInt
<8>(Val
) && isInt
<8>(Val
>> ShlVal
))
1786 else if (!isInt
<32>(Val
) && isInt
<32>(Val
>> ShlVal
))
1789 // Bail if there is no smaller encoding.
1793 switch (NVT
.getSimpleVT().SimpleTy
) {
1794 default: llvm_unreachable("Unsupported VT!");
1796 assert(CstVT
== MVT::i8
);
1797 ShlOp
= X86::SHL32ri
;
1800 case ISD::AND
: Op
= X86::AND32ri8
; break;
1801 case ISD::OR
: Op
= X86::OR32ri8
; break;
1802 case ISD::XOR
: Op
= X86::XOR32ri8
; break;
1806 assert(CstVT
== MVT::i8
|| CstVT
== MVT::i32
);
1807 ShlOp
= X86::SHL64ri
;
1810 case ISD::AND
: Op
= CstVT
==MVT::i8
? X86::AND64ri8
: X86::AND64ri32
; break;
1811 case ISD::OR
: Op
= CstVT
==MVT::i8
? X86::OR64ri8
: X86::OR64ri32
; break;
1812 case ISD::XOR
: Op
= CstVT
==MVT::i8
? X86::XOR64ri8
: X86::XOR64ri32
; break;
1817 // Emit the smaller op and the shift.
1818 SDValue NewCst
= CurDAG
->getTargetConstant(Val
>> ShlVal
, CstVT
);
1819 SDNode
*New
= CurDAG
->getMachineNode(Op
, dl
, NVT
, N0
->getOperand(0),NewCst
);
1820 return CurDAG
->SelectNodeTo(Node
, ShlOp
, NVT
, SDValue(New
, 0),
1824 case X86ISD::UMUL
: {
1825 SDValue N0
= Node
->getOperand(0);
1826 SDValue N1
= Node
->getOperand(1);
1829 switch (NVT
.getSimpleVT().SimpleTy
) {
1830 default: llvm_unreachable("Unsupported VT!");
1831 case MVT::i8
: LoReg
= X86::AL
; Opc
= X86::MUL8r
; break;
1832 case MVT::i16
: LoReg
= X86::AX
; Opc
= X86::MUL16r
; break;
1833 case MVT::i32
: LoReg
= X86::EAX
; Opc
= X86::MUL32r
; break;
1834 case MVT::i64
: LoReg
= X86::RAX
; Opc
= X86::MUL64r
; break;
1837 SDValue InFlag
= CurDAG
->getCopyToReg(CurDAG
->getEntryNode(), dl
, LoReg
,
1838 N0
, SDValue()).getValue(1);
1840 SDVTList VTs
= CurDAG
->getVTList(NVT
, NVT
, MVT::i32
);
1841 SDValue Ops
[] = {N1
, InFlag
};
1842 SDNode
*CNode
= CurDAG
->getMachineNode(Opc
, dl
, VTs
, Ops
, 2);
1844 ReplaceUses(SDValue(Node
, 0), SDValue(CNode
, 0));
1845 ReplaceUses(SDValue(Node
, 1), SDValue(CNode
, 1));
1846 ReplaceUses(SDValue(Node
, 2), SDValue(CNode
, 2));
1850 case ISD::SMUL_LOHI
:
1851 case ISD::UMUL_LOHI
: {
1852 SDValue N0
= Node
->getOperand(0);
1853 SDValue N1
= Node
->getOperand(1);
1855 bool isSigned
= Opcode
== ISD::SMUL_LOHI
;
1857 switch (NVT
.getSimpleVT().SimpleTy
) {
1858 default: llvm_unreachable("Unsupported VT!");
1859 case MVT::i8
: Opc
= X86::MUL8r
; MOpc
= X86::MUL8m
; break;
1860 case MVT::i16
: Opc
= X86::MUL16r
; MOpc
= X86::MUL16m
; break;
1861 case MVT::i32
: Opc
= X86::MUL32r
; MOpc
= X86::MUL32m
; break;
1862 case MVT::i64
: Opc
= X86::MUL64r
; MOpc
= X86::MUL64m
; break;
1865 switch (NVT
.getSimpleVT().SimpleTy
) {
1866 default: llvm_unreachable("Unsupported VT!");
1867 case MVT::i8
: Opc
= X86::IMUL8r
; MOpc
= X86::IMUL8m
; break;
1868 case MVT::i16
: Opc
= X86::IMUL16r
; MOpc
= X86::IMUL16m
; break;
1869 case MVT::i32
: Opc
= X86::IMUL32r
; MOpc
= X86::IMUL32m
; break;
1870 case MVT::i64
: Opc
= X86::IMUL64r
; MOpc
= X86::IMUL64m
; break;
1874 unsigned LoReg
, HiReg
;
1875 switch (NVT
.getSimpleVT().SimpleTy
) {
1876 default: llvm_unreachable("Unsupported VT!");
1877 case MVT::i8
: LoReg
= X86::AL
; HiReg
= X86::AH
; break;
1878 case MVT::i16
: LoReg
= X86::AX
; HiReg
= X86::DX
; break;
1879 case MVT::i32
: LoReg
= X86::EAX
; HiReg
= X86::EDX
; break;
1880 case MVT::i64
: LoReg
= X86::RAX
; HiReg
= X86::RDX
; break;
1883 SDValue Tmp0
, Tmp1
, Tmp2
, Tmp3
, Tmp4
;
1884 bool foldedLoad
= TryFoldLoad(Node
, N1
, Tmp0
, Tmp1
, Tmp2
, Tmp3
, Tmp4
);
1885 // Multiply is commmutative.
1887 foldedLoad
= TryFoldLoad(Node
, N0
, Tmp0
, Tmp1
, Tmp2
, Tmp3
, Tmp4
);
1892 SDValue InFlag
= CurDAG
->getCopyToReg(CurDAG
->getEntryNode(), dl
, LoReg
,
1893 N0
, SDValue()).getValue(1);
1896 SDValue Ops
[] = { Tmp0
, Tmp1
, Tmp2
, Tmp3
, Tmp4
, N1
.getOperand(0),
1899 CurDAG
->getMachineNode(MOpc
, dl
, MVT::Other
, MVT::Glue
, Ops
,
1900 array_lengthof(Ops
));
1901 InFlag
= SDValue(CNode
, 1);
1903 // Update the chain.
1904 ReplaceUses(N1
.getValue(1), SDValue(CNode
, 0));
1906 SDNode
*CNode
= CurDAG
->getMachineNode(Opc
, dl
, MVT::Glue
, N1
, InFlag
);
1907 InFlag
= SDValue(CNode
, 0);
1910 // Prevent use of AH in a REX instruction by referencing AX instead.
1911 if (HiReg
== X86::AH
&& Subtarget
->is64Bit() &&
1912 !SDValue(Node
, 1).use_empty()) {
1913 SDValue Result
= CurDAG
->getCopyFromReg(CurDAG
->getEntryNode(), dl
,
1914 X86::AX
, MVT::i16
, InFlag
);
1915 InFlag
= Result
.getValue(2);
1916 // Get the low part if needed. Don't use getCopyFromReg for aliasing
1918 if (!SDValue(Node
, 0).use_empty())
1919 ReplaceUses(SDValue(Node
, 1),
1920 CurDAG
->getTargetExtractSubreg(X86::sub_8bit
, dl
, MVT::i8
, Result
));
1922 // Shift AX down 8 bits.
1923 Result
= SDValue(CurDAG
->getMachineNode(X86::SHR16ri
, dl
, MVT::i16
,
1925 CurDAG
->getTargetConstant(8, MVT::i8
)), 0);
1926 // Then truncate it down to i8.
1927 ReplaceUses(SDValue(Node
, 1),
1928 CurDAG
->getTargetExtractSubreg(X86::sub_8bit
, dl
, MVT::i8
, Result
));
1930 // Copy the low half of the result, if it is needed.
1931 if (!SDValue(Node
, 0).use_empty()) {
1932 SDValue Result
= CurDAG
->getCopyFromReg(CurDAG
->getEntryNode(), dl
,
1933 LoReg
, NVT
, InFlag
);
1934 InFlag
= Result
.getValue(2);
1935 ReplaceUses(SDValue(Node
, 0), Result
);
1936 DEBUG(dbgs() << "=> "; Result
.getNode()->dump(CurDAG
); dbgs() << '\n');
1938 // Copy the high half of the result, if it is needed.
1939 if (!SDValue(Node
, 1).use_empty()) {
1940 SDValue Result
= CurDAG
->getCopyFromReg(CurDAG
->getEntryNode(), dl
,
1941 HiReg
, NVT
, InFlag
);
1942 InFlag
= Result
.getValue(2);
1943 ReplaceUses(SDValue(Node
, 1), Result
);
1944 DEBUG(dbgs() << "=> "; Result
.getNode()->dump(CurDAG
); dbgs() << '\n');
1951 case ISD::UDIVREM
: {
1952 SDValue N0
= Node
->getOperand(0);
1953 SDValue N1
= Node
->getOperand(1);
1955 bool isSigned
= Opcode
== ISD::SDIVREM
;
1957 switch (NVT
.getSimpleVT().SimpleTy
) {
1958 default: llvm_unreachable("Unsupported VT!");
1959 case MVT::i8
: Opc
= X86::DIV8r
; MOpc
= X86::DIV8m
; break;
1960 case MVT::i16
: Opc
= X86::DIV16r
; MOpc
= X86::DIV16m
; break;
1961 case MVT::i32
: Opc
= X86::DIV32r
; MOpc
= X86::DIV32m
; break;
1962 case MVT::i64
: Opc
= X86::DIV64r
; MOpc
= X86::DIV64m
; break;
1965 switch (NVT
.getSimpleVT().SimpleTy
) {
1966 default: llvm_unreachable("Unsupported VT!");
1967 case MVT::i8
: Opc
= X86::IDIV8r
; MOpc
= X86::IDIV8m
; break;
1968 case MVT::i16
: Opc
= X86::IDIV16r
; MOpc
= X86::IDIV16m
; break;
1969 case MVT::i32
: Opc
= X86::IDIV32r
; MOpc
= X86::IDIV32m
; break;
1970 case MVT::i64
: Opc
= X86::IDIV64r
; MOpc
= X86::IDIV64m
; break;
1974 unsigned LoReg
, HiReg
, ClrReg
;
1975 unsigned ClrOpcode
, SExtOpcode
;
1976 switch (NVT
.getSimpleVT().SimpleTy
) {
1977 default: llvm_unreachable("Unsupported VT!");
1979 LoReg
= X86::AL
; ClrReg
= HiReg
= X86::AH
;
1981 SExtOpcode
= X86::CBW
;
1984 LoReg
= X86::AX
; HiReg
= X86::DX
;
1985 ClrOpcode
= X86::MOV16r0
; ClrReg
= X86::DX
;
1986 SExtOpcode
= X86::CWD
;
1989 LoReg
= X86::EAX
; ClrReg
= HiReg
= X86::EDX
;
1990 ClrOpcode
= X86::MOV32r0
;
1991 SExtOpcode
= X86::CDQ
;
1994 LoReg
= X86::RAX
; ClrReg
= HiReg
= X86::RDX
;
1995 ClrOpcode
= X86::MOV64r0
;
1996 SExtOpcode
= X86::CQO
;
2000 SDValue Tmp0
, Tmp1
, Tmp2
, Tmp3
, Tmp4
;
2001 bool foldedLoad
= TryFoldLoad(Node
, N1
, Tmp0
, Tmp1
, Tmp2
, Tmp3
, Tmp4
);
2002 bool signBitIsZero
= CurDAG
->SignBitIsZero(N0
);
2005 if (NVT
== MVT::i8
&& (!isSigned
|| signBitIsZero
)) {
2006 // Special case for div8, just use a move with zero extension to AX to
2007 // clear the upper 8 bits (AH).
2008 SDValue Tmp0
, Tmp1
, Tmp2
, Tmp3
, Tmp4
, Move
, Chain
;
2009 if (TryFoldLoad(Node
, N0
, Tmp0
, Tmp1
, Tmp2
, Tmp3
, Tmp4
)) {
2010 SDValue Ops
[] = { Tmp0
, Tmp1
, Tmp2
, Tmp3
, Tmp4
, N0
.getOperand(0) };
2012 SDValue(CurDAG
->getMachineNode(X86::MOVZX32rm8
, dl
, MVT::i32
,
2014 array_lengthof(Ops
)), 0);
2015 Chain
= Move
.getValue(1);
2016 ReplaceUses(N0
.getValue(1), Chain
);
2019 SDValue(CurDAG
->getMachineNode(X86::MOVZX32rr8
, dl
, MVT::i32
, N0
),0);
2020 Chain
= CurDAG
->getEntryNode();
2022 Chain
= CurDAG
->getCopyToReg(Chain
, dl
, X86::EAX
, Move
, SDValue());
2023 InFlag
= Chain
.getValue(1);
2026 CurDAG
->getCopyToReg(CurDAG
->getEntryNode(), dl
,
2027 LoReg
, N0
, SDValue()).getValue(1);
2028 if (isSigned
&& !signBitIsZero
) {
2029 // Sign extend the low part into the high part.
2031 SDValue(CurDAG
->getMachineNode(SExtOpcode
, dl
, MVT::Glue
, InFlag
),0);
2033 // Zero out the high part, effectively zero extending the input.
2035 SDValue(CurDAG
->getMachineNode(ClrOpcode
, dl
, NVT
), 0);
2036 InFlag
= CurDAG
->getCopyToReg(CurDAG
->getEntryNode(), dl
, ClrReg
,
2037 ClrNode
, InFlag
).getValue(1);
2042 SDValue Ops
[] = { Tmp0
, Tmp1
, Tmp2
, Tmp3
, Tmp4
, N1
.getOperand(0),
2045 CurDAG
->getMachineNode(MOpc
, dl
, MVT::Other
, MVT::Glue
, Ops
,
2046 array_lengthof(Ops
));
2047 InFlag
= SDValue(CNode
, 1);
2048 // Update the chain.
2049 ReplaceUses(N1
.getValue(1), SDValue(CNode
, 0));
2052 SDValue(CurDAG
->getMachineNode(Opc
, dl
, MVT::Glue
, N1
, InFlag
), 0);
2055 // Prevent use of AH in a REX instruction by referencing AX instead.
2056 // Shift it down 8 bits.
2057 if (HiReg
== X86::AH
&& Subtarget
->is64Bit() &&
2058 !SDValue(Node
, 1).use_empty()) {
2059 SDValue Result
= CurDAG
->getCopyFromReg(CurDAG
->getEntryNode(), dl
,
2060 X86::AX
, MVT::i16
, InFlag
);
2061 InFlag
= Result
.getValue(2);
2063 // If we also need AL (the quotient), get it by extracting a subreg from
2064 // Result. The fast register allocator does not like multiple CopyFromReg
2065 // nodes using aliasing registers.
2066 if (!SDValue(Node
, 0).use_empty())
2067 ReplaceUses(SDValue(Node
, 0),
2068 CurDAG
->getTargetExtractSubreg(X86::sub_8bit
, dl
, MVT::i8
, Result
));
2070 // Shift AX right by 8 bits instead of using AH.
2071 Result
= SDValue(CurDAG
->getMachineNode(X86::SHR16ri
, dl
, MVT::i16
,
2073 CurDAG
->getTargetConstant(8, MVT::i8
)),
2075 ReplaceUses(SDValue(Node
, 1),
2076 CurDAG
->getTargetExtractSubreg(X86::sub_8bit
, dl
, MVT::i8
, Result
));
2078 // Copy the division (low) result, if it is needed.
2079 if (!SDValue(Node
, 0).use_empty()) {
2080 SDValue Result
= CurDAG
->getCopyFromReg(CurDAG
->getEntryNode(), dl
,
2081 LoReg
, NVT
, InFlag
);
2082 InFlag
= Result
.getValue(2);
2083 ReplaceUses(SDValue(Node
, 0), Result
);
2084 DEBUG(dbgs() << "=> "; Result
.getNode()->dump(CurDAG
); dbgs() << '\n');
2086 // Copy the remainder (high) result, if it is needed.
2087 if (!SDValue(Node
, 1).use_empty()) {
2088 SDValue Result
= CurDAG
->getCopyFromReg(CurDAG
->getEntryNode(), dl
,
2089 HiReg
, NVT
, InFlag
);
2090 InFlag
= Result
.getValue(2);
2091 ReplaceUses(SDValue(Node
, 1), Result
);
2092 DEBUG(dbgs() << "=> "; Result
.getNode()->dump(CurDAG
); dbgs() << '\n');
2098 SDValue N0
= Node
->getOperand(0);
2099 SDValue N1
= Node
->getOperand(1);
2101 // Look for (X86cmp (and $op, $imm), 0) and see if we can convert it to
2102 // use a smaller encoding.
2103 if (N0
.getOpcode() == ISD::TRUNCATE
&& N0
.hasOneUse() &&
2104 HasNoSignedComparisonUses(Node
))
2105 // Look past the truncate if CMP is the only use of it.
2106 N0
= N0
.getOperand(0);
2107 if (N0
.getNode()->getOpcode() == ISD::AND
&& N0
.getNode()->hasOneUse() &&
2108 N0
.getValueType() != MVT::i8
&&
2109 X86::isZeroNode(N1
)) {
2110 ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(N0
.getNode()->getOperand(1));
2113 // For example, convert "testl %eax, $8" to "testb %al, $8"
2114 if ((C
->getZExtValue() & ~UINT64_C(0xff)) == 0 &&
2115 (!(C
->getZExtValue() & 0x80) ||
2116 HasNoSignedComparisonUses(Node
))) {
2117 SDValue Imm
= CurDAG
->getTargetConstant(C
->getZExtValue(), MVT::i8
);
2118 SDValue Reg
= N0
.getNode()->getOperand(0);
2120 // On x86-32, only the ABCD registers have 8-bit subregisters.
2121 if (!Subtarget
->is64Bit()) {
2122 TargetRegisterClass
*TRC
= 0;
2123 switch (N0
.getValueType().getSimpleVT().SimpleTy
) {
2124 case MVT::i32
: TRC
= &X86::GR32_ABCDRegClass
; break;
2125 case MVT::i16
: TRC
= &X86::GR16_ABCDRegClass
; break;
2126 default: llvm_unreachable("Unsupported TEST operand type!");
2128 SDValue RC
= CurDAG
->getTargetConstant(TRC
->getID(), MVT::i32
);
2129 Reg
= SDValue(CurDAG
->getMachineNode(X86::COPY_TO_REGCLASS
, dl
,
2130 Reg
.getValueType(), Reg
, RC
), 0);
2133 // Extract the l-register.
2134 SDValue Subreg
= CurDAG
->getTargetExtractSubreg(X86::sub_8bit
, dl
,
2138 return CurDAG
->getMachineNode(X86::TEST8ri
, dl
, MVT::i32
, Subreg
, Imm
);
2141 // For example, "testl %eax, $2048" to "testb %ah, $8".
2142 if ((C
->getZExtValue() & ~UINT64_C(0xff00)) == 0 &&
2143 (!(C
->getZExtValue() & 0x8000) ||
2144 HasNoSignedComparisonUses(Node
))) {
2145 // Shift the immediate right by 8 bits.
2146 SDValue ShiftedImm
= CurDAG
->getTargetConstant(C
->getZExtValue() >> 8,
2148 SDValue Reg
= N0
.getNode()->getOperand(0);
2150 // Put the value in an ABCD register.
2151 TargetRegisterClass
*TRC
= 0;
2152 switch (N0
.getValueType().getSimpleVT().SimpleTy
) {
2153 case MVT::i64
: TRC
= &X86::GR64_ABCDRegClass
; break;
2154 case MVT::i32
: TRC
= &X86::GR32_ABCDRegClass
; break;
2155 case MVT::i16
: TRC
= &X86::GR16_ABCDRegClass
; break;
2156 default: llvm_unreachable("Unsupported TEST operand type!");
2158 SDValue RC
= CurDAG
->getTargetConstant(TRC
->getID(), MVT::i32
);
2159 Reg
= SDValue(CurDAG
->getMachineNode(X86::COPY_TO_REGCLASS
, dl
,
2160 Reg
.getValueType(), Reg
, RC
), 0);
2162 // Extract the h-register.
2163 SDValue Subreg
= CurDAG
->getTargetExtractSubreg(X86::sub_8bit_hi
, dl
,
2166 // Emit a testb. No special NOREX tricks are needed since there's
2167 // only one GPR operand!
2168 return CurDAG
->getMachineNode(X86::TEST8ri
, dl
, MVT::i32
,
2169 Subreg
, ShiftedImm
);
2172 // For example, "testl %eax, $32776" to "testw %ax, $32776".
2173 if ((C
->getZExtValue() & ~UINT64_C(0xffff)) == 0 &&
2174 N0
.getValueType() != MVT::i16
&&
2175 (!(C
->getZExtValue() & 0x8000) ||
2176 HasNoSignedComparisonUses(Node
))) {
2177 SDValue Imm
= CurDAG
->getTargetConstant(C
->getZExtValue(), MVT::i16
);
2178 SDValue Reg
= N0
.getNode()->getOperand(0);
2180 // Extract the 16-bit subregister.
2181 SDValue Subreg
= CurDAG
->getTargetExtractSubreg(X86::sub_16bit
, dl
,
2185 return CurDAG
->getMachineNode(X86::TEST16ri
, dl
, MVT::i32
, Subreg
, Imm
);
2188 // For example, "testq %rax, $268468232" to "testl %eax, $268468232".
2189 if ((C
->getZExtValue() & ~UINT64_C(0xffffffff)) == 0 &&
2190 N0
.getValueType() == MVT::i64
&&
2191 (!(C
->getZExtValue() & 0x80000000) ||
2192 HasNoSignedComparisonUses(Node
))) {
2193 SDValue Imm
= CurDAG
->getTargetConstant(C
->getZExtValue(), MVT::i32
);
2194 SDValue Reg
= N0
.getNode()->getOperand(0);
2196 // Extract the 32-bit subregister.
2197 SDValue Subreg
= CurDAG
->getTargetExtractSubreg(X86::sub_32bit
, dl
,
2201 return CurDAG
->getMachineNode(X86::TEST32ri
, dl
, MVT::i32
, Subreg
, Imm
);
2208 SDNode
*ResNode
= SelectCode(Node
);
2210 DEBUG(dbgs() << "=> ";
2211 if (ResNode
== NULL
|| ResNode
== Node
)
2214 ResNode
->dump(CurDAG
);
2220 bool X86DAGToDAGISel::
2221 SelectInlineAsmMemoryOperand(const SDValue
&Op
, char ConstraintCode
,
2222 std::vector
<SDValue
> &OutOps
) {
2223 SDValue Op0
, Op1
, Op2
, Op3
, Op4
;
2224 switch (ConstraintCode
) {
2225 case 'o': // offsetable ??
2226 case 'v': // not offsetable ??
2227 default: return true;
2229 if (!SelectAddr(0, Op
, Op0
, Op1
, Op2
, Op3
, Op4
))
2234 OutOps
.push_back(Op0
);
2235 OutOps
.push_back(Op1
);
2236 OutOps
.push_back(Op2
);
2237 OutOps
.push_back(Op3
);
2238 OutOps
.push_back(Op4
);
2242 /// createX86ISelDag - This pass converts a legalized DAG into a
2243 /// X86-specific DAG, ready for instruction scheduling.
2245 FunctionPass
*llvm::createX86ISelDag(X86TargetMachine
&TM
,
2246 llvm::CodeGenOpt::Level OptLevel
) {
2247 return new X86DAGToDAGISel(TM
, OptLevel
);