1 //===- X86ISelDAGToDAG.cpp - A DAG pattern matching inst selector for X86 -===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file defines a DAG pattern matching instruction selector for X86,
11 // converting from a legalized dag to a X86 dag.
13 //===----------------------------------------------------------------------===//
15 #define DEBUG_TYPE "x86-isel"
17 #include "X86InstrBuilder.h"
18 #include "X86MachineFunctionInfo.h"
19 #include "X86RegisterInfo.h"
20 #include "X86Subtarget.h"
21 #include "X86TargetMachine.h"
22 #include "llvm/Instructions.h"
23 #include "llvm/Intrinsics.h"
24 #include "llvm/Support/CFG.h"
25 #include "llvm/Type.h"
26 #include "llvm/CodeGen/MachineConstantPool.h"
27 #include "llvm/CodeGen/MachineFunction.h"
28 #include "llvm/CodeGen/MachineFrameInfo.h"
29 #include "llvm/CodeGen/MachineInstrBuilder.h"
30 #include "llvm/CodeGen/MachineRegisterInfo.h"
31 #include "llvm/CodeGen/SelectionDAGISel.h"
32 #include "llvm/Target/TargetMachine.h"
33 #include "llvm/Target/TargetOptions.h"
34 #include "llvm/Support/Debug.h"
35 #include "llvm/Support/ErrorHandling.h"
36 #include "llvm/Support/MathExtras.h"
37 #include "llvm/Support/raw_ostream.h"
38 #include "llvm/ADT/SmallPtrSet.h"
39 #include "llvm/ADT/Statistic.h"
42 STATISTIC(NumLoadMoved
, "Number of loads moved below TokenFactor");
44 //===----------------------------------------------------------------------===//
45 // Pattern Matcher Implementation
46 //===----------------------------------------------------------------------===//
49 /// X86ISelAddressMode - This corresponds to X86AddressMode, but uses
50 /// SDValue's instead of register numbers for the leaves of the matched
52 struct X86ISelAddressMode
{
58 // This is really a union, discriminated by BaseType!
66 const GlobalValue
*GV
;
68 const BlockAddress
*BlockAddr
;
71 unsigned Align
; // CP alignment.
72 unsigned char SymbolFlags
; // X86II::MO_*
75 : BaseType(RegBase
), Base_FrameIndex(0), Scale(1), IndexReg(), Disp(0),
76 Segment(), GV(0), CP(0), BlockAddr(0), ES(0), JT(-1), Align(0),
77 SymbolFlags(X86II::MO_NO_FLAG
) {
80 bool hasSymbolicDisplacement() const {
81 return GV
!= 0 || CP
!= 0 || ES
!= 0 || JT
!= -1 || BlockAddr
!= 0;
84 bool hasBaseOrIndexReg() const {
85 return IndexReg
.getNode() != 0 || Base_Reg
.getNode() != 0;
88 /// isRIPRelative - Return true if this addressing mode is already RIP
90 bool isRIPRelative() const {
91 if (BaseType
!= RegBase
) return false;
92 if (RegisterSDNode
*RegNode
=
93 dyn_cast_or_null
<RegisterSDNode
>(Base_Reg
.getNode()))
94 return RegNode
->getReg() == X86::RIP
;
98 void setBaseReg(SDValue Reg
) {
104 dbgs() << "X86ISelAddressMode " << this << '\n';
105 dbgs() << "Base_Reg ";
106 if (Base_Reg
.getNode() != 0)
107 Base_Reg
.getNode()->dump();
110 dbgs() << " Base.FrameIndex " << Base_FrameIndex
<< '\n'
111 << " Scale" << Scale
<< '\n'
113 if (IndexReg
.getNode() != 0)
114 IndexReg
.getNode()->dump();
117 dbgs() << " Disp " << Disp
<< '\n'
134 dbgs() << " JT" << JT
<< " Align" << Align
<< '\n';
140 //===--------------------------------------------------------------------===//
141 /// ISel - X86 specific code to select X86 machine instructions for
142 /// SelectionDAG operations.
144 class X86DAGToDAGISel
: public SelectionDAGISel
{
145 /// X86Lowering - This object fully describes how to lower LLVM code to an
146 /// X86-specific SelectionDAG.
147 const X86TargetLowering
&X86Lowering
;
149 /// Subtarget - Keep a pointer to the X86Subtarget around so that we can
150 /// make the right decision when generating code for different targets.
151 const X86Subtarget
*Subtarget
;
153 /// OptForSize - If true, selector should try to optimize for code size
154 /// instead of performance.
158 explicit X86DAGToDAGISel(X86TargetMachine
&tm
, CodeGenOpt::Level OptLevel
)
159 : SelectionDAGISel(tm
, OptLevel
),
160 X86Lowering(*tm
.getTargetLowering()),
161 Subtarget(&tm
.getSubtarget
<X86Subtarget
>()),
164 virtual const char *getPassName() const {
165 return "X86 DAG->DAG Instruction Selection";
168 virtual void EmitFunctionEntryCode();
170 virtual bool IsProfitableToFold(SDValue N
, SDNode
*U
, SDNode
*Root
) const;
172 virtual void PreprocessISelDAG();
174 inline bool immSext8(SDNode
*N
) const {
175 return isInt
<8>(cast
<ConstantSDNode
>(N
)->getSExtValue());
178 // i64immSExt32 predicate - True if the 64-bit immediate fits in a 32-bit
179 // sign extended field.
180 inline bool i64immSExt32(SDNode
*N
) const {
181 uint64_t v
= cast
<ConstantSDNode
>(N
)->getZExtValue();
182 return (int64_t)v
== (int32_t)v
;
185 // Include the pieces autogenerated from the target description.
186 #include "X86GenDAGISel.inc"
189 SDNode
*Select(SDNode
*N
);
190 SDNode
*SelectAtomic64(SDNode
*Node
, unsigned Opc
);
191 SDNode
*SelectAtomicLoadAdd(SDNode
*Node
, EVT NVT
);
193 bool MatchLoadInAddress(LoadSDNode
*N
, X86ISelAddressMode
&AM
);
194 bool MatchWrapper(SDValue N
, X86ISelAddressMode
&AM
);
195 bool MatchAddress(SDValue N
, X86ISelAddressMode
&AM
);
196 bool MatchAddressRecursively(SDValue N
, X86ISelAddressMode
&AM
,
198 bool MatchAddressBase(SDValue N
, X86ISelAddressMode
&AM
);
199 bool SelectAddr(SDNode
*Parent
, SDValue N
, SDValue
&Base
,
200 SDValue
&Scale
, SDValue
&Index
, SDValue
&Disp
,
202 bool SelectLEAAddr(SDValue N
, SDValue
&Base
,
203 SDValue
&Scale
, SDValue
&Index
, SDValue
&Disp
,
205 bool SelectTLSADDRAddr(SDValue N
, SDValue
&Base
,
206 SDValue
&Scale
, SDValue
&Index
, SDValue
&Disp
,
208 bool SelectScalarSSELoad(SDNode
*Root
, SDValue N
,
209 SDValue
&Base
, SDValue
&Scale
,
210 SDValue
&Index
, SDValue
&Disp
,
212 SDValue
&NodeWithChain
);
214 bool TryFoldLoad(SDNode
*P
, SDValue N
,
215 SDValue
&Base
, SDValue
&Scale
,
216 SDValue
&Index
, SDValue
&Disp
,
219 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
220 /// inline asm expressions.
221 virtual bool SelectInlineAsmMemoryOperand(const SDValue
&Op
,
223 std::vector
<SDValue
> &OutOps
);
225 void EmitSpecialCodeForMain(MachineBasicBlock
*BB
, MachineFrameInfo
*MFI
);
227 inline void getAddressOperands(X86ISelAddressMode
&AM
, SDValue
&Base
,
228 SDValue
&Scale
, SDValue
&Index
,
229 SDValue
&Disp
, SDValue
&Segment
) {
230 Base
= (AM
.BaseType
== X86ISelAddressMode::FrameIndexBase
) ?
231 CurDAG
->getTargetFrameIndex(AM
.Base_FrameIndex
, TLI
.getPointerTy()) :
233 Scale
= getI8Imm(AM
.Scale
);
235 // These are 32-bit even in 64-bit mode since RIP relative offset
238 Disp
= CurDAG
->getTargetGlobalAddress(AM
.GV
, DebugLoc(),
242 Disp
= CurDAG
->getTargetConstantPool(AM
.CP
, MVT::i32
,
243 AM
.Align
, AM
.Disp
, AM
.SymbolFlags
);
245 Disp
= CurDAG
->getTargetExternalSymbol(AM
.ES
, MVT::i32
, AM
.SymbolFlags
);
246 else if (AM
.JT
!= -1)
247 Disp
= CurDAG
->getTargetJumpTable(AM
.JT
, MVT::i32
, AM
.SymbolFlags
);
248 else if (AM
.BlockAddr
)
249 Disp
= CurDAG
->getBlockAddress(AM
.BlockAddr
, MVT::i32
,
250 true, AM
.SymbolFlags
);
252 Disp
= CurDAG
->getTargetConstant(AM
.Disp
, MVT::i32
);
254 if (AM
.Segment
.getNode())
255 Segment
= AM
.Segment
;
257 Segment
= CurDAG
->getRegister(0, MVT::i32
);
260 /// getI8Imm - Return a target constant with the specified value, of type
262 inline SDValue
getI8Imm(unsigned Imm
) {
263 return CurDAG
->getTargetConstant(Imm
, MVT::i8
);
266 /// getI32Imm - Return a target constant with the specified value, of type
268 inline SDValue
getI32Imm(unsigned Imm
) {
269 return CurDAG
->getTargetConstant(Imm
, MVT::i32
);
272 /// getGlobalBaseReg - Return an SDNode that returns the value of
273 /// the global base register. Output instructions required to
274 /// initialize the global base register, if necessary.
276 SDNode
*getGlobalBaseReg();
278 /// getTargetMachine - Return a reference to the TargetMachine, casted
279 /// to the target-specific type.
280 const X86TargetMachine
&getTargetMachine() {
281 return static_cast<const X86TargetMachine
&>(TM
);
284 /// getInstrInfo - Return a reference to the TargetInstrInfo, casted
285 /// to the target-specific type.
286 const X86InstrInfo
*getInstrInfo() {
287 return getTargetMachine().getInstrInfo();
294 X86DAGToDAGISel::IsProfitableToFold(SDValue N
, SDNode
*U
, SDNode
*Root
) const {
295 if (OptLevel
== CodeGenOpt::None
) return false;
300 if (N
.getOpcode() != ISD::LOAD
)
303 // If N is a load, do additional profitability checks.
305 switch (U
->getOpcode()) {
318 SDValue Op1
= U
->getOperand(1);
320 // If the other operand is a 8-bit immediate we should fold the immediate
321 // instead. This reduces code size.
323 // movl 4(%esp), %eax
327 // addl 4(%esp), %eax
328 // The former is 2 bytes shorter. In case where the increment is 1, then
329 // the saving can be 4 bytes (by using incl %eax).
330 if (ConstantSDNode
*Imm
= dyn_cast
<ConstantSDNode
>(Op1
))
331 if (Imm
->getAPIntValue().isSignedIntN(8))
334 // If the other operand is a TLS address, we should fold it instead.
337 // leal i@NTPOFF(%eax), %eax
339 // movl $i@NTPOFF, %eax
341 // if the block also has an access to a second TLS address this will save
343 // FIXME: This is probably also true for non TLS addresses.
344 if (Op1
.getOpcode() == X86ISD::Wrapper
) {
345 SDValue Val
= Op1
.getOperand(0);
346 if (Val
.getOpcode() == ISD::TargetGlobalTLSAddress
)
356 /// MoveBelowCallOrigChain - Replace the original chain operand of the call with
357 /// load's chain operand and move load below the call's chain operand.
358 static void MoveBelowOrigChain(SelectionDAG
*CurDAG
, SDValue Load
,
359 SDValue Call
, SDValue OrigChain
) {
360 SmallVector
<SDValue
, 8> Ops
;
361 SDValue Chain
= OrigChain
.getOperand(0);
362 if (Chain
.getNode() == Load
.getNode())
363 Ops
.push_back(Load
.getOperand(0));
365 assert(Chain
.getOpcode() == ISD::TokenFactor
&&
366 "Unexpected chain operand");
367 for (unsigned i
= 0, e
= Chain
.getNumOperands(); i
!= e
; ++i
)
368 if (Chain
.getOperand(i
).getNode() == Load
.getNode())
369 Ops
.push_back(Load
.getOperand(0));
371 Ops
.push_back(Chain
.getOperand(i
));
373 CurDAG
->getNode(ISD::TokenFactor
, Load
.getDebugLoc(),
374 MVT::Other
, &Ops
[0], Ops
.size());
376 Ops
.push_back(NewChain
);
378 for (unsigned i
= 1, e
= OrigChain
.getNumOperands(); i
!= e
; ++i
)
379 Ops
.push_back(OrigChain
.getOperand(i
));
380 CurDAG
->UpdateNodeOperands(OrigChain
.getNode(), &Ops
[0], Ops
.size());
381 CurDAG
->UpdateNodeOperands(Load
.getNode(), Call
.getOperand(0),
382 Load
.getOperand(1), Load
.getOperand(2));
384 Ops
.push_back(SDValue(Load
.getNode(), 1));
385 for (unsigned i
= 1, e
= Call
.getNode()->getNumOperands(); i
!= e
; ++i
)
386 Ops
.push_back(Call
.getOperand(i
));
387 CurDAG
->UpdateNodeOperands(Call
.getNode(), &Ops
[0], Ops
.size());
390 /// isCalleeLoad - Return true if call address is a load and it can be
391 /// moved below CALLSEQ_START and the chains leading up to the call.
392 /// Return the CALLSEQ_START by reference as a second output.
393 /// In the case of a tail call, there isn't a callseq node between the call
394 /// chain and the load.
395 static bool isCalleeLoad(SDValue Callee
, SDValue
&Chain
, bool HasCallSeq
) {
396 if (Callee
.getNode() == Chain
.getNode() || !Callee
.hasOneUse())
398 LoadSDNode
*LD
= dyn_cast
<LoadSDNode
>(Callee
.getNode());
401 LD
->getAddressingMode() != ISD::UNINDEXED
||
402 LD
->getExtensionType() != ISD::NON_EXTLOAD
)
405 // Now let's find the callseq_start.
406 while (HasCallSeq
&& Chain
.getOpcode() != ISD::CALLSEQ_START
) {
407 if (!Chain
.hasOneUse())
409 Chain
= Chain
.getOperand(0);
412 if (!Chain
.getNumOperands())
414 if (Chain
.getOperand(0).getNode() == Callee
.getNode())
416 if (Chain
.getOperand(0).getOpcode() == ISD::TokenFactor
&&
417 Callee
.getValue(1).isOperandOf(Chain
.getOperand(0).getNode()) &&
418 Callee
.getValue(1).hasOneUse())
423 void X86DAGToDAGISel::PreprocessISelDAG() {
424 // OptForSize is used in pattern predicates that isel is matching.
425 OptForSize
= MF
->getFunction()->hasFnAttr(Attribute::OptimizeForSize
);
427 for (SelectionDAG::allnodes_iterator I
= CurDAG
->allnodes_begin(),
428 E
= CurDAG
->allnodes_end(); I
!= E
; ) {
429 SDNode
*N
= I
++; // Preincrement iterator to avoid invalidation issues.
431 if (OptLevel
!= CodeGenOpt::None
&&
432 (N
->getOpcode() == X86ISD::CALL
||
433 N
->getOpcode() == X86ISD::TC_RETURN
)) {
434 /// Also try moving call address load from outside callseq_start to just
435 /// before the call to allow it to be folded.
453 bool HasCallSeq
= N
->getOpcode() == X86ISD::CALL
;
454 SDValue Chain
= N
->getOperand(0);
455 SDValue Load
= N
->getOperand(1);
456 if (!isCalleeLoad(Load
, Chain
, HasCallSeq
))
458 MoveBelowOrigChain(CurDAG
, Load
, SDValue(N
, 0), Chain
);
463 // Lower fpround and fpextend nodes that target the FP stack to be store and
464 // load to the stack. This is a gross hack. We would like to simply mark
465 // these as being illegal, but when we do that, legalize produces these when
466 // it expands calls, then expands these in the same legalize pass. We would
467 // like dag combine to be able to hack on these between the call expansion
468 // and the node legalization. As such this pass basically does "really
469 // late" legalization of these inline with the X86 isel pass.
470 // FIXME: This should only happen when not compiled with -O0.
471 if (N
->getOpcode() != ISD::FP_ROUND
&& N
->getOpcode() != ISD::FP_EXTEND
)
474 // If the source and destination are SSE registers, then this is a legal
475 // conversion that should not be lowered.
476 EVT SrcVT
= N
->getOperand(0).getValueType();
477 EVT DstVT
= N
->getValueType(0);
478 bool SrcIsSSE
= X86Lowering
.isScalarFPTypeInSSEReg(SrcVT
);
479 bool DstIsSSE
= X86Lowering
.isScalarFPTypeInSSEReg(DstVT
);
480 if (SrcIsSSE
&& DstIsSSE
)
483 if (!SrcIsSSE
&& !DstIsSSE
) {
484 // If this is an FPStack extension, it is a noop.
485 if (N
->getOpcode() == ISD::FP_EXTEND
)
487 // If this is a value-preserving FPStack truncation, it is a noop.
488 if (N
->getConstantOperandVal(1))
492 // Here we could have an FP stack truncation or an FPStack <-> SSE convert.
493 // FPStack has extload and truncstore. SSE can fold direct loads into other
494 // operations. Based on this, decide what we want to do.
496 if (N
->getOpcode() == ISD::FP_ROUND
)
497 MemVT
= DstVT
; // FP_ROUND must use DstVT, we can't do a 'trunc load'.
499 MemVT
= SrcIsSSE
? SrcVT
: DstVT
;
501 SDValue MemTmp
= CurDAG
->CreateStackTemporary(MemVT
);
502 DebugLoc dl
= N
->getDebugLoc();
504 // FIXME: optimize the case where the src/dest is a load or store?
505 SDValue Store
= CurDAG
->getTruncStore(CurDAG
->getEntryNode(), dl
,
507 MemTmp
, MachinePointerInfo(), MemVT
,
509 SDValue Result
= CurDAG
->getExtLoad(ISD::EXTLOAD
, dl
, DstVT
, Store
, MemTmp
,
510 MachinePointerInfo(),
511 MemVT
, false, false, 0);
513 // We're about to replace all uses of the FP_ROUND/FP_EXTEND with the
514 // extload we created. This will cause general havok on the dag because
515 // anything below the conversion could be folded into other existing nodes.
516 // To avoid invalidating 'I', back it up to the convert node.
518 CurDAG
->ReplaceAllUsesOfValueWith(SDValue(N
, 0), Result
);
520 // Now that we did that, the node is dead. Increment the iterator to the
521 // next node to process, then delete N.
523 CurDAG
->DeleteNode(N
);
528 /// EmitSpecialCodeForMain - Emit any code that needs to be executed only in
529 /// the main function.
530 void X86DAGToDAGISel::EmitSpecialCodeForMain(MachineBasicBlock
*BB
,
531 MachineFrameInfo
*MFI
) {
532 const TargetInstrInfo
*TII
= TM
.getInstrInfo();
533 if (Subtarget
->isTargetCygMing()) {
535 Subtarget
->is64Bit() ? X86::WINCALL64pcrel32
: X86::CALLpcrel32
;
536 BuildMI(BB
, DebugLoc(),
537 TII
->get(CallOp
)).addExternalSymbol("__main");
541 void X86DAGToDAGISel::EmitFunctionEntryCode() {
542 // If this is main, emit special code for main.
543 if (const Function
*Fn
= MF
->getFunction())
544 if (Fn
->hasExternalLinkage() && Fn
->getName() == "main")
545 EmitSpecialCodeForMain(MF
->begin(), MF
->getFrameInfo());
549 bool X86DAGToDAGISel::MatchLoadInAddress(LoadSDNode
*N
, X86ISelAddressMode
&AM
){
550 SDValue Address
= N
->getOperand(1);
552 // load gs:0 -> GS segment register.
553 // load fs:0 -> FS segment register.
555 // This optimization is valid because the GNU TLS model defines that
556 // gs:0 (or fs:0 on X86-64) contains its own address.
557 // For more information see http://people.redhat.com/drepper/tls.pdf
558 if (ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(Address
))
559 if (C
->getSExtValue() == 0 && AM
.Segment
.getNode() == 0 &&
560 Subtarget
->isTargetELF())
561 switch (N
->getPointerInfo().getAddrSpace()) {
563 AM
.Segment
= CurDAG
->getRegister(X86::GS
, MVT::i16
);
566 AM
.Segment
= CurDAG
->getRegister(X86::FS
, MVT::i16
);
573 /// MatchWrapper - Try to match X86ISD::Wrapper and X86ISD::WrapperRIP nodes
574 /// into an addressing mode. These wrap things that will resolve down into a
575 /// symbol reference. If no match is possible, this returns true, otherwise it
577 bool X86DAGToDAGISel::MatchWrapper(SDValue N
, X86ISelAddressMode
&AM
) {
578 // If the addressing mode already has a symbol as the displacement, we can
579 // never match another symbol.
580 if (AM
.hasSymbolicDisplacement())
583 SDValue N0
= N
.getOperand(0);
584 CodeModel::Model M
= TM
.getCodeModel();
586 // Handle X86-64 rip-relative addresses. We check this before checking direct
587 // folding because RIP is preferable to non-RIP accesses.
588 if (Subtarget
->is64Bit() &&
589 // Under X86-64 non-small code model, GV (and friends) are 64-bits, so
590 // they cannot be folded into immediate fields.
591 // FIXME: This can be improved for kernel and other models?
592 (M
== CodeModel::Small
|| M
== CodeModel::Kernel
) &&
593 // Base and index reg must be 0 in order to use %rip as base and lowering
595 !AM
.hasBaseOrIndexReg() && N
.getOpcode() == X86ISD::WrapperRIP
) {
596 if (GlobalAddressSDNode
*G
= dyn_cast
<GlobalAddressSDNode
>(N0
)) {
597 int64_t Offset
= AM
.Disp
+ G
->getOffset();
598 if (!X86::isOffsetSuitableForCodeModel(Offset
, M
)) return true;
599 AM
.GV
= G
->getGlobal();
601 AM
.SymbolFlags
= G
->getTargetFlags();
602 } else if (ConstantPoolSDNode
*CP
= dyn_cast
<ConstantPoolSDNode
>(N0
)) {
603 int64_t Offset
= AM
.Disp
+ CP
->getOffset();
604 if (!X86::isOffsetSuitableForCodeModel(Offset
, M
)) return true;
605 AM
.CP
= CP
->getConstVal();
606 AM
.Align
= CP
->getAlignment();
608 AM
.SymbolFlags
= CP
->getTargetFlags();
609 } else if (ExternalSymbolSDNode
*S
= dyn_cast
<ExternalSymbolSDNode
>(N0
)) {
610 AM
.ES
= S
->getSymbol();
611 AM
.SymbolFlags
= S
->getTargetFlags();
612 } else if (JumpTableSDNode
*J
= dyn_cast
<JumpTableSDNode
>(N0
)) {
613 AM
.JT
= J
->getIndex();
614 AM
.SymbolFlags
= J
->getTargetFlags();
616 AM
.BlockAddr
= cast
<BlockAddressSDNode
>(N0
)->getBlockAddress();
617 AM
.SymbolFlags
= cast
<BlockAddressSDNode
>(N0
)->getTargetFlags();
620 if (N
.getOpcode() == X86ISD::WrapperRIP
)
621 AM
.setBaseReg(CurDAG
->getRegister(X86::RIP
, MVT::i64
));
625 // Handle the case when globals fit in our immediate field: This is true for
626 // X86-32 always and X86-64 when in -static -mcmodel=small mode. In 64-bit
627 // mode, this results in a non-RIP-relative computation.
628 if (!Subtarget
->is64Bit() ||
629 ((M
== CodeModel::Small
|| M
== CodeModel::Kernel
) &&
630 TM
.getRelocationModel() == Reloc::Static
)) {
631 if (GlobalAddressSDNode
*G
= dyn_cast
<GlobalAddressSDNode
>(N0
)) {
632 AM
.GV
= G
->getGlobal();
633 AM
.Disp
+= G
->getOffset();
634 AM
.SymbolFlags
= G
->getTargetFlags();
635 } else if (ConstantPoolSDNode
*CP
= dyn_cast
<ConstantPoolSDNode
>(N0
)) {
636 AM
.CP
= CP
->getConstVal();
637 AM
.Align
= CP
->getAlignment();
638 AM
.Disp
+= CP
->getOffset();
639 AM
.SymbolFlags
= CP
->getTargetFlags();
640 } else if (ExternalSymbolSDNode
*S
= dyn_cast
<ExternalSymbolSDNode
>(N0
)) {
641 AM
.ES
= S
->getSymbol();
642 AM
.SymbolFlags
= S
->getTargetFlags();
643 } else if (JumpTableSDNode
*J
= dyn_cast
<JumpTableSDNode
>(N0
)) {
644 AM
.JT
= J
->getIndex();
645 AM
.SymbolFlags
= J
->getTargetFlags();
647 AM
.BlockAddr
= cast
<BlockAddressSDNode
>(N0
)->getBlockAddress();
648 AM
.SymbolFlags
= cast
<BlockAddressSDNode
>(N0
)->getTargetFlags();
656 /// MatchAddress - Add the specified node to the specified addressing mode,
657 /// returning true if it cannot be done. This just pattern matches for the
659 bool X86DAGToDAGISel::MatchAddress(SDValue N
, X86ISelAddressMode
&AM
) {
660 if (MatchAddressRecursively(N
, AM
, 0))
663 // Post-processing: Convert lea(,%reg,2) to lea(%reg,%reg), which has
664 // a smaller encoding and avoids a scaled-index.
666 AM
.BaseType
== X86ISelAddressMode::RegBase
&&
667 AM
.Base_Reg
.getNode() == 0) {
668 AM
.Base_Reg
= AM
.IndexReg
;
672 // Post-processing: Convert foo to foo(%rip), even in non-PIC mode,
673 // because it has a smaller encoding.
674 // TODO: Which other code models can use this?
675 if (TM
.getCodeModel() == CodeModel::Small
&&
676 Subtarget
->is64Bit() &&
678 AM
.BaseType
== X86ISelAddressMode::RegBase
&&
679 AM
.Base_Reg
.getNode() == 0 &&
680 AM
.IndexReg
.getNode() == 0 &&
681 AM
.SymbolFlags
== X86II::MO_NO_FLAG
&&
682 AM
.hasSymbolicDisplacement())
683 AM
.Base_Reg
= CurDAG
->getRegister(X86::RIP
, MVT::i64
);
688 bool X86DAGToDAGISel::MatchAddressRecursively(SDValue N
, X86ISelAddressMode
&AM
,
690 bool is64Bit
= Subtarget
->is64Bit();
691 DebugLoc dl
= N
.getDebugLoc();
693 dbgs() << "MatchAddress: ";
698 return MatchAddressBase(N
, AM
);
700 CodeModel::Model M
= TM
.getCodeModel();
702 // If this is already a %rip relative address, we can only merge immediates
703 // into it. Instead of handling this in every case, we handle it here.
704 // RIP relative addressing: %rip + 32-bit displacement!
705 if (AM
.isRIPRelative()) {
706 // FIXME: JumpTable and ExternalSymbol address currently don't like
707 // displacements. It isn't very important, but this should be fixed for
709 if (!AM
.ES
&& AM
.JT
!= -1) return true;
711 if (ConstantSDNode
*Cst
= dyn_cast
<ConstantSDNode
>(N
)) {
712 int64_t Val
= AM
.Disp
+ Cst
->getSExtValue();
713 if (X86::isOffsetSuitableForCodeModel(Val
, M
,
714 AM
.hasSymbolicDisplacement())) {
722 switch (N
.getOpcode()) {
724 case ISD::Constant
: {
725 uint64_t Val
= cast
<ConstantSDNode
>(N
)->getSExtValue();
727 X86::isOffsetSuitableForCodeModel(AM
.Disp
+ Val
, M
,
728 AM
.hasSymbolicDisplacement())) {
735 case X86ISD::Wrapper
:
736 case X86ISD::WrapperRIP
:
737 if (!MatchWrapper(N
, AM
))
742 if (!MatchLoadInAddress(cast
<LoadSDNode
>(N
), AM
))
746 case ISD::FrameIndex
:
747 if (AM
.BaseType
== X86ISelAddressMode::RegBase
748 && AM
.Base_Reg
.getNode() == 0) {
749 AM
.BaseType
= X86ISelAddressMode::FrameIndexBase
;
750 AM
.Base_FrameIndex
= cast
<FrameIndexSDNode
>(N
)->getIndex();
756 if (AM
.IndexReg
.getNode() != 0 || AM
.Scale
!= 1)
760 *CN
= dyn_cast
<ConstantSDNode
>(N
.getNode()->getOperand(1))) {
761 unsigned Val
= CN
->getZExtValue();
762 // Note that we handle x<<1 as (,x,2) rather than (x,x) here so
763 // that the base operand remains free for further matching. If
764 // the base doesn't end up getting used, a post-processing step
765 // in MatchAddress turns (,x,2) into (x,x), which is cheaper.
766 if (Val
== 1 || Val
== 2 || Val
== 3) {
768 SDValue ShVal
= N
.getNode()->getOperand(0);
770 // Okay, we know that we have a scale by now. However, if the scaled
771 // value is an add of something and a constant, we can fold the
772 // constant into the disp field here.
773 if (CurDAG
->isBaseWithConstantOffset(ShVal
)) {
774 AM
.IndexReg
= ShVal
.getNode()->getOperand(0);
775 ConstantSDNode
*AddVal
=
776 cast
<ConstantSDNode
>(ShVal
.getNode()->getOperand(1));
777 uint64_t Disp
= AM
.Disp
+ (AddVal
->getSExtValue() << Val
);
779 X86::isOffsetSuitableForCodeModel(Disp
, M
,
780 AM
.hasSymbolicDisplacement()))
794 // A mul_lohi where we need the low part can be folded as a plain multiply.
795 if (N
.getResNo() != 0) break;
798 case X86ISD::MUL_IMM
:
799 // X*[3,5,9] -> X+X*[2,4,8]
800 if (AM
.BaseType
== X86ISelAddressMode::RegBase
&&
801 AM
.Base_Reg
.getNode() == 0 &&
802 AM
.IndexReg
.getNode() == 0) {
804 *CN
= dyn_cast
<ConstantSDNode
>(N
.getNode()->getOperand(1)))
805 if (CN
->getZExtValue() == 3 || CN
->getZExtValue() == 5 ||
806 CN
->getZExtValue() == 9) {
807 AM
.Scale
= unsigned(CN
->getZExtValue())-1;
809 SDValue MulVal
= N
.getNode()->getOperand(0);
812 // Okay, we know that we have a scale by now. However, if the scaled
813 // value is an add of something and a constant, we can fold the
814 // constant into the disp field here.
815 if (MulVal
.getNode()->getOpcode() == ISD::ADD
&& MulVal
.hasOneUse() &&
816 isa
<ConstantSDNode
>(MulVal
.getNode()->getOperand(1))) {
817 Reg
= MulVal
.getNode()->getOperand(0);
818 ConstantSDNode
*AddVal
=
819 cast
<ConstantSDNode
>(MulVal
.getNode()->getOperand(1));
820 uint64_t Disp
= AM
.Disp
+ AddVal
->getSExtValue() *
823 X86::isOffsetSuitableForCodeModel(Disp
, M
,
824 AM
.hasSymbolicDisplacement()))
827 Reg
= N
.getNode()->getOperand(0);
829 Reg
= N
.getNode()->getOperand(0);
832 AM
.IndexReg
= AM
.Base_Reg
= Reg
;
839 // Given A-B, if A can be completely folded into the address and
840 // the index field with the index field unused, use -B as the index.
841 // This is a win if a has multiple parts that can be folded into
842 // the address. Also, this saves a mov if the base register has
843 // other uses, since it avoids a two-address sub instruction, however
844 // it costs an additional mov if the index register has other uses.
846 // Add an artificial use to this node so that we can keep track of
847 // it if it gets CSE'd with a different node.
848 HandleSDNode
Handle(N
);
850 // Test if the LHS of the sub can be folded.
851 X86ISelAddressMode Backup
= AM
;
852 if (MatchAddressRecursively(N
.getNode()->getOperand(0), AM
, Depth
+1)) {
856 // Test if the index field is free for use.
857 if (AM
.IndexReg
.getNode() || AM
.isRIPRelative()) {
863 SDValue RHS
= Handle
.getValue().getNode()->getOperand(1);
864 // If the RHS involves a register with multiple uses, this
865 // transformation incurs an extra mov, due to the neg instruction
866 // clobbering its operand.
867 if (!RHS
.getNode()->hasOneUse() ||
868 RHS
.getNode()->getOpcode() == ISD::CopyFromReg
||
869 RHS
.getNode()->getOpcode() == ISD::TRUNCATE
||
870 RHS
.getNode()->getOpcode() == ISD::ANY_EXTEND
||
871 (RHS
.getNode()->getOpcode() == ISD::ZERO_EXTEND
&&
872 RHS
.getNode()->getOperand(0).getValueType() == MVT::i32
))
874 // If the base is a register with multiple uses, this
875 // transformation may save a mov.
876 if ((AM
.BaseType
== X86ISelAddressMode::RegBase
&&
877 AM
.Base_Reg
.getNode() &&
878 !AM
.Base_Reg
.getNode()->hasOneUse()) ||
879 AM
.BaseType
== X86ISelAddressMode::FrameIndexBase
)
881 // If the folded LHS was interesting, this transformation saves
882 // address arithmetic.
883 if ((AM
.hasSymbolicDisplacement() && !Backup
.hasSymbolicDisplacement()) +
884 ((AM
.Disp
!= 0) && (Backup
.Disp
== 0)) +
885 (AM
.Segment
.getNode() && !Backup
.Segment
.getNode()) >= 2)
887 // If it doesn't look like it may be an overall win, don't do it.
893 // Ok, the transformation is legal and appears profitable. Go for it.
894 SDValue Zero
= CurDAG
->getConstant(0, N
.getValueType());
895 SDValue Neg
= CurDAG
->getNode(ISD::SUB
, dl
, N
.getValueType(), Zero
, RHS
);
899 // Insert the new nodes into the topological ordering.
900 if (Zero
.getNode()->getNodeId() == -1 ||
901 Zero
.getNode()->getNodeId() > N
.getNode()->getNodeId()) {
902 CurDAG
->RepositionNode(N
.getNode(), Zero
.getNode());
903 Zero
.getNode()->setNodeId(N
.getNode()->getNodeId());
905 if (Neg
.getNode()->getNodeId() == -1 ||
906 Neg
.getNode()->getNodeId() > N
.getNode()->getNodeId()) {
907 CurDAG
->RepositionNode(N
.getNode(), Neg
.getNode());
908 Neg
.getNode()->setNodeId(N
.getNode()->getNodeId());
914 // Add an artificial use to this node so that we can keep track of
915 // it if it gets CSE'd with a different node.
916 HandleSDNode
Handle(N
);
918 X86ISelAddressMode Backup
= AM
;
919 if (!MatchAddressRecursively(N
.getOperand(0), AM
, Depth
+1) &&
920 !MatchAddressRecursively(Handle
.getValue().getOperand(1), AM
, Depth
+1))
924 // Try again after commuting the operands.
925 if (!MatchAddressRecursively(Handle
.getValue().getOperand(1), AM
, Depth
+1)&&
926 !MatchAddressRecursively(Handle
.getValue().getOperand(0), AM
, Depth
+1))
930 // If we couldn't fold both operands into the address at the same time,
931 // see if we can just put each operand into a register and fold at least
933 if (AM
.BaseType
== X86ISelAddressMode::RegBase
&&
934 !AM
.Base_Reg
.getNode() &&
935 !AM
.IndexReg
.getNode()) {
936 N
= Handle
.getValue();
937 AM
.Base_Reg
= N
.getOperand(0);
938 AM
.IndexReg
= N
.getOperand(1);
942 N
= Handle
.getValue();
947 // Handle "X | C" as "X + C" iff X is known to have C bits clear.
948 if (CurDAG
->isBaseWithConstantOffset(N
)) {
949 X86ISelAddressMode Backup
= AM
;
950 ConstantSDNode
*CN
= cast
<ConstantSDNode
>(N
.getOperand(1));
951 uint64_t Offset
= CN
->getSExtValue();
953 // Start with the LHS as an addr mode.
954 if (!MatchAddressRecursively(N
.getOperand(0), AM
, Depth
+1) &&
955 // Address could not have picked a GV address for the displacement.
957 // On x86-64, the resultant disp must fit in 32-bits.
959 X86::isOffsetSuitableForCodeModel(AM
.Disp
+ Offset
, M
,
960 AM
.hasSymbolicDisplacement()))) {
969 // Perform some heroic transforms on an and of a constant-count shift
970 // with a constant to enable use of the scaled offset field.
972 SDValue Shift
= N
.getOperand(0);
973 if (Shift
.getNumOperands() != 2) break;
975 // Scale must not be used already.
976 if (AM
.IndexReg
.getNode() != 0 || AM
.Scale
!= 1) break;
978 SDValue X
= Shift
.getOperand(0);
979 ConstantSDNode
*C2
= dyn_cast
<ConstantSDNode
>(N
.getOperand(1));
980 ConstantSDNode
*C1
= dyn_cast
<ConstantSDNode
>(Shift
.getOperand(1));
981 if (!C1
|| !C2
) break;
983 // Handle "(X >> (8-C1)) & C2" as "(X >> 8) & 0xff)" if safe. This
984 // allows us to convert the shift and and into an h-register extract and
986 if (Shift
.getOpcode() == ISD::SRL
&& Shift
.hasOneUse()) {
987 unsigned ScaleLog
= 8 - C1
->getZExtValue();
988 if (ScaleLog
> 0 && ScaleLog
< 4 &&
989 C2
->getZExtValue() == (UINT64_C(0xff) << ScaleLog
)) {
990 SDValue Eight
= CurDAG
->getConstant(8, MVT::i8
);
991 SDValue Mask
= CurDAG
->getConstant(0xff, N
.getValueType());
992 SDValue Srl
= CurDAG
->getNode(ISD::SRL
, dl
, N
.getValueType(),
994 SDValue And
= CurDAG
->getNode(ISD::AND
, dl
, N
.getValueType(),
996 SDValue ShlCount
= CurDAG
->getConstant(ScaleLog
, MVT::i8
);
997 SDValue Shl
= CurDAG
->getNode(ISD::SHL
, dl
, N
.getValueType(),
1000 // Insert the new nodes into the topological ordering.
1001 if (Eight
.getNode()->getNodeId() == -1 ||
1002 Eight
.getNode()->getNodeId() > X
.getNode()->getNodeId()) {
1003 CurDAG
->RepositionNode(X
.getNode(), Eight
.getNode());
1004 Eight
.getNode()->setNodeId(X
.getNode()->getNodeId());
1006 if (Mask
.getNode()->getNodeId() == -1 ||
1007 Mask
.getNode()->getNodeId() > X
.getNode()->getNodeId()) {
1008 CurDAG
->RepositionNode(X
.getNode(), Mask
.getNode());
1009 Mask
.getNode()->setNodeId(X
.getNode()->getNodeId());
1011 if (Srl
.getNode()->getNodeId() == -1 ||
1012 Srl
.getNode()->getNodeId() > Shift
.getNode()->getNodeId()) {
1013 CurDAG
->RepositionNode(Shift
.getNode(), Srl
.getNode());
1014 Srl
.getNode()->setNodeId(Shift
.getNode()->getNodeId());
1016 if (And
.getNode()->getNodeId() == -1 ||
1017 And
.getNode()->getNodeId() > N
.getNode()->getNodeId()) {
1018 CurDAG
->RepositionNode(N
.getNode(), And
.getNode());
1019 And
.getNode()->setNodeId(N
.getNode()->getNodeId());
1021 if (ShlCount
.getNode()->getNodeId() == -1 ||
1022 ShlCount
.getNode()->getNodeId() > X
.getNode()->getNodeId()) {
1023 CurDAG
->RepositionNode(X
.getNode(), ShlCount
.getNode());
1024 ShlCount
.getNode()->setNodeId(N
.getNode()->getNodeId());
1026 if (Shl
.getNode()->getNodeId() == -1 ||
1027 Shl
.getNode()->getNodeId() > N
.getNode()->getNodeId()) {
1028 CurDAG
->RepositionNode(N
.getNode(), Shl
.getNode());
1029 Shl
.getNode()->setNodeId(N
.getNode()->getNodeId());
1031 CurDAG
->ReplaceAllUsesWith(N
, Shl
);
1033 AM
.Scale
= (1 << ScaleLog
);
1038 // Handle "(X << C1) & C2" as "(X & (C2>>C1)) << C1" if safe and if this
1039 // allows us to fold the shift into this addressing mode.
1040 if (Shift
.getOpcode() != ISD::SHL
) break;
1042 // Not likely to be profitable if either the AND or SHIFT node has more
1043 // than one use (unless all uses are for address computation). Besides,
1044 // isel mechanism requires their node ids to be reused.
1045 if (!N
.hasOneUse() || !Shift
.hasOneUse())
1048 // Verify that the shift amount is something we can fold.
1049 unsigned ShiftCst
= C1
->getZExtValue();
1050 if (ShiftCst
!= 1 && ShiftCst
!= 2 && ShiftCst
!= 3)
1053 // Get the new AND mask, this folds to a constant.
1054 SDValue NewANDMask
= CurDAG
->getNode(ISD::SRL
, dl
, N
.getValueType(),
1055 SDValue(C2
, 0), SDValue(C1
, 0));
1056 SDValue NewAND
= CurDAG
->getNode(ISD::AND
, dl
, N
.getValueType(), X
,
1058 SDValue NewSHIFT
= CurDAG
->getNode(ISD::SHL
, dl
, N
.getValueType(),
1059 NewAND
, SDValue(C1
, 0));
1061 // Insert the new nodes into the topological ordering.
1062 if (C1
->getNodeId() > X
.getNode()->getNodeId()) {
1063 CurDAG
->RepositionNode(X
.getNode(), C1
);
1064 C1
->setNodeId(X
.getNode()->getNodeId());
1066 if (NewANDMask
.getNode()->getNodeId() == -1 ||
1067 NewANDMask
.getNode()->getNodeId() > X
.getNode()->getNodeId()) {
1068 CurDAG
->RepositionNode(X
.getNode(), NewANDMask
.getNode());
1069 NewANDMask
.getNode()->setNodeId(X
.getNode()->getNodeId());
1071 if (NewAND
.getNode()->getNodeId() == -1 ||
1072 NewAND
.getNode()->getNodeId() > Shift
.getNode()->getNodeId()) {
1073 CurDAG
->RepositionNode(Shift
.getNode(), NewAND
.getNode());
1074 NewAND
.getNode()->setNodeId(Shift
.getNode()->getNodeId());
1076 if (NewSHIFT
.getNode()->getNodeId() == -1 ||
1077 NewSHIFT
.getNode()->getNodeId() > N
.getNode()->getNodeId()) {
1078 CurDAG
->RepositionNode(N
.getNode(), NewSHIFT
.getNode());
1079 NewSHIFT
.getNode()->setNodeId(N
.getNode()->getNodeId());
1082 CurDAG
->ReplaceAllUsesWith(N
, NewSHIFT
);
1084 AM
.Scale
= 1 << ShiftCst
;
1085 AM
.IndexReg
= NewAND
;
1090 return MatchAddressBase(N
, AM
);
1093 /// MatchAddressBase - Helper for MatchAddress. Add the specified node to the
1094 /// specified addressing mode without any further recursion.
1095 bool X86DAGToDAGISel::MatchAddressBase(SDValue N
, X86ISelAddressMode
&AM
) {
1096 // Is the base register already occupied?
1097 if (AM
.BaseType
!= X86ISelAddressMode::RegBase
|| AM
.Base_Reg
.getNode()) {
1098 // If so, check to see if the scale index register is set.
1099 if (AM
.IndexReg
.getNode() == 0) {
1105 // Otherwise, we cannot select it.
1109 // Default, generate it as a register.
1110 AM
.BaseType
= X86ISelAddressMode::RegBase
;
1115 /// SelectAddr - returns true if it is able pattern match an addressing mode.
1116 /// It returns the operands which make up the maximal addressing mode it can
1117 /// match by reference.
1119 /// Parent is the parent node of the addr operand that is being matched. It
1120 /// is always a load, store, atomic node, or null. It is only null when
1121 /// checking memory operands for inline asm nodes.
1122 bool X86DAGToDAGISel::SelectAddr(SDNode
*Parent
, SDValue N
, SDValue
&Base
,
1123 SDValue
&Scale
, SDValue
&Index
,
1124 SDValue
&Disp
, SDValue
&Segment
) {
1125 X86ISelAddressMode AM
;
1128 // This list of opcodes are all the nodes that have an "addr:$ptr" operand
1129 // that are not a MemSDNode, and thus don't have proper addrspace info.
1130 Parent
->getOpcode() != ISD::INTRINSIC_W_CHAIN
&& // unaligned loads, fixme
1131 Parent
->getOpcode() != ISD::INTRINSIC_VOID
&& // nontemporal stores
1132 Parent
->getOpcode() != X86ISD::TLSCALL
) { // Fixme
1133 unsigned AddrSpace
=
1134 cast
<MemSDNode
>(Parent
)->getPointerInfo().getAddrSpace();
1135 // AddrSpace 256 -> GS, 257 -> FS.
1136 if (AddrSpace
== 256)
1137 AM
.Segment
= CurDAG
->getRegister(X86::GS
, MVT::i16
);
1138 if (AddrSpace
== 257)
1139 AM
.Segment
= CurDAG
->getRegister(X86::FS
, MVT::i16
);
1142 if (MatchAddress(N
, AM
))
1145 EVT VT
= N
.getValueType();
1146 if (AM
.BaseType
== X86ISelAddressMode::RegBase
) {
1147 if (!AM
.Base_Reg
.getNode())
1148 AM
.Base_Reg
= CurDAG
->getRegister(0, VT
);
1151 if (!AM
.IndexReg
.getNode())
1152 AM
.IndexReg
= CurDAG
->getRegister(0, VT
);
1154 getAddressOperands(AM
, Base
, Scale
, Index
, Disp
, Segment
);
1158 /// SelectScalarSSELoad - Match a scalar SSE load. In particular, we want to
1159 /// match a load whose top elements are either undef or zeros. The load flavor
1160 /// is derived from the type of N, which is either v4f32 or v2f64.
1163 /// PatternChainNode: this is the matched node that has a chain input and
1165 bool X86DAGToDAGISel::SelectScalarSSELoad(SDNode
*Root
,
1166 SDValue N
, SDValue
&Base
,
1167 SDValue
&Scale
, SDValue
&Index
,
1168 SDValue
&Disp
, SDValue
&Segment
,
1169 SDValue
&PatternNodeWithChain
) {
1170 if (N
.getOpcode() == ISD::SCALAR_TO_VECTOR
) {
1171 PatternNodeWithChain
= N
.getOperand(0);
1172 if (ISD::isNON_EXTLoad(PatternNodeWithChain
.getNode()) &&
1173 PatternNodeWithChain
.hasOneUse() &&
1174 IsProfitableToFold(N
.getOperand(0), N
.getNode(), Root
) &&
1175 IsLegalToFold(N
.getOperand(0), N
.getNode(), Root
, OptLevel
)) {
1176 LoadSDNode
*LD
= cast
<LoadSDNode
>(PatternNodeWithChain
);
1177 if (!SelectAddr(LD
, LD
->getBasePtr(), Base
, Scale
, Index
, Disp
, Segment
))
1183 // Also handle the case where we explicitly require zeros in the top
1184 // elements. This is a vector shuffle from the zero vector.
1185 if (N
.getOpcode() == X86ISD::VZEXT_MOVL
&& N
.getNode()->hasOneUse() &&
1186 // Check to see if the top elements are all zeros (or bitcast of zeros).
1187 N
.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR
&&
1188 N
.getOperand(0).getNode()->hasOneUse() &&
1189 ISD::isNON_EXTLoad(N
.getOperand(0).getOperand(0).getNode()) &&
1190 N
.getOperand(0).getOperand(0).hasOneUse() &&
1191 IsProfitableToFold(N
.getOperand(0), N
.getNode(), Root
) &&
1192 IsLegalToFold(N
.getOperand(0), N
.getNode(), Root
, OptLevel
)) {
1193 // Okay, this is a zero extending load. Fold it.
1194 LoadSDNode
*LD
= cast
<LoadSDNode
>(N
.getOperand(0).getOperand(0));
1195 if (!SelectAddr(LD
, LD
->getBasePtr(), Base
, Scale
, Index
, Disp
, Segment
))
1197 PatternNodeWithChain
= SDValue(LD
, 0);
1204 /// SelectLEAAddr - it calls SelectAddr and determines if the maximal addressing
1205 /// mode it matches can be cost effectively emitted as an LEA instruction.
1206 bool X86DAGToDAGISel::SelectLEAAddr(SDValue N
,
1207 SDValue
&Base
, SDValue
&Scale
,
1208 SDValue
&Index
, SDValue
&Disp
,
1210 X86ISelAddressMode AM
;
1212 // Set AM.Segment to prevent MatchAddress from using one. LEA doesn't support
1214 SDValue Copy
= AM
.Segment
;
1215 SDValue T
= CurDAG
->getRegister(0, MVT::i32
);
1217 if (MatchAddress(N
, AM
))
1219 assert (T
== AM
.Segment
);
1222 EVT VT
= N
.getValueType();
1223 unsigned Complexity
= 0;
1224 if (AM
.BaseType
== X86ISelAddressMode::RegBase
)
1225 if (AM
.Base_Reg
.getNode())
1228 AM
.Base_Reg
= CurDAG
->getRegister(0, VT
);
1229 else if (AM
.BaseType
== X86ISelAddressMode::FrameIndexBase
)
1232 if (AM
.IndexReg
.getNode())
1235 AM
.IndexReg
= CurDAG
->getRegister(0, VT
);
1237 // Don't match just leal(,%reg,2). It's cheaper to do addl %reg, %reg, or with
1242 // FIXME: We are artificially lowering the criteria to turn ADD %reg, $GA
1243 // to a LEA. This is determined with some expermentation but is by no means
1244 // optimal (especially for code size consideration). LEA is nice because of
1245 // its three-address nature. Tweak the cost function again when we can run
1246 // convertToThreeAddress() at register allocation time.
1247 if (AM
.hasSymbolicDisplacement()) {
1248 // For X86-64, we should always use lea to materialize RIP relative
1250 if (Subtarget
->is64Bit())
1256 if (AM
.Disp
&& (AM
.Base_Reg
.getNode() || AM
.IndexReg
.getNode()))
1259 // If it isn't worth using an LEA, reject it.
1260 if (Complexity
<= 2)
1263 getAddressOperands(AM
, Base
, Scale
, Index
, Disp
, Segment
);
1267 /// SelectTLSADDRAddr - This is only run on TargetGlobalTLSAddress nodes.
1268 bool X86DAGToDAGISel::SelectTLSADDRAddr(SDValue N
, SDValue
&Base
,
1269 SDValue
&Scale
, SDValue
&Index
,
1270 SDValue
&Disp
, SDValue
&Segment
) {
1271 assert(N
.getOpcode() == ISD::TargetGlobalTLSAddress
);
1272 const GlobalAddressSDNode
*GA
= cast
<GlobalAddressSDNode
>(N
);
1274 X86ISelAddressMode AM
;
1275 AM
.GV
= GA
->getGlobal();
1276 AM
.Disp
+= GA
->getOffset();
1277 AM
.Base_Reg
= CurDAG
->getRegister(0, N
.getValueType());
1278 AM
.SymbolFlags
= GA
->getTargetFlags();
1280 if (N
.getValueType() == MVT::i32
) {
1282 AM
.IndexReg
= CurDAG
->getRegister(X86::EBX
, MVT::i32
);
1284 AM
.IndexReg
= CurDAG
->getRegister(0, MVT::i64
);
1287 getAddressOperands(AM
, Base
, Scale
, Index
, Disp
, Segment
);
1292 bool X86DAGToDAGISel::TryFoldLoad(SDNode
*P
, SDValue N
,
1293 SDValue
&Base
, SDValue
&Scale
,
1294 SDValue
&Index
, SDValue
&Disp
,
1296 if (!ISD::isNON_EXTLoad(N
.getNode()) ||
1297 !IsProfitableToFold(N
, P
, P
) ||
1298 !IsLegalToFold(N
, P
, P
, OptLevel
))
1301 return SelectAddr(N
.getNode(),
1302 N
.getOperand(1), Base
, Scale
, Index
, Disp
, Segment
);
1305 /// getGlobalBaseReg - Return an SDNode that returns the value of
1306 /// the global base register. Output instructions required to
1307 /// initialize the global base register, if necessary.
1309 SDNode
*X86DAGToDAGISel::getGlobalBaseReg() {
1310 unsigned GlobalBaseReg
= getInstrInfo()->getGlobalBaseReg(MF
);
1311 return CurDAG
->getRegister(GlobalBaseReg
, TLI
.getPointerTy()).getNode();
1314 SDNode
*X86DAGToDAGISel::SelectAtomic64(SDNode
*Node
, unsigned Opc
) {
1315 SDValue Chain
= Node
->getOperand(0);
1316 SDValue In1
= Node
->getOperand(1);
1317 SDValue In2L
= Node
->getOperand(2);
1318 SDValue In2H
= Node
->getOperand(3);
1319 SDValue Tmp0
, Tmp1
, Tmp2
, Tmp3
, Tmp4
;
1320 if (!SelectAddr(Node
, In1
, Tmp0
, Tmp1
, Tmp2
, Tmp3
, Tmp4
))
1322 MachineSDNode::mmo_iterator MemOp
= MF
->allocateMemRefsArray(1);
1323 MemOp
[0] = cast
<MemSDNode
>(Node
)->getMemOperand();
1324 const SDValue Ops
[] = { Tmp0
, Tmp1
, Tmp2
, Tmp3
, Tmp4
, In2L
, In2H
, Chain
};
1325 SDNode
*ResNode
= CurDAG
->getMachineNode(Opc
, Node
->getDebugLoc(),
1326 MVT::i32
, MVT::i32
, MVT::Other
, Ops
,
1327 array_lengthof(Ops
));
1328 cast
<MachineSDNode
>(ResNode
)->setMemRefs(MemOp
, MemOp
+ 1);
1332 SDNode
*X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode
*Node
, EVT NVT
) {
1333 if (Node
->hasAnyUseOfValue(0))
1336 // Optimize common patterns for __sync_add_and_fetch and
1337 // __sync_sub_and_fetch where the result is not used. This allows us
1338 // to use "lock" version of add, sub, inc, dec instructions.
1339 // FIXME: Do not use special instructions but instead add the "lock"
1340 // prefix to the target node somehow. The extra information will then be
1341 // transferred to machine instruction and it denotes the prefix.
1342 SDValue Chain
= Node
->getOperand(0);
1343 SDValue Ptr
= Node
->getOperand(1);
1344 SDValue Val
= Node
->getOperand(2);
1345 SDValue Tmp0
, Tmp1
, Tmp2
, Tmp3
, Tmp4
;
1346 if (!SelectAddr(Node
, Ptr
, Tmp0
, Tmp1
, Tmp2
, Tmp3
, Tmp4
))
1349 bool isInc
= false, isDec
= false, isSub
= false, isCN
= false;
1350 ConstantSDNode
*CN
= dyn_cast
<ConstantSDNode
>(Val
);
1353 int64_t CNVal
= CN
->getSExtValue();
1356 else if (CNVal
== -1)
1358 else if (CNVal
>= 0)
1359 Val
= CurDAG
->getTargetConstant(CNVal
, NVT
);
1362 Val
= CurDAG
->getTargetConstant(-CNVal
, NVT
);
1364 } else if (Val
.hasOneUse() &&
1365 Val
.getOpcode() == ISD::SUB
&&
1366 X86::isZeroNode(Val
.getOperand(0))) {
1368 Val
= Val
.getOperand(1);
1372 switch (NVT
.getSimpleVT().SimpleTy
) {
1376 Opc
= X86::LOCK_INC8m
;
1378 Opc
= X86::LOCK_DEC8m
;
1381 Opc
= X86::LOCK_SUB8mi
;
1383 Opc
= X86::LOCK_SUB8mr
;
1386 Opc
= X86::LOCK_ADD8mi
;
1388 Opc
= X86::LOCK_ADD8mr
;
1393 Opc
= X86::LOCK_INC16m
;
1395 Opc
= X86::LOCK_DEC16m
;
1398 if (immSext8(Val
.getNode()))
1399 Opc
= X86::LOCK_SUB16mi8
;
1401 Opc
= X86::LOCK_SUB16mi
;
1403 Opc
= X86::LOCK_SUB16mr
;
1406 if (immSext8(Val
.getNode()))
1407 Opc
= X86::LOCK_ADD16mi8
;
1409 Opc
= X86::LOCK_ADD16mi
;
1411 Opc
= X86::LOCK_ADD16mr
;
1416 Opc
= X86::LOCK_INC32m
;
1418 Opc
= X86::LOCK_DEC32m
;
1421 if (immSext8(Val
.getNode()))
1422 Opc
= X86::LOCK_SUB32mi8
;
1424 Opc
= X86::LOCK_SUB32mi
;
1426 Opc
= X86::LOCK_SUB32mr
;
1429 if (immSext8(Val
.getNode()))
1430 Opc
= X86::LOCK_ADD32mi8
;
1432 Opc
= X86::LOCK_ADD32mi
;
1434 Opc
= X86::LOCK_ADD32mr
;
1439 Opc
= X86::LOCK_INC64m
;
1441 Opc
= X86::LOCK_DEC64m
;
1443 Opc
= X86::LOCK_SUB64mr
;
1445 if (immSext8(Val
.getNode()))
1446 Opc
= X86::LOCK_SUB64mi8
;
1447 else if (i64immSExt32(Val
.getNode()))
1448 Opc
= X86::LOCK_SUB64mi32
;
1451 Opc
= X86::LOCK_ADD64mr
;
1453 if (immSext8(Val
.getNode()))
1454 Opc
= X86::LOCK_ADD64mi8
;
1455 else if (i64immSExt32(Val
.getNode()))
1456 Opc
= X86::LOCK_ADD64mi32
;
1462 DebugLoc dl
= Node
->getDebugLoc();
1463 SDValue Undef
= SDValue(CurDAG
->getMachineNode(TargetOpcode::IMPLICIT_DEF
,
1465 MachineSDNode::mmo_iterator MemOp
= MF
->allocateMemRefsArray(1);
1466 MemOp
[0] = cast
<MemSDNode
>(Node
)->getMemOperand();
1467 if (isInc
|| isDec
) {
1468 SDValue Ops
[] = { Tmp0
, Tmp1
, Tmp2
, Tmp3
, Tmp4
, Chain
};
1469 SDValue Ret
= SDValue(CurDAG
->getMachineNode(Opc
, dl
, MVT::Other
, Ops
, 6), 0);
1470 cast
<MachineSDNode
>(Ret
)->setMemRefs(MemOp
, MemOp
+ 1);
1471 SDValue RetVals
[] = { Undef
, Ret
};
1472 return CurDAG
->getMergeValues(RetVals
, 2, dl
).getNode();
1474 SDValue Ops
[] = { Tmp0
, Tmp1
, Tmp2
, Tmp3
, Tmp4
, Val
, Chain
};
1475 SDValue Ret
= SDValue(CurDAG
->getMachineNode(Opc
, dl
, MVT::Other
, Ops
, 7), 0);
1476 cast
<MachineSDNode
>(Ret
)->setMemRefs(MemOp
, MemOp
+ 1);
1477 SDValue RetVals
[] = { Undef
, Ret
};
1478 return CurDAG
->getMergeValues(RetVals
, 2, dl
).getNode();
1482 /// HasNoSignedComparisonUses - Test whether the given X86ISD::CMP node has
1483 /// any uses which require the SF or OF bits to be accurate.
1484 static bool HasNoSignedComparisonUses(SDNode
*N
) {
1485 // Examine each user of the node.
1486 for (SDNode::use_iterator UI
= N
->use_begin(),
1487 UE
= N
->use_end(); UI
!= UE
; ++UI
) {
1488 // Only examine CopyToReg uses.
1489 if (UI
->getOpcode() != ISD::CopyToReg
)
1491 // Only examine CopyToReg uses that copy to EFLAGS.
1492 if (cast
<RegisterSDNode
>(UI
->getOperand(1))->getReg() !=
1495 // Examine each user of the CopyToReg use.
1496 for (SDNode::use_iterator FlagUI
= UI
->use_begin(),
1497 FlagUE
= UI
->use_end(); FlagUI
!= FlagUE
; ++FlagUI
) {
1498 // Only examine the Flag result.
1499 if (FlagUI
.getUse().getResNo() != 1) continue;
1500 // Anything unusual: assume conservatively.
1501 if (!FlagUI
->isMachineOpcode()) return false;
1502 // Examine the opcode of the user.
1503 switch (FlagUI
->getMachineOpcode()) {
1504 // These comparisons don't treat the most significant bit specially.
1505 case X86::SETAr
: case X86::SETAEr
: case X86::SETBr
: case X86::SETBEr
:
1506 case X86::SETEr
: case X86::SETNEr
: case X86::SETPr
: case X86::SETNPr
:
1507 case X86::SETAm
: case X86::SETAEm
: case X86::SETBm
: case X86::SETBEm
:
1508 case X86::SETEm
: case X86::SETNEm
: case X86::SETPm
: case X86::SETNPm
:
1509 case X86::JA_4
: case X86::JAE_4
: case X86::JB_4
: case X86::JBE_4
:
1510 case X86::JE_4
: case X86::JNE_4
: case X86::JP_4
: case X86::JNP_4
:
1511 case X86::CMOVA16rr
: case X86::CMOVA16rm
:
1512 case X86::CMOVA32rr
: case X86::CMOVA32rm
:
1513 case X86::CMOVA64rr
: case X86::CMOVA64rm
:
1514 case X86::CMOVAE16rr
: case X86::CMOVAE16rm
:
1515 case X86::CMOVAE32rr
: case X86::CMOVAE32rm
:
1516 case X86::CMOVAE64rr
: case X86::CMOVAE64rm
:
1517 case X86::CMOVB16rr
: case X86::CMOVB16rm
:
1518 case X86::CMOVB32rr
: case X86::CMOVB32rm
:
1519 case X86::CMOVB64rr
: case X86::CMOVB64rm
:
1520 case X86::CMOVBE16rr
: case X86::CMOVBE16rm
:
1521 case X86::CMOVBE32rr
: case X86::CMOVBE32rm
:
1522 case X86::CMOVBE64rr
: case X86::CMOVBE64rm
:
1523 case X86::CMOVE16rr
: case X86::CMOVE16rm
:
1524 case X86::CMOVE32rr
: case X86::CMOVE32rm
:
1525 case X86::CMOVE64rr
: case X86::CMOVE64rm
:
1526 case X86::CMOVNE16rr
: case X86::CMOVNE16rm
:
1527 case X86::CMOVNE32rr
: case X86::CMOVNE32rm
:
1528 case X86::CMOVNE64rr
: case X86::CMOVNE64rm
:
1529 case X86::CMOVNP16rr
: case X86::CMOVNP16rm
:
1530 case X86::CMOVNP32rr
: case X86::CMOVNP32rm
:
1531 case X86::CMOVNP64rr
: case X86::CMOVNP64rm
:
1532 case X86::CMOVP16rr
: case X86::CMOVP16rm
:
1533 case X86::CMOVP32rr
: case X86::CMOVP32rm
:
1534 case X86::CMOVP64rr
: case X86::CMOVP64rm
:
1536 // Anything else: assume conservatively.
1537 default: return false;
1544 SDNode
*X86DAGToDAGISel::Select(SDNode
*Node
) {
1545 EVT NVT
= Node
->getValueType(0);
1547 unsigned Opcode
= Node
->getOpcode();
1548 DebugLoc dl
= Node
->getDebugLoc();
1550 DEBUG(dbgs() << "Selecting: "; Node
->dump(CurDAG
); dbgs() << '\n');
1552 if (Node
->isMachineOpcode()) {
1553 DEBUG(dbgs() << "== "; Node
->dump(CurDAG
); dbgs() << '\n');
1554 return NULL
; // Already selected.
1559 case X86ISD::GlobalBaseReg
:
1560 return getGlobalBaseReg();
1562 case X86ISD::ATOMOR64_DAG
:
1563 return SelectAtomic64(Node
, X86::ATOMOR6432
);
1564 case X86ISD::ATOMXOR64_DAG
:
1565 return SelectAtomic64(Node
, X86::ATOMXOR6432
);
1566 case X86ISD::ATOMADD64_DAG
:
1567 return SelectAtomic64(Node
, X86::ATOMADD6432
);
1568 case X86ISD::ATOMSUB64_DAG
:
1569 return SelectAtomic64(Node
, X86::ATOMSUB6432
);
1570 case X86ISD::ATOMNAND64_DAG
:
1571 return SelectAtomic64(Node
, X86::ATOMNAND6432
);
1572 case X86ISD::ATOMAND64_DAG
:
1573 return SelectAtomic64(Node
, X86::ATOMAND6432
);
1574 case X86ISD::ATOMSWAP64_DAG
:
1575 return SelectAtomic64(Node
, X86::ATOMSWAP6432
);
1577 case ISD::ATOMIC_LOAD_ADD
: {
1578 SDNode
*RetVal
= SelectAtomicLoadAdd(Node
, NVT
);
1583 case X86ISD::UMUL
: {
1584 SDValue N0
= Node
->getOperand(0);
1585 SDValue N1
= Node
->getOperand(1);
1588 switch (NVT
.getSimpleVT().SimpleTy
) {
1589 default: llvm_unreachable("Unsupported VT!");
1590 case MVT::i8
: LoReg
= X86::AL
; Opc
= X86::MUL8r
; break;
1591 case MVT::i16
: LoReg
= X86::AX
; Opc
= X86::MUL16r
; break;
1592 case MVT::i32
: LoReg
= X86::EAX
; Opc
= X86::MUL32r
; break;
1593 case MVT::i64
: LoReg
= X86::RAX
; Opc
= X86::MUL64r
; break;
1596 SDValue InFlag
= CurDAG
->getCopyToReg(CurDAG
->getEntryNode(), dl
, LoReg
,
1597 N0
, SDValue()).getValue(1);
1599 SDVTList VTs
= CurDAG
->getVTList(NVT
, NVT
, MVT::i32
);
1600 SDValue Ops
[] = {N1
, InFlag
};
1601 SDNode
*CNode
= CurDAG
->getMachineNode(Opc
, dl
, VTs
, Ops
, 2);
1603 ReplaceUses(SDValue(Node
, 0), SDValue(CNode
, 0));
1604 ReplaceUses(SDValue(Node
, 1), SDValue(CNode
, 1));
1605 ReplaceUses(SDValue(Node
, 2), SDValue(CNode
, 2));
1609 case ISD::SMUL_LOHI
:
1610 case ISD::UMUL_LOHI
: {
1611 SDValue N0
= Node
->getOperand(0);
1612 SDValue N1
= Node
->getOperand(1);
1614 bool isSigned
= Opcode
== ISD::SMUL_LOHI
;
1616 switch (NVT
.getSimpleVT().SimpleTy
) {
1617 default: llvm_unreachable("Unsupported VT!");
1618 case MVT::i8
: Opc
= X86::MUL8r
; MOpc
= X86::MUL8m
; break;
1619 case MVT::i16
: Opc
= X86::MUL16r
; MOpc
= X86::MUL16m
; break;
1620 case MVT::i32
: Opc
= X86::MUL32r
; MOpc
= X86::MUL32m
; break;
1621 case MVT::i64
: Opc
= X86::MUL64r
; MOpc
= X86::MUL64m
; break;
1624 switch (NVT
.getSimpleVT().SimpleTy
) {
1625 default: llvm_unreachable("Unsupported VT!");
1626 case MVT::i8
: Opc
= X86::IMUL8r
; MOpc
= X86::IMUL8m
; break;
1627 case MVT::i16
: Opc
= X86::IMUL16r
; MOpc
= X86::IMUL16m
; break;
1628 case MVT::i32
: Opc
= X86::IMUL32r
; MOpc
= X86::IMUL32m
; break;
1629 case MVT::i64
: Opc
= X86::IMUL64r
; MOpc
= X86::IMUL64m
; break;
1633 unsigned LoReg
, HiReg
;
1634 switch (NVT
.getSimpleVT().SimpleTy
) {
1635 default: llvm_unreachable("Unsupported VT!");
1636 case MVT::i8
: LoReg
= X86::AL
; HiReg
= X86::AH
; break;
1637 case MVT::i16
: LoReg
= X86::AX
; HiReg
= X86::DX
; break;
1638 case MVT::i32
: LoReg
= X86::EAX
; HiReg
= X86::EDX
; break;
1639 case MVT::i64
: LoReg
= X86::RAX
; HiReg
= X86::RDX
; break;
1642 SDValue Tmp0
, Tmp1
, Tmp2
, Tmp3
, Tmp4
;
1643 bool foldedLoad
= TryFoldLoad(Node
, N1
, Tmp0
, Tmp1
, Tmp2
, Tmp3
, Tmp4
);
1644 // Multiply is commmutative.
1646 foldedLoad
= TryFoldLoad(Node
, N0
, Tmp0
, Tmp1
, Tmp2
, Tmp3
, Tmp4
);
1651 SDValue InFlag
= CurDAG
->getCopyToReg(CurDAG
->getEntryNode(), dl
, LoReg
,
1652 N0
, SDValue()).getValue(1);
1655 SDValue Ops
[] = { Tmp0
, Tmp1
, Tmp2
, Tmp3
, Tmp4
, N1
.getOperand(0),
1658 CurDAG
->getMachineNode(MOpc
, dl
, MVT::Other
, MVT::Glue
, Ops
,
1659 array_lengthof(Ops
));
1660 InFlag
= SDValue(CNode
, 1);
1662 // Update the chain.
1663 ReplaceUses(N1
.getValue(1), SDValue(CNode
, 0));
1665 SDNode
*CNode
= CurDAG
->getMachineNode(Opc
, dl
, MVT::Glue
, N1
, InFlag
);
1666 InFlag
= SDValue(CNode
, 0);
1669 // Prevent use of AH in a REX instruction by referencing AX instead.
1670 if (HiReg
== X86::AH
&& Subtarget
->is64Bit() &&
1671 !SDValue(Node
, 1).use_empty()) {
1672 SDValue Result
= CurDAG
->getCopyFromReg(CurDAG
->getEntryNode(), dl
,
1673 X86::AX
, MVT::i16
, InFlag
);
1674 InFlag
= Result
.getValue(2);
1675 // Get the low part if needed. Don't use getCopyFromReg for aliasing
1677 if (!SDValue(Node
, 0).use_empty())
1678 ReplaceUses(SDValue(Node
, 1),
1679 CurDAG
->getTargetExtractSubreg(X86::sub_8bit
, dl
, MVT::i8
, Result
));
1681 // Shift AX down 8 bits.
1682 Result
= SDValue(CurDAG
->getMachineNode(X86::SHR16ri
, dl
, MVT::i16
,
1684 CurDAG
->getTargetConstant(8, MVT::i8
)), 0);
1685 // Then truncate it down to i8.
1686 ReplaceUses(SDValue(Node
, 1),
1687 CurDAG
->getTargetExtractSubreg(X86::sub_8bit
, dl
, MVT::i8
, Result
));
1689 // Copy the low half of the result, if it is needed.
1690 if (!SDValue(Node
, 0).use_empty()) {
1691 SDValue Result
= CurDAG
->getCopyFromReg(CurDAG
->getEntryNode(), dl
,
1692 LoReg
, NVT
, InFlag
);
1693 InFlag
= Result
.getValue(2);
1694 ReplaceUses(SDValue(Node
, 0), Result
);
1695 DEBUG(dbgs() << "=> "; Result
.getNode()->dump(CurDAG
); dbgs() << '\n');
1697 // Copy the high half of the result, if it is needed.
1698 if (!SDValue(Node
, 1).use_empty()) {
1699 SDValue Result
= CurDAG
->getCopyFromReg(CurDAG
->getEntryNode(), dl
,
1700 HiReg
, NVT
, InFlag
);
1701 InFlag
= Result
.getValue(2);
1702 ReplaceUses(SDValue(Node
, 1), Result
);
1703 DEBUG(dbgs() << "=> "; Result
.getNode()->dump(CurDAG
); dbgs() << '\n');
1710 case ISD::UDIVREM
: {
1711 SDValue N0
= Node
->getOperand(0);
1712 SDValue N1
= Node
->getOperand(1);
1714 bool isSigned
= Opcode
== ISD::SDIVREM
;
1716 switch (NVT
.getSimpleVT().SimpleTy
) {
1717 default: llvm_unreachable("Unsupported VT!");
1718 case MVT::i8
: Opc
= X86::DIV8r
; MOpc
= X86::DIV8m
; break;
1719 case MVT::i16
: Opc
= X86::DIV16r
; MOpc
= X86::DIV16m
; break;
1720 case MVT::i32
: Opc
= X86::DIV32r
; MOpc
= X86::DIV32m
; break;
1721 case MVT::i64
: Opc
= X86::DIV64r
; MOpc
= X86::DIV64m
; break;
1724 switch (NVT
.getSimpleVT().SimpleTy
) {
1725 default: llvm_unreachable("Unsupported VT!");
1726 case MVT::i8
: Opc
= X86::IDIV8r
; MOpc
= X86::IDIV8m
; break;
1727 case MVT::i16
: Opc
= X86::IDIV16r
; MOpc
= X86::IDIV16m
; break;
1728 case MVT::i32
: Opc
= X86::IDIV32r
; MOpc
= X86::IDIV32m
; break;
1729 case MVT::i64
: Opc
= X86::IDIV64r
; MOpc
= X86::IDIV64m
; break;
1733 unsigned LoReg
, HiReg
, ClrReg
;
1734 unsigned ClrOpcode
, SExtOpcode
;
1735 switch (NVT
.getSimpleVT().SimpleTy
) {
1736 default: llvm_unreachable("Unsupported VT!");
1738 LoReg
= X86::AL
; ClrReg
= HiReg
= X86::AH
;
1740 SExtOpcode
= X86::CBW
;
1743 LoReg
= X86::AX
; HiReg
= X86::DX
;
1744 ClrOpcode
= X86::MOV16r0
; ClrReg
= X86::DX
;
1745 SExtOpcode
= X86::CWD
;
1748 LoReg
= X86::EAX
; ClrReg
= HiReg
= X86::EDX
;
1749 ClrOpcode
= X86::MOV32r0
;
1750 SExtOpcode
= X86::CDQ
;
1753 LoReg
= X86::RAX
; ClrReg
= HiReg
= X86::RDX
;
1754 ClrOpcode
= X86::MOV64r0
;
1755 SExtOpcode
= X86::CQO
;
1759 SDValue Tmp0
, Tmp1
, Tmp2
, Tmp3
, Tmp4
;
1760 bool foldedLoad
= TryFoldLoad(Node
, N1
, Tmp0
, Tmp1
, Tmp2
, Tmp3
, Tmp4
);
1761 bool signBitIsZero
= CurDAG
->SignBitIsZero(N0
);
1764 if (NVT
== MVT::i8
&& (!isSigned
|| signBitIsZero
)) {
1765 // Special case for div8, just use a move with zero extension to AX to
1766 // clear the upper 8 bits (AH).
1767 SDValue Tmp0
, Tmp1
, Tmp2
, Tmp3
, Tmp4
, Move
, Chain
;
1768 if (TryFoldLoad(Node
, N0
, Tmp0
, Tmp1
, Tmp2
, Tmp3
, Tmp4
)) {
1769 SDValue Ops
[] = { Tmp0
, Tmp1
, Tmp2
, Tmp3
, Tmp4
, N0
.getOperand(0) };
1771 SDValue(CurDAG
->getMachineNode(X86::MOVZX16rm8
, dl
, MVT::i16
,
1773 array_lengthof(Ops
)), 0);
1774 Chain
= Move
.getValue(1);
1775 ReplaceUses(N0
.getValue(1), Chain
);
1778 SDValue(CurDAG
->getMachineNode(X86::MOVZX16rr8
, dl
, MVT::i16
, N0
),0);
1779 Chain
= CurDAG
->getEntryNode();
1781 Chain
= CurDAG
->getCopyToReg(Chain
, dl
, X86::AX
, Move
, SDValue());
1782 InFlag
= Chain
.getValue(1);
1785 CurDAG
->getCopyToReg(CurDAG
->getEntryNode(), dl
,
1786 LoReg
, N0
, SDValue()).getValue(1);
1787 if (isSigned
&& !signBitIsZero
) {
1788 // Sign extend the low part into the high part.
1790 SDValue(CurDAG
->getMachineNode(SExtOpcode
, dl
, MVT::Glue
, InFlag
),0);
1792 // Zero out the high part, effectively zero extending the input.
1794 SDValue(CurDAG
->getMachineNode(ClrOpcode
, dl
, NVT
), 0);
1795 InFlag
= CurDAG
->getCopyToReg(CurDAG
->getEntryNode(), dl
, ClrReg
,
1796 ClrNode
, InFlag
).getValue(1);
1801 SDValue Ops
[] = { Tmp0
, Tmp1
, Tmp2
, Tmp3
, Tmp4
, N1
.getOperand(0),
1804 CurDAG
->getMachineNode(MOpc
, dl
, MVT::Other
, MVT::Glue
, Ops
,
1805 array_lengthof(Ops
));
1806 InFlag
= SDValue(CNode
, 1);
1807 // Update the chain.
1808 ReplaceUses(N1
.getValue(1), SDValue(CNode
, 0));
1811 SDValue(CurDAG
->getMachineNode(Opc
, dl
, MVT::Glue
, N1
, InFlag
), 0);
1814 // Prevent use of AH in a REX instruction by referencing AX instead.
1815 // Shift it down 8 bits.
1816 if (HiReg
== X86::AH
&& Subtarget
->is64Bit() &&
1817 !SDValue(Node
, 1).use_empty()) {
1818 SDValue Result
= CurDAG
->getCopyFromReg(CurDAG
->getEntryNode(), dl
,
1819 X86::AX
, MVT::i16
, InFlag
);
1820 InFlag
= Result
.getValue(2);
1822 // If we also need AL (the quotient), get it by extracting a subreg from
1823 // Result. The fast register allocator does not like multiple CopyFromReg
1824 // nodes using aliasing registers.
1825 if (!SDValue(Node
, 0).use_empty())
1826 ReplaceUses(SDValue(Node
, 0),
1827 CurDAG
->getTargetExtractSubreg(X86::sub_8bit
, dl
, MVT::i8
, Result
));
1829 // Shift AX right by 8 bits instead of using AH.
1830 Result
= SDValue(CurDAG
->getMachineNode(X86::SHR16ri
, dl
, MVT::i16
,
1832 CurDAG
->getTargetConstant(8, MVT::i8
)),
1834 ReplaceUses(SDValue(Node
, 1),
1835 CurDAG
->getTargetExtractSubreg(X86::sub_8bit
, dl
, MVT::i8
, Result
));
1837 // Copy the division (low) result, if it is needed.
1838 if (!SDValue(Node
, 0).use_empty()) {
1839 SDValue Result
= CurDAG
->getCopyFromReg(CurDAG
->getEntryNode(), dl
,
1840 LoReg
, NVT
, InFlag
);
1841 InFlag
= Result
.getValue(2);
1842 ReplaceUses(SDValue(Node
, 0), Result
);
1843 DEBUG(dbgs() << "=> "; Result
.getNode()->dump(CurDAG
); dbgs() << '\n');
1845 // Copy the remainder (high) result, if it is needed.
1846 if (!SDValue(Node
, 1).use_empty()) {
1847 SDValue Result
= CurDAG
->getCopyFromReg(CurDAG
->getEntryNode(), dl
,
1848 HiReg
, NVT
, InFlag
);
1849 InFlag
= Result
.getValue(2);
1850 ReplaceUses(SDValue(Node
, 1), Result
);
1851 DEBUG(dbgs() << "=> "; Result
.getNode()->dump(CurDAG
); dbgs() << '\n');
1857 SDValue N0
= Node
->getOperand(0);
1858 SDValue N1
= Node
->getOperand(1);
1860 // Look for (X86cmp (and $op, $imm), 0) and see if we can convert it to
1861 // use a smaller encoding.
1862 if (N0
.getOpcode() == ISD::TRUNCATE
&& N0
.hasOneUse() &&
1863 HasNoSignedComparisonUses(Node
))
1864 // Look past the truncate if CMP is the only use of it.
1865 N0
= N0
.getOperand(0);
1866 if (N0
.getNode()->getOpcode() == ISD::AND
&& N0
.getNode()->hasOneUse() &&
1867 N0
.getValueType() != MVT::i8
&&
1868 X86::isZeroNode(N1
)) {
1869 ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(N0
.getNode()->getOperand(1));
1872 // For example, convert "testl %eax, $8" to "testb %al, $8"
1873 if ((C
->getZExtValue() & ~UINT64_C(0xff)) == 0 &&
1874 (!(C
->getZExtValue() & 0x80) ||
1875 HasNoSignedComparisonUses(Node
))) {
1876 SDValue Imm
= CurDAG
->getTargetConstant(C
->getZExtValue(), MVT::i8
);
1877 SDValue Reg
= N0
.getNode()->getOperand(0);
1879 // On x86-32, only the ABCD registers have 8-bit subregisters.
1880 if (!Subtarget
->is64Bit()) {
1881 TargetRegisterClass
*TRC
= 0;
1882 switch (N0
.getValueType().getSimpleVT().SimpleTy
) {
1883 case MVT::i32
: TRC
= &X86::GR32_ABCDRegClass
; break;
1884 case MVT::i16
: TRC
= &X86::GR16_ABCDRegClass
; break;
1885 default: llvm_unreachable("Unsupported TEST operand type!");
1887 SDValue RC
= CurDAG
->getTargetConstant(TRC
->getID(), MVT::i32
);
1888 Reg
= SDValue(CurDAG
->getMachineNode(X86::COPY_TO_REGCLASS
, dl
,
1889 Reg
.getValueType(), Reg
, RC
), 0);
1892 // Extract the l-register.
1893 SDValue Subreg
= CurDAG
->getTargetExtractSubreg(X86::sub_8bit
, dl
,
1897 return CurDAG
->getMachineNode(X86::TEST8ri
, dl
, MVT::i32
, Subreg
, Imm
);
1900 // For example, "testl %eax, $2048" to "testb %ah, $8".
1901 if ((C
->getZExtValue() & ~UINT64_C(0xff00)) == 0 &&
1902 (!(C
->getZExtValue() & 0x8000) ||
1903 HasNoSignedComparisonUses(Node
))) {
1904 // Shift the immediate right by 8 bits.
1905 SDValue ShiftedImm
= CurDAG
->getTargetConstant(C
->getZExtValue() >> 8,
1907 SDValue Reg
= N0
.getNode()->getOperand(0);
1909 // Put the value in an ABCD register.
1910 TargetRegisterClass
*TRC
= 0;
1911 switch (N0
.getValueType().getSimpleVT().SimpleTy
) {
1912 case MVT::i64
: TRC
= &X86::GR64_ABCDRegClass
; break;
1913 case MVT::i32
: TRC
= &X86::GR32_ABCDRegClass
; break;
1914 case MVT::i16
: TRC
= &X86::GR16_ABCDRegClass
; break;
1915 default: llvm_unreachable("Unsupported TEST operand type!");
1917 SDValue RC
= CurDAG
->getTargetConstant(TRC
->getID(), MVT::i32
);
1918 Reg
= SDValue(CurDAG
->getMachineNode(X86::COPY_TO_REGCLASS
, dl
,
1919 Reg
.getValueType(), Reg
, RC
), 0);
1921 // Extract the h-register.
1922 SDValue Subreg
= CurDAG
->getTargetExtractSubreg(X86::sub_8bit_hi
, dl
,
1925 // Emit a testb. No special NOREX tricks are needed since there's
1926 // only one GPR operand!
1927 return CurDAG
->getMachineNode(X86::TEST8ri
, dl
, MVT::i32
,
1928 Subreg
, ShiftedImm
);
1931 // For example, "testl %eax, $32776" to "testw %ax, $32776".
1932 if ((C
->getZExtValue() & ~UINT64_C(0xffff)) == 0 &&
1933 N0
.getValueType() != MVT::i16
&&
1934 (!(C
->getZExtValue() & 0x8000) ||
1935 HasNoSignedComparisonUses(Node
))) {
1936 SDValue Imm
= CurDAG
->getTargetConstant(C
->getZExtValue(), MVT::i16
);
1937 SDValue Reg
= N0
.getNode()->getOperand(0);
1939 // Extract the 16-bit subregister.
1940 SDValue Subreg
= CurDAG
->getTargetExtractSubreg(X86::sub_16bit
, dl
,
1944 return CurDAG
->getMachineNode(X86::TEST16ri
, dl
, MVT::i32
, Subreg
, Imm
);
1947 // For example, "testq %rax, $268468232" to "testl %eax, $268468232".
1948 if ((C
->getZExtValue() & ~UINT64_C(0xffffffff)) == 0 &&
1949 N0
.getValueType() == MVT::i64
&&
1950 (!(C
->getZExtValue() & 0x80000000) ||
1951 HasNoSignedComparisonUses(Node
))) {
1952 SDValue Imm
= CurDAG
->getTargetConstant(C
->getZExtValue(), MVT::i32
);
1953 SDValue Reg
= N0
.getNode()->getOperand(0);
1955 // Extract the 32-bit subregister.
1956 SDValue Subreg
= CurDAG
->getTargetExtractSubreg(X86::sub_32bit
, dl
,
1960 return CurDAG
->getMachineNode(X86::TEST32ri
, dl
, MVT::i32
, Subreg
, Imm
);
1967 SDNode
*ResNode
= SelectCode(Node
);
1969 DEBUG(dbgs() << "=> ";
1970 if (ResNode
== NULL
|| ResNode
== Node
)
1973 ResNode
->dump(CurDAG
);
1979 bool X86DAGToDAGISel::
1980 SelectInlineAsmMemoryOperand(const SDValue
&Op
, char ConstraintCode
,
1981 std::vector
<SDValue
> &OutOps
) {
1982 SDValue Op0
, Op1
, Op2
, Op3
, Op4
;
1983 switch (ConstraintCode
) {
1984 case 'o': // offsetable ??
1985 case 'v': // not offsetable ??
1986 default: return true;
1988 if (!SelectAddr(0, Op
, Op0
, Op1
, Op2
, Op3
, Op4
))
1993 OutOps
.push_back(Op0
);
1994 OutOps
.push_back(Op1
);
1995 OutOps
.push_back(Op2
);
1996 OutOps
.push_back(Op3
);
1997 OutOps
.push_back(Op4
);
2001 /// createX86ISelDag - This pass converts a legalized DAG into a
2002 /// X86-specific DAG, ready for instruction scheduling.
2004 FunctionPass
*llvm::createX86ISelDag(X86TargetMachine
&TM
,
2005 llvm::CodeGenOpt::Level OptLevel
) {
2006 return new X86DAGToDAGISel(TM
, OptLevel
);