1 //===-- X86SelectionDAGInfo.cpp - X86 SelectionDAG Info -------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements the X86SelectionDAGInfo class.
11 //===----------------------------------------------------------------------===//
13 #include "X86SelectionDAGInfo.h"
14 #include "X86ISelLowering.h"
15 #include "X86InstrInfo.h"
16 #include "X86RegisterInfo.h"
17 #include "X86Subtarget.h"
18 #include "llvm/CodeGen/SelectionDAG.h"
19 #include "llvm/CodeGen/TargetLowering.h"
20 #include "llvm/IR/DerivedTypes.h"
24 #define DEBUG_TYPE "x86-selectiondag-info"
26 bool X86SelectionDAGInfo::isBaseRegConflictPossible(
27 SelectionDAG
&DAG
, ArrayRef
<MCPhysReg
> ClobberSet
) const {
28 // We cannot use TRI->hasBasePointer() until *after* we select all basic
29 // blocks. Legalization may introduce new stack temporaries with large
30 // alignment requirements. Fall back to generic code if there are any
31 // dynamic stack adjustments (hopefully rare) and the base pointer would
32 // conflict if we had to use it.
33 MachineFrameInfo
&MFI
= DAG
.getMachineFunction().getFrameInfo();
34 if (!MFI
.hasVarSizedObjects() && !MFI
.hasOpaqueSPAdjustment())
37 const X86RegisterInfo
*TRI
= static_cast<const X86RegisterInfo
*>(
38 DAG
.getSubtarget().getRegisterInfo());
39 unsigned BaseReg
= TRI
->getBaseRegister();
40 for (unsigned R
: ClobberSet
)
48 // Represents a cover of a buffer of Size bytes with Count() blocks of type AVT
49 // (of size UBytes() bytes), as well as how many bytes remain (BytesLeft() is
50 // always smaller than the block size).
51 struct RepMovsRepeats
{
52 RepMovsRepeats(uint64_t Size
) : Size(Size
) {}
54 uint64_t Count() const { return Size
/ UBytes(); }
55 uint64_t BytesLeft() const { return Size
% UBytes(); }
56 uint64_t UBytes() const { return AVT
.getSizeInBits() / 8; }
64 SDValue
X86SelectionDAGInfo::EmitTargetCodeForMemset(
65 SelectionDAG
&DAG
, const SDLoc
&dl
, SDValue Chain
, SDValue Dst
, SDValue Val
,
66 SDValue Size
, unsigned Align
, bool isVolatile
,
67 MachinePointerInfo DstPtrInfo
) const {
68 ConstantSDNode
*ConstantSize
= dyn_cast
<ConstantSDNode
>(Size
);
69 const X86Subtarget
&Subtarget
=
70 DAG
.getMachineFunction().getSubtarget
<X86Subtarget
>();
73 // If the base register might conflict with our physical registers, bail out.
74 const MCPhysReg ClobberSet
[] = {X86::RCX
, X86::RAX
, X86::RDI
,
75 X86::ECX
, X86::EAX
, X86::EDI
};
76 assert(!isBaseRegConflictPossible(DAG
, ClobberSet
));
79 // If to a segment-relative address space, use the default lowering.
80 if (DstPtrInfo
.getAddrSpace() >= 256)
83 // If not DWORD aligned or size is more than the threshold, call the library.
84 // The libc version is likely to be faster for these cases. It can use the
85 // address value and run time information about the CPU.
86 if ((Align
& 3) != 0 || !ConstantSize
||
87 ConstantSize
->getZExtValue() > Subtarget
.getMaxInlineSizeThreshold()) {
88 // Check to see if there is a specialized entry-point for memory zeroing.
89 ConstantSDNode
*ValC
= dyn_cast
<ConstantSDNode
>(Val
);
91 if (const char *bzeroName
= (ValC
&& ValC
->isNullValue())
92 ? DAG
.getTargetLoweringInfo().getLibcallName(RTLIB::BZERO
)
94 const TargetLowering
&TLI
= DAG
.getTargetLoweringInfo();
95 EVT IntPtr
= TLI
.getPointerTy(DAG
.getDataLayout());
96 Type
*IntPtrTy
= DAG
.getDataLayout().getIntPtrType(*DAG
.getContext());
97 TargetLowering::ArgListTy Args
;
98 TargetLowering::ArgListEntry Entry
;
101 Args
.push_back(Entry
);
103 Args
.push_back(Entry
);
105 TargetLowering::CallLoweringInfo
CLI(DAG
);
108 .setLibCallee(CallingConv::C
, Type::getVoidTy(*DAG
.getContext()),
109 DAG
.getExternalSymbol(bzeroName
, IntPtr
),
113 std::pair
<SDValue
,SDValue
> CallResult
= TLI
.LowerCallTo(CLI
);
114 return CallResult
.second
;
117 // Otherwise have the target-independent code call memset.
121 uint64_t SizeVal
= ConstantSize
->getZExtValue();
125 ConstantSDNode
*ValC
= dyn_cast
<ConstantSDNode
>(Val
);
126 unsigned BytesLeft
= 0;
129 uint64_t Val
= ValC
->getZExtValue() & 255;
131 // If the value is a constant, then we can potentially use larger sets.
133 case 2: // WORD aligned
136 Val
= (Val
<< 8) | Val
;
138 case 0: // DWORD aligned
141 Val
= (Val
<< 8) | Val
;
142 Val
= (Val
<< 16) | Val
;
143 if (Subtarget
.is64Bit() && ((Align
& 0x7) == 0)) { // QWORD aligned
146 Val
= (Val
<< 32) | Val
;
149 default: // Byte aligned
152 Count
= DAG
.getIntPtrConstant(SizeVal
, dl
);
156 if (AVT
.bitsGT(MVT::i8
)) {
157 unsigned UBytes
= AVT
.getSizeInBits() / 8;
158 Count
= DAG
.getIntPtrConstant(SizeVal
/ UBytes
, dl
);
159 BytesLeft
= SizeVal
% UBytes
;
162 Chain
= DAG
.getCopyToReg(Chain
, dl
, ValReg
, DAG
.getConstant(Val
, dl
, AVT
),
164 InFlag
= Chain
.getValue(1);
167 Count
= DAG
.getIntPtrConstant(SizeVal
, dl
);
168 Chain
= DAG
.getCopyToReg(Chain
, dl
, X86::AL
, Val
, InFlag
);
169 InFlag
= Chain
.getValue(1);
172 bool Use64BitRegs
= Subtarget
.isTarget64BitLP64();
173 Chain
= DAG
.getCopyToReg(Chain
, dl
, Use64BitRegs
? X86::RCX
: X86::ECX
,
175 InFlag
= Chain
.getValue(1);
176 Chain
= DAG
.getCopyToReg(Chain
, dl
, Use64BitRegs
? X86::RDI
: X86::EDI
,
178 InFlag
= Chain
.getValue(1);
180 SDVTList Tys
= DAG
.getVTList(MVT::Other
, MVT::Glue
);
181 SDValue Ops
[] = { Chain
, DAG
.getValueType(AVT
), InFlag
};
182 Chain
= DAG
.getNode(X86ISD::REP_STOS
, dl
, Tys
, Ops
);
185 // Handle the last 1 - 7 bytes.
186 unsigned Offset
= SizeVal
- BytesLeft
;
187 EVT AddrVT
= Dst
.getValueType();
188 EVT SizeVT
= Size
.getValueType();
190 Chain
= DAG
.getMemset(Chain
, dl
,
191 DAG
.getNode(ISD::ADD
, dl
, AddrVT
, Dst
,
192 DAG
.getConstant(Offset
, dl
, AddrVT
)),
194 DAG
.getConstant(BytesLeft
, dl
, SizeVT
),
195 Align
, isVolatile
, false,
196 DstPtrInfo
.getWithOffset(Offset
));
199 // TODO: Use a Tokenfactor, as in memcpy, instead of a single chain.
203 SDValue
X86SelectionDAGInfo::EmitTargetCodeForMemcpy(
204 SelectionDAG
&DAG
, const SDLoc
&dl
, SDValue Chain
, SDValue Dst
, SDValue Src
,
205 SDValue Size
, unsigned Align
, bool isVolatile
, bool AlwaysInline
,
206 MachinePointerInfo DstPtrInfo
, MachinePointerInfo SrcPtrInfo
) const {
207 // This requires the copy size to be a constant, preferably
208 // within a subtarget-specific limit.
209 ConstantSDNode
*ConstantSize
= dyn_cast
<ConstantSDNode
>(Size
);
210 const X86Subtarget
&Subtarget
=
211 DAG
.getMachineFunction().getSubtarget
<X86Subtarget
>();
214 RepMovsRepeats
Repeats(ConstantSize
->getZExtValue());
215 if (!AlwaysInline
&& Repeats
.Size
> Subtarget
.getMaxInlineSizeThreshold())
218 /// If not DWORD aligned, it is more efficient to call the library. However
219 /// if calling the library is not allowed (AlwaysInline), then soldier on as
220 /// the code generated here is better than the long load-store sequence we
221 /// would otherwise get.
222 if (!AlwaysInline
&& (Align
& 3) != 0)
225 // If to a segment-relative address space, use the default lowering.
226 if (DstPtrInfo
.getAddrSpace() >= 256 ||
227 SrcPtrInfo
.getAddrSpace() >= 256)
230 // If the base register might conflict with our physical registers, bail out.
231 const MCPhysReg ClobberSet
[] = {X86::RCX
, X86::RSI
, X86::RDI
,
232 X86::ECX
, X86::ESI
, X86::EDI
};
233 if (isBaseRegConflictPossible(DAG
, ClobberSet
))
236 // If the target has enhanced REPMOVSB, then it's at least as fast to use
237 // REP MOVSB instead of REP MOVS{W,D,Q}, and it avoids having to handle
239 if (!Subtarget
.hasERMSB() && !(Align
& 1)) {
242 Repeats
.AVT
= MVT::i16
;
245 Repeats
.AVT
= MVT::i32
;
248 Repeats
.AVT
= Subtarget
.is64Bit() ? MVT::i64
: MVT::i32
;
250 if (Repeats
.BytesLeft() > 0 &&
251 DAG
.getMachineFunction().getFunction().optForMinSize()) {
252 // When aggressively optimizing for size, avoid generating the code to
254 Repeats
.AVT
= MVT::i8
;
258 bool Use64BitRegs
= Subtarget
.isTarget64BitLP64();
260 Chain
= DAG
.getCopyToReg(Chain
, dl
, Use64BitRegs
? X86::RCX
: X86::ECX
,
261 DAG
.getIntPtrConstant(Repeats
.Count(), dl
), InFlag
);
262 InFlag
= Chain
.getValue(1);
263 Chain
= DAG
.getCopyToReg(Chain
, dl
, Use64BitRegs
? X86::RDI
: X86::EDI
,
265 InFlag
= Chain
.getValue(1);
266 Chain
= DAG
.getCopyToReg(Chain
, dl
, Use64BitRegs
? X86::RSI
: X86::ESI
,
268 InFlag
= Chain
.getValue(1);
270 SDVTList Tys
= DAG
.getVTList(MVT::Other
, MVT::Glue
);
271 SDValue Ops
[] = { Chain
, DAG
.getValueType(Repeats
.AVT
), InFlag
};
272 SDValue RepMovs
= DAG
.getNode(X86ISD::REP_MOVS
, dl
, Tys
, Ops
);
274 SmallVector
<SDValue
, 4> Results
;
275 Results
.push_back(RepMovs
);
276 if (Repeats
.BytesLeft()) {
277 // Handle the last 1 - 7 bytes.
278 unsigned Offset
= Repeats
.Size
- Repeats
.BytesLeft();
279 EVT DstVT
= Dst
.getValueType();
280 EVT SrcVT
= Src
.getValueType();
281 EVT SizeVT
= Size
.getValueType();
282 Results
.push_back(DAG
.getMemcpy(Chain
, dl
,
283 DAG
.getNode(ISD::ADD
, dl
, DstVT
, Dst
,
284 DAG
.getConstant(Offset
, dl
,
286 DAG
.getNode(ISD::ADD
, dl
, SrcVT
, Src
,
287 DAG
.getConstant(Offset
, dl
,
289 DAG
.getConstant(Repeats
.BytesLeft(), dl
,
291 Align
, isVolatile
, AlwaysInline
, false,
292 DstPtrInfo
.getWithOffset(Offset
),
293 SrcPtrInfo
.getWithOffset(Offset
)));
296 return DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
, Results
);