1 //===-- X86SelectionDAGInfo.cpp - X86 SelectionDAG Info -------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements the X86SelectionDAGInfo class.
11 //===----------------------------------------------------------------------===//
13 #include "X86SelectionDAGInfo.h"
14 #include "X86ISelLowering.h"
15 #include "X86InstrInfo.h"
16 #include "X86RegisterInfo.h"
17 #include "X86Subtarget.h"
18 #include "llvm/CodeGen/MachineFrameInfo.h"
19 #include "llvm/CodeGen/SelectionDAG.h"
20 #include "llvm/CodeGen/TargetLowering.h"
21 #include "llvm/IR/DerivedTypes.h"
25 #define DEBUG_TYPE "x86-selectiondag-info"
28 UseFSRMForMemcpy("x86-use-fsrm-for-memcpy", cl::Hidden
, cl::init(false),
29 cl::desc("Use fast short rep mov in memcpy lowering"));
31 bool X86SelectionDAGInfo::isBaseRegConflictPossible(
32 SelectionDAG
&DAG
, ArrayRef
<MCPhysReg
> ClobberSet
) const {
33 // We cannot use TRI->hasBasePointer() until *after* we select all basic
34 // blocks. Legalization may introduce new stack temporaries with large
35 // alignment requirements. Fall back to generic code if there are any
36 // dynamic stack adjustments (hopefully rare) and the base pointer would
37 // conflict if we had to use it.
38 MachineFrameInfo
&MFI
= DAG
.getMachineFunction().getFrameInfo();
39 if (!MFI
.hasVarSizedObjects() && !MFI
.hasOpaqueSPAdjustment())
42 const X86RegisterInfo
*TRI
= static_cast<const X86RegisterInfo
*>(
43 DAG
.getSubtarget().getRegisterInfo());
44 return llvm::is_contained(ClobberSet
, TRI
->getBaseRegister());
47 SDValue
X86SelectionDAGInfo::EmitTargetCodeForMemset(
48 SelectionDAG
&DAG
, const SDLoc
&dl
, SDValue Chain
, SDValue Dst
, SDValue Val
,
49 SDValue Size
, Align Alignment
, bool isVolatile
, bool AlwaysInline
,
50 MachinePointerInfo DstPtrInfo
) const {
51 ConstantSDNode
*ConstantSize
= dyn_cast
<ConstantSDNode
>(Size
);
52 const X86Subtarget
&Subtarget
=
53 DAG
.getMachineFunction().getSubtarget
<X86Subtarget
>();
56 // If the base register might conflict with our physical registers, bail out.
57 const MCPhysReg ClobberSet
[] = {X86::RCX
, X86::RAX
, X86::RDI
,
58 X86::ECX
, X86::EAX
, X86::EDI
};
59 assert(!isBaseRegConflictPossible(DAG
, ClobberSet
));
62 // If to a segment-relative address space, use the default lowering.
63 if (DstPtrInfo
.getAddrSpace() >= 256)
66 // If not DWORD aligned or size is more than the threshold, call the library.
67 // The libc version is likely to be faster for these cases. It can use the
68 // address value and run time information about the CPU.
69 if (Alignment
< Align(4) || !ConstantSize
||
70 ConstantSize
->getZExtValue() > Subtarget
.getMaxInlineSizeThreshold())
73 uint64_t SizeVal
= ConstantSize
->getZExtValue();
77 unsigned BytesLeft
= 0;
78 if (auto *ValC
= dyn_cast
<ConstantSDNode
>(Val
)) {
80 uint64_t Val
= ValC
->getZExtValue() & 255;
82 // If the value is a constant, then we can potentially use larger sets.
83 if (Alignment
> Align(2)) {
87 Val
= (Val
<< 8) | Val
;
88 Val
= (Val
<< 16) | Val
;
89 if (Subtarget
.is64Bit() && Alignment
> Align(8)) { // QWORD aligned
92 Val
= (Val
<< 32) | Val
;
94 } else if (Alignment
== Align(2)) {
98 Val
= (Val
<< 8) | Val
;
103 Count
= DAG
.getIntPtrConstant(SizeVal
, dl
);
106 if (AVT
.bitsGT(MVT::i8
)) {
107 unsigned UBytes
= AVT
.getSizeInBits() / 8;
108 Count
= DAG
.getIntPtrConstant(SizeVal
/ UBytes
, dl
);
109 BytesLeft
= SizeVal
% UBytes
;
112 Chain
= DAG
.getCopyToReg(Chain
, dl
, ValReg
, DAG
.getConstant(Val
, dl
, AVT
),
114 InGlue
= Chain
.getValue(1);
117 Count
= DAG
.getIntPtrConstant(SizeVal
, dl
);
118 Chain
= DAG
.getCopyToReg(Chain
, dl
, X86::AL
, Val
, InGlue
);
119 InGlue
= Chain
.getValue(1);
122 bool Use64BitRegs
= Subtarget
.isTarget64BitLP64();
123 Chain
= DAG
.getCopyToReg(Chain
, dl
, Use64BitRegs
? X86::RCX
: X86::ECX
,
125 InGlue
= Chain
.getValue(1);
126 Chain
= DAG
.getCopyToReg(Chain
, dl
, Use64BitRegs
? X86::RDI
: X86::EDI
,
128 InGlue
= Chain
.getValue(1);
130 SDVTList Tys
= DAG
.getVTList(MVT::Other
, MVT::Glue
);
131 SDValue Ops
[] = { Chain
, DAG
.getValueType(AVT
), InGlue
};
132 Chain
= DAG
.getNode(X86ISD::REP_STOS
, dl
, Tys
, Ops
);
135 // Handle the last 1 - 7 bytes.
136 unsigned Offset
= SizeVal
- BytesLeft
;
137 EVT AddrVT
= Dst
.getValueType();
138 EVT SizeVT
= Size
.getValueType();
141 DAG
.getMemset(Chain
, dl
,
142 DAG
.getNode(ISD::ADD
, dl
, AddrVT
, Dst
,
143 DAG
.getConstant(Offset
, dl
, AddrVT
)),
144 Val
, DAG
.getConstant(BytesLeft
, dl
, SizeVT
), Alignment
,
145 isVolatile
, AlwaysInline
,
146 /* isTailCall */ false, DstPtrInfo
.getWithOffset(Offset
));
149 // TODO: Use a Tokenfactor, as in memcpy, instead of a single chain.
153 /// Emit a single REP MOVS{B,W,D,Q} instruction.
154 static SDValue
emitRepmovs(const X86Subtarget
&Subtarget
, SelectionDAG
&DAG
,
155 const SDLoc
&dl
, SDValue Chain
, SDValue Dst
,
156 SDValue Src
, SDValue Size
, MVT AVT
) {
157 const bool Use64BitRegs
= Subtarget
.isTarget64BitLP64();
158 const unsigned CX
= Use64BitRegs
? X86::RCX
: X86::ECX
;
159 const unsigned DI
= Use64BitRegs
? X86::RDI
: X86::EDI
;
160 const unsigned SI
= Use64BitRegs
? X86::RSI
: X86::ESI
;
163 Chain
= DAG
.getCopyToReg(Chain
, dl
, CX
, Size
, InGlue
);
164 InGlue
= Chain
.getValue(1);
165 Chain
= DAG
.getCopyToReg(Chain
, dl
, DI
, Dst
, InGlue
);
166 InGlue
= Chain
.getValue(1);
167 Chain
= DAG
.getCopyToReg(Chain
, dl
, SI
, Src
, InGlue
);
168 InGlue
= Chain
.getValue(1);
170 SDVTList Tys
= DAG
.getVTList(MVT::Other
, MVT::Glue
);
171 SDValue Ops
[] = {Chain
, DAG
.getValueType(AVT
), InGlue
};
172 return DAG
.getNode(X86ISD::REP_MOVS
, dl
, Tys
, Ops
);
175 /// Emit a single REP MOVSB instruction for a particular constant size.
176 static SDValue
emitRepmovsB(const X86Subtarget
&Subtarget
, SelectionDAG
&DAG
,
177 const SDLoc
&dl
, SDValue Chain
, SDValue Dst
,
178 SDValue Src
, uint64_t Size
) {
179 return emitRepmovs(Subtarget
, DAG
, dl
, Chain
, Dst
, Src
,
180 DAG
.getIntPtrConstant(Size
, dl
), MVT::i8
);
183 /// Returns the best type to use with repmovs depending on alignment.
184 static MVT
getOptimalRepmovsType(const X86Subtarget
&Subtarget
,
186 uint64_t Align
= Alignment
.value();
187 assert((Align
!= 0) && "Align is normalized");
188 assert(isPowerOf2_64(Align
) && "Align is a power of 2");
197 return Subtarget
.is64Bit() ? MVT::i64
: MVT::i32
;
201 /// Returns a REP MOVS instruction, possibly with a few load/stores to implement
202 /// a constant size memory copy. In some cases where we know REP MOVS is
203 /// inefficient we return an empty SDValue so the calling code can either
204 /// generate a load/store sequence or call the runtime memcpy function.
205 static SDValue
emitConstantSizeRepmov(
206 SelectionDAG
&DAG
, const X86Subtarget
&Subtarget
, const SDLoc
&dl
,
207 SDValue Chain
, SDValue Dst
, SDValue Src
, uint64_t Size
, EVT SizeVT
,
208 Align Alignment
, bool isVolatile
, bool AlwaysInline
,
209 MachinePointerInfo DstPtrInfo
, MachinePointerInfo SrcPtrInfo
) {
211 /// TODO: Revisit next line: big copy with ERMSB on march >= haswell are very
213 if (!AlwaysInline
&& Size
> Subtarget
.getMaxInlineSizeThreshold())
216 /// If we have enhanced repmovs we use it.
217 if (Subtarget
.hasERMSB())
218 return emitRepmovsB(Subtarget
, DAG
, dl
, Chain
, Dst
, Src
, Size
);
220 assert(!Subtarget
.hasERMSB() && "No efficient RepMovs");
221 /// We assume runtime memcpy will do a better job for unaligned copies when
222 /// ERMS is not present.
223 if (!AlwaysInline
&& (Alignment
.value() & 3) != 0)
226 const MVT BlockType
= getOptimalRepmovsType(Subtarget
, Alignment
);
227 const uint64_t BlockBytes
= BlockType
.getSizeInBits() / 8;
228 const uint64_t BlockCount
= Size
/ BlockBytes
;
229 const uint64_t BytesLeft
= Size
% BlockBytes
;
231 emitRepmovs(Subtarget
, DAG
, dl
, Chain
, Dst
, Src
,
232 DAG
.getIntPtrConstant(BlockCount
, dl
), BlockType
);
234 /// RepMov can process the whole length.
238 assert(BytesLeft
&& "We have leftover at this point");
240 /// In case we optimize for size we use repmovsb even if it's less efficient
241 /// so we can save the loads/stores of the leftover.
242 if (DAG
.getMachineFunction().getFunction().hasMinSize())
243 return emitRepmovsB(Subtarget
, DAG
, dl
, Chain
, Dst
, Src
, Size
);
245 // Handle the last 1 - 7 bytes.
246 SmallVector
<SDValue
, 4> Results
;
247 Results
.push_back(RepMovs
);
248 unsigned Offset
= Size
- BytesLeft
;
249 EVT DstVT
= Dst
.getValueType();
250 EVT SrcVT
= Src
.getValueType();
251 Results
.push_back(DAG
.getMemcpy(
253 DAG
.getNode(ISD::ADD
, dl
, DstVT
, Dst
, DAG
.getConstant(Offset
, dl
, DstVT
)),
254 DAG
.getNode(ISD::ADD
, dl
, SrcVT
, Src
, DAG
.getConstant(Offset
, dl
, SrcVT
)),
255 DAG
.getConstant(BytesLeft
, dl
, SizeVT
), Alignment
, isVolatile
,
256 /*AlwaysInline*/ true, /*isTailCall*/ false,
257 DstPtrInfo
.getWithOffset(Offset
), SrcPtrInfo
.getWithOffset(Offset
)));
258 return DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
, Results
);
261 SDValue
X86SelectionDAGInfo::EmitTargetCodeForMemcpy(
262 SelectionDAG
&DAG
, const SDLoc
&dl
, SDValue Chain
, SDValue Dst
, SDValue Src
,
263 SDValue Size
, Align Alignment
, bool isVolatile
, bool AlwaysInline
,
264 MachinePointerInfo DstPtrInfo
, MachinePointerInfo SrcPtrInfo
) const {
265 // If to a segment-relative address space, use the default lowering.
266 if (DstPtrInfo
.getAddrSpace() >= 256 || SrcPtrInfo
.getAddrSpace() >= 256)
269 // If the base registers conflict with our physical registers, use the default
271 const MCPhysReg ClobberSet
[] = {X86::RCX
, X86::RSI
, X86::RDI
,
272 X86::ECX
, X86::ESI
, X86::EDI
};
273 if (isBaseRegConflictPossible(DAG
, ClobberSet
))
276 const X86Subtarget
&Subtarget
=
277 DAG
.getMachineFunction().getSubtarget
<X86Subtarget
>();
279 // If enabled and available, use fast short rep mov.
280 if (UseFSRMForMemcpy
&& Subtarget
.hasFSRM())
281 return emitRepmovs(Subtarget
, DAG
, dl
, Chain
, Dst
, Src
, Size
, MVT::i8
);
283 /// Handle constant sizes,
284 if (ConstantSDNode
*ConstantSize
= dyn_cast
<ConstantSDNode
>(Size
))
285 return emitConstantSizeRepmov(DAG
, Subtarget
, dl
, Chain
, Dst
, Src
,
286 ConstantSize
->getZExtValue(),
287 Size
.getValueType(), Alignment
, isVolatile
,
288 AlwaysInline
, DstPtrInfo
, SrcPtrInfo
);