1 //===-- X86SelectionDAGInfo.cpp - X86 SelectionDAG Info -------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements the X86SelectionDAGInfo class.
11 //===----------------------------------------------------------------------===//
13 #include "X86SelectionDAGInfo.h"
14 #include "X86ISelLowering.h"
15 #include "X86InstrInfo.h"
16 #include "X86RegisterInfo.h"
17 #include "X86Subtarget.h"
18 #include "llvm/CodeGen/MachineFrameInfo.h"
19 #include "llvm/CodeGen/SelectionDAG.h"
20 #include "llvm/CodeGen/TargetLowering.h"
21 #include "llvm/IR/DerivedTypes.h"
25 #define DEBUG_TYPE "x86-selectiondag-info"
28 UseFSRMForMemcpy("x86-use-fsrm-for-memcpy", cl::Hidden
, cl::init(false),
29 cl::desc("Use fast short rep mov in memcpy lowering"));
31 bool X86SelectionDAGInfo::isBaseRegConflictPossible(
32 SelectionDAG
&DAG
, ArrayRef
<MCPhysReg
> ClobberSet
) const {
33 // We cannot use TRI->hasBasePointer() until *after* we select all basic
34 // blocks. Legalization may introduce new stack temporaries with large
35 // alignment requirements. Fall back to generic code if there are any
36 // dynamic stack adjustments (hopefully rare) and the base pointer would
37 // conflict if we had to use it.
38 MachineFrameInfo
&MFI
= DAG
.getMachineFunction().getFrameInfo();
39 if (!MFI
.hasVarSizedObjects() && !MFI
.hasOpaqueSPAdjustment())
42 const X86RegisterInfo
*TRI
= static_cast<const X86RegisterInfo
*>(
43 DAG
.getSubtarget().getRegisterInfo());
44 return llvm::is_contained(ClobberSet
, TRI
->getBaseRegister());
47 SDValue
X86SelectionDAGInfo::EmitTargetCodeForMemset(
48 SelectionDAG
&DAG
, const SDLoc
&dl
, SDValue Chain
, SDValue Dst
, SDValue Val
,
49 SDValue Size
, Align Alignment
, bool isVolatile
,
50 MachinePointerInfo DstPtrInfo
) const {
51 ConstantSDNode
*ConstantSize
= dyn_cast
<ConstantSDNode
>(Size
);
52 const X86Subtarget
&Subtarget
=
53 DAG
.getMachineFunction().getSubtarget
<X86Subtarget
>();
56 // If the base register might conflict with our physical registers, bail out.
57 const MCPhysReg ClobberSet
[] = {X86::RCX
, X86::RAX
, X86::RDI
,
58 X86::ECX
, X86::EAX
, X86::EDI
};
59 assert(!isBaseRegConflictPossible(DAG
, ClobberSet
));
62 // If to a segment-relative address space, use the default lowering.
63 if (DstPtrInfo
.getAddrSpace() >= 256)
66 // If not DWORD aligned or size is more than the threshold, call the library.
67 // The libc version is likely to be faster for these cases. It can use the
68 // address value and run time information about the CPU.
69 if (Alignment
< Align(4) || !ConstantSize
||
70 ConstantSize
->getZExtValue() > Subtarget
.getMaxInlineSizeThreshold()) {
71 // Check to see if there is a specialized entry-point for memory zeroing.
72 ConstantSDNode
*ValC
= dyn_cast
<ConstantSDNode
>(Val
);
74 if (const char *bzeroName
= (ValC
&& ValC
->isNullValue())
75 ? DAG
.getTargetLoweringInfo().getLibcallName(RTLIB::BZERO
)
77 const TargetLowering
&TLI
= DAG
.getTargetLoweringInfo();
78 EVT IntPtr
= TLI
.getPointerTy(DAG
.getDataLayout());
79 Type
*IntPtrTy
= DAG
.getDataLayout().getIntPtrType(*DAG
.getContext());
80 TargetLowering::ArgListTy Args
;
81 TargetLowering::ArgListEntry Entry
;
84 Args
.push_back(Entry
);
86 Args
.push_back(Entry
);
88 TargetLowering::CallLoweringInfo
CLI(DAG
);
91 .setLibCallee(CallingConv::C
, Type::getVoidTy(*DAG
.getContext()),
92 DAG
.getExternalSymbol(bzeroName
, IntPtr
),
96 std::pair
<SDValue
,SDValue
> CallResult
= TLI
.LowerCallTo(CLI
);
97 return CallResult
.second
;
100 // Otherwise have the target-independent code call memset.
104 uint64_t SizeVal
= ConstantSize
->getZExtValue();
108 ConstantSDNode
*ValC
= dyn_cast
<ConstantSDNode
>(Val
);
109 unsigned BytesLeft
= 0;
112 uint64_t Val
= ValC
->getZExtValue() & 255;
114 // If the value is a constant, then we can potentially use larger sets.
115 if (Alignment
> Align(2)) {
119 Val
= (Val
<< 8) | Val
;
120 Val
= (Val
<< 16) | Val
;
121 if (Subtarget
.is64Bit() && Alignment
> Align(8)) { // QWORD aligned
124 Val
= (Val
<< 32) | Val
;
126 } else if (Alignment
== Align(2)) {
130 Val
= (Val
<< 8) | Val
;
135 Count
= DAG
.getIntPtrConstant(SizeVal
, dl
);
138 if (AVT
.bitsGT(MVT::i8
)) {
139 unsigned UBytes
= AVT
.getSizeInBits() / 8;
140 Count
= DAG
.getIntPtrConstant(SizeVal
/ UBytes
, dl
);
141 BytesLeft
= SizeVal
% UBytes
;
144 Chain
= DAG
.getCopyToReg(Chain
, dl
, ValReg
, DAG
.getConstant(Val
, dl
, AVT
),
146 InFlag
= Chain
.getValue(1);
149 Count
= DAG
.getIntPtrConstant(SizeVal
, dl
);
150 Chain
= DAG
.getCopyToReg(Chain
, dl
, X86::AL
, Val
, InFlag
);
151 InFlag
= Chain
.getValue(1);
154 bool Use64BitRegs
= Subtarget
.isTarget64BitLP64();
155 Chain
= DAG
.getCopyToReg(Chain
, dl
, Use64BitRegs
? X86::RCX
: X86::ECX
,
157 InFlag
= Chain
.getValue(1);
158 Chain
= DAG
.getCopyToReg(Chain
, dl
, Use64BitRegs
? X86::RDI
: X86::EDI
,
160 InFlag
= Chain
.getValue(1);
162 SDVTList Tys
= DAG
.getVTList(MVT::Other
, MVT::Glue
);
163 SDValue Ops
[] = { Chain
, DAG
.getValueType(AVT
), InFlag
};
164 Chain
= DAG
.getNode(X86ISD::REP_STOS
, dl
, Tys
, Ops
);
167 // Handle the last 1 - 7 bytes.
168 unsigned Offset
= SizeVal
- BytesLeft
;
169 EVT AddrVT
= Dst
.getValueType();
170 EVT SizeVT
= Size
.getValueType();
173 DAG
.getMemset(Chain
, dl
,
174 DAG
.getNode(ISD::ADD
, dl
, AddrVT
, Dst
,
175 DAG
.getConstant(Offset
, dl
, AddrVT
)),
176 Val
, DAG
.getConstant(BytesLeft
, dl
, SizeVT
), Alignment
,
177 isVolatile
, false, DstPtrInfo
.getWithOffset(Offset
));
180 // TODO: Use a Tokenfactor, as in memcpy, instead of a single chain.
184 /// Emit a single REP MOVS{B,W,D,Q} instruction.
185 static SDValue
emitRepmovs(const X86Subtarget
&Subtarget
, SelectionDAG
&DAG
,
186 const SDLoc
&dl
, SDValue Chain
, SDValue Dst
,
187 SDValue Src
, SDValue Size
, MVT AVT
) {
188 const bool Use64BitRegs
= Subtarget
.isTarget64BitLP64();
189 const unsigned CX
= Use64BitRegs
? X86::RCX
: X86::ECX
;
190 const unsigned DI
= Use64BitRegs
? X86::RDI
: X86::EDI
;
191 const unsigned SI
= Use64BitRegs
? X86::RSI
: X86::ESI
;
194 Chain
= DAG
.getCopyToReg(Chain
, dl
, CX
, Size
, InFlag
);
195 InFlag
= Chain
.getValue(1);
196 Chain
= DAG
.getCopyToReg(Chain
, dl
, DI
, Dst
, InFlag
);
197 InFlag
= Chain
.getValue(1);
198 Chain
= DAG
.getCopyToReg(Chain
, dl
, SI
, Src
, InFlag
);
199 InFlag
= Chain
.getValue(1);
201 SDVTList Tys
= DAG
.getVTList(MVT::Other
, MVT::Glue
);
202 SDValue Ops
[] = {Chain
, DAG
.getValueType(AVT
), InFlag
};
203 return DAG
.getNode(X86ISD::REP_MOVS
, dl
, Tys
, Ops
);
206 /// Emit a single REP MOVSB instruction for a particular constant size.
207 static SDValue
emitRepmovsB(const X86Subtarget
&Subtarget
, SelectionDAG
&DAG
,
208 const SDLoc
&dl
, SDValue Chain
, SDValue Dst
,
209 SDValue Src
, uint64_t Size
) {
210 return emitRepmovs(Subtarget
, DAG
, dl
, Chain
, Dst
, Src
,
211 DAG
.getIntPtrConstant(Size
, dl
), MVT::i8
);
214 /// Returns the best type to use with repmovs depending on alignment.
215 static MVT
getOptimalRepmovsType(const X86Subtarget
&Subtarget
,
217 assert((Align
!= 0) && "Align is normalized");
218 assert(isPowerOf2_64(Align
) && "Align is a power of 2");
227 return Subtarget
.is64Bit() ? MVT::i64
: MVT::i32
;
231 /// Returns a REP MOVS instruction, possibly with a few load/stores to implement
232 /// a constant size memory copy. In some cases where we know REP MOVS is
233 /// inefficient we return an empty SDValue so the calling code can either
234 /// generate a load/store sequence or call the runtime memcpy function.
235 static SDValue
emitConstantSizeRepmov(
236 SelectionDAG
&DAG
, const X86Subtarget
&Subtarget
, const SDLoc
&dl
,
237 SDValue Chain
, SDValue Dst
, SDValue Src
, uint64_t Size
, EVT SizeVT
,
238 unsigned Align
, bool isVolatile
, bool AlwaysInline
,
239 MachinePointerInfo DstPtrInfo
, MachinePointerInfo SrcPtrInfo
) {
241 /// TODO: Revisit next line: big copy with ERMSB on march >= haswell are very
243 if (!AlwaysInline
&& Size
> Subtarget
.getMaxInlineSizeThreshold())
246 /// If we have enhanced repmovs we use it.
247 if (Subtarget
.hasERMSB())
248 return emitRepmovsB(Subtarget
, DAG
, dl
, Chain
, Dst
, Src
, Size
);
250 assert(!Subtarget
.hasERMSB() && "No efficient RepMovs");
251 /// We assume runtime memcpy will do a better job for unaligned copies when
252 /// ERMS is not present.
253 if (!AlwaysInline
&& (Align
& 3) != 0)
256 const MVT BlockType
= getOptimalRepmovsType(Subtarget
, Align
);
257 const uint64_t BlockBytes
= BlockType
.getSizeInBits() / 8;
258 const uint64_t BlockCount
= Size
/ BlockBytes
;
259 const uint64_t BytesLeft
= Size
% BlockBytes
;
261 emitRepmovs(Subtarget
, DAG
, dl
, Chain
, Dst
, Src
,
262 DAG
.getIntPtrConstant(BlockCount
, dl
), BlockType
);
264 /// RepMov can process the whole length.
268 assert(BytesLeft
&& "We have leftover at this point");
270 /// In case we optimize for size we use repmovsb even if it's less efficient
271 /// so we can save the loads/stores of the leftover.
272 if (DAG
.getMachineFunction().getFunction().hasMinSize())
273 return emitRepmovsB(Subtarget
, DAG
, dl
, Chain
, Dst
, Src
, Size
);
275 // Handle the last 1 - 7 bytes.
276 SmallVector
<SDValue
, 4> Results
;
277 Results
.push_back(RepMovs
);
278 unsigned Offset
= Size
- BytesLeft
;
279 EVT DstVT
= Dst
.getValueType();
280 EVT SrcVT
= Src
.getValueType();
281 Results
.push_back(DAG
.getMemcpy(
283 DAG
.getNode(ISD::ADD
, dl
, DstVT
, Dst
, DAG
.getConstant(Offset
, dl
, DstVT
)),
284 DAG
.getNode(ISD::ADD
, dl
, SrcVT
, Src
, DAG
.getConstant(Offset
, dl
, SrcVT
)),
285 DAG
.getConstant(BytesLeft
, dl
, SizeVT
), llvm::Align(Align
), isVolatile
,
286 /*AlwaysInline*/ true, /*isTailCall*/ false,
287 DstPtrInfo
.getWithOffset(Offset
), SrcPtrInfo
.getWithOffset(Offset
)));
288 return DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
, Results
);
291 SDValue
X86SelectionDAGInfo::EmitTargetCodeForMemcpy(
292 SelectionDAG
&DAG
, const SDLoc
&dl
, SDValue Chain
, SDValue Dst
, SDValue Src
,
293 SDValue Size
, Align Alignment
, bool isVolatile
, bool AlwaysInline
,
294 MachinePointerInfo DstPtrInfo
, MachinePointerInfo SrcPtrInfo
) const {
295 // If to a segment-relative address space, use the default lowering.
296 if (DstPtrInfo
.getAddrSpace() >= 256 || SrcPtrInfo
.getAddrSpace() >= 256)
299 // If the base registers conflict with our physical registers, use the default
301 const MCPhysReg ClobberSet
[] = {X86::RCX
, X86::RSI
, X86::RDI
,
302 X86::ECX
, X86::ESI
, X86::EDI
};
303 if (isBaseRegConflictPossible(DAG
, ClobberSet
))
306 const X86Subtarget
&Subtarget
=
307 DAG
.getMachineFunction().getSubtarget
<X86Subtarget
>();
309 // If enabled and available, use fast short rep mov.
310 if (UseFSRMForMemcpy
&& Subtarget
.hasFSRM())
311 return emitRepmovs(Subtarget
, DAG
, dl
, Chain
, Dst
, Src
, Size
, MVT::i8
);
313 /// Handle constant sizes,
314 if (ConstantSDNode
*ConstantSize
= dyn_cast
<ConstantSDNode
>(Size
))
315 return emitConstantSizeRepmov(
316 DAG
, Subtarget
, dl
, Chain
, Dst
, Src
, ConstantSize
->getZExtValue(),
317 Size
.getValueType(), Alignment
.value(), isVolatile
, AlwaysInline
,
318 DstPtrInfo
, SrcPtrInfo
);