1 //===-- X86SelectionDAGInfo.cpp - X86 SelectionDAG Info -------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements the X86SelectionDAGInfo class.
11 //===----------------------------------------------------------------------===//
13 #include "X86SelectionDAGInfo.h"
14 #include "X86ISelLowering.h"
15 #include "X86InstrInfo.h"
16 #include "X86RegisterInfo.h"
17 #include "X86Subtarget.h"
18 #include "llvm/CodeGen/SelectionDAG.h"
19 #include "llvm/CodeGen/TargetLowering.h"
20 #include "llvm/IR/DerivedTypes.h"
24 #define DEBUG_TYPE "x86-selectiondag-info"
26 bool X86SelectionDAGInfo::isBaseRegConflictPossible(
27 SelectionDAG
&DAG
, ArrayRef
<MCPhysReg
> ClobberSet
) const {
28 // We cannot use TRI->hasBasePointer() until *after* we select all basic
29 // blocks. Legalization may introduce new stack temporaries with large
30 // alignment requirements. Fall back to generic code if there are any
31 // dynamic stack adjustments (hopefully rare) and the base pointer would
32 // conflict if we had to use it.
33 MachineFrameInfo
&MFI
= DAG
.getMachineFunction().getFrameInfo();
34 if (!MFI
.hasVarSizedObjects() && !MFI
.hasOpaqueSPAdjustment())
37 const X86RegisterInfo
*TRI
= static_cast<const X86RegisterInfo
*>(
38 DAG
.getSubtarget().getRegisterInfo());
39 unsigned BaseReg
= TRI
->getBaseRegister();
40 for (unsigned R
: ClobberSet
)
46 SDValue
X86SelectionDAGInfo::EmitTargetCodeForMemset(
47 SelectionDAG
&DAG
, const SDLoc
&dl
, SDValue Chain
, SDValue Dst
, SDValue Val
,
48 SDValue Size
, unsigned Align
, bool isVolatile
,
49 MachinePointerInfo DstPtrInfo
) const {
50 ConstantSDNode
*ConstantSize
= dyn_cast
<ConstantSDNode
>(Size
);
51 const X86Subtarget
&Subtarget
=
52 DAG
.getMachineFunction().getSubtarget
<X86Subtarget
>();
55 // If the base register might conflict with our physical registers, bail out.
56 const MCPhysReg ClobberSet
[] = {X86::RCX
, X86::RAX
, X86::RDI
,
57 X86::ECX
, X86::EAX
, X86::EDI
};
58 assert(!isBaseRegConflictPossible(DAG
, ClobberSet
));
61 // If to a segment-relative address space, use the default lowering.
62 if (DstPtrInfo
.getAddrSpace() >= 256)
65 // If not DWORD aligned or size is more than the threshold, call the library.
66 // The libc version is likely to be faster for these cases. It can use the
67 // address value and run time information about the CPU.
68 if ((Align
& 3) != 0 || !ConstantSize
||
69 ConstantSize
->getZExtValue() > Subtarget
.getMaxInlineSizeThreshold()) {
70 // Check to see if there is a specialized entry-point for memory zeroing.
71 ConstantSDNode
*ValC
= dyn_cast
<ConstantSDNode
>(Val
);
73 if (const char *bzeroName
= (ValC
&& ValC
->isNullValue())
74 ? DAG
.getTargetLoweringInfo().getLibcallName(RTLIB::BZERO
)
76 const TargetLowering
&TLI
= DAG
.getTargetLoweringInfo();
77 EVT IntPtr
= TLI
.getPointerTy(DAG
.getDataLayout());
78 Type
*IntPtrTy
= DAG
.getDataLayout().getIntPtrType(*DAG
.getContext());
79 TargetLowering::ArgListTy Args
;
80 TargetLowering::ArgListEntry Entry
;
83 Args
.push_back(Entry
);
85 Args
.push_back(Entry
);
87 TargetLowering::CallLoweringInfo
CLI(DAG
);
90 .setLibCallee(CallingConv::C
, Type::getVoidTy(*DAG
.getContext()),
91 DAG
.getExternalSymbol(bzeroName
, IntPtr
),
95 std::pair
<SDValue
,SDValue
> CallResult
= TLI
.LowerCallTo(CLI
);
96 return CallResult
.second
;
99 // Otherwise have the target-independent code call memset.
103 uint64_t SizeVal
= ConstantSize
->getZExtValue();
107 ConstantSDNode
*ValC
= dyn_cast
<ConstantSDNode
>(Val
);
108 unsigned BytesLeft
= 0;
111 uint64_t Val
= ValC
->getZExtValue() & 255;
113 // If the value is a constant, then we can potentially use larger sets.
115 case 2: // WORD aligned
118 Val
= (Val
<< 8) | Val
;
120 case 0: // DWORD aligned
123 Val
= (Val
<< 8) | Val
;
124 Val
= (Val
<< 16) | Val
;
125 if (Subtarget
.is64Bit() && ((Align
& 0x7) == 0)) { // QWORD aligned
128 Val
= (Val
<< 32) | Val
;
131 default: // Byte aligned
134 Count
= DAG
.getIntPtrConstant(SizeVal
, dl
);
138 if (AVT
.bitsGT(MVT::i8
)) {
139 unsigned UBytes
= AVT
.getSizeInBits() / 8;
140 Count
= DAG
.getIntPtrConstant(SizeVal
/ UBytes
, dl
);
141 BytesLeft
= SizeVal
% UBytes
;
144 Chain
= DAG
.getCopyToReg(Chain
, dl
, ValReg
, DAG
.getConstant(Val
, dl
, AVT
),
146 InFlag
= Chain
.getValue(1);
149 Count
= DAG
.getIntPtrConstant(SizeVal
, dl
);
150 Chain
= DAG
.getCopyToReg(Chain
, dl
, X86::AL
, Val
, InFlag
);
151 InFlag
= Chain
.getValue(1);
154 bool Use64BitRegs
= Subtarget
.isTarget64BitLP64();
155 Chain
= DAG
.getCopyToReg(Chain
, dl
, Use64BitRegs
? X86::RCX
: X86::ECX
,
157 InFlag
= Chain
.getValue(1);
158 Chain
= DAG
.getCopyToReg(Chain
, dl
, Use64BitRegs
? X86::RDI
: X86::EDI
,
160 InFlag
= Chain
.getValue(1);
162 SDVTList Tys
= DAG
.getVTList(MVT::Other
, MVT::Glue
);
163 SDValue Ops
[] = { Chain
, DAG
.getValueType(AVT
), InFlag
};
164 Chain
= DAG
.getNode(X86ISD::REP_STOS
, dl
, Tys
, Ops
);
167 // Handle the last 1 - 7 bytes.
168 unsigned Offset
= SizeVal
- BytesLeft
;
169 EVT AddrVT
= Dst
.getValueType();
170 EVT SizeVT
= Size
.getValueType();
172 Chain
= DAG
.getMemset(Chain
, dl
,
173 DAG
.getNode(ISD::ADD
, dl
, AddrVT
, Dst
,
174 DAG
.getConstant(Offset
, dl
, AddrVT
)),
176 DAG
.getConstant(BytesLeft
, dl
, SizeVT
),
177 Align
, isVolatile
, false,
178 DstPtrInfo
.getWithOffset(Offset
));
181 // TODO: Use a Tokenfactor, as in memcpy, instead of a single chain.
185 /// Emit a single REP MOVS{B,W,D,Q} instruction.
186 static SDValue
emitRepmovs(const X86Subtarget
&Subtarget
, SelectionDAG
&DAG
,
187 const SDLoc
&dl
, SDValue Chain
, SDValue Dst
,
188 SDValue Src
, SDValue Size
, MVT AVT
) {
189 const bool Use64BitRegs
= Subtarget
.isTarget64BitLP64();
190 const unsigned CX
= Use64BitRegs
? X86::RCX
: X86::ECX
;
191 const unsigned DI
= Use64BitRegs
? X86::RDI
: X86::EDI
;
192 const unsigned SI
= Use64BitRegs
? X86::RSI
: X86::ESI
;
195 Chain
= DAG
.getCopyToReg(Chain
, dl
, CX
, Size
, InFlag
);
196 InFlag
= Chain
.getValue(1);
197 Chain
= DAG
.getCopyToReg(Chain
, dl
, DI
, Dst
, InFlag
);
198 InFlag
= Chain
.getValue(1);
199 Chain
= DAG
.getCopyToReg(Chain
, dl
, SI
, Src
, InFlag
);
200 InFlag
= Chain
.getValue(1);
202 SDVTList Tys
= DAG
.getVTList(MVT::Other
, MVT::Glue
);
203 SDValue Ops
[] = {Chain
, DAG
.getValueType(AVT
), InFlag
};
204 return DAG
.getNode(X86ISD::REP_MOVS
, dl
, Tys
, Ops
);
207 /// Emit a single REP MOVSB instruction for a particular constant size.
208 static SDValue
emitRepmovsB(const X86Subtarget
&Subtarget
, SelectionDAG
&DAG
,
209 const SDLoc
&dl
, SDValue Chain
, SDValue Dst
,
210 SDValue Src
, uint64_t Size
) {
211 return emitRepmovs(Subtarget
, DAG
, dl
, Chain
, Dst
, Src
,
212 DAG
.getIntPtrConstant(Size
, dl
), MVT::i8
);
215 /// Returns the best type to use with repmovs depending on alignment.
216 static MVT
getOptimalRepmovsType(const X86Subtarget
&Subtarget
,
218 assert((Align
!= 0) && "Align is normalized");
219 assert(isPowerOf2_64(Align
) && "Align is a power of 2");
228 return Subtarget
.is64Bit() ? MVT::i64
: MVT::i32
;
232 /// Returns a REP MOVS instruction, possibly with a few load/stores to implement
233 /// a constant size memory copy. In some cases where we know REP MOVS is
234 /// inefficient we return an empty SDValue so the calling code can either
235 /// generate a load/store sequence or call the runtime memcpy function.
236 static SDValue
emitConstantSizeRepmov(
237 SelectionDAG
&DAG
, const X86Subtarget
&Subtarget
, const SDLoc
&dl
,
238 SDValue Chain
, SDValue Dst
, SDValue Src
, uint64_t Size
, EVT SizeVT
,
239 unsigned Align
, bool isVolatile
, bool AlwaysInline
,
240 MachinePointerInfo DstPtrInfo
, MachinePointerInfo SrcPtrInfo
) {
242 /// TODO: Revisit next line: big copy with ERMSB on march >= haswell are very
244 if (!AlwaysInline
&& Size
> Subtarget
.getMaxInlineSizeThreshold())
247 /// If we have enhanced repmovs we use it.
248 if (Subtarget
.hasERMSB())
249 return emitRepmovsB(Subtarget
, DAG
, dl
, Chain
, Dst
, Src
, Size
);
251 assert(!Subtarget
.hasERMSB() && "No efficient RepMovs");
252 /// We assume runtime memcpy will do a better job for unaligned copies when
253 /// ERMS is not present.
254 if (!AlwaysInline
&& (Align
& 3) != 0)
257 const MVT BlockType
= getOptimalRepmovsType(Subtarget
, Align
);
258 const uint64_t BlockBytes
= BlockType
.getSizeInBits() / 8;
259 const uint64_t BlockCount
= Size
/ BlockBytes
;
260 const uint64_t BytesLeft
= Size
% BlockBytes
;
262 emitRepmovs(Subtarget
, DAG
, dl
, Chain
, Dst
, Src
,
263 DAG
.getIntPtrConstant(BlockCount
, dl
), BlockType
);
265 /// RepMov can process the whole length.
269 assert(BytesLeft
&& "We have leftover at this point");
271 /// In case we optimize for size we use repmovsb even if it's less efficient
272 /// so we can save the loads/stores of the leftover.
273 if (DAG
.getMachineFunction().getFunction().hasMinSize())
274 return emitRepmovsB(Subtarget
, DAG
, dl
, Chain
, Dst
, Src
, Size
);
276 // Handle the last 1 - 7 bytes.
277 SmallVector
<SDValue
, 4> Results
;
278 Results
.push_back(RepMovs
);
279 unsigned Offset
= Size
- BytesLeft
;
280 EVT DstVT
= Dst
.getValueType();
281 EVT SrcVT
= Src
.getValueType();
282 Results
.push_back(DAG
.getMemcpy(
284 DAG
.getNode(ISD::ADD
, dl
, DstVT
, Dst
, DAG
.getConstant(Offset
, dl
, DstVT
)),
285 DAG
.getNode(ISD::ADD
, dl
, SrcVT
, Src
, DAG
.getConstant(Offset
, dl
, SrcVT
)),
286 DAG
.getConstant(BytesLeft
, dl
, SizeVT
), Align
, isVolatile
,
287 /*AlwaysInline*/ true, /*isTailCall*/ false,
288 DstPtrInfo
.getWithOffset(Offset
), SrcPtrInfo
.getWithOffset(Offset
)));
289 return DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
, Results
);
292 SDValue
X86SelectionDAGInfo::EmitTargetCodeForMemcpy(
293 SelectionDAG
&DAG
, const SDLoc
&dl
, SDValue Chain
, SDValue Dst
, SDValue Src
,
294 SDValue Size
, unsigned Align
, bool isVolatile
, bool AlwaysInline
,
295 MachinePointerInfo DstPtrInfo
, MachinePointerInfo SrcPtrInfo
) const {
296 // If to a segment-relative address space, use the default lowering.
297 if (DstPtrInfo
.getAddrSpace() >= 256 || SrcPtrInfo
.getAddrSpace() >= 256)
300 // If the base registers conflict with our physical registers, use the default
302 const MCPhysReg ClobberSet
[] = {X86::RCX
, X86::RSI
, X86::RDI
,
303 X86::ECX
, X86::ESI
, X86::EDI
};
304 if (isBaseRegConflictPossible(DAG
, ClobberSet
))
307 const X86Subtarget
&Subtarget
=
308 DAG
.getMachineFunction().getSubtarget
<X86Subtarget
>();
310 /// Handle constant sizes,
311 if (ConstantSDNode
*ConstantSize
= dyn_cast
<ConstantSDNode
>(Size
))
312 return emitConstantSizeRepmov(DAG
, Subtarget
, dl
, Chain
, Dst
, Src
,
313 ConstantSize
->getZExtValue(),
314 Size
.getValueType(), Align
, isVolatile
,
315 AlwaysInline
, DstPtrInfo
, SrcPtrInfo
);