1 //===-- TargetLowering.cpp - Implement the TargetLowering class -----------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This implements the TargetLowering class.
11 //===----------------------------------------------------------------------===//
13 #include "llvm/CodeGen/TargetLowering.h"
14 #include "llvm/ADT/BitVector.h"
15 #include "llvm/ADT/STLExtras.h"
16 #include "llvm/CodeGen/CallingConvLower.h"
17 #include "llvm/CodeGen/MachineFrameInfo.h"
18 #include "llvm/CodeGen/MachineFunction.h"
19 #include "llvm/CodeGen/MachineJumpTableInfo.h"
20 #include "llvm/CodeGen/MachineRegisterInfo.h"
21 #include "llvm/CodeGen/SelectionDAG.h"
22 #include "llvm/CodeGen/TargetRegisterInfo.h"
23 #include "llvm/CodeGen/TargetSubtargetInfo.h"
24 #include "llvm/IR/DataLayout.h"
25 #include "llvm/IR/DerivedTypes.h"
26 #include "llvm/IR/GlobalVariable.h"
27 #include "llvm/IR/LLVMContext.h"
28 #include "llvm/MC/MCAsmInfo.h"
29 #include "llvm/MC/MCExpr.h"
30 #include "llvm/Support/ErrorHandling.h"
31 #include "llvm/Support/KnownBits.h"
32 #include "llvm/Support/MathExtras.h"
33 #include "llvm/Target/TargetLoweringObjectFile.h"
34 #include "llvm/Target/TargetMachine.h"
38 /// NOTE: The TargetMachine owns TLOF.
39 TargetLowering::TargetLowering(const TargetMachine
&tm
)
40 : TargetLoweringBase(tm
) {}
42 const char *TargetLowering::getTargetNodeName(unsigned Opcode
) const {
46 bool TargetLowering::isPositionIndependent() const {
47 return getTargetMachine().isPositionIndependent();
50 /// Check whether a given call node is in tail position within its function. If
51 /// so, it sets Chain to the input chain of the tail call.
52 bool TargetLowering::isInTailCallPosition(SelectionDAG
&DAG
, SDNode
*Node
,
53 SDValue
&Chain
) const {
54 const Function
&F
= DAG
.getMachineFunction().getFunction();
56 // Conservatively require the attributes of the call to match those of
57 // the return. Ignore NoAlias and NonNull because they don't affect the
59 AttributeList CallerAttrs
= F
.getAttributes();
60 if (AttrBuilder(CallerAttrs
, AttributeList::ReturnIndex
)
61 .removeAttribute(Attribute::NoAlias
)
62 .removeAttribute(Attribute::NonNull
)
66 // It's not safe to eliminate the sign / zero extension of the return value.
67 if (CallerAttrs
.hasAttribute(AttributeList::ReturnIndex
, Attribute::ZExt
) ||
68 CallerAttrs
.hasAttribute(AttributeList::ReturnIndex
, Attribute::SExt
))
71 // Check if the only use is a function return node.
72 return isUsedByReturnOnly(Node
, Chain
);
75 bool TargetLowering::parametersInCSRMatch(const MachineRegisterInfo
&MRI
,
76 const uint32_t *CallerPreservedMask
,
77 const SmallVectorImpl
<CCValAssign
> &ArgLocs
,
78 const SmallVectorImpl
<SDValue
> &OutVals
) const {
79 for (unsigned I
= 0, E
= ArgLocs
.size(); I
!= E
; ++I
) {
80 const CCValAssign
&ArgLoc
= ArgLocs
[I
];
81 if (!ArgLoc
.isRegLoc())
83 unsigned Reg
= ArgLoc
.getLocReg();
84 // Only look at callee saved registers.
85 if (MachineOperand::clobbersPhysReg(CallerPreservedMask
, Reg
))
87 // Check that we pass the value used for the caller.
88 // (We look for a CopyFromReg reading a virtual register that is used
89 // for the function live-in value of register Reg)
90 SDValue Value
= OutVals
[I
];
91 if (Value
->getOpcode() != ISD::CopyFromReg
)
93 unsigned ArgReg
= cast
<RegisterSDNode
>(Value
->getOperand(1))->getReg();
94 if (MRI
.getLiveInPhysReg(ArgReg
) != Reg
)
100 /// Set CallLoweringInfo attribute flags based on a call instruction
101 /// and called function attributes.
102 void TargetLoweringBase::ArgListEntry::setAttributes(const CallBase
*Call
,
104 IsSExt
= Call
->paramHasAttr(ArgIdx
, Attribute::SExt
);
105 IsZExt
= Call
->paramHasAttr(ArgIdx
, Attribute::ZExt
);
106 IsInReg
= Call
->paramHasAttr(ArgIdx
, Attribute::InReg
);
107 IsSRet
= Call
->paramHasAttr(ArgIdx
, Attribute::StructRet
);
108 IsNest
= Call
->paramHasAttr(ArgIdx
, Attribute::Nest
);
109 IsByVal
= Call
->paramHasAttr(ArgIdx
, Attribute::ByVal
);
110 IsInAlloca
= Call
->paramHasAttr(ArgIdx
, Attribute::InAlloca
);
111 IsReturned
= Call
->paramHasAttr(ArgIdx
, Attribute::Returned
);
112 IsSwiftSelf
= Call
->paramHasAttr(ArgIdx
, Attribute::SwiftSelf
);
113 IsSwiftError
= Call
->paramHasAttr(ArgIdx
, Attribute::SwiftError
);
114 Alignment
= Call
->getParamAlignment(ArgIdx
);
117 /// Generate a libcall taking the given operands as arguments and returning a
118 /// result of type RetVT.
119 std::pair
<SDValue
, SDValue
>
120 TargetLowering::makeLibCall(SelectionDAG
&DAG
, RTLIB::Libcall LC
, EVT RetVT
,
121 ArrayRef
<SDValue
> Ops
, bool isSigned
,
122 const SDLoc
&dl
, bool doesNotReturn
,
123 bool isReturnValueUsed
,
124 bool isPostTypeLegalization
) const {
125 TargetLowering::ArgListTy Args
;
126 Args
.reserve(Ops
.size());
128 TargetLowering::ArgListEntry Entry
;
129 for (SDValue Op
: Ops
) {
131 Entry
.Ty
= Entry
.Node
.getValueType().getTypeForEVT(*DAG
.getContext());
132 Entry
.IsSExt
= shouldSignExtendTypeInLibCall(Op
.getValueType(), isSigned
);
133 Entry
.IsZExt
= !shouldSignExtendTypeInLibCall(Op
.getValueType(), isSigned
);
134 Args
.push_back(Entry
);
137 if (LC
== RTLIB::UNKNOWN_LIBCALL
)
138 report_fatal_error("Unsupported library call operation!");
139 SDValue Callee
= DAG
.getExternalSymbol(getLibcallName(LC
),
140 getPointerTy(DAG
.getDataLayout()));
142 Type
*RetTy
= RetVT
.getTypeForEVT(*DAG
.getContext());
143 TargetLowering::CallLoweringInfo
CLI(DAG
);
144 bool signExtend
= shouldSignExtendTypeInLibCall(RetVT
, isSigned
);
146 .setChain(DAG
.getEntryNode())
147 .setLibCallee(getLibcallCallingConv(LC
), RetTy
, Callee
, std::move(Args
))
148 .setNoReturn(doesNotReturn
)
149 .setDiscardResult(!isReturnValueUsed
)
150 .setIsPostTypeLegalization(isPostTypeLegalization
)
151 .setSExtResult(signExtend
)
152 .setZExtResult(!signExtend
);
153 return LowerCallTo(CLI
);
156 /// Soften the operands of a comparison. This code is shared among BR_CC,
157 /// SELECT_CC, and SETCC handlers.
158 void TargetLowering::softenSetCCOperands(SelectionDAG
&DAG
, EVT VT
,
159 SDValue
&NewLHS
, SDValue
&NewRHS
,
160 ISD::CondCode
&CCCode
,
161 const SDLoc
&dl
) const {
162 assert((VT
== MVT::f32
|| VT
== MVT::f64
|| VT
== MVT::f128
|| VT
== MVT::ppcf128
)
163 && "Unsupported setcc type!");
165 // Expand into one or more soft-fp libcall(s).
166 RTLIB::Libcall LC1
= RTLIB::UNKNOWN_LIBCALL
, LC2
= RTLIB::UNKNOWN_LIBCALL
;
167 bool ShouldInvertCC
= false;
171 LC1
= (VT
== MVT::f32
) ? RTLIB::OEQ_F32
:
172 (VT
== MVT::f64
) ? RTLIB::OEQ_F64
:
173 (VT
== MVT::f128
) ? RTLIB::OEQ_F128
: RTLIB::OEQ_PPCF128
;
177 LC1
= (VT
== MVT::f32
) ? RTLIB::UNE_F32
:
178 (VT
== MVT::f64
) ? RTLIB::UNE_F64
:
179 (VT
== MVT::f128
) ? RTLIB::UNE_F128
: RTLIB::UNE_PPCF128
;
183 LC1
= (VT
== MVT::f32
) ? RTLIB::OGE_F32
:
184 (VT
== MVT::f64
) ? RTLIB::OGE_F64
:
185 (VT
== MVT::f128
) ? RTLIB::OGE_F128
: RTLIB::OGE_PPCF128
;
189 LC1
= (VT
== MVT::f32
) ? RTLIB::OLT_F32
:
190 (VT
== MVT::f64
) ? RTLIB::OLT_F64
:
191 (VT
== MVT::f128
) ? RTLIB::OLT_F128
: RTLIB::OLT_PPCF128
;
195 LC1
= (VT
== MVT::f32
) ? RTLIB::OLE_F32
:
196 (VT
== MVT::f64
) ? RTLIB::OLE_F64
:
197 (VT
== MVT::f128
) ? RTLIB::OLE_F128
: RTLIB::OLE_PPCF128
;
201 LC1
= (VT
== MVT::f32
) ? RTLIB::OGT_F32
:
202 (VT
== MVT::f64
) ? RTLIB::OGT_F64
:
203 (VT
== MVT::f128
) ? RTLIB::OGT_F128
: RTLIB::OGT_PPCF128
;
206 LC1
= (VT
== MVT::f32
) ? RTLIB::UO_F32
:
207 (VT
== MVT::f64
) ? RTLIB::UO_F64
:
208 (VT
== MVT::f128
) ? RTLIB::UO_F128
: RTLIB::UO_PPCF128
;
211 LC1
= (VT
== MVT::f32
) ? RTLIB::O_F32
:
212 (VT
== MVT::f64
) ? RTLIB::O_F64
:
213 (VT
== MVT::f128
) ? RTLIB::O_F128
: RTLIB::O_PPCF128
;
216 // SETONE = SETOLT | SETOGT
217 LC1
= (VT
== MVT::f32
) ? RTLIB::OLT_F32
:
218 (VT
== MVT::f64
) ? RTLIB::OLT_F64
:
219 (VT
== MVT::f128
) ? RTLIB::OLT_F128
: RTLIB::OLT_PPCF128
;
220 LC2
= (VT
== MVT::f32
) ? RTLIB::OGT_F32
:
221 (VT
== MVT::f64
) ? RTLIB::OGT_F64
:
222 (VT
== MVT::f128
) ? RTLIB::OGT_F128
: RTLIB::OGT_PPCF128
;
225 LC1
= (VT
== MVT::f32
) ? RTLIB::UO_F32
:
226 (VT
== MVT::f64
) ? RTLIB::UO_F64
:
227 (VT
== MVT::f128
) ? RTLIB::UO_F128
: RTLIB::UO_PPCF128
;
228 LC2
= (VT
== MVT::f32
) ? RTLIB::OEQ_F32
:
229 (VT
== MVT::f64
) ? RTLIB::OEQ_F64
:
230 (VT
== MVT::f128
) ? RTLIB::OEQ_F128
: RTLIB::OEQ_PPCF128
;
233 // Invert CC for unordered comparisons
234 ShouldInvertCC
= true;
237 LC1
= (VT
== MVT::f32
) ? RTLIB::OGE_F32
:
238 (VT
== MVT::f64
) ? RTLIB::OGE_F64
:
239 (VT
== MVT::f128
) ? RTLIB::OGE_F128
: RTLIB::OGE_PPCF128
;
242 LC1
= (VT
== MVT::f32
) ? RTLIB::OGT_F32
:
243 (VT
== MVT::f64
) ? RTLIB::OGT_F64
:
244 (VT
== MVT::f128
) ? RTLIB::OGT_F128
: RTLIB::OGT_PPCF128
;
247 LC1
= (VT
== MVT::f32
) ? RTLIB::OLE_F32
:
248 (VT
== MVT::f64
) ? RTLIB::OLE_F64
:
249 (VT
== MVT::f128
) ? RTLIB::OLE_F128
: RTLIB::OLE_PPCF128
;
252 LC1
= (VT
== MVT::f32
) ? RTLIB::OLT_F32
:
253 (VT
== MVT::f64
) ? RTLIB::OLT_F64
:
254 (VT
== MVT::f128
) ? RTLIB::OLT_F128
: RTLIB::OLT_PPCF128
;
256 default: llvm_unreachable("Do not know how to soften this setcc!");
260 // Use the target specific return value for comparions lib calls.
261 EVT RetVT
= getCmpLibcallReturnType();
262 SDValue Ops
[2] = {NewLHS
, NewRHS
};
263 NewLHS
= makeLibCall(DAG
, LC1
, RetVT
, Ops
, false /*sign irrelevant*/,
265 NewRHS
= DAG
.getConstant(0, dl
, RetVT
);
267 CCCode
= getCmpLibcallCC(LC1
);
269 CCCode
= getSetCCInverse(CCCode
, /*isInteger=*/true);
271 if (LC2
!= RTLIB::UNKNOWN_LIBCALL
) {
272 SDValue Tmp
= DAG
.getNode(
274 getSetCCResultType(DAG
.getDataLayout(), *DAG
.getContext(), RetVT
),
275 NewLHS
, NewRHS
, DAG
.getCondCode(CCCode
));
276 NewLHS
= makeLibCall(DAG
, LC2
, RetVT
, Ops
, false/*sign irrelevant*/,
278 NewLHS
= DAG
.getNode(
280 getSetCCResultType(DAG
.getDataLayout(), *DAG
.getContext(), RetVT
),
281 NewLHS
, NewRHS
, DAG
.getCondCode(getCmpLibcallCC(LC2
)));
282 NewLHS
= DAG
.getNode(ISD::OR
, dl
, Tmp
.getValueType(), Tmp
, NewLHS
);
287 /// Return the entry encoding for a jump table in the current function. The
288 /// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum.
289 unsigned TargetLowering::getJumpTableEncoding() const {
290 // In non-pic modes, just use the address of a block.
291 if (!isPositionIndependent())
292 return MachineJumpTableInfo::EK_BlockAddress
;
294 // In PIC mode, if the target supports a GPRel32 directive, use it.
295 if (getTargetMachine().getMCAsmInfo()->getGPRel32Directive() != nullptr)
296 return MachineJumpTableInfo::EK_GPRel32BlockAddress
;
298 // Otherwise, use a label difference.
299 return MachineJumpTableInfo::EK_LabelDifference32
;
302 SDValue
TargetLowering::getPICJumpTableRelocBase(SDValue Table
,
303 SelectionDAG
&DAG
) const {
304 // If our PIC model is GP relative, use the global offset table as the base.
305 unsigned JTEncoding
= getJumpTableEncoding();
307 if ((JTEncoding
== MachineJumpTableInfo::EK_GPRel64BlockAddress
) ||
308 (JTEncoding
== MachineJumpTableInfo::EK_GPRel32BlockAddress
))
309 return DAG
.getGLOBAL_OFFSET_TABLE(getPointerTy(DAG
.getDataLayout()));
314 /// This returns the relocation base for the given PIC jumptable, the same as
315 /// getPICJumpTableRelocBase, but as an MCExpr.
317 TargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction
*MF
,
318 unsigned JTI
,MCContext
&Ctx
) const{
319 // The normal PIC reloc base is the label at the start of the jump table.
320 return MCSymbolRefExpr::create(MF
->getJTISymbol(JTI
, Ctx
), Ctx
);
324 TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode
*GA
) const {
325 const TargetMachine
&TM
= getTargetMachine();
326 const GlobalValue
*GV
= GA
->getGlobal();
328 // If the address is not even local to this DSO we will have to load it from
329 // a got and then add the offset.
330 if (!TM
.shouldAssumeDSOLocal(*GV
->getParent(), GV
))
333 // If the code is position independent we will have to add a base register.
334 if (isPositionIndependent())
337 // Otherwise we can do it.
341 //===----------------------------------------------------------------------===//
342 // Optimization Methods
343 //===----------------------------------------------------------------------===//
345 /// If the specified instruction has a constant integer operand and there are
346 /// bits set in that constant that are not demanded, then clear those bits and
348 bool TargetLowering::ShrinkDemandedConstant(SDValue Op
, const APInt
&Demanded
,
349 TargetLoweringOpt
&TLO
) const {
350 SelectionDAG
&DAG
= TLO
.DAG
;
352 unsigned Opcode
= Op
.getOpcode();
354 // Do target-specific constant optimization.
355 if (targetShrinkDemandedConstant(Op
, Demanded
, TLO
))
356 return TLO
.New
.getNode();
358 // FIXME: ISD::SELECT, ISD::SELECT_CC
365 auto *Op1C
= dyn_cast
<ConstantSDNode
>(Op
.getOperand(1));
369 // If this is a 'not' op, don't touch it because that's a canonical form.
370 const APInt
&C
= Op1C
->getAPIntValue();
371 if (Opcode
== ISD::XOR
&& Demanded
.isSubsetOf(C
))
374 if (!C
.isSubsetOf(Demanded
)) {
375 EVT VT
= Op
.getValueType();
376 SDValue NewC
= DAG
.getConstant(Demanded
& C
, DL
, VT
);
377 SDValue NewOp
= DAG
.getNode(Opcode
, DL
, VT
, Op
.getOperand(0), NewC
);
378 return TLO
.CombineTo(Op
, NewOp
);
388 /// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
389 /// This uses isZExtFree and ZERO_EXTEND for the widening cast, but it could be
390 /// generalized for targets with other types of implicit widening casts.
391 bool TargetLowering::ShrinkDemandedOp(SDValue Op
, unsigned BitWidth
,
392 const APInt
&Demanded
,
393 TargetLoweringOpt
&TLO
) const {
394 assert(Op
.getNumOperands() == 2 &&
395 "ShrinkDemandedOp only supports binary operators!");
396 assert(Op
.getNode()->getNumValues() == 1 &&
397 "ShrinkDemandedOp only supports nodes with one result!");
399 SelectionDAG
&DAG
= TLO
.DAG
;
402 // Early return, as this function cannot handle vector types.
403 if (Op
.getValueType().isVector())
406 // Don't do this if the node has another user, which may require the
408 if (!Op
.getNode()->hasOneUse())
411 // Search for the smallest integer type with free casts to and from
412 // Op's type. For expedience, just check power-of-2 integer types.
413 const TargetLowering
&TLI
= DAG
.getTargetLoweringInfo();
414 unsigned DemandedSize
= Demanded
.getActiveBits();
415 unsigned SmallVTBits
= DemandedSize
;
416 if (!isPowerOf2_32(SmallVTBits
))
417 SmallVTBits
= NextPowerOf2(SmallVTBits
);
418 for (; SmallVTBits
< BitWidth
; SmallVTBits
= NextPowerOf2(SmallVTBits
)) {
419 EVT SmallVT
= EVT::getIntegerVT(*DAG
.getContext(), SmallVTBits
);
420 if (TLI
.isTruncateFree(Op
.getValueType(), SmallVT
) &&
421 TLI
.isZExtFree(SmallVT
, Op
.getValueType())) {
422 // We found a type with free casts.
423 SDValue X
= DAG
.getNode(
424 Op
.getOpcode(), dl
, SmallVT
,
425 DAG
.getNode(ISD::TRUNCATE
, dl
, SmallVT
, Op
.getOperand(0)),
426 DAG
.getNode(ISD::TRUNCATE
, dl
, SmallVT
, Op
.getOperand(1)));
427 assert(DemandedSize
<= SmallVTBits
&& "Narrowed below demanded bits?");
428 SDValue Z
= DAG
.getNode(ISD::ANY_EXTEND
, dl
, Op
.getValueType(), X
);
429 return TLO
.CombineTo(Op
, Z
);
435 bool TargetLowering::SimplifyDemandedBits(SDValue Op
, const APInt
&DemandedBits
,
436 DAGCombinerInfo
&DCI
) const {
437 SelectionDAG
&DAG
= DCI
.DAG
;
438 TargetLoweringOpt
TLO(DAG
, !DCI
.isBeforeLegalize(),
439 !DCI
.isBeforeLegalizeOps());
442 bool Simplified
= SimplifyDemandedBits(Op
, DemandedBits
, Known
, TLO
);
444 DCI
.AddToWorklist(Op
.getNode());
445 DCI
.CommitTargetLoweringOpt(TLO
);
450 bool TargetLowering::SimplifyDemandedBits(SDValue Op
, const APInt
&DemandedBits
,
452 TargetLoweringOpt
&TLO
,
454 bool AssumeSingleUse
) const {
455 EVT VT
= Op
.getValueType();
456 APInt DemandedElts
= VT
.isVector()
457 ? APInt::getAllOnesValue(VT
.getVectorNumElements())
459 return SimplifyDemandedBits(Op
, DemandedBits
, DemandedElts
, Known
, TLO
, Depth
,
463 /// Look at Op. At this point, we know that only the OriginalDemandedBits of the
464 /// result of Op are ever used downstream. If we can use this information to
465 /// simplify Op, create a new simplified DAG node and return true, returning the
466 /// original and new nodes in Old and New. Otherwise, analyze the expression and
467 /// return a mask of Known bits for the expression (used to simplify the
468 /// caller). The Known bits may only be accurate for those bits in the
469 /// OriginalDemandedBits and OriginalDemandedElts.
470 bool TargetLowering::SimplifyDemandedBits(
471 SDValue Op
, const APInt
&OriginalDemandedBits
,
472 const APInt
&OriginalDemandedElts
, KnownBits
&Known
, TargetLoweringOpt
&TLO
,
473 unsigned Depth
, bool AssumeSingleUse
) const {
474 unsigned BitWidth
= OriginalDemandedBits
.getBitWidth();
475 assert(Op
.getScalarValueSizeInBits() == BitWidth
&&
476 "Mask size mismatches value type size!");
478 unsigned NumElts
= OriginalDemandedElts
.getBitWidth();
479 assert((!Op
.getValueType().isVector() ||
480 NumElts
== Op
.getValueType().getVectorNumElements()) &&
481 "Unexpected vector size");
483 APInt DemandedBits
= OriginalDemandedBits
;
484 APInt DemandedElts
= OriginalDemandedElts
;
486 auto &DL
= TLO
.DAG
.getDataLayout();
488 // Don't know anything.
489 Known
= KnownBits(BitWidth
);
491 if (Op
.getOpcode() == ISD::Constant
) {
492 // We know all of the bits for a constant!
493 Known
.One
= cast
<ConstantSDNode
>(Op
)->getAPIntValue();
494 Known
.Zero
= ~Known
.One
;
498 // Other users may use these bits.
499 EVT VT
= Op
.getValueType();
500 if (!Op
.getNode()->hasOneUse() && !AssumeSingleUse
) {
502 // If not at the root, Just compute the Known bits to
503 // simplify things downstream.
504 Known
= TLO
.DAG
.computeKnownBits(Op
, DemandedElts
, Depth
);
507 // If this is the root being simplified, allow it to have multiple uses,
508 // just set the DemandedBits/Elts to all bits.
509 DemandedBits
= APInt::getAllOnesValue(BitWidth
);
510 DemandedElts
= APInt::getAllOnesValue(NumElts
);
511 } else if (OriginalDemandedBits
== 0 || OriginalDemandedElts
== 0) {
512 // Not demanding any bits/elts from Op.
514 return TLO
.CombineTo(Op
, TLO
.DAG
.getUNDEF(VT
));
516 } else if (Depth
== 6) { // Limit search depth.
520 KnownBits Known2
, KnownOut
;
521 switch (Op
.getOpcode()) {
522 case ISD::BUILD_VECTOR
:
523 // Collect the known bits that are shared by every constant vector element.
524 Known
.Zero
.setAllBits(); Known
.One
.setAllBits();
525 for (SDValue SrcOp
: Op
->ops()) {
526 if (!isa
<ConstantSDNode
>(SrcOp
)) {
527 // We can only handle all constant values - bail out with no known bits.
528 Known
= KnownBits(BitWidth
);
531 Known2
.One
= cast
<ConstantSDNode
>(SrcOp
)->getAPIntValue();
532 Known2
.Zero
= ~Known2
.One
;
534 // BUILD_VECTOR can implicitly truncate sources, we must handle this.
535 if (Known2
.One
.getBitWidth() != BitWidth
) {
536 assert(Known2
.getBitWidth() > BitWidth
&&
537 "Expected BUILD_VECTOR implicit truncation");
538 Known2
= Known2
.trunc(BitWidth
);
541 // Known bits are the values that are shared by every element.
542 // TODO: support per-element known bits.
543 Known
.One
&= Known2
.One
;
544 Known
.Zero
&= Known2
.Zero
;
546 return false; // Don't fall through, will infinitely loop.
547 case ISD::CONCAT_VECTORS
: {
548 Known
.Zero
.setAllBits();
549 Known
.One
.setAllBits();
550 EVT SubVT
= Op
.getOperand(0).getValueType();
551 unsigned NumSubVecs
= Op
.getNumOperands();
552 unsigned NumSubElts
= SubVT
.getVectorNumElements();
553 for (unsigned i
= 0; i
!= NumSubVecs
; ++i
) {
554 APInt DemandedSubElts
=
555 DemandedElts
.extractBits(NumSubElts
, i
* NumSubElts
);
556 if (SimplifyDemandedBits(Op
.getOperand(i
), DemandedBits
, DemandedSubElts
,
557 Known2
, TLO
, Depth
+ 1))
559 // Known bits are shared by every demanded subvector element.
560 if (!!DemandedSubElts
) {
561 Known
.One
&= Known2
.One
;
562 Known
.Zero
&= Known2
.Zero
;
567 case ISD::VECTOR_SHUFFLE
: {
568 ArrayRef
<int> ShuffleMask
= cast
<ShuffleVectorSDNode
>(Op
)->getMask();
570 // Collect demanded elements from shuffle operands..
571 APInt
DemandedLHS(NumElts
, 0);
572 APInt
DemandedRHS(NumElts
, 0);
573 for (unsigned i
= 0; i
!= NumElts
; ++i
) {
574 if (!DemandedElts
[i
])
576 int M
= ShuffleMask
[i
];
578 // For UNDEF elements, we don't know anything about the common state of
579 // the shuffle result.
580 DemandedLHS
.clearAllBits();
581 DemandedRHS
.clearAllBits();
584 assert(0 <= M
&& M
< (int)(2 * NumElts
) && "Shuffle index out of range");
585 if (M
< (int)NumElts
)
586 DemandedLHS
.setBit(M
);
588 DemandedRHS
.setBit(M
- NumElts
);
591 if (!!DemandedLHS
|| !!DemandedRHS
) {
592 Known
.Zero
.setAllBits();
593 Known
.One
.setAllBits();
595 if (SimplifyDemandedBits(Op
.getOperand(0), DemandedBits
, DemandedLHS
,
596 Known2
, TLO
, Depth
+ 1))
598 Known
.One
&= Known2
.One
;
599 Known
.Zero
&= Known2
.Zero
;
602 if (SimplifyDemandedBits(Op
.getOperand(1), DemandedBits
, DemandedRHS
,
603 Known2
, TLO
, Depth
+ 1))
605 Known
.One
&= Known2
.One
;
606 Known
.Zero
&= Known2
.Zero
;
612 SDValue Op0
= Op
.getOperand(0);
613 SDValue Op1
= Op
.getOperand(1);
615 // If the RHS is a constant, check to see if the LHS would be zero without
616 // using the bits from the RHS. Below, we use knowledge about the RHS to
617 // simplify the LHS, here we're using information from the LHS to simplify
619 if (ConstantSDNode
*RHSC
= isConstOrConstSplat(Op1
)) {
620 // Do not increment Depth here; that can cause an infinite loop.
621 KnownBits LHSKnown
= TLO
.DAG
.computeKnownBits(Op0
, DemandedElts
, Depth
);
622 // If the LHS already has zeros where RHSC does, this 'and' is dead.
623 if ((LHSKnown
.Zero
& DemandedBits
) ==
624 (~RHSC
->getAPIntValue() & DemandedBits
))
625 return TLO
.CombineTo(Op
, Op0
);
627 // If any of the set bits in the RHS are known zero on the LHS, shrink
629 if (ShrinkDemandedConstant(Op
, ~LHSKnown
.Zero
& DemandedBits
, TLO
))
632 // Bitwise-not (xor X, -1) is a special case: we don't usually shrink its
633 // constant, but if this 'and' is only clearing bits that were just set by
634 // the xor, then this 'and' can be eliminated by shrinking the mask of
635 // the xor. For example, for a 32-bit X:
636 // and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1
637 if (isBitwiseNot(Op0
) && Op0
.hasOneUse() &&
638 LHSKnown
.One
== ~RHSC
->getAPIntValue()) {
639 SDValue Xor
= TLO
.DAG
.getNode(ISD::XOR
, dl
, VT
, Op0
.getOperand(0), Op1
);
640 return TLO
.CombineTo(Op
, Xor
);
644 if (SimplifyDemandedBits(Op1
, DemandedBits
, DemandedElts
, Known
, TLO
,
647 assert(!Known
.hasConflict() && "Bits known to be one AND zero?");
648 if (SimplifyDemandedBits(Op0
, ~Known
.Zero
& DemandedBits
, DemandedElts
,
649 Known2
, TLO
, Depth
+ 1))
651 assert(!Known2
.hasConflict() && "Bits known to be one AND zero?");
653 // If all of the demanded bits are known one on one side, return the other.
654 // These bits cannot contribute to the result of the 'and'.
655 if (DemandedBits
.isSubsetOf(Known2
.Zero
| Known
.One
))
656 return TLO
.CombineTo(Op
, Op0
);
657 if (DemandedBits
.isSubsetOf(Known
.Zero
| Known2
.One
))
658 return TLO
.CombineTo(Op
, Op1
);
659 // If all of the demanded bits in the inputs are known zeros, return zero.
660 if (DemandedBits
.isSubsetOf(Known
.Zero
| Known2
.Zero
))
661 return TLO
.CombineTo(Op
, TLO
.DAG
.getConstant(0, dl
, VT
));
662 // If the RHS is a constant, see if we can simplify it.
663 if (ShrinkDemandedConstant(Op
, ~Known2
.Zero
& DemandedBits
, TLO
))
665 // If the operation can be done in a smaller type, do so.
666 if (ShrinkDemandedOp(Op
, BitWidth
, DemandedBits
, TLO
))
669 // Output known-1 bits are only known if set in both the LHS & RHS.
670 Known
.One
&= Known2
.One
;
671 // Output known-0 are known to be clear if zero in either the LHS | RHS.
672 Known
.Zero
|= Known2
.Zero
;
676 SDValue Op0
= Op
.getOperand(0);
677 SDValue Op1
= Op
.getOperand(1);
679 if (SimplifyDemandedBits(Op1
, DemandedBits
, DemandedElts
, Known
, TLO
,
682 assert(!Known
.hasConflict() && "Bits known to be one AND zero?");
683 if (SimplifyDemandedBits(Op0
, ~Known
.One
& DemandedBits
, DemandedElts
,
684 Known2
, TLO
, Depth
+ 1))
686 assert(!Known2
.hasConflict() && "Bits known to be one AND zero?");
688 // If all of the demanded bits are known zero on one side, return the other.
689 // These bits cannot contribute to the result of the 'or'.
690 if (DemandedBits
.isSubsetOf(Known2
.One
| Known
.Zero
))
691 return TLO
.CombineTo(Op
, Op0
);
692 if (DemandedBits
.isSubsetOf(Known
.One
| Known2
.Zero
))
693 return TLO
.CombineTo(Op
, Op1
);
694 // If the RHS is a constant, see if we can simplify it.
695 if (ShrinkDemandedConstant(Op
, DemandedBits
, TLO
))
697 // If the operation can be done in a smaller type, do so.
698 if (ShrinkDemandedOp(Op
, BitWidth
, DemandedBits
, TLO
))
701 // Output known-0 bits are only known if clear in both the LHS & RHS.
702 Known
.Zero
&= Known2
.Zero
;
703 // Output known-1 are known to be set if set in either the LHS | RHS.
704 Known
.One
|= Known2
.One
;
708 SDValue Op0
= Op
.getOperand(0);
709 SDValue Op1
= Op
.getOperand(1);
711 if (SimplifyDemandedBits(Op1
, DemandedBits
, DemandedElts
, Known
, TLO
,
714 assert(!Known
.hasConflict() && "Bits known to be one AND zero?");
715 if (SimplifyDemandedBits(Op0
, DemandedBits
, DemandedElts
, Known2
, TLO
,
718 assert(!Known2
.hasConflict() && "Bits known to be one AND zero?");
720 // If all of the demanded bits are known zero on one side, return the other.
721 // These bits cannot contribute to the result of the 'xor'.
722 if (DemandedBits
.isSubsetOf(Known
.Zero
))
723 return TLO
.CombineTo(Op
, Op0
);
724 if (DemandedBits
.isSubsetOf(Known2
.Zero
))
725 return TLO
.CombineTo(Op
, Op1
);
726 // If the operation can be done in a smaller type, do so.
727 if (ShrinkDemandedOp(Op
, BitWidth
, DemandedBits
, TLO
))
730 // If all of the unknown bits are known to be zero on one side or the other
731 // (but not both) turn this into an *inclusive* or.
732 // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
733 if (DemandedBits
.isSubsetOf(Known
.Zero
| Known2
.Zero
))
734 return TLO
.CombineTo(Op
, TLO
.DAG
.getNode(ISD::OR
, dl
, VT
, Op0
, Op1
));
736 // Output known-0 bits are known if clear or set in both the LHS & RHS.
737 KnownOut
.Zero
= (Known
.Zero
& Known2
.Zero
) | (Known
.One
& Known2
.One
);
738 // Output known-1 are known to be set if set in only one of the LHS, RHS.
739 KnownOut
.One
= (Known
.Zero
& Known2
.One
) | (Known
.One
& Known2
.Zero
);
741 if (ConstantSDNode
*C
= isConstOrConstSplat(Op1
)) {
742 // If one side is a constant, and all of the known set bits on the other
743 // side are also set in the constant, turn this into an AND, as we know
744 // the bits will be cleared.
745 // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
746 // NB: it is okay if more bits are known than are requested
747 if (C
->getAPIntValue() == Known2
.One
) {
749 TLO
.DAG
.getConstant(~C
->getAPIntValue() & DemandedBits
, dl
, VT
);
750 return TLO
.CombineTo(Op
, TLO
.DAG
.getNode(ISD::AND
, dl
, VT
, Op0
, ANDC
));
753 // If the RHS is a constant, see if we can change it. Don't alter a -1
754 // constant because that's a 'not' op, and that is better for combining
756 if (!C
->isAllOnesValue()) {
757 if (DemandedBits
.isSubsetOf(C
->getAPIntValue())) {
758 // We're flipping all demanded bits. Flip the undemanded bits too.
759 SDValue New
= TLO
.DAG
.getNOT(dl
, Op0
, VT
);
760 return TLO
.CombineTo(Op
, New
);
762 // If we can't turn this into a 'not', try to shrink the constant.
763 if (ShrinkDemandedConstant(Op
, DemandedBits
, TLO
))
768 Known
= std::move(KnownOut
);
772 if (SimplifyDemandedBits(Op
.getOperand(2), DemandedBits
, Known
, TLO
,
775 if (SimplifyDemandedBits(Op
.getOperand(1), DemandedBits
, Known2
, TLO
,
778 assert(!Known
.hasConflict() && "Bits known to be one AND zero?");
779 assert(!Known2
.hasConflict() && "Bits known to be one AND zero?");
781 // If the operands are constants, see if we can simplify them.
782 if (ShrinkDemandedConstant(Op
, DemandedBits
, TLO
))
785 // Only known if known in both the LHS and RHS.
786 Known
.One
&= Known2
.One
;
787 Known
.Zero
&= Known2
.Zero
;
790 if (SimplifyDemandedBits(Op
.getOperand(3), DemandedBits
, Known
, TLO
,
793 if (SimplifyDemandedBits(Op
.getOperand(2), DemandedBits
, Known2
, TLO
,
796 assert(!Known
.hasConflict() && "Bits known to be one AND zero?");
797 assert(!Known2
.hasConflict() && "Bits known to be one AND zero?");
799 // If the operands are constants, see if we can simplify them.
800 if (ShrinkDemandedConstant(Op
, DemandedBits
, TLO
))
803 // Only known if known in both the LHS and RHS.
804 Known
.One
&= Known2
.One
;
805 Known
.Zero
&= Known2
.Zero
;
808 SDValue Op0
= Op
.getOperand(0);
809 SDValue Op1
= Op
.getOperand(1);
810 ISD::CondCode CC
= cast
<CondCodeSDNode
>(Op
.getOperand(2))->get();
811 // If (1) we only need the sign-bit, (2) the setcc operands are the same
812 // width as the setcc result, and (3) the result of a setcc conforms to 0 or
813 // -1, we may be able to bypass the setcc.
814 if (DemandedBits
.isSignMask() &&
815 Op0
.getScalarValueSizeInBits() == BitWidth
&&
816 getBooleanContents(VT
) ==
817 BooleanContent::ZeroOrNegativeOneBooleanContent
) {
818 // If we're testing X < 0, then this compare isn't needed - just use X!
819 // FIXME: We're limiting to integer types here, but this should also work
820 // if we don't care about FP signed-zero. The use of SETLT with FP means
821 // that we don't care about NaNs.
822 if (CC
== ISD::SETLT
&& Op1
.getValueType().isInteger() &&
823 (isNullConstant(Op1
) || ISD::isBuildVectorAllZeros(Op1
.getNode())))
824 return TLO
.CombineTo(Op
, Op0
);
826 // TODO: Should we check for other forms of sign-bit comparisons?
827 // Examples: X <= -1, X >= 0
829 if (getBooleanContents(Op0
.getValueType()) ==
830 TargetLowering::ZeroOrOneBooleanContent
&&
832 Known
.Zero
.setBitsFrom(1);
836 SDValue Op0
= Op
.getOperand(0);
837 SDValue Op1
= Op
.getOperand(1);
839 if (ConstantSDNode
*SA
= isConstOrConstSplat(Op1
)) {
840 // If the shift count is an invalid immediate, don't do anything.
841 if (SA
->getAPIntValue().uge(BitWidth
))
844 unsigned ShAmt
= SA
->getZExtValue();
846 // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
847 // single shift. We can do this if the bottom bits (which are shifted
848 // out) are never demanded.
849 if (Op0
.getOpcode() == ISD::SRL
) {
851 (DemandedBits
& APInt::getLowBitsSet(BitWidth
, ShAmt
)) == 0) {
852 if (ConstantSDNode
*SA2
= isConstOrConstSplat(Op0
.getOperand(1))) {
853 if (SA2
->getAPIntValue().ult(BitWidth
)) {
854 unsigned C1
= SA2
->getZExtValue();
855 unsigned Opc
= ISD::SHL
;
856 int Diff
= ShAmt
- C1
;
862 SDValue NewSA
= TLO
.DAG
.getConstant(Diff
, dl
, Op1
.getValueType());
863 return TLO
.CombineTo(
864 Op
, TLO
.DAG
.getNode(Opc
, dl
, VT
, Op0
.getOperand(0), NewSA
));
870 if (SimplifyDemandedBits(Op0
, DemandedBits
.lshr(ShAmt
), DemandedElts
,
871 Known
, TLO
, Depth
+ 1))
874 // Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits
875 // are not demanded. This will likely allow the anyext to be folded away.
876 if (Op0
.getOpcode() == ISD::ANY_EXTEND
) {
877 SDValue InnerOp
= Op0
.getOperand(0);
878 EVT InnerVT
= InnerOp
.getValueType();
879 unsigned InnerBits
= InnerVT
.getScalarSizeInBits();
880 if (ShAmt
< InnerBits
&& DemandedBits
.getActiveBits() <= InnerBits
&&
881 isTypeDesirableForOp(ISD::SHL
, InnerVT
)) {
882 EVT ShTy
= getShiftAmountTy(InnerVT
, DL
);
883 if (!APInt(BitWidth
, ShAmt
).isIntN(ShTy
.getSizeInBits()))
886 TLO
.DAG
.getNode(ISD::SHL
, dl
, InnerVT
, InnerOp
,
887 TLO
.DAG
.getConstant(ShAmt
, dl
, ShTy
));
888 return TLO
.CombineTo(
889 Op
, TLO
.DAG
.getNode(ISD::ANY_EXTEND
, dl
, VT
, NarrowShl
));
891 // Repeat the SHL optimization above in cases where an extension
892 // intervenes: (shl (anyext (shr x, c1)), c2) to
893 // (shl (anyext x), c2-c1). This requires that the bottom c1 bits
894 // aren't demanded (as above) and that the shifted upper c1 bits of
895 // x aren't demanded.
896 if (Op0
.hasOneUse() && InnerOp
.getOpcode() == ISD::SRL
&&
897 InnerOp
.hasOneUse()) {
898 if (ConstantSDNode
*SA2
=
899 isConstOrConstSplat(InnerOp
.getOperand(1))) {
900 unsigned InnerShAmt
= SA2
->getLimitedValue(InnerBits
);
901 if (InnerShAmt
< ShAmt
&& InnerShAmt
< InnerBits
&&
902 DemandedBits
.getActiveBits() <=
903 (InnerBits
- InnerShAmt
+ ShAmt
) &&
904 DemandedBits
.countTrailingZeros() >= ShAmt
) {
905 SDValue NewSA
= TLO
.DAG
.getConstant(ShAmt
- InnerShAmt
, dl
,
907 SDValue NewExt
= TLO
.DAG
.getNode(ISD::ANY_EXTEND
, dl
, VT
,
908 InnerOp
.getOperand(0));
909 return TLO
.CombineTo(
910 Op
, TLO
.DAG
.getNode(ISD::SHL
, dl
, VT
, NewExt
, NewSA
));
916 Known
.Zero
<<= ShAmt
;
918 // low bits known zero.
919 Known
.Zero
.setLowBits(ShAmt
);
924 SDValue Op0
= Op
.getOperand(0);
925 SDValue Op1
= Op
.getOperand(1);
927 if (ConstantSDNode
*SA
= isConstOrConstSplat(Op1
)) {
928 // If the shift count is an invalid immediate, don't do anything.
929 if (SA
->getAPIntValue().uge(BitWidth
))
932 unsigned ShAmt
= SA
->getZExtValue();
933 APInt InDemandedMask
= (DemandedBits
<< ShAmt
);
935 // If the shift is exact, then it does demand the low bits (and knows that
937 if (Op
->getFlags().hasExact())
938 InDemandedMask
.setLowBits(ShAmt
);
940 // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
941 // single shift. We can do this if the top bits (which are shifted out)
942 // are never demanded.
943 if (Op0
.getOpcode() == ISD::SHL
) {
944 if (ConstantSDNode
*SA2
= isConstOrConstSplat(Op0
.getOperand(1))) {
946 (DemandedBits
& APInt::getHighBitsSet(BitWidth
, ShAmt
)) == 0) {
947 if (SA2
->getAPIntValue().ult(BitWidth
)) {
948 unsigned C1
= SA2
->getZExtValue();
949 unsigned Opc
= ISD::SRL
;
950 int Diff
= ShAmt
- C1
;
956 SDValue NewSA
= TLO
.DAG
.getConstant(Diff
, dl
, Op1
.getValueType());
957 return TLO
.CombineTo(
958 Op
, TLO
.DAG
.getNode(Opc
, dl
, VT
, Op0
.getOperand(0), NewSA
));
964 // Compute the new bits that are at the top now.
965 if (SimplifyDemandedBits(Op0
, InDemandedMask
, DemandedElts
, Known
, TLO
,
968 assert(!Known
.hasConflict() && "Bits known to be one AND zero?");
969 Known
.Zero
.lshrInPlace(ShAmt
);
970 Known
.One
.lshrInPlace(ShAmt
);
972 Known
.Zero
.setHighBits(ShAmt
); // High bits known zero.
977 SDValue Op0
= Op
.getOperand(0);
978 SDValue Op1
= Op
.getOperand(1);
980 // If this is an arithmetic shift right and only the low-bit is set, we can
981 // always convert this into a logical shr, even if the shift amount is
982 // variable. The low bit of the shift cannot be an input sign bit unless
983 // the shift amount is >= the size of the datatype, which is undefined.
984 if (DemandedBits
.isOneValue())
985 return TLO
.CombineTo(Op
, TLO
.DAG
.getNode(ISD::SRL
, dl
, VT
, Op0
, Op1
));
987 if (ConstantSDNode
*SA
= isConstOrConstSplat(Op1
)) {
988 // If the shift count is an invalid immediate, don't do anything.
989 if (SA
->getAPIntValue().uge(BitWidth
))
992 unsigned ShAmt
= SA
->getZExtValue();
993 APInt InDemandedMask
= (DemandedBits
<< ShAmt
);
995 // If the shift is exact, then it does demand the low bits (and knows that
997 if (Op
->getFlags().hasExact())
998 InDemandedMask
.setLowBits(ShAmt
);
1000 // If any of the demanded bits are produced by the sign extension, we also
1001 // demand the input sign bit.
1002 if (DemandedBits
.countLeadingZeros() < ShAmt
)
1003 InDemandedMask
.setSignBit();
1005 if (SimplifyDemandedBits(Op0
, InDemandedMask
, DemandedElts
, Known
, TLO
,
1008 assert(!Known
.hasConflict() && "Bits known to be one AND zero?");
1009 Known
.Zero
.lshrInPlace(ShAmt
);
1010 Known
.One
.lshrInPlace(ShAmt
);
1012 // If the input sign bit is known to be zero, or if none of the top bits
1013 // are demanded, turn this into an unsigned shift right.
1014 if (Known
.Zero
[BitWidth
- ShAmt
- 1] ||
1015 DemandedBits
.countLeadingZeros() >= ShAmt
) {
1017 Flags
.setExact(Op
->getFlags().hasExact());
1018 return TLO
.CombineTo(
1019 Op
, TLO
.DAG
.getNode(ISD::SRL
, dl
, VT
, Op0
, Op1
, Flags
));
1022 int Log2
= DemandedBits
.exactLogBase2();
1024 // The bit must come from the sign.
1026 TLO
.DAG
.getConstant(BitWidth
- 1 - Log2
, dl
, Op1
.getValueType());
1027 return TLO
.CombineTo(Op
, TLO
.DAG
.getNode(ISD::SRL
, dl
, VT
, Op0
, NewSA
));
1030 if (Known
.One
[BitWidth
- ShAmt
- 1])
1031 // New bits are known one.
1032 Known
.One
.setHighBits(ShAmt
);
1038 SDValue Op0
= Op
.getOperand(0);
1039 SDValue Op1
= Op
.getOperand(1);
1040 SDValue Op2
= Op
.getOperand(2);
1041 bool IsFSHL
= (Op
.getOpcode() == ISD::FSHL
);
1043 if (ConstantSDNode
*SA
= isConstOrConstSplat(Op2
)) {
1044 unsigned Amt
= SA
->getAPIntValue().urem(BitWidth
);
1046 // For fshl, 0-shift returns the 1st arg.
1047 // For fshr, 0-shift returns the 2nd arg.
1049 if (SimplifyDemandedBits(IsFSHL
? Op0
: Op1
, DemandedBits
, DemandedElts
,
1050 Known
, TLO
, Depth
+ 1))
1055 // fshl: (Op0 << Amt) | (Op1 >> (BW - Amt))
1056 // fshr: (Op0 << (BW - Amt)) | (Op1 >> Amt)
1057 APInt Demanded0
= DemandedBits
.lshr(IsFSHL
? Amt
: (BitWidth
- Amt
));
1058 APInt Demanded1
= DemandedBits
<< (IsFSHL
? (BitWidth
- Amt
) : Amt
);
1059 if (SimplifyDemandedBits(Op0
, Demanded0
, DemandedElts
, Known2
, TLO
,
1062 if (SimplifyDemandedBits(Op1
, Demanded1
, DemandedElts
, Known
, TLO
,
1066 Known2
.One
<<= (IsFSHL
? Amt
: (BitWidth
- Amt
));
1067 Known2
.Zero
<<= (IsFSHL
? Amt
: (BitWidth
- Amt
));
1068 Known
.One
.lshrInPlace(IsFSHL
? (BitWidth
- Amt
) : Amt
);
1069 Known
.Zero
.lshrInPlace(IsFSHL
? (BitWidth
- Amt
) : Amt
);
1070 Known
.One
|= Known2
.One
;
1071 Known
.Zero
|= Known2
.Zero
;
1075 case ISD::SIGN_EXTEND_INREG
: {
1076 SDValue Op0
= Op
.getOperand(0);
1077 EVT ExVT
= cast
<VTSDNode
>(Op
.getOperand(1))->getVT();
1078 unsigned ExVTBits
= ExVT
.getScalarSizeInBits();
1080 // If we only care about the highest bit, don't bother shifting right.
1081 if (DemandedBits
.isSignMask()) {
1082 bool AlreadySignExtended
=
1083 TLO
.DAG
.ComputeNumSignBits(Op0
) >= BitWidth
- ExVTBits
+ 1;
1084 // However if the input is already sign extended we expect the sign
1085 // extension to be dropped altogether later and do not simplify.
1086 if (!AlreadySignExtended
) {
1087 // Compute the correct shift amount type, which must be getShiftAmountTy
1088 // for scalar types after legalization.
1089 EVT ShiftAmtTy
= VT
;
1090 if (TLO
.LegalTypes() && !ShiftAmtTy
.isVector())
1091 ShiftAmtTy
= getShiftAmountTy(ShiftAmtTy
, DL
);
1094 TLO
.DAG
.getConstant(BitWidth
- ExVTBits
, dl
, ShiftAmtTy
);
1095 return TLO
.CombineTo(Op
,
1096 TLO
.DAG
.getNode(ISD::SHL
, dl
, VT
, Op0
, ShiftAmt
));
1100 // If none of the extended bits are demanded, eliminate the sextinreg.
1101 if (DemandedBits
.getActiveBits() <= ExVTBits
)
1102 return TLO
.CombineTo(Op
, Op0
);
1104 APInt InputDemandedBits
= DemandedBits
.getLoBits(ExVTBits
);
1106 // Since the sign extended bits are demanded, we know that the sign
1108 InputDemandedBits
.setBit(ExVTBits
- 1);
1110 if (SimplifyDemandedBits(Op0
, InputDemandedBits
, Known
, TLO
, Depth
+ 1))
1112 assert(!Known
.hasConflict() && "Bits known to be one AND zero?");
1114 // If the sign bit of the input is known set or clear, then we know the
1115 // top bits of the result.
1117 // If the input sign bit is known zero, convert this into a zero extension.
1118 if (Known
.Zero
[ExVTBits
- 1])
1119 return TLO
.CombineTo(
1120 Op
, TLO
.DAG
.getZeroExtendInReg(Op0
, dl
, ExVT
.getScalarType()));
1122 APInt Mask
= APInt::getLowBitsSet(BitWidth
, ExVTBits
);
1123 if (Known
.One
[ExVTBits
- 1]) { // Input sign bit known set
1124 Known
.One
.setBitsFrom(ExVTBits
);
1126 } else { // Input sign bit unknown
1132 case ISD::BUILD_PAIR
: {
1133 EVT HalfVT
= Op
.getOperand(0).getValueType();
1134 unsigned HalfBitWidth
= HalfVT
.getScalarSizeInBits();
1136 APInt MaskLo
= DemandedBits
.getLoBits(HalfBitWidth
).trunc(HalfBitWidth
);
1137 APInt MaskHi
= DemandedBits
.getHiBits(HalfBitWidth
).trunc(HalfBitWidth
);
1139 KnownBits KnownLo
, KnownHi
;
1141 if (SimplifyDemandedBits(Op
.getOperand(0), MaskLo
, KnownLo
, TLO
, Depth
+ 1))
1144 if (SimplifyDemandedBits(Op
.getOperand(1), MaskHi
, KnownHi
, TLO
, Depth
+ 1))
1147 Known
.Zero
= KnownLo
.Zero
.zext(BitWidth
) |
1148 KnownHi
.Zero
.zext(BitWidth
).shl(HalfBitWidth
);
1150 Known
.One
= KnownLo
.One
.zext(BitWidth
) |
1151 KnownHi
.One
.zext(BitWidth
).shl(HalfBitWidth
);
1154 case ISD::ZERO_EXTEND
: {
1155 SDValue Src
= Op
.getOperand(0);
1156 unsigned InBits
= Src
.getScalarValueSizeInBits();
1158 // If none of the top bits are demanded, convert this into an any_extend.
1159 if (DemandedBits
.getActiveBits() <= InBits
)
1160 return TLO
.CombineTo(Op
, TLO
.DAG
.getNode(ISD::ANY_EXTEND
, dl
, VT
, Src
));
1162 APInt InDemandedBits
= DemandedBits
.trunc(InBits
);
1163 if (SimplifyDemandedBits(Src
, InDemandedBits
, Known
, TLO
, Depth
+ 1))
1165 assert(!Known
.hasConflict() && "Bits known to be one AND zero?");
1166 Known
= Known
.zext(BitWidth
);
1167 Known
.Zero
.setBitsFrom(InBits
);
1170 case ISD::SIGN_EXTEND
: {
1171 SDValue Src
= Op
.getOperand(0);
1172 unsigned InBits
= Src
.getScalarValueSizeInBits();
1174 // If none of the top bits are demanded, convert this into an any_extend.
1175 if (DemandedBits
.getActiveBits() <= InBits
)
1176 return TLO
.CombineTo(Op
, TLO
.DAG
.getNode(ISD::ANY_EXTEND
, dl
, VT
, Src
));
1178 // Since some of the sign extended bits are demanded, we know that the sign
1180 APInt InDemandedBits
= DemandedBits
.trunc(InBits
);
1181 InDemandedBits
.setBit(InBits
- 1);
1183 if (SimplifyDemandedBits(Src
, InDemandedBits
, Known
, TLO
, Depth
+ 1))
1185 assert(!Known
.hasConflict() && "Bits known to be one AND zero?");
1186 // If the sign bit is known one, the top bits match.
1187 Known
= Known
.sext(BitWidth
);
1189 // If the sign bit is known zero, convert this to a zero extend.
1190 if (Known
.isNonNegative())
1191 return TLO
.CombineTo(Op
, TLO
.DAG
.getNode(ISD::ZERO_EXTEND
, dl
, VT
, Src
));
1194 case ISD::SIGN_EXTEND_VECTOR_INREG
: {
1195 // TODO - merge this with SIGN_EXTEND above?
1196 SDValue Src
= Op
.getOperand(0);
1197 unsigned InBits
= Src
.getScalarValueSizeInBits();
1199 APInt InDemandedBits
= DemandedBits
.trunc(InBits
);
1201 // If some of the sign extended bits are demanded, we know that the sign
1203 if (InBits
< DemandedBits
.getActiveBits())
1204 InDemandedBits
.setBit(InBits
- 1);
1206 if (SimplifyDemandedBits(Src
, InDemandedBits
, Known
, TLO
, Depth
+ 1))
1208 assert(!Known
.hasConflict() && "Bits known to be one AND zero?");
1209 // If the sign bit is known one, the top bits match.
1210 Known
= Known
.sext(BitWidth
);
1213 case ISD::ANY_EXTEND
: {
1214 SDValue Src
= Op
.getOperand(0);
1215 unsigned InBits
= Src
.getScalarValueSizeInBits();
1216 APInt InDemandedBits
= DemandedBits
.trunc(InBits
);
1217 if (SimplifyDemandedBits(Src
, InDemandedBits
, Known
, TLO
, Depth
+ 1))
1219 assert(!Known
.hasConflict() && "Bits known to be one AND zero?");
1220 Known
= Known
.zext(BitWidth
);
1223 case ISD::TRUNCATE
: {
1224 SDValue Src
= Op
.getOperand(0);
1226 // Simplify the input, using demanded bit information, and compute the known
1227 // zero/one bits live out.
1228 unsigned OperandBitWidth
= Src
.getScalarValueSizeInBits();
1229 APInt TruncMask
= DemandedBits
.zext(OperandBitWidth
);
1230 if (SimplifyDemandedBits(Src
, TruncMask
, Known
, TLO
, Depth
+ 1))
1232 Known
= Known
.trunc(BitWidth
);
1234 // If the input is only used by this truncate, see if we can shrink it based
1235 // on the known demanded bits.
1236 if (Src
.getNode()->hasOneUse()) {
1237 switch (Src
.getOpcode()) {
1241 // Shrink SRL by a constant if none of the high bits shifted in are
1243 if (TLO
.LegalTypes() && !isTypeDesirableForOp(ISD::SRL
, VT
))
1244 // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
1247 ConstantSDNode
*ShAmt
= dyn_cast
<ConstantSDNode
>(Src
.getOperand(1));
1250 SDValue Shift
= Src
.getOperand(1);
1251 if (TLO
.LegalTypes()) {
1252 uint64_t ShVal
= ShAmt
->getZExtValue();
1253 Shift
= TLO
.DAG
.getConstant(ShVal
, dl
, getShiftAmountTy(VT
, DL
));
1256 if (ShAmt
->getZExtValue() < BitWidth
) {
1257 APInt HighBits
= APInt::getHighBitsSet(OperandBitWidth
,
1258 OperandBitWidth
- BitWidth
);
1259 HighBits
.lshrInPlace(ShAmt
->getZExtValue());
1260 HighBits
= HighBits
.trunc(BitWidth
);
1262 if (!(HighBits
& DemandedBits
)) {
1263 // None of the shifted in bits are needed. Add a truncate of the
1264 // shift input, then shift it.
1266 TLO
.DAG
.getNode(ISD::TRUNCATE
, dl
, VT
, Src
.getOperand(0));
1267 return TLO
.CombineTo(
1268 Op
, TLO
.DAG
.getNode(ISD::SRL
, dl
, VT
, NewTrunc
, Shift
));
1275 assert(!Known
.hasConflict() && "Bits known to be one AND zero?");
1278 case ISD::AssertZext
: {
1279 // AssertZext demands all of the high bits, plus any of the low bits
1280 // demanded by its users.
1281 EVT ZVT
= cast
<VTSDNode
>(Op
.getOperand(1))->getVT();
1282 APInt InMask
= APInt::getLowBitsSet(BitWidth
, ZVT
.getSizeInBits());
1283 if (SimplifyDemandedBits(Op
.getOperand(0), ~InMask
| DemandedBits
, Known
,
1286 assert(!Known
.hasConflict() && "Bits known to be one AND zero?");
1288 Known
.Zero
|= ~InMask
;
1291 case ISD::EXTRACT_VECTOR_ELT
: {
1292 SDValue Src
= Op
.getOperand(0);
1293 SDValue Idx
= Op
.getOperand(1);
1294 unsigned NumSrcElts
= Src
.getValueType().getVectorNumElements();
1295 unsigned EltBitWidth
= Src
.getScalarValueSizeInBits();
1297 // Demand the bits from every vector element without a constant index.
1298 APInt DemandedSrcElts
= APInt::getAllOnesValue(NumSrcElts
);
1299 if (auto *CIdx
= dyn_cast
<ConstantSDNode
>(Idx
))
1300 if (CIdx
->getAPIntValue().ult(NumSrcElts
))
1301 DemandedSrcElts
= APInt::getOneBitSet(NumSrcElts
, CIdx
->getZExtValue());
1303 // If BitWidth > EltBitWidth the value is anyext:ed. So we do not know
1304 // anything about the extended bits.
1305 APInt DemandedSrcBits
= DemandedBits
;
1306 if (BitWidth
> EltBitWidth
)
1307 DemandedSrcBits
= DemandedSrcBits
.trunc(EltBitWidth
);
1309 if (SimplifyDemandedBits(Src
, DemandedSrcBits
, DemandedSrcElts
, Known2
, TLO
,
1314 if (BitWidth
> EltBitWidth
)
1315 Known
= Known
.zext(BitWidth
);
1318 case ISD::BITCAST
: {
1319 SDValue Src
= Op
.getOperand(0);
1320 EVT SrcVT
= Src
.getValueType();
1321 unsigned NumSrcEltBits
= SrcVT
.getScalarSizeInBits();
1323 // If this is an FP->Int bitcast and if the sign bit is the only
1324 // thing demanded, turn this into a FGETSIGN.
1325 if (!TLO
.LegalOperations() && !VT
.isVector() && !SrcVT
.isVector() &&
1326 DemandedBits
== APInt::getSignMask(Op
.getValueSizeInBits()) &&
1327 SrcVT
.isFloatingPoint()) {
1328 bool OpVTLegal
= isOperationLegalOrCustom(ISD::FGETSIGN
, VT
);
1329 bool i32Legal
= isOperationLegalOrCustom(ISD::FGETSIGN
, MVT::i32
);
1330 if ((OpVTLegal
|| i32Legal
) && VT
.isSimple() && SrcVT
!= MVT::f16
&&
1331 SrcVT
!= MVT::f128
) {
1332 // Cannot eliminate/lower SHL for f128 yet.
1333 EVT Ty
= OpVTLegal
? VT
: MVT::i32
;
1334 // Make a FGETSIGN + SHL to move the sign bit into the appropriate
1335 // place. We expect the SHL to be eliminated by other optimizations.
1336 SDValue Sign
= TLO
.DAG
.getNode(ISD::FGETSIGN
, dl
, Ty
, Src
);
1337 unsigned OpVTSizeInBits
= Op
.getValueSizeInBits();
1338 if (!OpVTLegal
&& OpVTSizeInBits
> 32)
1339 Sign
= TLO
.DAG
.getNode(ISD::ZERO_EXTEND
, dl
, VT
, Sign
);
1340 unsigned ShVal
= Op
.getValueSizeInBits() - 1;
1341 SDValue ShAmt
= TLO
.DAG
.getConstant(ShVal
, dl
, VT
);
1342 return TLO
.CombineTo(Op
,
1343 TLO
.DAG
.getNode(ISD::SHL
, dl
, VT
, Sign
, ShAmt
));
1346 // If bitcast from a vector, see if we can use SimplifyDemandedVectorElts by
1347 // demanding the element if any bits from it are demanded.
1348 // TODO - bigendian once we have test coverage.
1349 // TODO - bool vectors once SimplifyDemandedVectorElts has SETCC support.
1350 if (SrcVT
.isVector() && NumSrcEltBits
> 1 &&
1351 (BitWidth
% NumSrcEltBits
) == 0 &&
1352 TLO
.DAG
.getDataLayout().isLittleEndian()) {
1353 unsigned Scale
= BitWidth
/ NumSrcEltBits
;
1354 auto GetDemandedSubMask
= [&](APInt
&DemandedSubElts
) -> bool {
1355 DemandedSubElts
= APInt::getNullValue(Scale
);
1356 for (unsigned i
= 0; i
!= Scale
; ++i
) {
1357 unsigned Offset
= i
* NumSrcEltBits
;
1358 APInt Sub
= DemandedBits
.extractBits(NumSrcEltBits
, Offset
);
1359 if (!Sub
.isNullValue())
1360 DemandedSubElts
.setBit(i
);
1365 APInt DemandedSubElts
;
1366 if (GetDemandedSubMask(DemandedSubElts
)) {
1367 unsigned NumSrcElts
= SrcVT
.getVectorNumElements();
1368 APInt DemandedElts
= APInt::getSplat(NumSrcElts
, DemandedSubElts
);
1370 APInt KnownUndef
, KnownZero
;
1371 if (SimplifyDemandedVectorElts(Src
, DemandedElts
, KnownUndef
, KnownZero
,
1376 // If this is a bitcast, let computeKnownBits handle it. Only do this on a
1377 // recursive call where Known may be useful to the caller.
1379 Known
= TLO
.DAG
.computeKnownBits(Op
, Depth
);
1387 // Add, Sub, and Mul don't demand any bits in positions beyond that
1388 // of the highest bit demanded of them.
1389 SDValue Op0
= Op
.getOperand(0), Op1
= Op
.getOperand(1);
1390 unsigned DemandedBitsLZ
= DemandedBits
.countLeadingZeros();
1391 APInt LoMask
= APInt::getLowBitsSet(BitWidth
, BitWidth
- DemandedBitsLZ
);
1392 if (SimplifyDemandedBits(Op0
, LoMask
, DemandedElts
, Known2
, TLO
,
1394 SimplifyDemandedBits(Op1
, LoMask
, DemandedElts
, Known2
, TLO
,
1396 // See if the operation should be performed at a smaller bit width.
1397 ShrinkDemandedOp(Op
, BitWidth
, DemandedBits
, TLO
)) {
1398 SDNodeFlags Flags
= Op
.getNode()->getFlags();
1399 if (Flags
.hasNoSignedWrap() || Flags
.hasNoUnsignedWrap()) {
1400 // Disable the nsw and nuw flags. We can no longer guarantee that we
1401 // won't wrap after simplification.
1402 Flags
.setNoSignedWrap(false);
1403 Flags
.setNoUnsignedWrap(false);
1405 TLO
.DAG
.getNode(Op
.getOpcode(), dl
, VT
, Op0
, Op1
, Flags
);
1406 return TLO
.CombineTo(Op
, NewOp
);
1411 // If we have a constant operand, we may be able to turn it into -1 if we
1412 // do not demand the high bits. This can make the constant smaller to
1413 // encode, allow more general folding, or match specialized instruction
1414 // patterns (eg, 'blsr' on x86). Don't bother changing 1 to -1 because that
1415 // is probably not useful (and could be detrimental).
1416 ConstantSDNode
*C
= isConstOrConstSplat(Op1
);
1417 APInt HighMask
= APInt::getHighBitsSet(BitWidth
, DemandedBitsLZ
);
1418 if (C
&& !C
->isAllOnesValue() && !C
->isOne() &&
1419 (C
->getAPIntValue() | HighMask
).isAllOnesValue()) {
1420 SDValue Neg1
= TLO
.DAG
.getAllOnesConstant(dl
, VT
);
1421 // We can't guarantee that the new math op doesn't wrap, so explicitly
1422 // clear those flags to prevent folding with a potential existing node
1423 // that has those flags set.
1425 Flags
.setNoSignedWrap(false);
1426 Flags
.setNoUnsignedWrap(false);
1427 SDValue NewOp
= TLO
.DAG
.getNode(Op
.getOpcode(), dl
, VT
, Op0
, Neg1
, Flags
);
1428 return TLO
.CombineTo(Op
, NewOp
);
1434 if (Op
.getOpcode() >= ISD::BUILTIN_OP_END
) {
1435 if (SimplifyDemandedBitsForTargetNode(Op
, DemandedBits
, DemandedElts
,
1441 // Just use computeKnownBits to compute output bits.
1442 Known
= TLO
.DAG
.computeKnownBits(Op
, DemandedElts
, Depth
);
1446 // If we know the value of all of the demanded bits, return this as a
1448 if (DemandedBits
.isSubsetOf(Known
.Zero
| Known
.One
)) {
1449 // Avoid folding to a constant if any OpaqueConstant is involved.
1450 const SDNode
*N
= Op
.getNode();
1451 for (SDNodeIterator I
= SDNodeIterator::begin(N
),
1452 E
= SDNodeIterator::end(N
);
1455 if (ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(Op
))
1459 // TODO: Handle float bits as well.
1461 return TLO
.CombineTo(Op
, TLO
.DAG
.getConstant(Known
.One
, dl
, VT
));
1467 bool TargetLowering::SimplifyDemandedVectorElts(SDValue Op
,
1468 const APInt
&DemandedElts
,
1471 DAGCombinerInfo
&DCI
) const {
1472 SelectionDAG
&DAG
= DCI
.DAG
;
1473 TargetLoweringOpt
TLO(DAG
, !DCI
.isBeforeLegalize(),
1474 !DCI
.isBeforeLegalizeOps());
1477 SimplifyDemandedVectorElts(Op
, DemandedElts
, KnownUndef
, KnownZero
, TLO
);
1479 DCI
.AddToWorklist(Op
.getNode());
1480 DCI
.CommitTargetLoweringOpt(TLO
);
1485 /// Given a vector binary operation and known undefined elements for each input
1486 /// operand, compute whether each element of the output is undefined.
1487 static APInt
getKnownUndefForVectorBinop(SDValue BO
, SelectionDAG
&DAG
,
1488 const APInt
&UndefOp0
,
1489 const APInt
&UndefOp1
) {
1490 EVT VT
= BO
.getValueType();
1491 assert(ISD::isBinaryOp(BO
.getNode()) && VT
.isVector() && "Vector binop only");
1493 EVT EltVT
= VT
.getVectorElementType();
1494 unsigned NumElts
= VT
.getVectorNumElements();
1495 assert(UndefOp0
.getBitWidth() == NumElts
&&
1496 UndefOp1
.getBitWidth() == NumElts
&& "Bad type for undef analysis");
1498 auto getUndefOrConstantElt
= [&](SDValue V
, unsigned Index
,
1499 const APInt
&UndefVals
) {
1500 if (UndefVals
[Index
])
1501 return DAG
.getUNDEF(EltVT
);
1503 if (auto *BV
= dyn_cast
<BuildVectorSDNode
>(V
)) {
1504 // Try hard to make sure that the getNode() call is not creating temporary
1505 // nodes. Ignore opaque integers because they do not constant fold.
1506 SDValue Elt
= BV
->getOperand(Index
);
1507 auto *C
= dyn_cast
<ConstantSDNode
>(Elt
);
1508 if (isa
<ConstantFPSDNode
>(Elt
) || Elt
.isUndef() || (C
&& !C
->isOpaque()))
1515 APInt KnownUndef
= APInt::getNullValue(NumElts
);
1516 for (unsigned i
= 0; i
!= NumElts
; ++i
) {
1517 // If both inputs for this element are either constant or undef and match
1518 // the element type, compute the constant/undef result for this element of
1520 // TODO: Ideally we would use FoldConstantArithmetic() here, but that does
1521 // not handle FP constants. The code within getNode() should be refactored
1522 // to avoid the danger of creating a bogus temporary node here.
1523 SDValue C0
= getUndefOrConstantElt(BO
.getOperand(0), i
, UndefOp0
);
1524 SDValue C1
= getUndefOrConstantElt(BO
.getOperand(1), i
, UndefOp1
);
1525 if (C0
&& C1
&& C0
.getValueType() == EltVT
&& C1
.getValueType() == EltVT
)
1526 if (DAG
.getNode(BO
.getOpcode(), SDLoc(BO
), EltVT
, C0
, C1
).isUndef())
1527 KnownUndef
.setBit(i
);
1532 bool TargetLowering::SimplifyDemandedVectorElts(
1533 SDValue Op
, const APInt
&DemandedEltMask
, APInt
&KnownUndef
,
1534 APInt
&KnownZero
, TargetLoweringOpt
&TLO
, unsigned Depth
,
1535 bool AssumeSingleUse
) const {
1536 EVT VT
= Op
.getValueType();
1537 APInt DemandedElts
= DemandedEltMask
;
1538 unsigned NumElts
= DemandedElts
.getBitWidth();
1539 assert(VT
.isVector() && "Expected vector op");
1540 assert(VT
.getVectorNumElements() == NumElts
&&
1541 "Mask size mismatches value type element count!");
1543 KnownUndef
= KnownZero
= APInt::getNullValue(NumElts
);
1547 KnownUndef
.setAllBits();
1551 // If Op has other users, assume that all elements are needed.
1552 if (!Op
.getNode()->hasOneUse() && !AssumeSingleUse
)
1553 DemandedElts
.setAllBits();
1555 // Not demanding any elements from Op.
1556 if (DemandedElts
== 0) {
1557 KnownUndef
.setAllBits();
1558 return TLO
.CombineTo(Op
, TLO
.DAG
.getUNDEF(VT
));
1561 // Limit search depth.
1566 unsigned EltSizeInBits
= VT
.getScalarSizeInBits();
1568 switch (Op
.getOpcode()) {
1569 case ISD::SCALAR_TO_VECTOR
: {
1570 if (!DemandedElts
[0]) {
1571 KnownUndef
.setAllBits();
1572 return TLO
.CombineTo(Op
, TLO
.DAG
.getUNDEF(VT
));
1574 KnownUndef
.setHighBits(NumElts
- 1);
1577 case ISD::BITCAST
: {
1578 SDValue Src
= Op
.getOperand(0);
1579 EVT SrcVT
= Src
.getValueType();
1581 // We only handle vectors here.
1582 // TODO - investigate calling SimplifyDemandedBits/ComputeKnownBits?
1583 if (!SrcVT
.isVector())
1586 // Fast handling of 'identity' bitcasts.
1587 unsigned NumSrcElts
= SrcVT
.getVectorNumElements();
1588 if (NumSrcElts
== NumElts
)
1589 return SimplifyDemandedVectorElts(Src
, DemandedElts
, KnownUndef
,
1590 KnownZero
, TLO
, Depth
+ 1);
1592 APInt SrcZero
, SrcUndef
;
1593 APInt SrcDemandedElts
= APInt::getNullValue(NumSrcElts
);
1595 // Bitcast from 'large element' src vector to 'small element' vector, we
1596 // must demand a source element if any DemandedElt maps to it.
1597 if ((NumElts
% NumSrcElts
) == 0) {
1598 unsigned Scale
= NumElts
/ NumSrcElts
;
1599 for (unsigned i
= 0; i
!= NumElts
; ++i
)
1600 if (DemandedElts
[i
])
1601 SrcDemandedElts
.setBit(i
/ Scale
);
1603 if (SimplifyDemandedVectorElts(Src
, SrcDemandedElts
, SrcUndef
, SrcZero
,
1607 // Try calling SimplifyDemandedBits, converting demanded elts to the bits
1608 // of the large element.
1609 // TODO - bigendian once we have test coverage.
1610 if (TLO
.DAG
.getDataLayout().isLittleEndian()) {
1611 unsigned SrcEltSizeInBits
= SrcVT
.getScalarSizeInBits();
1612 APInt SrcDemandedBits
= APInt::getNullValue(SrcEltSizeInBits
);
1613 for (unsigned i
= 0; i
!= NumElts
; ++i
)
1614 if (DemandedElts
[i
]) {
1615 unsigned Ofs
= (i
% Scale
) * EltSizeInBits
;
1616 SrcDemandedBits
.setBits(Ofs
, Ofs
+ EltSizeInBits
);
1620 if (SimplifyDemandedBits(Src
, SrcDemandedBits
, Known
, TLO
, Depth
+ 1))
1624 // If the src element is zero/undef then all the output elements will be -
1625 // only demanded elements are guaranteed to be correct.
1626 for (unsigned i
= 0; i
!= NumSrcElts
; ++i
) {
1627 if (SrcDemandedElts
[i
]) {
1629 KnownZero
.setBits(i
* Scale
, (i
+ 1) * Scale
);
1631 KnownUndef
.setBits(i
* Scale
, (i
+ 1) * Scale
);
1636 // Bitcast from 'small element' src vector to 'large element' vector, we
1637 // demand all smaller source elements covered by the larger demanded element
1639 if ((NumSrcElts
% NumElts
) == 0) {
1640 unsigned Scale
= NumSrcElts
/ NumElts
;
1641 for (unsigned i
= 0; i
!= NumElts
; ++i
)
1642 if (DemandedElts
[i
])
1643 SrcDemandedElts
.setBits(i
* Scale
, (i
+ 1) * Scale
);
1645 if (SimplifyDemandedVectorElts(Src
, SrcDemandedElts
, SrcUndef
, SrcZero
,
1649 // If all the src elements covering an output element are zero/undef, then
1650 // the output element will be as well, assuming it was demanded.
1651 for (unsigned i
= 0; i
!= NumElts
; ++i
) {
1652 if (DemandedElts
[i
]) {
1653 if (SrcZero
.extractBits(Scale
, i
* Scale
).isAllOnesValue())
1654 KnownZero
.setBit(i
);
1655 if (SrcUndef
.extractBits(Scale
, i
* Scale
).isAllOnesValue())
1656 KnownUndef
.setBit(i
);
1662 case ISD::BUILD_VECTOR
: {
1663 // Check all elements and simplify any unused elements with UNDEF.
1664 if (!DemandedElts
.isAllOnesValue()) {
1665 // Don't simplify BROADCASTS.
1666 if (llvm::any_of(Op
->op_values(),
1667 [&](SDValue Elt
) { return Op
.getOperand(0) != Elt
; })) {
1668 SmallVector
<SDValue
, 32> Ops(Op
->op_begin(), Op
->op_end());
1669 bool Updated
= false;
1670 for (unsigned i
= 0; i
!= NumElts
; ++i
) {
1671 if (!DemandedElts
[i
] && !Ops
[i
].isUndef()) {
1672 Ops
[i
] = TLO
.DAG
.getUNDEF(Ops
[0].getValueType());
1673 KnownUndef
.setBit(i
);
1678 return TLO
.CombineTo(Op
, TLO
.DAG
.getBuildVector(VT
, DL
, Ops
));
1681 for (unsigned i
= 0; i
!= NumElts
; ++i
) {
1682 SDValue SrcOp
= Op
.getOperand(i
);
1683 if (SrcOp
.isUndef()) {
1684 KnownUndef
.setBit(i
);
1685 } else if (EltSizeInBits
== SrcOp
.getScalarValueSizeInBits() &&
1686 (isNullConstant(SrcOp
) || isNullFPConstant(SrcOp
))) {
1687 KnownZero
.setBit(i
);
1692 case ISD::CONCAT_VECTORS
: {
1693 EVT SubVT
= Op
.getOperand(0).getValueType();
1694 unsigned NumSubVecs
= Op
.getNumOperands();
1695 unsigned NumSubElts
= SubVT
.getVectorNumElements();
1696 for (unsigned i
= 0; i
!= NumSubVecs
; ++i
) {
1697 SDValue SubOp
= Op
.getOperand(i
);
1698 APInt SubElts
= DemandedElts
.extractBits(NumSubElts
, i
* NumSubElts
);
1699 APInt SubUndef
, SubZero
;
1700 if (SimplifyDemandedVectorElts(SubOp
, SubElts
, SubUndef
, SubZero
, TLO
,
1703 KnownUndef
.insertBits(SubUndef
, i
* NumSubElts
);
1704 KnownZero
.insertBits(SubZero
, i
* NumSubElts
);
1708 case ISD::INSERT_SUBVECTOR
: {
1709 if (!isa
<ConstantSDNode
>(Op
.getOperand(2)))
1711 SDValue Base
= Op
.getOperand(0);
1712 SDValue Sub
= Op
.getOperand(1);
1713 EVT SubVT
= Sub
.getValueType();
1714 unsigned NumSubElts
= SubVT
.getVectorNumElements();
1715 const APInt
&Idx
= cast
<ConstantSDNode
>(Op
.getOperand(2))->getAPIntValue();
1716 if (Idx
.ugt(NumElts
- NumSubElts
))
1718 unsigned SubIdx
= Idx
.getZExtValue();
1719 APInt SubElts
= DemandedElts
.extractBits(NumSubElts
, SubIdx
);
1720 APInt SubUndef
, SubZero
;
1721 if (SimplifyDemandedVectorElts(Sub
, SubElts
, SubUndef
, SubZero
, TLO
,
1724 APInt BaseElts
= DemandedElts
;
1725 BaseElts
.insertBits(APInt::getNullValue(NumSubElts
), SubIdx
);
1726 if (SimplifyDemandedVectorElts(Base
, BaseElts
, KnownUndef
, KnownZero
, TLO
,
1729 KnownUndef
.insertBits(SubUndef
, SubIdx
);
1730 KnownZero
.insertBits(SubZero
, SubIdx
);
1733 case ISD::EXTRACT_SUBVECTOR
: {
1734 SDValue Src
= Op
.getOperand(0);
1735 ConstantSDNode
*SubIdx
= dyn_cast
<ConstantSDNode
>(Op
.getOperand(1));
1736 unsigned NumSrcElts
= Src
.getValueType().getVectorNumElements();
1737 if (SubIdx
&& SubIdx
->getAPIntValue().ule(NumSrcElts
- NumElts
)) {
1738 // Offset the demanded elts by the subvector index.
1739 uint64_t Idx
= SubIdx
->getZExtValue();
1740 APInt SrcElts
= DemandedElts
.zextOrSelf(NumSrcElts
).shl(Idx
);
1741 APInt SrcUndef
, SrcZero
;
1742 if (SimplifyDemandedVectorElts(Src
, SrcElts
, SrcUndef
, SrcZero
, TLO
,
1745 KnownUndef
= SrcUndef
.extractBits(NumElts
, Idx
);
1746 KnownZero
= SrcZero
.extractBits(NumElts
, Idx
);
1750 case ISD::INSERT_VECTOR_ELT
: {
1751 SDValue Vec
= Op
.getOperand(0);
1752 SDValue Scl
= Op
.getOperand(1);
1753 auto *CIdx
= dyn_cast
<ConstantSDNode
>(Op
.getOperand(2));
1755 // For a legal, constant insertion index, if we don't need this insertion
1756 // then strip it, else remove it from the demanded elts.
1757 if (CIdx
&& CIdx
->getAPIntValue().ult(NumElts
)) {
1758 unsigned Idx
= CIdx
->getZExtValue();
1759 if (!DemandedElts
[Idx
])
1760 return TLO
.CombineTo(Op
, Vec
);
1762 APInt
DemandedVecElts(DemandedElts
);
1763 DemandedVecElts
.clearBit(Idx
);
1764 if (SimplifyDemandedVectorElts(Vec
, DemandedVecElts
, KnownUndef
,
1765 KnownZero
, TLO
, Depth
+ 1))
1768 KnownUndef
.clearBit(Idx
);
1770 KnownUndef
.setBit(Idx
);
1772 KnownZero
.clearBit(Idx
);
1773 if (isNullConstant(Scl
) || isNullFPConstant(Scl
))
1774 KnownZero
.setBit(Idx
);
1778 APInt VecUndef
, VecZero
;
1779 if (SimplifyDemandedVectorElts(Vec
, DemandedElts
, VecUndef
, VecZero
, TLO
,
1782 // Without knowing the insertion index we can't set KnownUndef/KnownZero.
1785 case ISD::VSELECT
: {
1786 // Try to transform the select condition based on the current demanded
1788 // TODO: If a condition element is undef, we can choose from one arm of the
1789 // select (and if one arm is undef, then we can propagate that to the
1791 // TODO - add support for constant vselect masks (see IR version of this).
1792 APInt UnusedUndef
, UnusedZero
;
1793 if (SimplifyDemandedVectorElts(Op
.getOperand(0), DemandedElts
, UnusedUndef
,
1794 UnusedZero
, TLO
, Depth
+ 1))
1797 // See if we can simplify either vselect operand.
1798 APInt
DemandedLHS(DemandedElts
);
1799 APInt
DemandedRHS(DemandedElts
);
1800 APInt UndefLHS
, ZeroLHS
;
1801 APInt UndefRHS
, ZeroRHS
;
1802 if (SimplifyDemandedVectorElts(Op
.getOperand(1), DemandedLHS
, UndefLHS
,
1803 ZeroLHS
, TLO
, Depth
+ 1))
1805 if (SimplifyDemandedVectorElts(Op
.getOperand(2), DemandedRHS
, UndefRHS
,
1806 ZeroRHS
, TLO
, Depth
+ 1))
1809 KnownUndef
= UndefLHS
& UndefRHS
;
1810 KnownZero
= ZeroLHS
& ZeroRHS
;
1813 case ISD::VECTOR_SHUFFLE
: {
1814 ArrayRef
<int> ShuffleMask
= cast
<ShuffleVectorSDNode
>(Op
)->getMask();
1816 // Collect demanded elements from shuffle operands..
1817 APInt
DemandedLHS(NumElts
, 0);
1818 APInt
DemandedRHS(NumElts
, 0);
1819 for (unsigned i
= 0; i
!= NumElts
; ++i
) {
1820 int M
= ShuffleMask
[i
];
1821 if (M
< 0 || !DemandedElts
[i
])
1823 assert(0 <= M
&& M
< (int)(2 * NumElts
) && "Shuffle index out of range");
1824 if (M
< (int)NumElts
)
1825 DemandedLHS
.setBit(M
);
1827 DemandedRHS
.setBit(M
- NumElts
);
1830 // See if we can simplify either shuffle operand.
1831 APInt UndefLHS
, ZeroLHS
;
1832 APInt UndefRHS
, ZeroRHS
;
1833 if (SimplifyDemandedVectorElts(Op
.getOperand(0), DemandedLHS
, UndefLHS
,
1834 ZeroLHS
, TLO
, Depth
+ 1))
1836 if (SimplifyDemandedVectorElts(Op
.getOperand(1), DemandedRHS
, UndefRHS
,
1837 ZeroRHS
, TLO
, Depth
+ 1))
1840 // Simplify mask using undef elements from LHS/RHS.
1841 bool Updated
= false;
1842 bool IdentityLHS
= true, IdentityRHS
= true;
1843 SmallVector
<int, 32> NewMask(ShuffleMask
.begin(), ShuffleMask
.end());
1844 for (unsigned i
= 0; i
!= NumElts
; ++i
) {
1845 int &M
= NewMask
[i
];
1848 if (!DemandedElts
[i
] || (M
< (int)NumElts
&& UndefLHS
[M
]) ||
1849 (M
>= (int)NumElts
&& UndefRHS
[M
- NumElts
])) {
1853 IdentityLHS
&= (M
< 0) || (M
== (int)i
);
1854 IdentityRHS
&= (M
< 0) || ((M
- NumElts
) == i
);
1857 // Update legal shuffle masks based on demanded elements if it won't reduce
1858 // to Identity which can cause premature removal of the shuffle mask.
1859 if (Updated
&& !IdentityLHS
&& !IdentityRHS
&& !TLO
.LegalOps
&&
1860 isShuffleMaskLegal(NewMask
, VT
))
1861 return TLO
.CombineTo(Op
,
1862 TLO
.DAG
.getVectorShuffle(VT
, DL
, Op
.getOperand(0),
1863 Op
.getOperand(1), NewMask
));
1865 // Propagate undef/zero elements from LHS/RHS.
1866 for (unsigned i
= 0; i
!= NumElts
; ++i
) {
1867 int M
= ShuffleMask
[i
];
1869 KnownUndef
.setBit(i
);
1870 } else if (M
< (int)NumElts
) {
1872 KnownUndef
.setBit(i
);
1874 KnownZero
.setBit(i
);
1876 if (UndefRHS
[M
- NumElts
])
1877 KnownUndef
.setBit(i
);
1878 if (ZeroRHS
[M
- NumElts
])
1879 KnownZero
.setBit(i
);
1884 case ISD::SIGN_EXTEND_VECTOR_INREG
:
1885 case ISD::ZERO_EXTEND_VECTOR_INREG
: {
1886 APInt SrcUndef
, SrcZero
;
1887 SDValue Src
= Op
.getOperand(0);
1888 unsigned NumSrcElts
= Src
.getValueType().getVectorNumElements();
1889 APInt DemandedSrcElts
= DemandedElts
.zextOrSelf(NumSrcElts
);
1890 if (SimplifyDemandedVectorElts(Src
, DemandedSrcElts
, SrcUndef
, SrcZero
, TLO
,
1893 KnownZero
= SrcZero
.zextOrTrunc(NumElts
);
1894 KnownUndef
= SrcUndef
.zextOrTrunc(NumElts
);
1896 if (Op
.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG
) {
1897 // zext(undef) upper bits are guaranteed to be zero.
1898 if (DemandedElts
.isSubsetOf(KnownUndef
))
1899 return TLO
.CombineTo(Op
, TLO
.DAG
.getConstant(0, SDLoc(Op
), VT
));
1900 KnownUndef
.clearAllBits();
1905 // TODO: There are more binop opcodes that could be handled here - MUL, MIN,
1906 // MAX, saturated math, etc.
1916 APInt UndefRHS
, ZeroRHS
;
1917 if (SimplifyDemandedVectorElts(Op
.getOperand(1), DemandedElts
, UndefRHS
,
1918 ZeroRHS
, TLO
, Depth
+ 1))
1920 APInt UndefLHS
, ZeroLHS
;
1921 if (SimplifyDemandedVectorElts(Op
.getOperand(0), DemandedElts
, UndefLHS
,
1922 ZeroLHS
, TLO
, Depth
+ 1))
1925 KnownZero
= ZeroLHS
& ZeroRHS
;
1926 KnownUndef
= getKnownUndefForVectorBinop(Op
, TLO
.DAG
, UndefLHS
, UndefRHS
);
1930 APInt SrcUndef
, SrcZero
;
1931 if (SimplifyDemandedVectorElts(Op
.getOperand(1), DemandedElts
, SrcUndef
,
1932 SrcZero
, TLO
, Depth
+ 1))
1934 if (SimplifyDemandedVectorElts(Op
.getOperand(0), DemandedElts
, KnownUndef
,
1935 KnownZero
, TLO
, Depth
+ 1))
1938 // If either side has a zero element, then the result element is zero, even
1939 // if the other is an UNDEF.
1940 // TODO: Extend getKnownUndefForVectorBinop to also deal with known zeros
1941 // and then handle 'and' nodes with the rest of the binop opcodes.
1942 KnownZero
|= SrcZero
;
1943 KnownUndef
&= SrcUndef
;
1944 KnownUndef
&= ~KnownZero
;
1948 case ISD::SIGN_EXTEND
:
1949 case ISD::ZERO_EXTEND
:
1950 if (SimplifyDemandedVectorElts(Op
.getOperand(0), DemandedElts
, KnownUndef
,
1951 KnownZero
, TLO
, Depth
+ 1))
1954 if (Op
.getOpcode() == ISD::ZERO_EXTEND
) {
1955 // zext(undef) upper bits are guaranteed to be zero.
1956 if (DemandedElts
.isSubsetOf(KnownUndef
))
1957 return TLO
.CombineTo(Op
, TLO
.DAG
.getConstant(0, SDLoc(Op
), VT
));
1958 KnownUndef
.clearAllBits();
1962 if (Op
.getOpcode() >= ISD::BUILTIN_OP_END
) {
1963 if (SimplifyDemandedVectorEltsForTargetNode(Op
, DemandedElts
, KnownUndef
,
1964 KnownZero
, TLO
, Depth
))
1968 APInt DemandedBits
= APInt::getAllOnesValue(EltSizeInBits
);
1969 if (SimplifyDemandedBits(Op
, DemandedBits
, DemandedEltMask
, Known
, TLO
,
1970 Depth
, AssumeSingleUse
))
1976 assert((KnownUndef
& KnownZero
) == 0 && "Elements flagged as undef AND zero");
1978 // Constant fold all undef cases.
1979 // TODO: Handle zero cases as well.
1980 if (DemandedElts
.isSubsetOf(KnownUndef
))
1981 return TLO
.CombineTo(Op
, TLO
.DAG
.getUNDEF(VT
));
1986 /// Determine which of the bits specified in Mask are known to be either zero or
1987 /// one and return them in the Known.
1988 void TargetLowering::computeKnownBitsForTargetNode(const SDValue Op
,
1990 const APInt
&DemandedElts
,
1991 const SelectionDAG
&DAG
,
1992 unsigned Depth
) const {
1993 assert((Op
.getOpcode() >= ISD::BUILTIN_OP_END
||
1994 Op
.getOpcode() == ISD::INTRINSIC_WO_CHAIN
||
1995 Op
.getOpcode() == ISD::INTRINSIC_W_CHAIN
||
1996 Op
.getOpcode() == ISD::INTRINSIC_VOID
) &&
1997 "Should use MaskedValueIsZero if you don't know whether Op"
1998 " is a target node!");
2002 void TargetLowering::computeKnownBitsForFrameIndex(const SDValue Op
,
2004 const APInt
&DemandedElts
,
2005 const SelectionDAG
&DAG
,
2006 unsigned Depth
) const {
2007 assert(isa
<FrameIndexSDNode
>(Op
) && "expected FrameIndex");
2009 if (unsigned Align
= DAG
.InferPtrAlignment(Op
)) {
2010 // The low bits are known zero if the pointer is aligned.
2011 Known
.Zero
.setLowBits(Log2_32(Align
));
2015 /// This method can be implemented by targets that want to expose additional
2016 /// information about sign bits to the DAG Combiner.
2017 unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op
,
2019 const SelectionDAG
&,
2020 unsigned Depth
) const {
2021 assert((Op
.getOpcode() >= ISD::BUILTIN_OP_END
||
2022 Op
.getOpcode() == ISD::INTRINSIC_WO_CHAIN
||
2023 Op
.getOpcode() == ISD::INTRINSIC_W_CHAIN
||
2024 Op
.getOpcode() == ISD::INTRINSIC_VOID
) &&
2025 "Should use ComputeNumSignBits if you don't know whether Op"
2026 " is a target node!");
2030 bool TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
2031 SDValue Op
, const APInt
&DemandedElts
, APInt
&KnownUndef
, APInt
&KnownZero
,
2032 TargetLoweringOpt
&TLO
, unsigned Depth
) const {
2033 assert((Op
.getOpcode() >= ISD::BUILTIN_OP_END
||
2034 Op
.getOpcode() == ISD::INTRINSIC_WO_CHAIN
||
2035 Op
.getOpcode() == ISD::INTRINSIC_W_CHAIN
||
2036 Op
.getOpcode() == ISD::INTRINSIC_VOID
) &&
2037 "Should use SimplifyDemandedVectorElts if you don't know whether Op"
2038 " is a target node!");
2042 bool TargetLowering::SimplifyDemandedBitsForTargetNode(
2043 SDValue Op
, const APInt
&DemandedBits
, const APInt
&DemandedElts
,
2044 KnownBits
&Known
, TargetLoweringOpt
&TLO
, unsigned Depth
) const {
2045 assert((Op
.getOpcode() >= ISD::BUILTIN_OP_END
||
2046 Op
.getOpcode() == ISD::INTRINSIC_WO_CHAIN
||
2047 Op
.getOpcode() == ISD::INTRINSIC_W_CHAIN
||
2048 Op
.getOpcode() == ISD::INTRINSIC_VOID
) &&
2049 "Should use SimplifyDemandedBits if you don't know whether Op"
2050 " is a target node!");
2051 computeKnownBitsForTargetNode(Op
, Known
, DemandedElts
, TLO
.DAG
, Depth
);
2055 bool TargetLowering::isKnownNeverNaNForTargetNode(SDValue Op
,
2056 const SelectionDAG
&DAG
,
2058 unsigned Depth
) const {
2059 assert((Op
.getOpcode() >= ISD::BUILTIN_OP_END
||
2060 Op
.getOpcode() == ISD::INTRINSIC_WO_CHAIN
||
2061 Op
.getOpcode() == ISD::INTRINSIC_W_CHAIN
||
2062 Op
.getOpcode() == ISD::INTRINSIC_VOID
) &&
2063 "Should use isKnownNeverNaN if you don't know whether Op"
2064 " is a target node!");
2068 // FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must
2069 // work with truncating build vectors and vectors with elements of less than
2071 bool TargetLowering::isConstTrueVal(const SDNode
*N
) const {
2076 if (auto *CN
= dyn_cast
<ConstantSDNode
>(N
)) {
2077 CVal
= CN
->getAPIntValue();
2078 } else if (auto *BV
= dyn_cast
<BuildVectorSDNode
>(N
)) {
2079 auto *CN
= BV
->getConstantSplatNode();
2083 // If this is a truncating build vector, truncate the splat value.
2084 // Otherwise, we may fail to match the expected values below.
2085 unsigned BVEltWidth
= BV
->getValueType(0).getScalarSizeInBits();
2086 CVal
= CN
->getAPIntValue();
2087 if (BVEltWidth
< CVal
.getBitWidth())
2088 CVal
= CVal
.trunc(BVEltWidth
);
2093 switch (getBooleanContents(N
->getValueType(0))) {
2094 case UndefinedBooleanContent
:
2096 case ZeroOrOneBooleanContent
:
2097 return CVal
.isOneValue();
2098 case ZeroOrNegativeOneBooleanContent
:
2099 return CVal
.isAllOnesValue();
2102 llvm_unreachable("Invalid boolean contents");
2105 bool TargetLowering::isConstFalseVal(const SDNode
*N
) const {
2109 const ConstantSDNode
*CN
= dyn_cast
<ConstantSDNode
>(N
);
2111 const BuildVectorSDNode
*BV
= dyn_cast
<BuildVectorSDNode
>(N
);
2115 // Only interested in constant splats, we don't care about undef
2116 // elements in identifying boolean constants and getConstantSplatNode
2117 // returns NULL if all ops are undef;
2118 CN
= BV
->getConstantSplatNode();
2123 if (getBooleanContents(N
->getValueType(0)) == UndefinedBooleanContent
)
2124 return !CN
->getAPIntValue()[0];
2126 return CN
->isNullValue();
2129 bool TargetLowering::isExtendedTrueVal(const ConstantSDNode
*N
, EVT VT
,
2134 TargetLowering::BooleanContent Cnt
= getBooleanContents(VT
);
2136 case TargetLowering::ZeroOrOneBooleanContent
:
2137 // An extended value of 1 is always true, unless its original type is i1,
2138 // in which case it will be sign extended to -1.
2139 return (N
->isOne() && !SExt
) || (SExt
&& (N
->getValueType(0) != MVT::i1
));
2140 case TargetLowering::UndefinedBooleanContent
:
2141 case TargetLowering::ZeroOrNegativeOneBooleanContent
:
2142 return N
->isAllOnesValue() && SExt
;
2144 llvm_unreachable("Unexpected enumeration.");
2147 /// This helper function of SimplifySetCC tries to optimize the comparison when
2148 /// either operand of the SetCC node is a bitwise-and instruction.
2149 SDValue
TargetLowering::foldSetCCWithAnd(EVT VT
, SDValue N0
, SDValue N1
,
2150 ISD::CondCode Cond
, const SDLoc
&DL
,
2151 DAGCombinerInfo
&DCI
) const {
2152 // Match these patterns in any of their permutations:
2155 if (N1
.getOpcode() == ISD::AND
&& N0
.getOpcode() != ISD::AND
)
2158 EVT OpVT
= N0
.getValueType();
2159 if (N0
.getOpcode() != ISD::AND
|| !OpVT
.isInteger() ||
2160 (Cond
!= ISD::SETEQ
&& Cond
!= ISD::SETNE
))
2164 if (N0
.getOperand(0) == N1
) {
2165 X
= N0
.getOperand(1);
2166 Y
= N0
.getOperand(0);
2167 } else if (N0
.getOperand(1) == N1
) {
2168 X
= N0
.getOperand(0);
2169 Y
= N0
.getOperand(1);
2174 SelectionDAG
&DAG
= DCI
.DAG
;
2175 SDValue Zero
= DAG
.getConstant(0, DL
, OpVT
);
2176 if (DAG
.isKnownToBeAPowerOfTwo(Y
)) {
2177 // Simplify X & Y == Y to X & Y != 0 if Y has exactly one bit set.
2178 // Note that where Y is variable and is known to have at most one bit set
2179 // (for example, if it is Z & 1) we cannot do this; the expressions are not
2180 // equivalent when Y == 0.
2181 Cond
= ISD::getSetCCInverse(Cond
, /*isInteger=*/true);
2182 if (DCI
.isBeforeLegalizeOps() ||
2183 isCondCodeLegal(Cond
, N0
.getSimpleValueType()))
2184 return DAG
.getSetCC(DL
, VT
, N0
, Zero
, Cond
);
2185 } else if (N0
.hasOneUse() && hasAndNotCompare(Y
)) {
2186 // If the target supports an 'and-not' or 'and-complement' logic operation,
2187 // try to use that to make a comparison operation more efficient.
2188 // But don't do this transform if the mask is a single bit because there are
2189 // more efficient ways to deal with that case (for example, 'bt' on x86 or
2190 // 'rlwinm' on PPC).
2192 // Bail out if the compare operand that we want to turn into a zero is
2193 // already a zero (otherwise, infinite loop).
2194 auto *YConst
= dyn_cast
<ConstantSDNode
>(Y
);
2195 if (YConst
&& YConst
->isNullValue())
2198 // Transform this into: ~X & Y == 0.
2199 SDValue NotX
= DAG
.getNOT(SDLoc(X
), X
, OpVT
);
2200 SDValue NewAnd
= DAG
.getNode(ISD::AND
, SDLoc(N0
), OpVT
, NotX
, Y
);
2201 return DAG
.getSetCC(DL
, VT
, NewAnd
, Zero
, Cond
);
2207 /// There are multiple IR patterns that could be checking whether certain
2208 /// truncation of a signed number would be lossy or not. The pattern which is
2209 /// best at IR level, may not lower optimally. Thus, we want to unfold it.
2210 /// We are looking for the following pattern: (KeptBits is a constant)
2211 /// (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits)
2212 /// KeptBits won't be bitwidth(x), that will be constant-folded to true/false.
2213 /// KeptBits also can't be 1, that would have been folded to %x dstcond 0
2214 /// We will unfold it into the natural trunc+sext pattern:
2215 /// ((%x << C) a>> C) dstcond %x
2216 /// Where C = bitwidth(x) - KeptBits and C u< bitwidth(x)
2217 SDValue
TargetLowering::optimizeSetCCOfSignedTruncationCheck(
2218 EVT SCCVT
, SDValue N0
, SDValue N1
, ISD::CondCode Cond
, DAGCombinerInfo
&DCI
,
2219 const SDLoc
&DL
) const {
2220 // We must be comparing with a constant.
2222 if (!(C1
= dyn_cast
<ConstantSDNode
>(N1
)))
2225 // N0 should be: add %x, (1 << (KeptBits-1))
2226 if (N0
->getOpcode() != ISD::ADD
)
2229 // And we must be 'add'ing a constant.
2230 ConstantSDNode
*C01
;
2231 if (!(C01
= dyn_cast
<ConstantSDNode
>(N0
->getOperand(1))))
2234 SDValue X
= N0
->getOperand(0);
2235 EVT XVT
= X
.getValueType();
2237 // Validate constants ...
2239 APInt I1
= C1
->getAPIntValue();
2241 ISD::CondCode NewCond
;
2242 if (Cond
== ISD::CondCode::SETULT
) {
2243 NewCond
= ISD::CondCode::SETEQ
;
2244 } else if (Cond
== ISD::CondCode::SETULE
) {
2245 NewCond
= ISD::CondCode::SETEQ
;
2246 // But need to 'canonicalize' the constant.
2248 } else if (Cond
== ISD::CondCode::SETUGT
) {
2249 NewCond
= ISD::CondCode::SETNE
;
2250 // But need to 'canonicalize' the constant.
2252 } else if (Cond
== ISD::CondCode::SETUGE
) {
2253 NewCond
= ISD::CondCode::SETNE
;
2257 APInt I01
= C01
->getAPIntValue();
2259 auto checkConstants
= [&I1
, &I01
]() -> bool {
2260 // Both of them must be power-of-two, and the constant from setcc is bigger.
2261 return I1
.ugt(I01
) && I1
.isPowerOf2() && I01
.isPowerOf2();
2264 if (checkConstants()) {
2265 // Great, e.g. got icmp ult i16 (add i16 %x, 128), 256
2267 // What if we invert constants? (and the target predicate)
2270 NewCond
= getSetCCInverse(NewCond
, /*isInteger=*/true);
2271 if (!checkConstants())
2273 // Great, e.g. got icmp uge i16 (add i16 %x, -128), -256
2276 // They are power-of-two, so which bit is set?
2277 const unsigned KeptBits
= I1
.logBase2();
2278 const unsigned KeptBitsMinusOne
= I01
.logBase2();
2281 if (KeptBits
!= (KeptBitsMinusOne
+ 1))
2283 assert(KeptBits
> 0 && KeptBits
< XVT
.getSizeInBits() && "unreachable");
2285 // We don't want to do this in every single case.
2286 SelectionDAG
&DAG
= DCI
.DAG
;
2287 if (!DAG
.getTargetLoweringInfo().shouldTransformSignedTruncationCheck(
2291 const unsigned MaskedBits
= XVT
.getSizeInBits() - KeptBits
;
2292 assert(MaskedBits
> 0 && MaskedBits
< XVT
.getSizeInBits() && "unreachable");
2294 // Unfold into: ((%x << C) a>> C) cond %x
2295 // Where 'cond' will be either 'eq' or 'ne'.
2296 SDValue ShiftAmt
= DAG
.getConstant(MaskedBits
, DL
, XVT
);
2297 SDValue T0
= DAG
.getNode(ISD::SHL
, DL
, XVT
, X
, ShiftAmt
);
2298 SDValue T1
= DAG
.getNode(ISD::SRA
, DL
, XVT
, T0
, ShiftAmt
);
2299 SDValue T2
= DAG
.getSetCC(DL
, SCCVT
, T1
, X
, NewCond
);
2304 /// Try to fold an equality comparison with a {add/sub/xor} binary operation as
2305 /// the 1st operand (N0). Callers are expected to swap the N0/N1 parameters to
2306 /// handle the commuted versions of these patterns.
2307 SDValue
TargetLowering::foldSetCCWithBinOp(EVT VT
, SDValue N0
, SDValue N1
,
2308 ISD::CondCode Cond
, const SDLoc
&DL
,
2309 DAGCombinerInfo
&DCI
) const {
2310 unsigned BOpcode
= N0
.getOpcode();
2311 assert((BOpcode
== ISD::ADD
|| BOpcode
== ISD::SUB
|| BOpcode
== ISD::XOR
) &&
2312 "Unexpected binop");
2313 assert((Cond
== ISD::SETEQ
|| Cond
== ISD::SETNE
) && "Unexpected condcode");
2315 // (X + Y) == X --> Y == 0
2316 // (X - Y) == X --> Y == 0
2317 // (X ^ Y) == X --> Y == 0
2318 SelectionDAG
&DAG
= DCI
.DAG
;
2319 EVT OpVT
= N0
.getValueType();
2320 SDValue X
= N0
.getOperand(0);
2321 SDValue Y
= N0
.getOperand(1);
2323 return DAG
.getSetCC(DL
, VT
, Y
, DAG
.getConstant(0, DL
, OpVT
), Cond
);
2328 // (X + Y) == Y --> X == 0
2329 // (X ^ Y) == Y --> X == 0
2330 if (BOpcode
== ISD::ADD
|| BOpcode
== ISD::XOR
)
2331 return DAG
.getSetCC(DL
, VT
, X
, DAG
.getConstant(0, DL
, OpVT
), Cond
);
2333 // The shift would not be valid if the operands are boolean (i1).
2334 if (!N0
.hasOneUse() || OpVT
.getScalarSizeInBits() == 1)
2337 // (X - Y) == Y --> X == Y << 1
2338 EVT ShiftVT
= getShiftAmountTy(OpVT
, DAG
.getDataLayout(),
2339 !DCI
.isBeforeLegalize());
2340 SDValue One
= DAG
.getConstant(1, DL
, ShiftVT
);
2341 SDValue YShl1
= DAG
.getNode(ISD::SHL
, DL
, N1
.getValueType(), Y
, One
);
2342 if (!DCI
.isCalledByLegalizer())
2343 DCI
.AddToWorklist(YShl1
.getNode());
2344 return DAG
.getSetCC(DL
, VT
, X
, YShl1
, Cond
);
2347 /// Try to simplify a setcc built with the specified operands and cc. If it is
2348 /// unable to simplify it, return a null SDValue.
2349 SDValue
TargetLowering::SimplifySetCC(EVT VT
, SDValue N0
, SDValue N1
,
2350 ISD::CondCode Cond
, bool foldBooleans
,
2351 DAGCombinerInfo
&DCI
,
2352 const SDLoc
&dl
) const {
2353 SelectionDAG
&DAG
= DCI
.DAG
;
2354 EVT OpVT
= N0
.getValueType();
2356 // These setcc operations always fold.
2360 case ISD::SETFALSE2
: return DAG
.getBoolConstant(false, dl
, VT
, OpVT
);
2362 case ISD::SETTRUE2
: return DAG
.getBoolConstant(true, dl
, VT
, OpVT
);
2365 // Ensure that the constant occurs on the RHS and fold constant comparisons.
2366 // TODO: Handle non-splat vector constants. All undef causes trouble.
2367 ISD::CondCode SwappedCC
= ISD::getSetCCSwappedOperands(Cond
);
2368 if (isConstOrConstSplat(N0
) &&
2369 (DCI
.isBeforeLegalizeOps() ||
2370 isCondCodeLegal(SwappedCC
, N0
.getSimpleValueType())))
2371 return DAG
.getSetCC(dl
, VT
, N1
, N0
, SwappedCC
);
2373 if (auto *N1C
= dyn_cast
<ConstantSDNode
>(N1
.getNode())) {
2374 const APInt
&C1
= N1C
->getAPIntValue();
2376 // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
2377 // equality comparison, then we're just comparing whether X itself is
2379 if (N0
.getOpcode() == ISD::SRL
&& (C1
.isNullValue() || C1
.isOneValue()) &&
2380 N0
.getOperand(0).getOpcode() == ISD::CTLZ
&&
2381 N0
.getOperand(1).getOpcode() == ISD::Constant
) {
2383 = cast
<ConstantSDNode
>(N0
.getOperand(1))->getAPIntValue();
2384 if ((Cond
== ISD::SETEQ
|| Cond
== ISD::SETNE
) &&
2385 ShAmt
== Log2_32(N0
.getValueSizeInBits())) {
2386 if ((C1
== 0) == (Cond
== ISD::SETEQ
)) {
2387 // (srl (ctlz x), 5) == 0 -> X != 0
2388 // (srl (ctlz x), 5) != 1 -> X != 0
2391 // (srl (ctlz x), 5) != 0 -> X == 0
2392 // (srl (ctlz x), 5) == 1 -> X == 0
2395 SDValue Zero
= DAG
.getConstant(0, dl
, N0
.getValueType());
2396 return DAG
.getSetCC(dl
, VT
, N0
.getOperand(0).getOperand(0),
2402 // Look through truncs that don't change the value of a ctpop.
2403 if (N0
.hasOneUse() && N0
.getOpcode() == ISD::TRUNCATE
)
2404 CTPOP
= N0
.getOperand(0);
2406 if (CTPOP
.hasOneUse() && CTPOP
.getOpcode() == ISD::CTPOP
&&
2408 N0
.getValueSizeInBits() > Log2_32_Ceil(CTPOP
.getValueSizeInBits()))) {
2409 EVT CTVT
= CTPOP
.getValueType();
2410 SDValue CTOp
= CTPOP
.getOperand(0);
2412 // (ctpop x) u< 2 -> (x & x-1) == 0
2413 // (ctpop x) u> 1 -> (x & x-1) != 0
2414 if ((Cond
== ISD::SETULT
&& C1
== 2) || (Cond
== ISD::SETUGT
&& C1
== 1)){
2415 SDValue Sub
= DAG
.getNode(ISD::SUB
, dl
, CTVT
, CTOp
,
2416 DAG
.getConstant(1, dl
, CTVT
));
2417 SDValue And
= DAG
.getNode(ISD::AND
, dl
, CTVT
, CTOp
, Sub
);
2418 ISD::CondCode CC
= Cond
== ISD::SETULT
? ISD::SETEQ
: ISD::SETNE
;
2419 return DAG
.getSetCC(dl
, VT
, And
, DAG
.getConstant(0, dl
, CTVT
), CC
);
2422 // TODO: (ctpop x) == 1 -> x && (x & x-1) == 0 iff ctpop is illegal.
2425 // (zext x) == C --> x == (trunc C)
2426 // (sext x) == C --> x == (trunc C)
2427 if ((Cond
== ISD::SETEQ
|| Cond
== ISD::SETNE
) &&
2428 DCI
.isBeforeLegalize() && N0
->hasOneUse()) {
2429 unsigned MinBits
= N0
.getValueSizeInBits();
2431 bool Signed
= false;
2432 if (N0
->getOpcode() == ISD::ZERO_EXTEND
) {
2434 MinBits
= N0
->getOperand(0).getValueSizeInBits();
2435 PreExt
= N0
->getOperand(0);
2436 } else if (N0
->getOpcode() == ISD::AND
) {
2437 // DAGCombine turns costly ZExts into ANDs
2438 if (auto *C
= dyn_cast
<ConstantSDNode
>(N0
->getOperand(1)))
2439 if ((C
->getAPIntValue()+1).isPowerOf2()) {
2440 MinBits
= C
->getAPIntValue().countTrailingOnes();
2441 PreExt
= N0
->getOperand(0);
2443 } else if (N0
->getOpcode() == ISD::SIGN_EXTEND
) {
2445 MinBits
= N0
->getOperand(0).getValueSizeInBits();
2446 PreExt
= N0
->getOperand(0);
2448 } else if (auto *LN0
= dyn_cast
<LoadSDNode
>(N0
)) {
2449 // ZEXTLOAD / SEXTLOAD
2450 if (LN0
->getExtensionType() == ISD::ZEXTLOAD
) {
2451 MinBits
= LN0
->getMemoryVT().getSizeInBits();
2453 } else if (LN0
->getExtensionType() == ISD::SEXTLOAD
) {
2455 MinBits
= LN0
->getMemoryVT().getSizeInBits();
2460 // Figure out how many bits we need to preserve this constant.
2461 unsigned ReqdBits
= Signed
?
2462 C1
.getBitWidth() - C1
.getNumSignBits() + 1 :
2465 // Make sure we're not losing bits from the constant.
2467 MinBits
< C1
.getBitWidth() &&
2468 MinBits
>= ReqdBits
) {
2469 EVT MinVT
= EVT::getIntegerVT(*DAG
.getContext(), MinBits
);
2470 if (isTypeDesirableForOp(ISD::SETCC
, MinVT
)) {
2471 // Will get folded away.
2472 SDValue Trunc
= DAG
.getNode(ISD::TRUNCATE
, dl
, MinVT
, PreExt
);
2473 if (MinBits
== 1 && C1
== 1)
2474 // Invert the condition.
2475 return DAG
.getSetCC(dl
, VT
, Trunc
, DAG
.getConstant(0, dl
, MVT::i1
),
2476 Cond
== ISD::SETEQ
? ISD::SETNE
: ISD::SETEQ
);
2477 SDValue C
= DAG
.getConstant(C1
.trunc(MinBits
), dl
, MinVT
);
2478 return DAG
.getSetCC(dl
, VT
, Trunc
, C
, Cond
);
2481 // If truncating the setcc operands is not desirable, we can still
2482 // simplify the expression in some cases:
2483 // setcc ([sz]ext (setcc x, y, cc)), 0, setne) -> setcc (x, y, cc)
2484 // setcc ([sz]ext (setcc x, y, cc)), 0, seteq) -> setcc (x, y, inv(cc))
2485 // setcc (zext (setcc x, y, cc)), 1, setne) -> setcc (x, y, inv(cc))
2486 // setcc (zext (setcc x, y, cc)), 1, seteq) -> setcc (x, y, cc)
2487 // setcc (sext (setcc x, y, cc)), -1, setne) -> setcc (x, y, inv(cc))
2488 // setcc (sext (setcc x, y, cc)), -1, seteq) -> setcc (x, y, cc)
2489 SDValue TopSetCC
= N0
->getOperand(0);
2490 unsigned N0Opc
= N0
->getOpcode();
2491 bool SExt
= (N0Opc
== ISD::SIGN_EXTEND
);
2492 if (TopSetCC
.getValueType() == MVT::i1
&& VT
== MVT::i1
&&
2493 TopSetCC
.getOpcode() == ISD::SETCC
&&
2494 (N0Opc
== ISD::ZERO_EXTEND
|| N0Opc
== ISD::SIGN_EXTEND
) &&
2495 (isConstFalseVal(N1C
) ||
2496 isExtendedTrueVal(N1C
, N0
->getValueType(0), SExt
))) {
2498 bool Inverse
= (N1C
->isNullValue() && Cond
== ISD::SETEQ
) ||
2499 (!N1C
->isNullValue() && Cond
== ISD::SETNE
);
2504 ISD::CondCode InvCond
= ISD::getSetCCInverse(
2505 cast
<CondCodeSDNode
>(TopSetCC
.getOperand(2))->get(),
2506 TopSetCC
.getOperand(0).getValueType().isInteger());
2507 return DAG
.getSetCC(dl
, VT
, TopSetCC
.getOperand(0),
2508 TopSetCC
.getOperand(1),
2514 // If the LHS is '(and load, const)', the RHS is 0, the test is for
2515 // equality or unsigned, and all 1 bits of the const are in the same
2516 // partial word, see if we can shorten the load.
2517 if (DCI
.isBeforeLegalize() &&
2518 !ISD::isSignedIntSetCC(Cond
) &&
2519 N0
.getOpcode() == ISD::AND
&& C1
== 0 &&
2520 N0
.getNode()->hasOneUse() &&
2521 isa
<LoadSDNode
>(N0
.getOperand(0)) &&
2522 N0
.getOperand(0).getNode()->hasOneUse() &&
2523 isa
<ConstantSDNode
>(N0
.getOperand(1))) {
2524 LoadSDNode
*Lod
= cast
<LoadSDNode
>(N0
.getOperand(0));
2526 unsigned bestWidth
= 0, bestOffset
= 0;
2527 if (!Lod
->isVolatile() && Lod
->isUnindexed()) {
2528 unsigned origWidth
= N0
.getValueSizeInBits();
2529 unsigned maskWidth
= origWidth
;
2530 // We can narrow (e.g.) 16-bit extending loads on 32-bit target to
2531 // 8 bits, but have to be careful...
2532 if (Lod
->getExtensionType() != ISD::NON_EXTLOAD
)
2533 origWidth
= Lod
->getMemoryVT().getSizeInBits();
2535 cast
<ConstantSDNode
>(N0
.getOperand(1))->getAPIntValue();
2536 for (unsigned width
= origWidth
/ 2; width
>=8; width
/= 2) {
2537 APInt newMask
= APInt::getLowBitsSet(maskWidth
, width
);
2538 for (unsigned offset
=0; offset
<origWidth
/width
; offset
++) {
2539 if (Mask
.isSubsetOf(newMask
)) {
2540 if (DAG
.getDataLayout().isLittleEndian())
2541 bestOffset
= (uint64_t)offset
* (width
/8);
2543 bestOffset
= (origWidth
/width
- offset
- 1) * (width
/8);
2544 bestMask
= Mask
.lshr(offset
* (width
/8) * 8);
2553 EVT newVT
= EVT::getIntegerVT(*DAG
.getContext(), bestWidth
);
2554 if (newVT
.isRound() &&
2555 shouldReduceLoadWidth(Lod
, ISD::NON_EXTLOAD
, newVT
)) {
2556 EVT PtrType
= Lod
->getOperand(1).getValueType();
2557 SDValue Ptr
= Lod
->getBasePtr();
2558 if (bestOffset
!= 0)
2559 Ptr
= DAG
.getNode(ISD::ADD
, dl
, PtrType
, Lod
->getBasePtr(),
2560 DAG
.getConstant(bestOffset
, dl
, PtrType
));
2561 unsigned NewAlign
= MinAlign(Lod
->getAlignment(), bestOffset
);
2562 SDValue NewLoad
= DAG
.getLoad(
2563 newVT
, dl
, Lod
->getChain(), Ptr
,
2564 Lod
->getPointerInfo().getWithOffset(bestOffset
), NewAlign
);
2565 return DAG
.getSetCC(dl
, VT
,
2566 DAG
.getNode(ISD::AND
, dl
, newVT
, NewLoad
,
2567 DAG
.getConstant(bestMask
.trunc(bestWidth
),
2569 DAG
.getConstant(0LL, dl
, newVT
), Cond
);
2574 // If the LHS is a ZERO_EXTEND, perform the comparison on the input.
2575 if (N0
.getOpcode() == ISD::ZERO_EXTEND
) {
2576 unsigned InSize
= N0
.getOperand(0).getValueSizeInBits();
2578 // If the comparison constant has bits in the upper part, the
2579 // zero-extended value could never match.
2580 if (C1
.intersects(APInt::getHighBitsSet(C1
.getBitWidth(),
2581 C1
.getBitWidth() - InSize
))) {
2586 return DAG
.getConstant(0, dl
, VT
);
2590 return DAG
.getConstant(1, dl
, VT
);
2593 // True if the sign bit of C1 is set.
2594 return DAG
.getConstant(C1
.isNegative(), dl
, VT
);
2597 // True if the sign bit of C1 isn't set.
2598 return DAG
.getConstant(C1
.isNonNegative(), dl
, VT
);
2604 // Otherwise, we can perform the comparison with the low bits.
2612 EVT newVT
= N0
.getOperand(0).getValueType();
2613 if (DCI
.isBeforeLegalizeOps() ||
2614 (isOperationLegal(ISD::SETCC
, newVT
) &&
2615 isCondCodeLegal(Cond
, newVT
.getSimpleVT()))) {
2617 getSetCCResultType(DAG
.getDataLayout(), *DAG
.getContext(), newVT
);
2618 SDValue NewConst
= DAG
.getConstant(C1
.trunc(InSize
), dl
, newVT
);
2620 SDValue NewSetCC
= DAG
.getSetCC(dl
, NewSetCCVT
, N0
.getOperand(0),
2622 return DAG
.getBoolExtOrTrunc(NewSetCC
, dl
, VT
, N0
.getValueType());
2627 break; // todo, be more careful with signed comparisons
2629 } else if (N0
.getOpcode() == ISD::SIGN_EXTEND_INREG
&&
2630 (Cond
== ISD::SETEQ
|| Cond
== ISD::SETNE
)) {
2631 EVT ExtSrcTy
= cast
<VTSDNode
>(N0
.getOperand(1))->getVT();
2632 unsigned ExtSrcTyBits
= ExtSrcTy
.getSizeInBits();
2633 EVT ExtDstTy
= N0
.getValueType();
2634 unsigned ExtDstTyBits
= ExtDstTy
.getSizeInBits();
2636 // If the constant doesn't fit into the number of bits for the source of
2637 // the sign extension, it is impossible for both sides to be equal.
2638 if (C1
.getMinSignedBits() > ExtSrcTyBits
)
2639 return DAG
.getConstant(Cond
== ISD::SETNE
, dl
, VT
);
2642 EVT Op0Ty
= N0
.getOperand(0).getValueType();
2643 if (Op0Ty
== ExtSrcTy
) {
2644 ZextOp
= N0
.getOperand(0);
2646 APInt Imm
= APInt::getLowBitsSet(ExtDstTyBits
, ExtSrcTyBits
);
2647 ZextOp
= DAG
.getNode(ISD::AND
, dl
, Op0Ty
, N0
.getOperand(0),
2648 DAG
.getConstant(Imm
, dl
, Op0Ty
));
2650 if (!DCI
.isCalledByLegalizer())
2651 DCI
.AddToWorklist(ZextOp
.getNode());
2652 // Otherwise, make this a use of a zext.
2653 return DAG
.getSetCC(dl
, VT
, ZextOp
,
2654 DAG
.getConstant(C1
& APInt::getLowBitsSet(
2659 } else if ((N1C
->isNullValue() || N1C
->isOne()) &&
2660 (Cond
== ISD::SETEQ
|| Cond
== ISD::SETNE
)) {
2661 // SETCC (SETCC), [0|1], [EQ|NE] -> SETCC
2662 if (N0
.getOpcode() == ISD::SETCC
&&
2663 isTypeLegal(VT
) && VT
.bitsLE(N0
.getValueType())) {
2664 bool TrueWhenTrue
= (Cond
== ISD::SETEQ
) ^ (!N1C
->isOne());
2666 return DAG
.getNode(ISD::TRUNCATE
, dl
, VT
, N0
);
2667 // Invert the condition.
2668 ISD::CondCode CC
= cast
<CondCodeSDNode
>(N0
.getOperand(2))->get();
2669 CC
= ISD::getSetCCInverse(CC
,
2670 N0
.getOperand(0).getValueType().isInteger());
2671 if (DCI
.isBeforeLegalizeOps() ||
2672 isCondCodeLegal(CC
, N0
.getOperand(0).getSimpleValueType()))
2673 return DAG
.getSetCC(dl
, VT
, N0
.getOperand(0), N0
.getOperand(1), CC
);
2676 if ((N0
.getOpcode() == ISD::XOR
||
2677 (N0
.getOpcode() == ISD::AND
&&
2678 N0
.getOperand(0).getOpcode() == ISD::XOR
&&
2679 N0
.getOperand(1) == N0
.getOperand(0).getOperand(1))) &&
2680 isa
<ConstantSDNode
>(N0
.getOperand(1)) &&
2681 cast
<ConstantSDNode
>(N0
.getOperand(1))->isOne()) {
2682 // If this is (X^1) == 0/1, swap the RHS and eliminate the xor. We
2683 // can only do this if the top bits are known zero.
2684 unsigned BitWidth
= N0
.getValueSizeInBits();
2685 if (DAG
.MaskedValueIsZero(N0
,
2686 APInt::getHighBitsSet(BitWidth
,
2688 // Okay, get the un-inverted input value.
2690 if (N0
.getOpcode() == ISD::XOR
) {
2691 Val
= N0
.getOperand(0);
2693 assert(N0
.getOpcode() == ISD::AND
&&
2694 N0
.getOperand(0).getOpcode() == ISD::XOR
);
2695 // ((X^1)&1)^1 -> X & 1
2696 Val
= DAG
.getNode(ISD::AND
, dl
, N0
.getValueType(),
2697 N0
.getOperand(0).getOperand(0),
2701 return DAG
.getSetCC(dl
, VT
, Val
, N1
,
2702 Cond
== ISD::SETEQ
? ISD::SETNE
: ISD::SETEQ
);
2704 } else if (N1C
->isOne() &&
2706 getBooleanContents(N0
->getValueType(0)) ==
2707 ZeroOrOneBooleanContent
)) {
2709 if (Op0
.getOpcode() == ISD::TRUNCATE
)
2710 Op0
= Op0
.getOperand(0);
2712 if ((Op0
.getOpcode() == ISD::XOR
) &&
2713 Op0
.getOperand(0).getOpcode() == ISD::SETCC
&&
2714 Op0
.getOperand(1).getOpcode() == ISD::SETCC
) {
2715 // (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc)
2716 Cond
= (Cond
== ISD::SETEQ
) ? ISD::SETNE
: ISD::SETEQ
;
2717 return DAG
.getSetCC(dl
, VT
, Op0
.getOperand(0), Op0
.getOperand(1),
2720 if (Op0
.getOpcode() == ISD::AND
&&
2721 isa
<ConstantSDNode
>(Op0
.getOperand(1)) &&
2722 cast
<ConstantSDNode
>(Op0
.getOperand(1))->isOne()) {
2723 // If this is (X&1) == / != 1, normalize it to (X&1) != / == 0.
2724 if (Op0
.getValueType().bitsGT(VT
))
2725 Op0
= DAG
.getNode(ISD::AND
, dl
, VT
,
2726 DAG
.getNode(ISD::TRUNCATE
, dl
, VT
, Op0
.getOperand(0)),
2727 DAG
.getConstant(1, dl
, VT
));
2728 else if (Op0
.getValueType().bitsLT(VT
))
2729 Op0
= DAG
.getNode(ISD::AND
, dl
, VT
,
2730 DAG
.getNode(ISD::ANY_EXTEND
, dl
, VT
, Op0
.getOperand(0)),
2731 DAG
.getConstant(1, dl
, VT
));
2733 return DAG
.getSetCC(dl
, VT
, Op0
,
2734 DAG
.getConstant(0, dl
, Op0
.getValueType()),
2735 Cond
== ISD::SETEQ
? ISD::SETNE
: ISD::SETEQ
);
2737 if (Op0
.getOpcode() == ISD::AssertZext
&&
2738 cast
<VTSDNode
>(Op0
.getOperand(1))->getVT() == MVT::i1
)
2739 return DAG
.getSetCC(dl
, VT
, Op0
,
2740 DAG
.getConstant(0, dl
, Op0
.getValueType()),
2741 Cond
== ISD::SETEQ
? ISD::SETNE
: ISD::SETEQ
);
2746 optimizeSetCCOfSignedTruncationCheck(VT
, N0
, N1
, Cond
, DCI
, dl
))
2750 // These simplifications apply to splat vectors as well.
2751 // TODO: Handle more splat vector cases.
2752 if (auto *N1C
= isConstOrConstSplat(N1
)) {
2753 const APInt
&C1
= N1C
->getAPIntValue();
2755 APInt MinVal
, MaxVal
;
2756 unsigned OperandBitSize
= N1C
->getValueType(0).getScalarSizeInBits();
2757 if (ISD::isSignedIntSetCC(Cond
)) {
2758 MinVal
= APInt::getSignedMinValue(OperandBitSize
);
2759 MaxVal
= APInt::getSignedMaxValue(OperandBitSize
);
2761 MinVal
= APInt::getMinValue(OperandBitSize
);
2762 MaxVal
= APInt::getMaxValue(OperandBitSize
);
2765 // Canonicalize GE/LE comparisons to use GT/LT comparisons.
2766 if (Cond
== ISD::SETGE
|| Cond
== ISD::SETUGE
) {
2767 // X >= MIN --> true
2769 return DAG
.getBoolConstant(true, dl
, VT
, OpVT
);
2771 if (!VT
.isVector()) { // TODO: Support this for vectors.
2772 // X >= C0 --> X > (C0 - 1)
2774 ISD::CondCode NewCC
= (Cond
== ISD::SETGE
) ? ISD::SETGT
: ISD::SETUGT
;
2775 if ((DCI
.isBeforeLegalizeOps() ||
2776 isCondCodeLegal(NewCC
, VT
.getSimpleVT())) &&
2777 (!N1C
->isOpaque() || (C
.getBitWidth() <= 64 &&
2778 isLegalICmpImmediate(C
.getSExtValue())))) {
2779 return DAG
.getSetCC(dl
, VT
, N0
,
2780 DAG
.getConstant(C
, dl
, N1
.getValueType()),
2786 if (Cond
== ISD::SETLE
|| Cond
== ISD::SETULE
) {
2787 // X <= MAX --> true
2789 return DAG
.getBoolConstant(true, dl
, VT
, OpVT
);
2791 // X <= C0 --> X < (C0 + 1)
2792 if (!VT
.isVector()) { // TODO: Support this for vectors.
2794 ISD::CondCode NewCC
= (Cond
== ISD::SETLE
) ? ISD::SETLT
: ISD::SETULT
;
2795 if ((DCI
.isBeforeLegalizeOps() ||
2796 isCondCodeLegal(NewCC
, VT
.getSimpleVT())) &&
2797 (!N1C
->isOpaque() || (C
.getBitWidth() <= 64 &&
2798 isLegalICmpImmediate(C
.getSExtValue())))) {
2799 return DAG
.getSetCC(dl
, VT
, N0
,
2800 DAG
.getConstant(C
, dl
, N1
.getValueType()),
2806 if (Cond
== ISD::SETLT
|| Cond
== ISD::SETULT
) {
2808 return DAG
.getBoolConstant(false, dl
, VT
, OpVT
); // X < MIN --> false
2810 // TODO: Support this for vectors after legalize ops.
2811 if (!VT
.isVector() || DCI
.isBeforeLegalizeOps()) {
2812 // Canonicalize setlt X, Max --> setne X, Max
2814 return DAG
.getSetCC(dl
, VT
, N0
, N1
, ISD::SETNE
);
2816 // If we have setult X, 1, turn it into seteq X, 0
2818 return DAG
.getSetCC(dl
, VT
, N0
,
2819 DAG
.getConstant(MinVal
, dl
, N0
.getValueType()),
2824 if (Cond
== ISD::SETGT
|| Cond
== ISD::SETUGT
) {
2826 return DAG
.getBoolConstant(false, dl
, VT
, OpVT
); // X > MAX --> false
2828 // TODO: Support this for vectors after legalize ops.
2829 if (!VT
.isVector() || DCI
.isBeforeLegalizeOps()) {
2830 // Canonicalize setgt X, Min --> setne X, Min
2832 return DAG
.getSetCC(dl
, VT
, N0
, N1
, ISD::SETNE
);
2834 // If we have setugt X, Max-1, turn it into seteq X, Max
2836 return DAG
.getSetCC(dl
, VT
, N0
,
2837 DAG
.getConstant(MaxVal
, dl
, N0
.getValueType()),
2842 // If we have "setcc X, C0", check to see if we can shrink the immediate
2844 // TODO: Support this for vectors after legalize ops.
2845 if (!VT
.isVector() || DCI
.isBeforeLegalizeOps()) {
2846 // SETUGT X, SINTMAX -> SETLT X, 0
2847 if (Cond
== ISD::SETUGT
&&
2848 C1
== APInt::getSignedMaxValue(OperandBitSize
))
2849 return DAG
.getSetCC(dl
, VT
, N0
,
2850 DAG
.getConstant(0, dl
, N1
.getValueType()),
2853 // SETULT X, SINTMIN -> SETGT X, -1
2854 if (Cond
== ISD::SETULT
&&
2855 C1
== APInt::getSignedMinValue(OperandBitSize
)) {
2856 SDValue ConstMinusOne
=
2857 DAG
.getConstant(APInt::getAllOnesValue(OperandBitSize
), dl
,
2859 return DAG
.getSetCC(dl
, VT
, N0
, ConstMinusOne
, ISD::SETGT
);
2864 // Back to non-vector simplifications.
2865 // TODO: Can we do these for vector splats?
2866 if (auto *N1C
= dyn_cast
<ConstantSDNode
>(N1
.getNode())) {
2867 const APInt
&C1
= N1C
->getAPIntValue();
2869 // Fold bit comparisons when we can.
2870 if ((Cond
== ISD::SETEQ
|| Cond
== ISD::SETNE
) &&
2871 (VT
== N0
.getValueType() ||
2872 (isTypeLegal(VT
) && VT
.bitsLE(N0
.getValueType()))) &&
2873 N0
.getOpcode() == ISD::AND
) {
2874 auto &DL
= DAG
.getDataLayout();
2875 if (auto *AndRHS
= dyn_cast
<ConstantSDNode
>(N0
.getOperand(1))) {
2876 EVT ShiftTy
= getShiftAmountTy(N0
.getValueType(), DL
,
2877 !DCI
.isBeforeLegalize());
2878 if (Cond
== ISD::SETNE
&& C1
== 0) {// (X & 8) != 0 --> (X & 8) >> 3
2879 // Perform the xform if the AND RHS is a single bit.
2880 if (AndRHS
->getAPIntValue().isPowerOf2()) {
2881 return DAG
.getNode(ISD::TRUNCATE
, dl
, VT
,
2882 DAG
.getNode(ISD::SRL
, dl
, N0
.getValueType(), N0
,
2883 DAG
.getConstant(AndRHS
->getAPIntValue().logBase2(), dl
,
2886 } else if (Cond
== ISD::SETEQ
&& C1
== AndRHS
->getAPIntValue()) {
2887 // (X & 8) == 8 --> (X & 8) >> 3
2888 // Perform the xform if C1 is a single bit.
2889 if (C1
.isPowerOf2()) {
2890 return DAG
.getNode(ISD::TRUNCATE
, dl
, VT
,
2891 DAG
.getNode(ISD::SRL
, dl
, N0
.getValueType(), N0
,
2892 DAG
.getConstant(C1
.logBase2(), dl
,
2899 if (C1
.getMinSignedBits() <= 64 &&
2900 !isLegalICmpImmediate(C1
.getSExtValue())) {
2901 // (X & -256) == 256 -> (X >> 8) == 1
2902 if ((Cond
== ISD::SETEQ
|| Cond
== ISD::SETNE
) &&
2903 N0
.getOpcode() == ISD::AND
&& N0
.hasOneUse()) {
2904 if (auto *AndRHS
= dyn_cast
<ConstantSDNode
>(N0
.getOperand(1))) {
2905 const APInt
&AndRHSC
= AndRHS
->getAPIntValue();
2906 if ((-AndRHSC
).isPowerOf2() && (AndRHSC
& C1
) == C1
) {
2907 unsigned ShiftBits
= AndRHSC
.countTrailingZeros();
2908 auto &DL
= DAG
.getDataLayout();
2909 EVT ShiftTy
= getShiftAmountTy(N0
.getValueType(), DL
,
2910 !DCI
.isBeforeLegalize());
2911 EVT CmpTy
= N0
.getValueType();
2912 SDValue Shift
= DAG
.getNode(ISD::SRL
, dl
, CmpTy
, N0
.getOperand(0),
2913 DAG
.getConstant(ShiftBits
, dl
,
2915 SDValue CmpRHS
= DAG
.getConstant(C1
.lshr(ShiftBits
), dl
, CmpTy
);
2916 return DAG
.getSetCC(dl
, VT
, Shift
, CmpRHS
, Cond
);
2919 } else if (Cond
== ISD::SETULT
|| Cond
== ISD::SETUGE
||
2920 Cond
== ISD::SETULE
|| Cond
== ISD::SETUGT
) {
2921 bool AdjOne
= (Cond
== ISD::SETULE
|| Cond
== ISD::SETUGT
);
2922 // X < 0x100000000 -> (X >> 32) < 1
2923 // X >= 0x100000000 -> (X >> 32) >= 1
2924 // X <= 0x0ffffffff -> (X >> 32) < 1
2925 // X > 0x0ffffffff -> (X >> 32) >= 1
2928 ISD::CondCode NewCond
= Cond
;
2930 ShiftBits
= C1
.countTrailingOnes();
2932 NewCond
= (Cond
== ISD::SETULE
) ? ISD::SETULT
: ISD::SETUGE
;
2934 ShiftBits
= C1
.countTrailingZeros();
2936 NewC
.lshrInPlace(ShiftBits
);
2937 if (ShiftBits
&& NewC
.getMinSignedBits() <= 64 &&
2938 isLegalICmpImmediate(NewC
.getSExtValue())) {
2939 auto &DL
= DAG
.getDataLayout();
2940 EVT ShiftTy
= getShiftAmountTy(N0
.getValueType(), DL
,
2941 !DCI
.isBeforeLegalize());
2942 EVT CmpTy
= N0
.getValueType();
2943 SDValue Shift
= DAG
.getNode(ISD::SRL
, dl
, CmpTy
, N0
,
2944 DAG
.getConstant(ShiftBits
, dl
, ShiftTy
));
2945 SDValue CmpRHS
= DAG
.getConstant(NewC
, dl
, CmpTy
);
2946 return DAG
.getSetCC(dl
, VT
, Shift
, CmpRHS
, NewCond
);
2952 if (isa
<ConstantFPSDNode
>(N0
.getNode())) {
2953 // Constant fold or commute setcc.
2954 SDValue O
= DAG
.FoldSetCC(VT
, N0
, N1
, Cond
, dl
);
2955 if (O
.getNode()) return O
;
2956 } else if (auto *CFP
= dyn_cast
<ConstantFPSDNode
>(N1
.getNode())) {
2957 // If the RHS of an FP comparison is a constant, simplify it away in
2959 if (CFP
->getValueAPF().isNaN()) {
2960 // If an operand is known to be a nan, we can fold it.
2961 switch (ISD::getUnorderedFlavor(Cond
)) {
2962 default: llvm_unreachable("Unknown flavor!");
2963 case 0: // Known false.
2964 return DAG
.getBoolConstant(false, dl
, VT
, OpVT
);
2965 case 1: // Known true.
2966 return DAG
.getBoolConstant(true, dl
, VT
, OpVT
);
2967 case 2: // Undefined.
2968 return DAG
.getUNDEF(VT
);
2972 // Otherwise, we know the RHS is not a NaN. Simplify the node to drop the
2973 // constant if knowing that the operand is non-nan is enough. We prefer to
2974 // have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to
2976 if (Cond
== ISD::SETO
|| Cond
== ISD::SETUO
)
2977 return DAG
.getSetCC(dl
, VT
, N0
, N0
, Cond
);
2979 // setcc (fneg x), C -> setcc swap(pred) x, -C
2980 if (N0
.getOpcode() == ISD::FNEG
) {
2981 ISD::CondCode SwapCond
= ISD::getSetCCSwappedOperands(Cond
);
2982 if (DCI
.isBeforeLegalizeOps() ||
2983 isCondCodeLegal(SwapCond
, N0
.getSimpleValueType())) {
2984 SDValue NegN1
= DAG
.getNode(ISD::FNEG
, dl
, N0
.getValueType(), N1
);
2985 return DAG
.getSetCC(dl
, VT
, N0
.getOperand(0), NegN1
, SwapCond
);
2989 // If the condition is not legal, see if we can find an equivalent one
2991 if (!isCondCodeLegal(Cond
, N0
.getSimpleValueType())) {
2992 // If the comparison was an awkward floating-point == or != and one of
2993 // the comparison operands is infinity or negative infinity, convert the
2994 // condition to a less-awkward <= or >=.
2995 if (CFP
->getValueAPF().isInfinity()) {
2996 if (CFP
->getValueAPF().isNegative()) {
2997 if (Cond
== ISD::SETOEQ
&&
2998 isCondCodeLegal(ISD::SETOLE
, N0
.getSimpleValueType()))
2999 return DAG
.getSetCC(dl
, VT
, N0
, N1
, ISD::SETOLE
);
3000 if (Cond
== ISD::SETUEQ
&&
3001 isCondCodeLegal(ISD::SETOLE
, N0
.getSimpleValueType()))
3002 return DAG
.getSetCC(dl
, VT
, N0
, N1
, ISD::SETULE
);
3003 if (Cond
== ISD::SETUNE
&&
3004 isCondCodeLegal(ISD::SETUGT
, N0
.getSimpleValueType()))
3005 return DAG
.getSetCC(dl
, VT
, N0
, N1
, ISD::SETUGT
);
3006 if (Cond
== ISD::SETONE
&&
3007 isCondCodeLegal(ISD::SETUGT
, N0
.getSimpleValueType()))
3008 return DAG
.getSetCC(dl
, VT
, N0
, N1
, ISD::SETOGT
);
3010 if (Cond
== ISD::SETOEQ
&&
3011 isCondCodeLegal(ISD::SETOGE
, N0
.getSimpleValueType()))
3012 return DAG
.getSetCC(dl
, VT
, N0
, N1
, ISD::SETOGE
);
3013 if (Cond
== ISD::SETUEQ
&&
3014 isCondCodeLegal(ISD::SETOGE
, N0
.getSimpleValueType()))
3015 return DAG
.getSetCC(dl
, VT
, N0
, N1
, ISD::SETUGE
);
3016 if (Cond
== ISD::SETUNE
&&
3017 isCondCodeLegal(ISD::SETULT
, N0
.getSimpleValueType()))
3018 return DAG
.getSetCC(dl
, VT
, N0
, N1
, ISD::SETULT
);
3019 if (Cond
== ISD::SETONE
&&
3020 isCondCodeLegal(ISD::SETULT
, N0
.getSimpleValueType()))
3021 return DAG
.getSetCC(dl
, VT
, N0
, N1
, ISD::SETOLT
);
3028 // The sext(setcc()) => setcc() optimization relies on the appropriate
3029 // constant being emitted.
3031 bool EqTrue
= ISD::isTrueWhenEqual(Cond
);
3033 // We can always fold X == X for integer setcc's.
3034 if (N0
.getValueType().isInteger())
3035 return DAG
.getBoolConstant(EqTrue
, dl
, VT
, OpVT
);
3037 unsigned UOF
= ISD::getUnorderedFlavor(Cond
);
3038 if (UOF
== 2) // FP operators that are undefined on NaNs.
3039 return DAG
.getBoolConstant(EqTrue
, dl
, VT
, OpVT
);
3040 if (UOF
== unsigned(EqTrue
))
3041 return DAG
.getBoolConstant(EqTrue
, dl
, VT
, OpVT
);
3042 // Otherwise, we can't fold it. However, we can simplify it to SETUO/SETO
3043 // if it is not already.
3044 ISD::CondCode NewCond
= UOF
== 0 ? ISD::SETO
: ISD::SETUO
;
3045 if (NewCond
!= Cond
&&
3046 (DCI
.isBeforeLegalizeOps() ||
3047 isCondCodeLegal(NewCond
, N0
.getSimpleValueType())))
3048 return DAG
.getSetCC(dl
, VT
, N0
, N1
, NewCond
);
3051 if ((Cond
== ISD::SETEQ
|| Cond
== ISD::SETNE
) &&
3052 N0
.getValueType().isInteger()) {
3053 if (N0
.getOpcode() == ISD::ADD
|| N0
.getOpcode() == ISD::SUB
||
3054 N0
.getOpcode() == ISD::XOR
) {
3055 // Simplify (X+Y) == (X+Z) --> Y == Z
3056 if (N0
.getOpcode() == N1
.getOpcode()) {
3057 if (N0
.getOperand(0) == N1
.getOperand(0))
3058 return DAG
.getSetCC(dl
, VT
, N0
.getOperand(1), N1
.getOperand(1), Cond
);
3059 if (N0
.getOperand(1) == N1
.getOperand(1))
3060 return DAG
.getSetCC(dl
, VT
, N0
.getOperand(0), N1
.getOperand(0), Cond
);
3061 if (isCommutativeBinOp(N0
.getOpcode())) {
3062 // If X op Y == Y op X, try other combinations.
3063 if (N0
.getOperand(0) == N1
.getOperand(1))
3064 return DAG
.getSetCC(dl
, VT
, N0
.getOperand(1), N1
.getOperand(0),
3066 if (N0
.getOperand(1) == N1
.getOperand(0))
3067 return DAG
.getSetCC(dl
, VT
, N0
.getOperand(0), N1
.getOperand(1),
3072 // If RHS is a legal immediate value for a compare instruction, we need
3073 // to be careful about increasing register pressure needlessly.
3074 bool LegalRHSImm
= false;
3076 if (auto *RHSC
= dyn_cast
<ConstantSDNode
>(N1
)) {
3077 if (auto *LHSR
= dyn_cast
<ConstantSDNode
>(N0
.getOperand(1))) {
3078 // Turn (X+C1) == C2 --> X == C2-C1
3079 if (N0
.getOpcode() == ISD::ADD
&& N0
.getNode()->hasOneUse()) {
3080 return DAG
.getSetCC(dl
, VT
, N0
.getOperand(0),
3081 DAG
.getConstant(RHSC
->getAPIntValue()-
3082 LHSR
->getAPIntValue(),
3083 dl
, N0
.getValueType()), Cond
);
3086 // Turn (X^C1) == C2 into X == C1^C2 iff X&~C1 = 0.
3087 if (N0
.getOpcode() == ISD::XOR
)
3088 // If we know that all of the inverted bits are zero, don't bother
3089 // performing the inversion.
3090 if (DAG
.MaskedValueIsZero(N0
.getOperand(0), ~LHSR
->getAPIntValue()))
3092 DAG
.getSetCC(dl
, VT
, N0
.getOperand(0),
3093 DAG
.getConstant(LHSR
->getAPIntValue() ^
3094 RHSC
->getAPIntValue(),
3095 dl
, N0
.getValueType()),
3099 // Turn (C1-X) == C2 --> X == C1-C2
3100 if (auto *SUBC
= dyn_cast
<ConstantSDNode
>(N0
.getOperand(0))) {
3101 if (N0
.getOpcode() == ISD::SUB
&& N0
.getNode()->hasOneUse()) {
3103 DAG
.getSetCC(dl
, VT
, N0
.getOperand(1),
3104 DAG
.getConstant(SUBC
->getAPIntValue() -
3105 RHSC
->getAPIntValue(),
3106 dl
, N0
.getValueType()),
3111 // Could RHSC fold directly into a compare?
3112 if (RHSC
->getValueType(0).getSizeInBits() <= 64)
3113 LegalRHSImm
= isLegalICmpImmediate(RHSC
->getSExtValue());
3116 // (X+Y) == X --> Y == 0 and similar folds.
3117 // Don't do this if X is an immediate that can fold into a cmp
3118 // instruction and X+Y has other uses. It could be an induction variable
3119 // chain, and the transform would increase register pressure.
3120 if (!LegalRHSImm
|| N0
.hasOneUse())
3121 if (SDValue V
= foldSetCCWithBinOp(VT
, N0
, N1
, Cond
, dl
, DCI
))
3125 if (N1
.getOpcode() == ISD::ADD
|| N1
.getOpcode() == ISD::SUB
||
3126 N1
.getOpcode() == ISD::XOR
)
3127 if (SDValue V
= foldSetCCWithBinOp(VT
, N1
, N0
, Cond
, dl
, DCI
))
3130 if (SDValue V
= foldSetCCWithAnd(VT
, N0
, N1
, Cond
, dl
, DCI
))
3134 // Fold away ALL boolean setcc's.
3136 if (N0
.getValueType().getScalarType() == MVT::i1
&& foldBooleans
) {
3137 EVT OpVT
= N0
.getValueType();
3139 default: llvm_unreachable("Unknown integer setcc!");
3140 case ISD::SETEQ
: // X == Y -> ~(X^Y)
3141 Temp
= DAG
.getNode(ISD::XOR
, dl
, OpVT
, N0
, N1
);
3142 N0
= DAG
.getNOT(dl
, Temp
, OpVT
);
3143 if (!DCI
.isCalledByLegalizer())
3144 DCI
.AddToWorklist(Temp
.getNode());
3146 case ISD::SETNE
: // X != Y --> (X^Y)
3147 N0
= DAG
.getNode(ISD::XOR
, dl
, OpVT
, N0
, N1
);
3149 case ISD::SETGT
: // X >s Y --> X == 0 & Y == 1 --> ~X & Y
3150 case ISD::SETULT
: // X <u Y --> X == 0 & Y == 1 --> ~X & Y
3151 Temp
= DAG
.getNOT(dl
, N0
, OpVT
);
3152 N0
= DAG
.getNode(ISD::AND
, dl
, OpVT
, N1
, Temp
);
3153 if (!DCI
.isCalledByLegalizer())
3154 DCI
.AddToWorklist(Temp
.getNode());
3156 case ISD::SETLT
: // X <s Y --> X == 1 & Y == 0 --> ~Y & X
3157 case ISD::SETUGT
: // X >u Y --> X == 1 & Y == 0 --> ~Y & X
3158 Temp
= DAG
.getNOT(dl
, N1
, OpVT
);
3159 N0
= DAG
.getNode(ISD::AND
, dl
, OpVT
, N0
, Temp
);
3160 if (!DCI
.isCalledByLegalizer())
3161 DCI
.AddToWorklist(Temp
.getNode());
3163 case ISD::SETULE
: // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
3164 case ISD::SETGE
: // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
3165 Temp
= DAG
.getNOT(dl
, N0
, OpVT
);
3166 N0
= DAG
.getNode(ISD::OR
, dl
, OpVT
, N1
, Temp
);
3167 if (!DCI
.isCalledByLegalizer())
3168 DCI
.AddToWorklist(Temp
.getNode());
3170 case ISD::SETUGE
: // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
3171 case ISD::SETLE
: // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
3172 Temp
= DAG
.getNOT(dl
, N1
, OpVT
);
3173 N0
= DAG
.getNode(ISD::OR
, dl
, OpVT
, N0
, Temp
);
3176 if (VT
.getScalarType() != MVT::i1
) {
3177 if (!DCI
.isCalledByLegalizer())
3178 DCI
.AddToWorklist(N0
.getNode());
3179 // FIXME: If running after legalize, we probably can't do this.
3180 ISD::NodeType ExtendCode
= getExtendForContent(getBooleanContents(OpVT
));
3181 N0
= DAG
.getNode(ExtendCode
, dl
, VT
, N0
);
3186 // Could not fold it.
3190 /// Returns true (and the GlobalValue and the offset) if the node is a
3191 /// GlobalAddress + offset.
3192 bool TargetLowering::isGAPlusOffset(SDNode
*WN
, const GlobalValue
*&GA
,
3193 int64_t &Offset
) const {
3195 SDNode
*N
= unwrapAddress(SDValue(WN
, 0)).getNode();
3197 if (auto *GASD
= dyn_cast
<GlobalAddressSDNode
>(N
)) {
3198 GA
= GASD
->getGlobal();
3199 Offset
+= GASD
->getOffset();
3203 if (N
->getOpcode() == ISD::ADD
) {
3204 SDValue N1
= N
->getOperand(0);
3205 SDValue N2
= N
->getOperand(1);
3206 if (isGAPlusOffset(N1
.getNode(), GA
, Offset
)) {
3207 if (auto *V
= dyn_cast
<ConstantSDNode
>(N2
)) {
3208 Offset
+= V
->getSExtValue();
3211 } else if (isGAPlusOffset(N2
.getNode(), GA
, Offset
)) {
3212 if (auto *V
= dyn_cast
<ConstantSDNode
>(N1
)) {
3213 Offset
+= V
->getSExtValue();
3222 SDValue
TargetLowering::PerformDAGCombine(SDNode
*N
,
3223 DAGCombinerInfo
&DCI
) const {
3224 // Default implementation: no optimization.
3228 //===----------------------------------------------------------------------===//
3229 // Inline Assembler Implementation Methods
3230 //===----------------------------------------------------------------------===//
3232 TargetLowering::ConstraintType
3233 TargetLowering::getConstraintType(StringRef Constraint
) const {
3234 unsigned S
= Constraint
.size();
3237 switch (Constraint
[0]) {
3239 case 'r': return C_RegisterClass
;
3241 case 'o': // offsetable
3242 case 'V': // not offsetable
3244 case 'i': // Simple Integer or Relocatable Constant
3245 case 'n': // Simple Integer
3246 case 'E': // Floating Point Constant
3247 case 'F': // Floating Point Constant
3248 case 's': // Relocatable Constant
3249 case 'p': // Address.
3250 case 'X': // Allow ANY value.
3251 case 'I': // Target registers.
3265 if (S
> 1 && Constraint
[0] == '{' && Constraint
[S
- 1] == '}') {
3266 if (S
== 8 && Constraint
.substr(1, 6) == "memory") // "{memory}"
3273 /// Try to replace an X constraint, which matches anything, with another that
3274 /// has more specific requirements based on the type of the corresponding
3276 const char *TargetLowering::LowerXConstraint(EVT ConstraintVT
) const {
3277 if (ConstraintVT
.isInteger())
3279 if (ConstraintVT
.isFloatingPoint())
3280 return "f"; // works for many targets
3284 SDValue
TargetLowering::LowerAsmOutputForConstraint(
3285 SDValue
&Chain
, SDValue
&Flag
, SDLoc DL
, const AsmOperandInfo
&OpInfo
,
3286 SelectionDAG
&DAG
) const {
3290 /// Lower the specified operand into the Ops vector.
3291 /// If it is invalid, don't add anything to Ops.
3292 void TargetLowering::LowerAsmOperandForConstraint(SDValue Op
,
3293 std::string
&Constraint
,
3294 std::vector
<SDValue
> &Ops
,
3295 SelectionDAG
&DAG
) const {
3297 if (Constraint
.length() > 1) return;
3299 char ConstraintLetter
= Constraint
[0];
3300 switch (ConstraintLetter
) {
3302 case 'X': // Allows any operand; labels (basic block) use this.
3303 if (Op
.getOpcode() == ISD::BasicBlock
||
3304 Op
.getOpcode() == ISD::TargetBlockAddress
) {
3309 case 'i': // Simple Integer or Relocatable Constant
3310 case 'n': // Simple Integer
3311 case 's': { // Relocatable Constant
3312 // These operands are interested in values of the form (GV+C), where C may
3313 // be folded in as an offset of GV, or it may be explicitly added. Also, it
3314 // is possible and fine if either GV or C are missing.
3315 ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(Op
);
3316 GlobalAddressSDNode
*GA
= dyn_cast
<GlobalAddressSDNode
>(Op
);
3318 // If we have "(add GV, C)", pull out GV/C
3319 if (Op
.getOpcode() == ISD::ADD
) {
3320 C
= dyn_cast
<ConstantSDNode
>(Op
.getOperand(1));
3321 GA
= dyn_cast
<GlobalAddressSDNode
>(Op
.getOperand(0));
3323 C
= dyn_cast
<ConstantSDNode
>(Op
.getOperand(0));
3324 GA
= dyn_cast
<GlobalAddressSDNode
>(Op
.getOperand(1));
3332 // If we find a valid operand, map to the TargetXXX version so that the
3333 // value itself doesn't get selected.
3334 if (GA
) { // Either &GV or &GV+C
3335 if (ConstraintLetter
!= 'n') {
3336 int64_t Offs
= GA
->getOffset();
3337 if (C
) Offs
+= C
->getZExtValue();
3338 Ops
.push_back(DAG
.getTargetGlobalAddress(GA
->getGlobal(),
3339 C
? SDLoc(C
) : SDLoc(),
3340 Op
.getValueType(), Offs
));
3344 if (C
) { // just C, no GV.
3345 // Simple constants are not allowed for 's'.
3346 if (ConstraintLetter
!= 's') {
3347 // gcc prints these as sign extended. Sign extend value to 64 bits
3348 // now; without this it would get ZExt'd later in
3349 // ScheduleDAGSDNodes::EmitNode, which is very generic.
3350 Ops
.push_back(DAG
.getTargetConstant(C
->getSExtValue(),
3351 SDLoc(C
), MVT::i64
));
3360 std::pair
<unsigned, const TargetRegisterClass
*>
3361 TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo
*RI
,
3362 StringRef Constraint
,
3364 if (Constraint
.empty() || Constraint
[0] != '{')
3365 return std::make_pair(0u, static_cast<TargetRegisterClass
*>(nullptr));
3366 assert(*(Constraint
.end() - 1) == '}' && "Not a brace enclosed constraint?");
3368 // Remove the braces from around the name.
3369 StringRef
RegName(Constraint
.data() + 1, Constraint
.size() - 2);
3371 std::pair
<unsigned, const TargetRegisterClass
*> R
=
3372 std::make_pair(0u, static_cast<const TargetRegisterClass
*>(nullptr));
3374 // Figure out which register class contains this reg.
3375 for (const TargetRegisterClass
*RC
: RI
->regclasses()) {
3376 // If none of the value types for this register class are valid, we
3377 // can't use it. For example, 64-bit reg classes on 32-bit targets.
3378 if (!isLegalRC(*RI
, *RC
))
3381 for (TargetRegisterClass::iterator I
= RC
->begin(), E
= RC
->end();
3383 if (RegName
.equals_lower(RI
->getRegAsmName(*I
))) {
3384 std::pair
<unsigned, const TargetRegisterClass
*> S
=
3385 std::make_pair(*I
, RC
);
3387 // If this register class has the requested value type, return it,
3388 // otherwise keep searching and return the first class found
3389 // if no other is found which explicitly has the requested type.
3390 if (RI
->isTypeLegalForClass(*RC
, VT
))
3401 //===----------------------------------------------------------------------===//
3402 // Constraint Selection.
3404 /// Return true of this is an input operand that is a matching constraint like
3406 bool TargetLowering::AsmOperandInfo::isMatchingInputConstraint() const {
3407 assert(!ConstraintCode
.empty() && "No known constraint!");
3408 return isdigit(static_cast<unsigned char>(ConstraintCode
[0]));
3411 /// If this is an input matching constraint, this method returns the output
3412 /// operand it matches.
3413 unsigned TargetLowering::AsmOperandInfo::getMatchedOperand() const {
3414 assert(!ConstraintCode
.empty() && "No known constraint!");
3415 return atoi(ConstraintCode
.c_str());
3418 /// Split up the constraint string from the inline assembly value into the
3419 /// specific constraints and their prefixes, and also tie in the associated
3421 /// If this returns an empty vector, and if the constraint string itself
3422 /// isn't empty, there was an error parsing.
3423 TargetLowering::AsmOperandInfoVector
3424 TargetLowering::ParseConstraints(const DataLayout
&DL
,
3425 const TargetRegisterInfo
*TRI
,
3426 ImmutableCallSite CS
) const {
3427 /// Information about all of the constraints.
3428 AsmOperandInfoVector ConstraintOperands
;
3429 const InlineAsm
*IA
= cast
<InlineAsm
>(CS
.getCalledValue());
3430 unsigned maCount
= 0; // Largest number of multiple alternative constraints.
3432 // Do a prepass over the constraints, canonicalizing them, and building up the
3433 // ConstraintOperands list.
3434 unsigned ArgNo
= 0; // ArgNo - The argument of the CallInst.
3435 unsigned ResNo
= 0; // ResNo - The result number of the next output.
3437 for (InlineAsm::ConstraintInfo
&CI
: IA
->ParseConstraints()) {
3438 ConstraintOperands
.emplace_back(std::move(CI
));
3439 AsmOperandInfo
&OpInfo
= ConstraintOperands
.back();
3441 // Update multiple alternative constraint count.
3442 if (OpInfo
.multipleAlternatives
.size() > maCount
)
3443 maCount
= OpInfo
.multipleAlternatives
.size();
3445 OpInfo
.ConstraintVT
= MVT::Other
;
3447 // Compute the value type for each operand.
3448 switch (OpInfo
.Type
) {
3449 case InlineAsm::isOutput
:
3450 // Indirect outputs just consume an argument.
3451 if (OpInfo
.isIndirect
) {
3452 OpInfo
.CallOperandVal
= const_cast<Value
*>(CS
.getArgument(ArgNo
++));
3456 // The return value of the call is this value. As such, there is no
3457 // corresponding argument.
3458 assert(!CS
.getType()->isVoidTy() &&
3460 if (StructType
*STy
= dyn_cast
<StructType
>(CS
.getType())) {
3461 OpInfo
.ConstraintVT
=
3462 getSimpleValueType(DL
, STy
->getElementType(ResNo
));
3464 assert(ResNo
== 0 && "Asm only has one result!");
3465 OpInfo
.ConstraintVT
= getSimpleValueType(DL
, CS
.getType());
3469 case InlineAsm::isInput
:
3470 OpInfo
.CallOperandVal
= const_cast<Value
*>(CS
.getArgument(ArgNo
++));
3472 case InlineAsm::isClobber
:
3477 if (OpInfo
.CallOperandVal
) {
3478 llvm::Type
*OpTy
= OpInfo
.CallOperandVal
->getType();
3479 if (OpInfo
.isIndirect
) {
3480 llvm::PointerType
*PtrTy
= dyn_cast
<PointerType
>(OpTy
);
3482 report_fatal_error("Indirect operand for inline asm not a pointer!");
3483 OpTy
= PtrTy
->getElementType();
3486 // Look for vector wrapped in a struct. e.g. { <16 x i8> }.
3487 if (StructType
*STy
= dyn_cast
<StructType
>(OpTy
))
3488 if (STy
->getNumElements() == 1)
3489 OpTy
= STy
->getElementType(0);
3491 // If OpTy is not a single value, it may be a struct/union that we
3492 // can tile with integers.
3493 if (!OpTy
->isSingleValueType() && OpTy
->isSized()) {
3494 unsigned BitSize
= DL
.getTypeSizeInBits(OpTy
);
3503 OpInfo
.ConstraintVT
=
3504 MVT::getVT(IntegerType::get(OpTy
->getContext(), BitSize
), true);
3507 } else if (PointerType
*PT
= dyn_cast
<PointerType
>(OpTy
)) {
3508 unsigned PtrSize
= DL
.getPointerSizeInBits(PT
->getAddressSpace());
3509 OpInfo
.ConstraintVT
= MVT::getIntegerVT(PtrSize
);
3511 OpInfo
.ConstraintVT
= MVT::getVT(OpTy
, true);
3516 // If we have multiple alternative constraints, select the best alternative.
3517 if (!ConstraintOperands
.empty()) {
3519 unsigned bestMAIndex
= 0;
3520 int bestWeight
= -1;
3521 // weight: -1 = invalid match, and 0 = so-so match to 5 = good match.
3524 // Compute the sums of the weights for each alternative, keeping track
3525 // of the best (highest weight) one so far.
3526 for (maIndex
= 0; maIndex
< maCount
; ++maIndex
) {
3528 for (unsigned cIndex
= 0, eIndex
= ConstraintOperands
.size();
3529 cIndex
!= eIndex
; ++cIndex
) {
3530 AsmOperandInfo
&OpInfo
= ConstraintOperands
[cIndex
];
3531 if (OpInfo
.Type
== InlineAsm::isClobber
)
3534 // If this is an output operand with a matching input operand,
3535 // look up the matching input. If their types mismatch, e.g. one
3536 // is an integer, the other is floating point, or their sizes are
3537 // different, flag it as an maCantMatch.
3538 if (OpInfo
.hasMatchingInput()) {
3539 AsmOperandInfo
&Input
= ConstraintOperands
[OpInfo
.MatchingInput
];
3540 if (OpInfo
.ConstraintVT
!= Input
.ConstraintVT
) {
3541 if ((OpInfo
.ConstraintVT
.isInteger() !=
3542 Input
.ConstraintVT
.isInteger()) ||
3543 (OpInfo
.ConstraintVT
.getSizeInBits() !=
3544 Input
.ConstraintVT
.getSizeInBits())) {
3545 weightSum
= -1; // Can't match.
3550 weight
= getMultipleConstraintMatchWeight(OpInfo
, maIndex
);
3555 weightSum
+= weight
;
3558 if (weightSum
> bestWeight
) {
3559 bestWeight
= weightSum
;
3560 bestMAIndex
= maIndex
;
3564 // Now select chosen alternative in each constraint.
3565 for (unsigned cIndex
= 0, eIndex
= ConstraintOperands
.size();
3566 cIndex
!= eIndex
; ++cIndex
) {
3567 AsmOperandInfo
&cInfo
= ConstraintOperands
[cIndex
];
3568 if (cInfo
.Type
== InlineAsm::isClobber
)
3570 cInfo
.selectAlternative(bestMAIndex
);
3575 // Check and hook up tied operands, choose constraint code to use.
3576 for (unsigned cIndex
= 0, eIndex
= ConstraintOperands
.size();
3577 cIndex
!= eIndex
; ++cIndex
) {
3578 AsmOperandInfo
&OpInfo
= ConstraintOperands
[cIndex
];
3580 // If this is an output operand with a matching input operand, look up the
3581 // matching input. If their types mismatch, e.g. one is an integer, the
3582 // other is floating point, or their sizes are different, flag it as an
3584 if (OpInfo
.hasMatchingInput()) {
3585 AsmOperandInfo
&Input
= ConstraintOperands
[OpInfo
.MatchingInput
];
3587 if (OpInfo
.ConstraintVT
!= Input
.ConstraintVT
) {
3588 std::pair
<unsigned, const TargetRegisterClass
*> MatchRC
=
3589 getRegForInlineAsmConstraint(TRI
, OpInfo
.ConstraintCode
,
3590 OpInfo
.ConstraintVT
);
3591 std::pair
<unsigned, const TargetRegisterClass
*> InputRC
=
3592 getRegForInlineAsmConstraint(TRI
, Input
.ConstraintCode
,
3593 Input
.ConstraintVT
);
3594 if ((OpInfo
.ConstraintVT
.isInteger() !=
3595 Input
.ConstraintVT
.isInteger()) ||
3596 (MatchRC
.second
!= InputRC
.second
)) {
3597 report_fatal_error("Unsupported asm: input constraint"
3598 " with a matching output constraint of"
3599 " incompatible type!");
3605 return ConstraintOperands
;
3608 /// Return an integer indicating how general CT is.
3609 static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT
) {
3611 case TargetLowering::C_Other
:
3612 case TargetLowering::C_Unknown
:
3614 case TargetLowering::C_Register
:
3616 case TargetLowering::C_RegisterClass
:
3618 case TargetLowering::C_Memory
:
3621 llvm_unreachable("Invalid constraint type");
3624 /// Examine constraint type and operand type and determine a weight value.
3625 /// This object must already have been set up with the operand type
3626 /// and the current alternative constraint selected.
3627 TargetLowering::ConstraintWeight
3628 TargetLowering::getMultipleConstraintMatchWeight(
3629 AsmOperandInfo
&info
, int maIndex
) const {
3630 InlineAsm::ConstraintCodeVector
*rCodes
;
3631 if (maIndex
>= (int)info
.multipleAlternatives
.size())
3632 rCodes
= &info
.Codes
;
3634 rCodes
= &info
.multipleAlternatives
[maIndex
].Codes
;
3635 ConstraintWeight BestWeight
= CW_Invalid
;
3637 // Loop over the options, keeping track of the most general one.
3638 for (unsigned i
= 0, e
= rCodes
->size(); i
!= e
; ++i
) {
3639 ConstraintWeight weight
=
3640 getSingleConstraintMatchWeight(info
, (*rCodes
)[i
].c_str());
3641 if (weight
> BestWeight
)
3642 BestWeight
= weight
;
3648 /// Examine constraint type and operand type and determine a weight value.
3649 /// This object must already have been set up with the operand type
3650 /// and the current alternative constraint selected.
3651 TargetLowering::ConstraintWeight
3652 TargetLowering::getSingleConstraintMatchWeight(
3653 AsmOperandInfo
&info
, const char *constraint
) const {
3654 ConstraintWeight weight
= CW_Invalid
;
3655 Value
*CallOperandVal
= info
.CallOperandVal
;
3656 // If we don't have a value, we can't do a match,
3657 // but allow it at the lowest weight.
3658 if (!CallOperandVal
)
3660 // Look at the constraint type.
3661 switch (*constraint
) {
3662 case 'i': // immediate integer.
3663 case 'n': // immediate integer with a known value.
3664 if (isa
<ConstantInt
>(CallOperandVal
))
3665 weight
= CW_Constant
;
3667 case 's': // non-explicit intregal immediate.
3668 if (isa
<GlobalValue
>(CallOperandVal
))
3669 weight
= CW_Constant
;
3671 case 'E': // immediate float if host format.
3672 case 'F': // immediate float.
3673 if (isa
<ConstantFP
>(CallOperandVal
))
3674 weight
= CW_Constant
;
3676 case '<': // memory operand with autodecrement.
3677 case '>': // memory operand with autoincrement.
3678 case 'm': // memory operand.
3679 case 'o': // offsettable memory operand
3680 case 'V': // non-offsettable memory operand
3683 case 'r': // general register.
3684 case 'g': // general register, memory operand or immediate integer.
3685 // note: Clang converts "g" to "imr".
3686 if (CallOperandVal
->getType()->isIntegerTy())
3687 weight
= CW_Register
;
3689 case 'X': // any operand.
3691 weight
= CW_Default
;
3697 /// If there are multiple different constraints that we could pick for this
3698 /// operand (e.g. "imr") try to pick the 'best' one.
3699 /// This is somewhat tricky: constraints fall into four classes:
3700 /// Other -> immediates and magic values
3701 /// Register -> one specific register
3702 /// RegisterClass -> a group of regs
3703 /// Memory -> memory
3704 /// Ideally, we would pick the most specific constraint possible: if we have
3705 /// something that fits into a register, we would pick it. The problem here
3706 /// is that if we have something that could either be in a register or in
3707 /// memory that use of the register could cause selection of *other*
3708 /// operands to fail: they might only succeed if we pick memory. Because of
3709 /// this the heuristic we use is:
3711 /// 1) If there is an 'other' constraint, and if the operand is valid for
3712 /// that constraint, use it. This makes us take advantage of 'i'
3713 /// constraints when available.
3714 /// 2) Otherwise, pick the most general constraint present. This prefers
3715 /// 'm' over 'r', for example.
3717 static void ChooseConstraint(TargetLowering::AsmOperandInfo
&OpInfo
,
3718 const TargetLowering
&TLI
,
3719 SDValue Op
, SelectionDAG
*DAG
) {
3720 assert(OpInfo
.Codes
.size() > 1 && "Doesn't have multiple constraint options");
3721 unsigned BestIdx
= 0;
3722 TargetLowering::ConstraintType BestType
= TargetLowering::C_Unknown
;
3723 int BestGenerality
= -1;
3725 // Loop over the options, keeping track of the most general one.
3726 for (unsigned i
= 0, e
= OpInfo
.Codes
.size(); i
!= e
; ++i
) {
3727 TargetLowering::ConstraintType CType
=
3728 TLI
.getConstraintType(OpInfo
.Codes
[i
]);
3730 // If this is an 'other' constraint, see if the operand is valid for it.
3731 // For example, on X86 we might have an 'rI' constraint. If the operand
3732 // is an integer in the range [0..31] we want to use I (saving a load
3733 // of a register), otherwise we must use 'r'.
3734 if (CType
== TargetLowering::C_Other
&& Op
.getNode()) {
3735 assert(OpInfo
.Codes
[i
].size() == 1 &&
3736 "Unhandled multi-letter 'other' constraint");
3737 std::vector
<SDValue
> ResultOps
;
3738 TLI
.LowerAsmOperandForConstraint(Op
, OpInfo
.Codes
[i
],
3740 if (!ResultOps
.empty()) {
3747 // Things with matching constraints can only be registers, per gcc
3748 // documentation. This mainly affects "g" constraints.
3749 if (CType
== TargetLowering::C_Memory
&& OpInfo
.hasMatchingInput())
3752 // This constraint letter is more general than the previous one, use it.
3753 int Generality
= getConstraintGenerality(CType
);
3754 if (Generality
> BestGenerality
) {
3757 BestGenerality
= Generality
;
3761 OpInfo
.ConstraintCode
= OpInfo
.Codes
[BestIdx
];
3762 OpInfo
.ConstraintType
= BestType
;
3765 /// Determines the constraint code and constraint type to use for the specific
3766 /// AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType.
3767 void TargetLowering::ComputeConstraintToUse(AsmOperandInfo
&OpInfo
,
3769 SelectionDAG
*DAG
) const {
3770 assert(!OpInfo
.Codes
.empty() && "Must have at least one constraint");
3772 // Single-letter constraints ('r') are very common.
3773 if (OpInfo
.Codes
.size() == 1) {
3774 OpInfo
.ConstraintCode
= OpInfo
.Codes
[0];
3775 OpInfo
.ConstraintType
= getConstraintType(OpInfo
.ConstraintCode
);
3777 ChooseConstraint(OpInfo
, *this, Op
, DAG
);
3780 // 'X' matches anything.
3781 if (OpInfo
.ConstraintCode
== "X" && OpInfo
.CallOperandVal
) {
3782 // Labels and constants are handled elsewhere ('X' is the only thing
3783 // that matches labels). For Functions, the type here is the type of
3784 // the result, which is not what we want to look at; leave them alone.
3785 Value
*v
= OpInfo
.CallOperandVal
;
3786 if (isa
<BasicBlock
>(v
) || isa
<ConstantInt
>(v
) || isa
<Function
>(v
)) {
3787 OpInfo
.CallOperandVal
= v
;
3791 if (Op
.getNode() && Op
.getOpcode() == ISD::TargetBlockAddress
)
3794 // Otherwise, try to resolve it to something we know about by looking at
3795 // the actual operand type.
3796 if (const char *Repl
= LowerXConstraint(OpInfo
.ConstraintVT
)) {
3797 OpInfo
.ConstraintCode
= Repl
;
3798 OpInfo
.ConstraintType
= getConstraintType(OpInfo
.ConstraintCode
);
3803 /// Given an exact SDIV by a constant, create a multiplication
3804 /// with the multiplicative inverse of the constant.
3805 static SDValue
BuildExactSDIV(const TargetLowering
&TLI
, SDNode
*N
,
3806 const SDLoc
&dl
, SelectionDAG
&DAG
,
3807 SmallVectorImpl
<SDNode
*> &Created
) {
3808 SDValue Op0
= N
->getOperand(0);
3809 SDValue Op1
= N
->getOperand(1);
3810 EVT VT
= N
->getValueType(0);
3811 EVT SVT
= VT
.getScalarType();
3812 EVT ShVT
= TLI
.getShiftAmountTy(VT
, DAG
.getDataLayout());
3813 EVT ShSVT
= ShVT
.getScalarType();
3815 bool UseSRA
= false;
3816 SmallVector
<SDValue
, 16> Shifts
, Factors
;
3818 auto BuildSDIVPattern
= [&](ConstantSDNode
*C
) {
3819 if (C
->isNullValue())
3821 APInt Divisor
= C
->getAPIntValue();
3822 unsigned Shift
= Divisor
.countTrailingZeros();
3824 Divisor
.ashrInPlace(Shift
);
3827 // Calculate the multiplicative inverse, using Newton's method.
3829 APInt Factor
= Divisor
;
3830 while ((t
= Divisor
* Factor
) != 1)
3831 Factor
*= APInt(Divisor
.getBitWidth(), 2) - t
;
3832 Shifts
.push_back(DAG
.getConstant(Shift
, dl
, ShSVT
));
3833 Factors
.push_back(DAG
.getConstant(Factor
, dl
, SVT
));
3837 // Collect all magic values from the build vector.
3838 if (!ISD::matchUnaryPredicate(Op1
, BuildSDIVPattern
))
3841 SDValue Shift
, Factor
;
3842 if (VT
.isVector()) {
3843 Shift
= DAG
.getBuildVector(ShVT
, dl
, Shifts
);
3844 Factor
= DAG
.getBuildVector(VT
, dl
, Factors
);
3847 Factor
= Factors
[0];
3852 // Shift the value upfront if it is even, so the LSB is one.
3854 // TODO: For UDIV use SRL instead of SRA.
3856 Flags
.setExact(true);
3857 Res
= DAG
.getNode(ISD::SRA
, dl
, VT
, Res
, Shift
, Flags
);
3858 Created
.push_back(Res
.getNode());
3861 return DAG
.getNode(ISD::MUL
, dl
, VT
, Res
, Factor
);
3864 SDValue
TargetLowering::BuildSDIVPow2(SDNode
*N
, const APInt
&Divisor
,
3866 SmallVectorImpl
<SDNode
*> &Created
) const {
3867 AttributeList Attr
= DAG
.getMachineFunction().getFunction().getAttributes();
3868 const TargetLowering
&TLI
= DAG
.getTargetLoweringInfo();
3869 if (TLI
.isIntDivCheap(N
->getValueType(0), Attr
))
3870 return SDValue(N
, 0); // Lower SDIV as SDIV
3874 /// Given an ISD::SDIV node expressing a divide by constant,
3875 /// return a DAG expression to select that will generate the same value by
3876 /// multiplying by a magic number.
3877 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
3878 SDValue
TargetLowering::BuildSDIV(SDNode
*N
, SelectionDAG
&DAG
,
3879 bool IsAfterLegalization
,
3880 SmallVectorImpl
<SDNode
*> &Created
) const {
3882 EVT VT
= N
->getValueType(0);
3883 EVT SVT
= VT
.getScalarType();
3884 EVT ShVT
= getShiftAmountTy(VT
, DAG
.getDataLayout());
3885 EVT ShSVT
= ShVT
.getScalarType();
3886 unsigned EltBits
= VT
.getScalarSizeInBits();
3888 // Check to see if we can do this.
3889 // FIXME: We should be more aggressive here.
3890 if (!isTypeLegal(VT
))
3893 // If the sdiv has an 'exact' bit we can use a simpler lowering.
3894 if (N
->getFlags().hasExact())
3895 return BuildExactSDIV(*this, N
, dl
, DAG
, Created
);
3897 SmallVector
<SDValue
, 16> MagicFactors
, Factors
, Shifts
, ShiftMasks
;
3899 auto BuildSDIVPattern
= [&](ConstantSDNode
*C
) {
3900 if (C
->isNullValue())
3903 const APInt
&Divisor
= C
->getAPIntValue();
3904 APInt::ms magics
= Divisor
.magic();
3905 int NumeratorFactor
= 0;
3908 if (Divisor
.isOneValue() || Divisor
.isAllOnesValue()) {
3909 // If d is +1/-1, we just multiply the numerator by +1/-1.
3910 NumeratorFactor
= Divisor
.getSExtValue();
3914 } else if (Divisor
.isStrictlyPositive() && magics
.m
.isNegative()) {
3915 // If d > 0 and m < 0, add the numerator.
3916 NumeratorFactor
= 1;
3917 } else if (Divisor
.isNegative() && magics
.m
.isStrictlyPositive()) {
3918 // If d < 0 and m > 0, subtract the numerator.
3919 NumeratorFactor
= -1;
3922 MagicFactors
.push_back(DAG
.getConstant(magics
.m
, dl
, SVT
));
3923 Factors
.push_back(DAG
.getConstant(NumeratorFactor
, dl
, SVT
));
3924 Shifts
.push_back(DAG
.getConstant(magics
.s
, dl
, ShSVT
));
3925 ShiftMasks
.push_back(DAG
.getConstant(ShiftMask
, dl
, SVT
));
3929 SDValue N0
= N
->getOperand(0);
3930 SDValue N1
= N
->getOperand(1);
3932 // Collect the shifts / magic values from each element.
3933 if (!ISD::matchUnaryPredicate(N1
, BuildSDIVPattern
))
3936 SDValue MagicFactor
, Factor
, Shift
, ShiftMask
;
3937 if (VT
.isVector()) {
3938 MagicFactor
= DAG
.getBuildVector(VT
, dl
, MagicFactors
);
3939 Factor
= DAG
.getBuildVector(VT
, dl
, Factors
);
3940 Shift
= DAG
.getBuildVector(ShVT
, dl
, Shifts
);
3941 ShiftMask
= DAG
.getBuildVector(VT
, dl
, ShiftMasks
);
3943 MagicFactor
= MagicFactors
[0];
3944 Factor
= Factors
[0];
3946 ShiftMask
= ShiftMasks
[0];
3949 // Multiply the numerator (operand 0) by the magic value.
3950 // FIXME: We should support doing a MUL in a wider type.
3952 if (IsAfterLegalization
? isOperationLegal(ISD::MULHS
, VT
)
3953 : isOperationLegalOrCustom(ISD::MULHS
, VT
))
3954 Q
= DAG
.getNode(ISD::MULHS
, dl
, VT
, N0
, MagicFactor
);
3955 else if (IsAfterLegalization
? isOperationLegal(ISD::SMUL_LOHI
, VT
)
3956 : isOperationLegalOrCustom(ISD::SMUL_LOHI
, VT
)) {
3958 DAG
.getNode(ISD::SMUL_LOHI
, dl
, DAG
.getVTList(VT
, VT
), N0
, MagicFactor
);
3959 Q
= SDValue(LoHi
.getNode(), 1);
3961 return SDValue(); // No mulhs or equivalent.
3962 Created
.push_back(Q
.getNode());
3964 // (Optionally) Add/subtract the numerator using Factor.
3965 Factor
= DAG
.getNode(ISD::MUL
, dl
, VT
, N0
, Factor
);
3966 Created
.push_back(Factor
.getNode());
3967 Q
= DAG
.getNode(ISD::ADD
, dl
, VT
, Q
, Factor
);
3968 Created
.push_back(Q
.getNode());
3970 // Shift right algebraic by shift value.
3971 Q
= DAG
.getNode(ISD::SRA
, dl
, VT
, Q
, Shift
);
3972 Created
.push_back(Q
.getNode());
3974 // Extract the sign bit, mask it and add it to the quotient.
3975 SDValue SignShift
= DAG
.getConstant(EltBits
- 1, dl
, ShVT
);
3976 SDValue T
= DAG
.getNode(ISD::SRL
, dl
, VT
, Q
, SignShift
);
3977 Created
.push_back(T
.getNode());
3978 T
= DAG
.getNode(ISD::AND
, dl
, VT
, T
, ShiftMask
);
3979 Created
.push_back(T
.getNode());
3980 return DAG
.getNode(ISD::ADD
, dl
, VT
, Q
, T
);
3983 /// Given an ISD::UDIV node expressing a divide by constant,
3984 /// return a DAG expression to select that will generate the same value by
3985 /// multiplying by a magic number.
3986 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
3987 SDValue
TargetLowering::BuildUDIV(SDNode
*N
, SelectionDAG
&DAG
,
3988 bool IsAfterLegalization
,
3989 SmallVectorImpl
<SDNode
*> &Created
) const {
3991 EVT VT
= N
->getValueType(0);
3992 EVT SVT
= VT
.getScalarType();
3993 EVT ShVT
= getShiftAmountTy(VT
, DAG
.getDataLayout());
3994 EVT ShSVT
= ShVT
.getScalarType();
3995 unsigned EltBits
= VT
.getScalarSizeInBits();
3997 // Check to see if we can do this.
3998 // FIXME: We should be more aggressive here.
3999 if (!isTypeLegal(VT
))
4002 bool UseNPQ
= false;
4003 SmallVector
<SDValue
, 16> PreShifts
, PostShifts
, MagicFactors
, NPQFactors
;
4005 auto BuildUDIVPattern
= [&](ConstantSDNode
*C
) {
4006 if (C
->isNullValue())
4008 // FIXME: We should use a narrower constant when the upper
4009 // bits are known to be zero.
4010 APInt Divisor
= C
->getAPIntValue();
4011 APInt::mu magics
= Divisor
.magicu();
4012 unsigned PreShift
= 0, PostShift
= 0;
4014 // If the divisor is even, we can avoid using the expensive fixup by
4015 // shifting the divided value upfront.
4016 if (magics
.a
!= 0 && !Divisor
[0]) {
4017 PreShift
= Divisor
.countTrailingZeros();
4018 // Get magic number for the shifted divisor.
4019 magics
= Divisor
.lshr(PreShift
).magicu(PreShift
);
4020 assert(magics
.a
== 0 && "Should use cheap fixup now");
4023 APInt Magic
= magics
.m
;
4026 if (magics
.a
== 0 || Divisor
.isOneValue()) {
4027 assert(magics
.s
< Divisor
.getBitWidth() &&
4028 "We shouldn't generate an undefined shift!");
4029 PostShift
= magics
.s
;
4032 PostShift
= magics
.s
- 1;
4036 PreShifts
.push_back(DAG
.getConstant(PreShift
, dl
, ShSVT
));
4037 MagicFactors
.push_back(DAG
.getConstant(Magic
, dl
, SVT
));
4038 NPQFactors
.push_back(
4039 DAG
.getConstant(SelNPQ
? APInt::getOneBitSet(EltBits
, EltBits
- 1)
4040 : APInt::getNullValue(EltBits
),
4042 PostShifts
.push_back(DAG
.getConstant(PostShift
, dl
, ShSVT
));
4047 SDValue N0
= N
->getOperand(0);
4048 SDValue N1
= N
->getOperand(1);
4050 // Collect the shifts/magic values from each element.
4051 if (!ISD::matchUnaryPredicate(N1
, BuildUDIVPattern
))
4054 SDValue PreShift
, PostShift
, MagicFactor
, NPQFactor
;
4055 if (VT
.isVector()) {
4056 PreShift
= DAG
.getBuildVector(ShVT
, dl
, PreShifts
);
4057 MagicFactor
= DAG
.getBuildVector(VT
, dl
, MagicFactors
);
4058 NPQFactor
= DAG
.getBuildVector(VT
, dl
, NPQFactors
);
4059 PostShift
= DAG
.getBuildVector(ShVT
, dl
, PostShifts
);
4061 PreShift
= PreShifts
[0];
4062 MagicFactor
= MagicFactors
[0];
4063 PostShift
= PostShifts
[0];
4067 Q
= DAG
.getNode(ISD::SRL
, dl
, VT
, Q
, PreShift
);
4068 Created
.push_back(Q
.getNode());
4070 // FIXME: We should support doing a MUL in a wider type.
4071 auto GetMULHU
= [&](SDValue X
, SDValue Y
) {
4072 if (IsAfterLegalization
? isOperationLegal(ISD::MULHU
, VT
)
4073 : isOperationLegalOrCustom(ISD::MULHU
, VT
))
4074 return DAG
.getNode(ISD::MULHU
, dl
, VT
, X
, Y
);
4075 if (IsAfterLegalization
? isOperationLegal(ISD::UMUL_LOHI
, VT
)
4076 : isOperationLegalOrCustom(ISD::UMUL_LOHI
, VT
)) {
4078 DAG
.getNode(ISD::UMUL_LOHI
, dl
, DAG
.getVTList(VT
, VT
), X
, Y
);
4079 return SDValue(LoHi
.getNode(), 1);
4081 return SDValue(); // No mulhu or equivalent
4084 // Multiply the numerator (operand 0) by the magic value.
4085 Q
= GetMULHU(Q
, MagicFactor
);
4089 Created
.push_back(Q
.getNode());
4092 SDValue NPQ
= DAG
.getNode(ISD::SUB
, dl
, VT
, N0
, Q
);
4093 Created
.push_back(NPQ
.getNode());
4095 // For vectors we might have a mix of non-NPQ/NPQ paths, so use
4096 // MULHU to act as a SRL-by-1 for NPQ, else multiply by zero.
4098 NPQ
= GetMULHU(NPQ
, NPQFactor
);
4100 NPQ
= DAG
.getNode(ISD::SRL
, dl
, VT
, NPQ
, DAG
.getConstant(1, dl
, ShVT
));
4102 Created
.push_back(NPQ
.getNode());
4104 Q
= DAG
.getNode(ISD::ADD
, dl
, VT
, NPQ
, Q
);
4105 Created
.push_back(Q
.getNode());
4108 Q
= DAG
.getNode(ISD::SRL
, dl
, VT
, Q
, PostShift
);
4109 Created
.push_back(Q
.getNode());
4111 SDValue One
= DAG
.getConstant(1, dl
, VT
);
4112 SDValue IsOne
= DAG
.getSetCC(dl
, VT
, N1
, One
, ISD::SETEQ
);
4113 return DAG
.getSelect(dl
, VT
, IsOne
, N0
, Q
);
4116 bool TargetLowering::
4117 verifyReturnAddressArgumentIsConstant(SDValue Op
, SelectionDAG
&DAG
) const {
4118 if (!isa
<ConstantSDNode
>(Op
.getOperand(0))) {
4119 DAG
.getContext()->emitError("argument to '__builtin_return_address' must "
4120 "be a constant integer");
4127 //===----------------------------------------------------------------------===//
4128 // Legalization Utilities
4129 //===----------------------------------------------------------------------===//
4131 bool TargetLowering::expandMUL_LOHI(unsigned Opcode
, EVT VT
, SDLoc dl
,
4132 SDValue LHS
, SDValue RHS
,
4133 SmallVectorImpl
<SDValue
> &Result
,
4134 EVT HiLoVT
, SelectionDAG
&DAG
,
4135 MulExpansionKind Kind
, SDValue LL
,
4136 SDValue LH
, SDValue RL
, SDValue RH
) const {
4137 assert(Opcode
== ISD::MUL
|| Opcode
== ISD::UMUL_LOHI
||
4138 Opcode
== ISD::SMUL_LOHI
);
4140 bool HasMULHS
= (Kind
== MulExpansionKind::Always
) ||
4141 isOperationLegalOrCustom(ISD::MULHS
, HiLoVT
);
4142 bool HasMULHU
= (Kind
== MulExpansionKind::Always
) ||
4143 isOperationLegalOrCustom(ISD::MULHU
, HiLoVT
);
4144 bool HasSMUL_LOHI
= (Kind
== MulExpansionKind::Always
) ||
4145 isOperationLegalOrCustom(ISD::SMUL_LOHI
, HiLoVT
);
4146 bool HasUMUL_LOHI
= (Kind
== MulExpansionKind::Always
) ||
4147 isOperationLegalOrCustom(ISD::UMUL_LOHI
, HiLoVT
);
4149 if (!HasMULHU
&& !HasMULHS
&& !HasUMUL_LOHI
&& !HasSMUL_LOHI
)
4152 unsigned OuterBitSize
= VT
.getScalarSizeInBits();
4153 unsigned InnerBitSize
= HiLoVT
.getScalarSizeInBits();
4154 unsigned LHSSB
= DAG
.ComputeNumSignBits(LHS
);
4155 unsigned RHSSB
= DAG
.ComputeNumSignBits(RHS
);
4157 // LL, LH, RL, and RH must be either all NULL or all set to a value.
4158 assert((LL
.getNode() && LH
.getNode() && RL
.getNode() && RH
.getNode()) ||
4159 (!LL
.getNode() && !LH
.getNode() && !RL
.getNode() && !RH
.getNode()));
4161 SDVTList VTs
= DAG
.getVTList(HiLoVT
, HiLoVT
);
4162 auto MakeMUL_LOHI
= [&](SDValue L
, SDValue R
, SDValue
&Lo
, SDValue
&Hi
,
4163 bool Signed
) -> bool {
4164 if ((Signed
&& HasSMUL_LOHI
) || (!Signed
&& HasUMUL_LOHI
)) {
4165 Lo
= DAG
.getNode(Signed
? ISD::SMUL_LOHI
: ISD::UMUL_LOHI
, dl
, VTs
, L
, R
);
4166 Hi
= SDValue(Lo
.getNode(), 1);
4169 if ((Signed
&& HasMULHS
) || (!Signed
&& HasMULHU
)) {
4170 Lo
= DAG
.getNode(ISD::MUL
, dl
, HiLoVT
, L
, R
);
4171 Hi
= DAG
.getNode(Signed
? ISD::MULHS
: ISD::MULHU
, dl
, HiLoVT
, L
, R
);
4179 if (!LL
.getNode() && !RL
.getNode() &&
4180 isOperationLegalOrCustom(ISD::TRUNCATE
, HiLoVT
)) {
4181 LL
= DAG
.getNode(ISD::TRUNCATE
, dl
, HiLoVT
, LHS
);
4182 RL
= DAG
.getNode(ISD::TRUNCATE
, dl
, HiLoVT
, RHS
);
4188 APInt HighMask
= APInt::getHighBitsSet(OuterBitSize
, InnerBitSize
);
4189 if (DAG
.MaskedValueIsZero(LHS
, HighMask
) &&
4190 DAG
.MaskedValueIsZero(RHS
, HighMask
)) {
4191 // The inputs are both zero-extended.
4192 if (MakeMUL_LOHI(LL
, RL
, Lo
, Hi
, false)) {
4193 Result
.push_back(Lo
);
4194 Result
.push_back(Hi
);
4195 if (Opcode
!= ISD::MUL
) {
4196 SDValue Zero
= DAG
.getConstant(0, dl
, HiLoVT
);
4197 Result
.push_back(Zero
);
4198 Result
.push_back(Zero
);
4204 if (!VT
.isVector() && Opcode
== ISD::MUL
&& LHSSB
> InnerBitSize
&&
4205 RHSSB
> InnerBitSize
) {
4206 // The input values are both sign-extended.
4207 // TODO non-MUL case?
4208 if (MakeMUL_LOHI(LL
, RL
, Lo
, Hi
, true)) {
4209 Result
.push_back(Lo
);
4210 Result
.push_back(Hi
);
4215 unsigned ShiftAmount
= OuterBitSize
- InnerBitSize
;
4216 EVT ShiftAmountTy
= getShiftAmountTy(VT
, DAG
.getDataLayout());
4217 if (APInt::getMaxValue(ShiftAmountTy
.getSizeInBits()).ult(ShiftAmount
)) {
4218 // FIXME getShiftAmountTy does not always return a sensible result when VT
4219 // is an illegal type, and so the type may be too small to fit the shift
4220 // amount. Override it with i32. The shift will have to be legalized.
4221 ShiftAmountTy
= MVT::i32
;
4223 SDValue Shift
= DAG
.getConstant(ShiftAmount
, dl
, ShiftAmountTy
);
4225 if (!LH
.getNode() && !RH
.getNode() &&
4226 isOperationLegalOrCustom(ISD::SRL
, VT
) &&
4227 isOperationLegalOrCustom(ISD::TRUNCATE
, HiLoVT
)) {
4228 LH
= DAG
.getNode(ISD::SRL
, dl
, VT
, LHS
, Shift
);
4229 LH
= DAG
.getNode(ISD::TRUNCATE
, dl
, HiLoVT
, LH
);
4230 RH
= DAG
.getNode(ISD::SRL
, dl
, VT
, RHS
, Shift
);
4231 RH
= DAG
.getNode(ISD::TRUNCATE
, dl
, HiLoVT
, RH
);
4237 if (!MakeMUL_LOHI(LL
, RL
, Lo
, Hi
, false))
4240 Result
.push_back(Lo
);
4242 if (Opcode
== ISD::MUL
) {
4243 RH
= DAG
.getNode(ISD::MUL
, dl
, HiLoVT
, LL
, RH
);
4244 LH
= DAG
.getNode(ISD::MUL
, dl
, HiLoVT
, LH
, RL
);
4245 Hi
= DAG
.getNode(ISD::ADD
, dl
, HiLoVT
, Hi
, RH
);
4246 Hi
= DAG
.getNode(ISD::ADD
, dl
, HiLoVT
, Hi
, LH
);
4247 Result
.push_back(Hi
);
4251 // Compute the full width result.
4252 auto Merge
= [&](SDValue Lo
, SDValue Hi
) -> SDValue
{
4253 Lo
= DAG
.getNode(ISD::ZERO_EXTEND
, dl
, VT
, Lo
);
4254 Hi
= DAG
.getNode(ISD::ZERO_EXTEND
, dl
, VT
, Hi
);
4255 Hi
= DAG
.getNode(ISD::SHL
, dl
, VT
, Hi
, Shift
);
4256 return DAG
.getNode(ISD::OR
, dl
, VT
, Lo
, Hi
);
4259 SDValue Next
= DAG
.getNode(ISD::ZERO_EXTEND
, dl
, VT
, Hi
);
4260 if (!MakeMUL_LOHI(LL
, RH
, Lo
, Hi
, false))
4263 // This is effectively the add part of a multiply-add of half-sized operands,
4264 // so it cannot overflow.
4265 Next
= DAG
.getNode(ISD::ADD
, dl
, VT
, Next
, Merge(Lo
, Hi
));
4267 if (!MakeMUL_LOHI(LH
, RL
, Lo
, Hi
, false))
4270 SDValue Zero
= DAG
.getConstant(0, dl
, HiLoVT
);
4271 EVT BoolType
= getSetCCResultType(DAG
.getDataLayout(), *DAG
.getContext(), VT
);
4273 bool UseGlue
= (isOperationLegalOrCustom(ISD::ADDC
, VT
) &&
4274 isOperationLegalOrCustom(ISD::ADDE
, VT
));
4276 Next
= DAG
.getNode(ISD::ADDC
, dl
, DAG
.getVTList(VT
, MVT::Glue
), Next
,
4279 Next
= DAG
.getNode(ISD::ADDCARRY
, dl
, DAG
.getVTList(VT
, BoolType
), Next
,
4280 Merge(Lo
, Hi
), DAG
.getConstant(0, dl
, BoolType
));
4282 SDValue Carry
= Next
.getValue(1);
4283 Result
.push_back(DAG
.getNode(ISD::TRUNCATE
, dl
, HiLoVT
, Next
));
4284 Next
= DAG
.getNode(ISD::SRL
, dl
, VT
, Next
, Shift
);
4286 if (!MakeMUL_LOHI(LH
, RH
, Lo
, Hi
, Opcode
== ISD::SMUL_LOHI
))
4290 Hi
= DAG
.getNode(ISD::ADDE
, dl
, DAG
.getVTList(HiLoVT
, MVT::Glue
), Hi
, Zero
,
4293 Hi
= DAG
.getNode(ISD::ADDCARRY
, dl
, DAG
.getVTList(HiLoVT
, BoolType
), Hi
,
4296 Next
= DAG
.getNode(ISD::ADD
, dl
, VT
, Next
, Merge(Lo
, Hi
));
4298 if (Opcode
== ISD::SMUL_LOHI
) {
4299 SDValue NextSub
= DAG
.getNode(ISD::SUB
, dl
, VT
, Next
,
4300 DAG
.getNode(ISD::ZERO_EXTEND
, dl
, VT
, RL
));
4301 Next
= DAG
.getSelectCC(dl
, LH
, Zero
, NextSub
, Next
, ISD::SETLT
);
4303 NextSub
= DAG
.getNode(ISD::SUB
, dl
, VT
, Next
,
4304 DAG
.getNode(ISD::ZERO_EXTEND
, dl
, VT
, LL
));
4305 Next
= DAG
.getSelectCC(dl
, RH
, Zero
, NextSub
, Next
, ISD::SETLT
);
4308 Result
.push_back(DAG
.getNode(ISD::TRUNCATE
, dl
, HiLoVT
, Next
));
4309 Next
= DAG
.getNode(ISD::SRL
, dl
, VT
, Next
, Shift
);
4310 Result
.push_back(DAG
.getNode(ISD::TRUNCATE
, dl
, HiLoVT
, Next
));
4314 bool TargetLowering::expandMUL(SDNode
*N
, SDValue
&Lo
, SDValue
&Hi
, EVT HiLoVT
,
4315 SelectionDAG
&DAG
, MulExpansionKind Kind
,
4316 SDValue LL
, SDValue LH
, SDValue RL
,
4318 SmallVector
<SDValue
, 2> Result
;
4319 bool Ok
= expandMUL_LOHI(N
->getOpcode(), N
->getValueType(0), N
,
4320 N
->getOperand(0), N
->getOperand(1), Result
, HiLoVT
,
4321 DAG
, Kind
, LL
, LH
, RL
, RH
);
4323 assert(Result
.size() == 2);
4330 bool TargetLowering::expandFunnelShift(SDNode
*Node
, SDValue
&Result
,
4331 SelectionDAG
&DAG
) const {
4332 EVT VT
= Node
->getValueType(0);
4334 if (VT
.isVector() && (!isOperationLegalOrCustom(ISD::SHL
, VT
) ||
4335 !isOperationLegalOrCustom(ISD::SRL
, VT
) ||
4336 !isOperationLegalOrCustom(ISD::SUB
, VT
) ||
4337 !isOperationLegalOrCustomOrPromote(ISD::OR
, VT
)))
4340 // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
4341 // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
4342 SDValue X
= Node
->getOperand(0);
4343 SDValue Y
= Node
->getOperand(1);
4344 SDValue Z
= Node
->getOperand(2);
4346 unsigned EltSizeInBits
= VT
.getScalarSizeInBits();
4347 bool IsFSHL
= Node
->getOpcode() == ISD::FSHL
;
4348 SDLoc
DL(SDValue(Node
, 0));
4350 EVT ShVT
= Z
.getValueType();
4351 SDValue BitWidthC
= DAG
.getConstant(EltSizeInBits
, DL
, ShVT
);
4352 SDValue Zero
= DAG
.getConstant(0, DL
, ShVT
);
4355 if (isPowerOf2_32(EltSizeInBits
)) {
4356 SDValue Mask
= DAG
.getConstant(EltSizeInBits
- 1, DL
, ShVT
);
4357 ShAmt
= DAG
.getNode(ISD::AND
, DL
, ShVT
, Z
, Mask
);
4359 ShAmt
= DAG
.getNode(ISD::UREM
, DL
, ShVT
, Z
, BitWidthC
);
4362 SDValue InvShAmt
= DAG
.getNode(ISD::SUB
, DL
, ShVT
, BitWidthC
, ShAmt
);
4363 SDValue ShX
= DAG
.getNode(ISD::SHL
, DL
, VT
, X
, IsFSHL
? ShAmt
: InvShAmt
);
4364 SDValue ShY
= DAG
.getNode(ISD::SRL
, DL
, VT
, Y
, IsFSHL
? InvShAmt
: ShAmt
);
4365 SDValue Or
= DAG
.getNode(ISD::OR
, DL
, VT
, ShX
, ShY
);
4367 // If (Z % BW == 0), then the opposite direction shift is shift-by-bitwidth,
4368 // and that is undefined. We must compare and select to avoid UB.
4369 EVT CCVT
= getSetCCResultType(DAG
.getDataLayout(), *DAG
.getContext(), ShVT
);
4371 // For fshl, 0-shift returns the 1st arg (X).
4372 // For fshr, 0-shift returns the 2nd arg (Y).
4373 SDValue IsZeroShift
= DAG
.getSetCC(DL
, CCVT
, ShAmt
, Zero
, ISD::SETEQ
);
4374 Result
= DAG
.getSelect(DL
, VT
, IsZeroShift
, IsFSHL
? X
: Y
, Or
);
4378 // TODO: Merge with expandFunnelShift.
4379 bool TargetLowering::expandROT(SDNode
*Node
, SDValue
&Result
,
4380 SelectionDAG
&DAG
) const {
4381 EVT VT
= Node
->getValueType(0);
4382 unsigned EltSizeInBits
= VT
.getScalarSizeInBits();
4383 bool IsLeft
= Node
->getOpcode() == ISD::ROTL
;
4384 SDValue Op0
= Node
->getOperand(0);
4385 SDValue Op1
= Node
->getOperand(1);
4386 SDLoc
DL(SDValue(Node
, 0));
4388 EVT ShVT
= Op1
.getValueType();
4389 SDValue BitWidthC
= DAG
.getConstant(EltSizeInBits
, DL
, ShVT
);
4391 // If a rotate in the other direction is legal, use it.
4392 unsigned RevRot
= IsLeft
? ISD::ROTR
: ISD::ROTL
;
4393 if (isOperationLegal(RevRot
, VT
)) {
4394 SDValue Sub
= DAG
.getNode(ISD::SUB
, DL
, ShVT
, BitWidthC
, Op1
);
4395 Result
= DAG
.getNode(RevRot
, DL
, VT
, Op0
, Sub
);
4399 if (VT
.isVector() && (!isOperationLegalOrCustom(ISD::SHL
, VT
) ||
4400 !isOperationLegalOrCustom(ISD::SRL
, VT
) ||
4401 !isOperationLegalOrCustom(ISD::SUB
, VT
) ||
4402 !isOperationLegalOrCustomOrPromote(ISD::OR
, VT
) ||
4403 !isOperationLegalOrCustomOrPromote(ISD::AND
, VT
)))
4407 // (rotl x, c) -> (or (shl x, (and c, w-1)), (srl x, (and w-c, w-1)))
4408 // (rotr x, c) -> (or (srl x, (and c, w-1)), (shl x, (and w-c, w-1)))
4410 assert(isPowerOf2_32(EltSizeInBits
) && EltSizeInBits
> 1 &&
4411 "Expecting the type bitwidth to be a power of 2");
4412 unsigned ShOpc
= IsLeft
? ISD::SHL
: ISD::SRL
;
4413 unsigned HsOpc
= IsLeft
? ISD::SRL
: ISD::SHL
;
4414 SDValue BitWidthMinusOneC
= DAG
.getConstant(EltSizeInBits
- 1, DL
, ShVT
);
4415 SDValue NegOp1
= DAG
.getNode(ISD::SUB
, DL
, ShVT
, BitWidthC
, Op1
);
4416 SDValue And0
= DAG
.getNode(ISD::AND
, DL
, ShVT
, Op1
, BitWidthMinusOneC
);
4417 SDValue And1
= DAG
.getNode(ISD::AND
, DL
, ShVT
, NegOp1
, BitWidthMinusOneC
);
4418 Result
= DAG
.getNode(ISD::OR
, DL
, VT
, DAG
.getNode(ShOpc
, DL
, VT
, Op0
, And0
),
4419 DAG
.getNode(HsOpc
, DL
, VT
, Op0
, And1
));
4423 bool TargetLowering::expandFP_TO_SINT(SDNode
*Node
, SDValue
&Result
,
4424 SelectionDAG
&DAG
) const {
4425 SDValue Src
= Node
->getOperand(0);
4426 EVT SrcVT
= Src
.getValueType();
4427 EVT DstVT
= Node
->getValueType(0);
4428 SDLoc
dl(SDValue(Node
, 0));
4430 // FIXME: Only f32 to i64 conversions are supported.
4431 if (SrcVT
!= MVT::f32
|| DstVT
!= MVT::i64
)
4434 // Expand f32 -> i64 conversion
4435 // This algorithm comes from compiler-rt's implementation of fixsfdi:
4436 // https://github.com/llvm/llvm-project/blob/master/compiler-rt/lib/builtins/fixsfdi.c
4437 unsigned SrcEltBits
= SrcVT
.getScalarSizeInBits();
4438 EVT IntVT
= SrcVT
.changeTypeToInteger();
4439 EVT IntShVT
= getShiftAmountTy(IntVT
, DAG
.getDataLayout());
4441 SDValue ExponentMask
= DAG
.getConstant(0x7F800000, dl
, IntVT
);
4442 SDValue ExponentLoBit
= DAG
.getConstant(23, dl
, IntVT
);
4443 SDValue Bias
= DAG
.getConstant(127, dl
, IntVT
);
4444 SDValue SignMask
= DAG
.getConstant(APInt::getSignMask(SrcEltBits
), dl
, IntVT
);
4445 SDValue SignLowBit
= DAG
.getConstant(SrcEltBits
- 1, dl
, IntVT
);
4446 SDValue MantissaMask
= DAG
.getConstant(0x007FFFFF, dl
, IntVT
);
4448 SDValue Bits
= DAG
.getNode(ISD::BITCAST
, dl
, IntVT
, Src
);
4450 SDValue ExponentBits
= DAG
.getNode(
4451 ISD::SRL
, dl
, IntVT
, DAG
.getNode(ISD::AND
, dl
, IntVT
, Bits
, ExponentMask
),
4452 DAG
.getZExtOrTrunc(ExponentLoBit
, dl
, IntShVT
));
4453 SDValue Exponent
= DAG
.getNode(ISD::SUB
, dl
, IntVT
, ExponentBits
, Bias
);
4455 SDValue Sign
= DAG
.getNode(ISD::SRA
, dl
, IntVT
,
4456 DAG
.getNode(ISD::AND
, dl
, IntVT
, Bits
, SignMask
),
4457 DAG
.getZExtOrTrunc(SignLowBit
, dl
, IntShVT
));
4458 Sign
= DAG
.getSExtOrTrunc(Sign
, dl
, DstVT
);
4460 SDValue R
= DAG
.getNode(ISD::OR
, dl
, IntVT
,
4461 DAG
.getNode(ISD::AND
, dl
, IntVT
, Bits
, MantissaMask
),
4462 DAG
.getConstant(0x00800000, dl
, IntVT
));
4464 R
= DAG
.getZExtOrTrunc(R
, dl
, DstVT
);
4466 R
= DAG
.getSelectCC(
4467 dl
, Exponent
, ExponentLoBit
,
4468 DAG
.getNode(ISD::SHL
, dl
, DstVT
, R
,
4470 DAG
.getNode(ISD::SUB
, dl
, IntVT
, Exponent
, ExponentLoBit
),
4472 DAG
.getNode(ISD::SRL
, dl
, DstVT
, R
,
4474 DAG
.getNode(ISD::SUB
, dl
, IntVT
, ExponentLoBit
, Exponent
),
4478 SDValue Ret
= DAG
.getNode(ISD::SUB
, dl
, DstVT
,
4479 DAG
.getNode(ISD::XOR
, dl
, DstVT
, R
, Sign
), Sign
);
4481 Result
= DAG
.getSelectCC(dl
, Exponent
, DAG
.getConstant(0, dl
, IntVT
),
4482 DAG
.getConstant(0, dl
, DstVT
), Ret
, ISD::SETLT
);
4486 bool TargetLowering::expandFP_TO_UINT(SDNode
*Node
, SDValue
&Result
,
4487 SelectionDAG
&DAG
) const {
4488 SDLoc
dl(SDValue(Node
, 0));
4489 SDValue Src
= Node
->getOperand(0);
4491 EVT SrcVT
= Src
.getValueType();
4492 EVT DstVT
= Node
->getValueType(0);
4494 getSetCCResultType(DAG
.getDataLayout(), *DAG
.getContext(), SrcVT
);
4496 // Only expand vector types if we have the appropriate vector bit operations.
4497 if (DstVT
.isVector() && (!isOperationLegalOrCustom(ISD::FP_TO_SINT
, DstVT
) ||
4498 !isOperationLegalOrCustomOrPromote(ISD::XOR
, SrcVT
)))
4501 // If the maximum float value is smaller then the signed integer range,
4502 // the destination signmask can't be represented by the float, so we can
4503 // just use FP_TO_SINT directly.
4504 const fltSemantics
&APFSem
= DAG
.EVTToAPFloatSemantics(SrcVT
);
4505 APFloat
APF(APFSem
, APInt::getNullValue(SrcVT
.getScalarSizeInBits()));
4506 APInt SignMask
= APInt::getSignMask(DstVT
.getScalarSizeInBits());
4507 if (APFloat::opOverflow
&
4508 APF
.convertFromAPInt(SignMask
, false, APFloat::rmNearestTiesToEven
)) {
4509 Result
= DAG
.getNode(ISD::FP_TO_SINT
, dl
, DstVT
, Src
);
4513 SDValue Cst
= DAG
.getConstantFP(APF
, dl
, SrcVT
);
4514 SDValue Sel
= DAG
.getSetCC(dl
, SetCCVT
, Src
, Cst
, ISD::SETLT
);
4516 bool Strict
= shouldUseStrictFP_TO_INT(SrcVT
, DstVT
, /*IsSigned*/ false);
4518 // Expand based on maximum range of FP_TO_SINT, if the value exceeds the
4519 // signmask then offset (the result of which should be fully representable).
4520 // Sel = Src < 0x8000000000000000
4521 // Val = select Sel, Src, Src - 0x8000000000000000
4522 // Ofs = select Sel, 0, 0x8000000000000000
4523 // Result = fp_to_sint(Val) ^ Ofs
4525 // TODO: Should any fast-math-flags be set for the FSUB?
4526 SDValue Val
= DAG
.getSelect(dl
, SrcVT
, Sel
, Src
,
4527 DAG
.getNode(ISD::FSUB
, dl
, SrcVT
, Src
, Cst
));
4528 SDValue Ofs
= DAG
.getSelect(dl
, DstVT
, Sel
, DAG
.getConstant(0, dl
, DstVT
),
4529 DAG
.getConstant(SignMask
, dl
, DstVT
));
4530 Result
= DAG
.getNode(ISD::XOR
, dl
, DstVT
,
4531 DAG
.getNode(ISD::FP_TO_SINT
, dl
, DstVT
, Val
), Ofs
);
4533 // Expand based on maximum range of FP_TO_SINT:
4534 // True = fp_to_sint(Src)
4535 // False = 0x8000000000000000 + fp_to_sint(Src - 0x8000000000000000)
4536 // Result = select (Src < 0x8000000000000000), True, False
4538 SDValue True
= DAG
.getNode(ISD::FP_TO_SINT
, dl
, DstVT
, Src
);
4539 // TODO: Should any fast-math-flags be set for the FSUB?
4540 SDValue False
= DAG
.getNode(ISD::FP_TO_SINT
, dl
, DstVT
,
4541 DAG
.getNode(ISD::FSUB
, dl
, SrcVT
, Src
, Cst
));
4542 False
= DAG
.getNode(ISD::XOR
, dl
, DstVT
, False
,
4543 DAG
.getConstant(SignMask
, dl
, DstVT
));
4544 Result
= DAG
.getSelect(dl
, DstVT
, Sel
, True
, False
);
4549 bool TargetLowering::expandUINT_TO_FP(SDNode
*Node
, SDValue
&Result
,
4550 SelectionDAG
&DAG
) const {
4551 SDValue Src
= Node
->getOperand(0);
4552 EVT SrcVT
= Src
.getValueType();
4553 EVT DstVT
= Node
->getValueType(0);
4555 if (SrcVT
.getScalarType() != MVT::i64
)
4558 SDLoc
dl(SDValue(Node
, 0));
4559 EVT ShiftVT
= getShiftAmountTy(SrcVT
, DAG
.getDataLayout());
4561 if (DstVT
.getScalarType() == MVT::f32
) {
4562 // Only expand vector types if we have the appropriate vector bit
4564 if (SrcVT
.isVector() &&
4565 (!isOperationLegalOrCustom(ISD::SRL
, SrcVT
) ||
4566 !isOperationLegalOrCustom(ISD::FADD
, DstVT
) ||
4567 !isOperationLegalOrCustom(ISD::SINT_TO_FP
, SrcVT
) ||
4568 !isOperationLegalOrCustomOrPromote(ISD::OR
, SrcVT
) ||
4569 !isOperationLegalOrCustomOrPromote(ISD::AND
, SrcVT
)))
4572 // For unsigned conversions, convert them to signed conversions using the
4573 // algorithm from the x86_64 __floatundidf in compiler_rt.
4574 SDValue Fast
= DAG
.getNode(ISD::SINT_TO_FP
, dl
, DstVT
, Src
);
4576 SDValue ShiftConst
= DAG
.getConstant(1, dl
, ShiftVT
);
4577 SDValue Shr
= DAG
.getNode(ISD::SRL
, dl
, SrcVT
, Src
, ShiftConst
);
4578 SDValue AndConst
= DAG
.getConstant(1, dl
, SrcVT
);
4579 SDValue And
= DAG
.getNode(ISD::AND
, dl
, SrcVT
, Src
, AndConst
);
4580 SDValue Or
= DAG
.getNode(ISD::OR
, dl
, SrcVT
, And
, Shr
);
4582 SDValue SignCvt
= DAG
.getNode(ISD::SINT_TO_FP
, dl
, DstVT
, Or
);
4583 SDValue Slow
= DAG
.getNode(ISD::FADD
, dl
, DstVT
, SignCvt
, SignCvt
);
4585 // TODO: This really should be implemented using a branch rather than a
4586 // select. We happen to get lucky and machinesink does the right
4587 // thing most of the time. This would be a good candidate for a
4588 // pseudo-op, or, even better, for whole-function isel.
4590 getSetCCResultType(DAG
.getDataLayout(), *DAG
.getContext(), SrcVT
);
4592 SDValue SignBitTest
= DAG
.getSetCC(
4593 dl
, SetCCVT
, Src
, DAG
.getConstant(0, dl
, SrcVT
), ISD::SETLT
);
4594 Result
= DAG
.getSelect(dl
, DstVT
, SignBitTest
, Slow
, Fast
);
4598 if (DstVT
.getScalarType() == MVT::f64
) {
4599 // Only expand vector types if we have the appropriate vector bit
4601 if (SrcVT
.isVector() &&
4602 (!isOperationLegalOrCustom(ISD::SRL
, SrcVT
) ||
4603 !isOperationLegalOrCustom(ISD::FADD
, DstVT
) ||
4604 !isOperationLegalOrCustom(ISD::FSUB
, DstVT
) ||
4605 !isOperationLegalOrCustomOrPromote(ISD::OR
, SrcVT
) ||
4606 !isOperationLegalOrCustomOrPromote(ISD::AND
, SrcVT
)))
4609 // Implementation of unsigned i64 to f64 following the algorithm in
4610 // __floatundidf in compiler_rt. This implementation has the advantage
4611 // of performing rounding correctly, both in the default rounding mode
4612 // and in all alternate rounding modes.
4613 SDValue TwoP52
= DAG
.getConstant(UINT64_C(0x4330000000000000), dl
, SrcVT
);
4614 SDValue TwoP84PlusTwoP52
= DAG
.getConstantFP(
4615 BitsToDouble(UINT64_C(0x4530000000100000)), dl
, DstVT
);
4616 SDValue TwoP84
= DAG
.getConstant(UINT64_C(0x4530000000000000), dl
, SrcVT
);
4617 SDValue LoMask
= DAG
.getConstant(UINT64_C(0x00000000FFFFFFFF), dl
, SrcVT
);
4618 SDValue HiShift
= DAG
.getConstant(32, dl
, ShiftVT
);
4620 SDValue Lo
= DAG
.getNode(ISD::AND
, dl
, SrcVT
, Src
, LoMask
);
4621 SDValue Hi
= DAG
.getNode(ISD::SRL
, dl
, SrcVT
, Src
, HiShift
);
4622 SDValue LoOr
= DAG
.getNode(ISD::OR
, dl
, SrcVT
, Lo
, TwoP52
);
4623 SDValue HiOr
= DAG
.getNode(ISD::OR
, dl
, SrcVT
, Hi
, TwoP84
);
4624 SDValue LoFlt
= DAG
.getBitcast(DstVT
, LoOr
);
4625 SDValue HiFlt
= DAG
.getBitcast(DstVT
, HiOr
);
4626 SDValue HiSub
= DAG
.getNode(ISD::FSUB
, dl
, DstVT
, HiFlt
, TwoP84PlusTwoP52
);
4627 Result
= DAG
.getNode(ISD::FADD
, dl
, DstVT
, LoFlt
, HiSub
);
4634 SDValue
TargetLowering::expandFMINNUM_FMAXNUM(SDNode
*Node
,
4635 SelectionDAG
&DAG
) const {
4637 unsigned NewOp
= Node
->getOpcode() == ISD::FMINNUM
?
4638 ISD::FMINNUM_IEEE
: ISD::FMAXNUM_IEEE
;
4639 EVT VT
= Node
->getValueType(0);
4640 if (isOperationLegalOrCustom(NewOp
, VT
)) {
4641 SDValue Quiet0
= Node
->getOperand(0);
4642 SDValue Quiet1
= Node
->getOperand(1);
4644 if (!Node
->getFlags().hasNoNaNs()) {
4645 // Insert canonicalizes if it's possible we need to quiet to get correct
4647 if (!DAG
.isKnownNeverSNaN(Quiet0
)) {
4648 Quiet0
= DAG
.getNode(ISD::FCANONICALIZE
, dl
, VT
, Quiet0
,
4651 if (!DAG
.isKnownNeverSNaN(Quiet1
)) {
4652 Quiet1
= DAG
.getNode(ISD::FCANONICALIZE
, dl
, VT
, Quiet1
,
4657 return DAG
.getNode(NewOp
, dl
, VT
, Quiet0
, Quiet1
, Node
->getFlags());
4663 bool TargetLowering::expandCTPOP(SDNode
*Node
, SDValue
&Result
,
4664 SelectionDAG
&DAG
) const {
4666 EVT VT
= Node
->getValueType(0);
4667 EVT ShVT
= getShiftAmountTy(VT
, DAG
.getDataLayout());
4668 SDValue Op
= Node
->getOperand(0);
4669 unsigned Len
= VT
.getScalarSizeInBits();
4670 assert(VT
.isInteger() && "CTPOP not implemented for this type.");
4672 // TODO: Add support for irregular type lengths.
4673 if (!(Len
<= 128 && Len
% 8 == 0))
4676 // Only expand vector types if we have the appropriate vector bit operations.
4677 if (VT
.isVector() && (!isOperationLegalOrCustom(ISD::ADD
, VT
) ||
4678 !isOperationLegalOrCustom(ISD::SUB
, VT
) ||
4679 !isOperationLegalOrCustom(ISD::SRL
, VT
) ||
4680 (Len
!= 8 && !isOperationLegalOrCustom(ISD::MUL
, VT
)) ||
4681 !isOperationLegalOrCustomOrPromote(ISD::AND
, VT
)))
4684 // This is the "best" algorithm from
4685 // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
4687 DAG
.getConstant(APInt::getSplat(Len
, APInt(8, 0x55)), dl
, VT
);
4689 DAG
.getConstant(APInt::getSplat(Len
, APInt(8, 0x33)), dl
, VT
);
4691 DAG
.getConstant(APInt::getSplat(Len
, APInt(8, 0x0F)), dl
, VT
);
4693 DAG
.getConstant(APInt::getSplat(Len
, APInt(8, 0x01)), dl
, VT
);
4695 // v = v - ((v >> 1) & 0x55555555...)
4696 Op
= DAG
.getNode(ISD::SUB
, dl
, VT
, Op
,
4697 DAG
.getNode(ISD::AND
, dl
, VT
,
4698 DAG
.getNode(ISD::SRL
, dl
, VT
, Op
,
4699 DAG
.getConstant(1, dl
, ShVT
)),
4701 // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
4702 Op
= DAG
.getNode(ISD::ADD
, dl
, VT
, DAG
.getNode(ISD::AND
, dl
, VT
, Op
, Mask33
),
4703 DAG
.getNode(ISD::AND
, dl
, VT
,
4704 DAG
.getNode(ISD::SRL
, dl
, VT
, Op
,
4705 DAG
.getConstant(2, dl
, ShVT
)),
4707 // v = (v + (v >> 4)) & 0x0F0F0F0F...
4708 Op
= DAG
.getNode(ISD::AND
, dl
, VT
,
4709 DAG
.getNode(ISD::ADD
, dl
, VT
, Op
,
4710 DAG
.getNode(ISD::SRL
, dl
, VT
, Op
,
4711 DAG
.getConstant(4, dl
, ShVT
))),
4713 // v = (v * 0x01010101...) >> (Len - 8)
4716 DAG
.getNode(ISD::SRL
, dl
, VT
, DAG
.getNode(ISD::MUL
, dl
, VT
, Op
, Mask01
),
4717 DAG
.getConstant(Len
- 8, dl
, ShVT
));
4723 bool TargetLowering::expandCTLZ(SDNode
*Node
, SDValue
&Result
,
4724 SelectionDAG
&DAG
) const {
4726 EVT VT
= Node
->getValueType(0);
4727 EVT ShVT
= getShiftAmountTy(VT
, DAG
.getDataLayout());
4728 SDValue Op
= Node
->getOperand(0);
4729 unsigned NumBitsPerElt
= VT
.getScalarSizeInBits();
4731 // If the non-ZERO_UNDEF version is supported we can use that instead.
4732 if (Node
->getOpcode() == ISD::CTLZ_ZERO_UNDEF
&&
4733 isOperationLegalOrCustom(ISD::CTLZ
, VT
)) {
4734 Result
= DAG
.getNode(ISD::CTLZ
, dl
, VT
, Op
);
4738 // If the ZERO_UNDEF version is supported use that and handle the zero case.
4739 if (isOperationLegalOrCustom(ISD::CTLZ_ZERO_UNDEF
, VT
)) {
4741 getSetCCResultType(DAG
.getDataLayout(), *DAG
.getContext(), VT
);
4742 SDValue CTLZ
= DAG
.getNode(ISD::CTLZ_ZERO_UNDEF
, dl
, VT
, Op
);
4743 SDValue Zero
= DAG
.getConstant(0, dl
, VT
);
4744 SDValue SrcIsZero
= DAG
.getSetCC(dl
, SetCCVT
, Op
, Zero
, ISD::SETEQ
);
4745 Result
= DAG
.getNode(ISD::SELECT
, dl
, VT
, SrcIsZero
,
4746 DAG
.getConstant(NumBitsPerElt
, dl
, VT
), CTLZ
);
4750 // Only expand vector types if we have the appropriate vector bit operations.
4751 if (VT
.isVector() && (!isPowerOf2_32(NumBitsPerElt
) ||
4752 !isOperationLegalOrCustom(ISD::CTPOP
, VT
) ||
4753 !isOperationLegalOrCustom(ISD::SRL
, VT
) ||
4754 !isOperationLegalOrCustomOrPromote(ISD::OR
, VT
)))
4757 // for now, we do this:
4758 // x = x | (x >> 1);
4759 // x = x | (x >> 2);
4761 // x = x | (x >>16);
4762 // x = x | (x >>32); // for 64-bit input
4763 // return popcount(~x);
4765 // Ref: "Hacker's Delight" by Henry Warren
4766 for (unsigned i
= 0; (1U << i
) <= (NumBitsPerElt
/ 2); ++i
) {
4767 SDValue Tmp
= DAG
.getConstant(1ULL << i
, dl
, ShVT
);
4768 Op
= DAG
.getNode(ISD::OR
, dl
, VT
, Op
,
4769 DAG
.getNode(ISD::SRL
, dl
, VT
, Op
, Tmp
));
4771 Op
= DAG
.getNOT(dl
, Op
, VT
);
4772 Result
= DAG
.getNode(ISD::CTPOP
, dl
, VT
, Op
);
4776 bool TargetLowering::expandCTTZ(SDNode
*Node
, SDValue
&Result
,
4777 SelectionDAG
&DAG
) const {
4779 EVT VT
= Node
->getValueType(0);
4780 SDValue Op
= Node
->getOperand(0);
4781 unsigned NumBitsPerElt
= VT
.getScalarSizeInBits();
4783 // If the non-ZERO_UNDEF version is supported we can use that instead.
4784 if (Node
->getOpcode() == ISD::CTTZ_ZERO_UNDEF
&&
4785 isOperationLegalOrCustom(ISD::CTTZ
, VT
)) {
4786 Result
= DAG
.getNode(ISD::CTTZ
, dl
, VT
, Op
);
4790 // If the ZERO_UNDEF version is supported use that and handle the zero case.
4791 if (isOperationLegalOrCustom(ISD::CTTZ_ZERO_UNDEF
, VT
)) {
4793 getSetCCResultType(DAG
.getDataLayout(), *DAG
.getContext(), VT
);
4794 SDValue CTTZ
= DAG
.getNode(ISD::CTTZ_ZERO_UNDEF
, dl
, VT
, Op
);
4795 SDValue Zero
= DAG
.getConstant(0, dl
, VT
);
4796 SDValue SrcIsZero
= DAG
.getSetCC(dl
, SetCCVT
, Op
, Zero
, ISD::SETEQ
);
4797 Result
= DAG
.getNode(ISD::SELECT
, dl
, VT
, SrcIsZero
,
4798 DAG
.getConstant(NumBitsPerElt
, dl
, VT
), CTTZ
);
4802 // Only expand vector types if we have the appropriate vector bit operations.
4803 if (VT
.isVector() && (!isPowerOf2_32(NumBitsPerElt
) ||
4804 (!isOperationLegalOrCustom(ISD::CTPOP
, VT
) &&
4805 !isOperationLegalOrCustom(ISD::CTLZ
, VT
)) ||
4806 !isOperationLegalOrCustom(ISD::SUB
, VT
) ||
4807 !isOperationLegalOrCustomOrPromote(ISD::AND
, VT
) ||
4808 !isOperationLegalOrCustomOrPromote(ISD::XOR
, VT
)))
4811 // for now, we use: { return popcount(~x & (x - 1)); }
4812 // unless the target has ctlz but not ctpop, in which case we use:
4813 // { return 32 - nlz(~x & (x-1)); }
4814 // Ref: "Hacker's Delight" by Henry Warren
4815 SDValue Tmp
= DAG
.getNode(
4816 ISD::AND
, dl
, VT
, DAG
.getNOT(dl
, Op
, VT
),
4817 DAG
.getNode(ISD::SUB
, dl
, VT
, Op
, DAG
.getConstant(1, dl
, VT
)));
4819 // If ISD::CTLZ is legal and CTPOP isn't, then do that instead.
4820 if (isOperationLegal(ISD::CTLZ
, VT
) && !isOperationLegal(ISD::CTPOP
, VT
)) {
4822 DAG
.getNode(ISD::SUB
, dl
, VT
, DAG
.getConstant(NumBitsPerElt
, dl
, VT
),
4823 DAG
.getNode(ISD::CTLZ
, dl
, VT
, Tmp
));
4827 Result
= DAG
.getNode(ISD::CTPOP
, dl
, VT
, Tmp
);
4831 bool TargetLowering::expandABS(SDNode
*N
, SDValue
&Result
,
4832 SelectionDAG
&DAG
) const {
4834 EVT VT
= N
->getValueType(0);
4835 EVT ShVT
= getShiftAmountTy(VT
, DAG
.getDataLayout());
4836 SDValue Op
= N
->getOperand(0);
4838 // Only expand vector types if we have the appropriate vector operations.
4839 if (VT
.isVector() && (!isOperationLegalOrCustom(ISD::SRA
, VT
) ||
4840 !isOperationLegalOrCustom(ISD::ADD
, VT
) ||
4841 !isOperationLegalOrCustomOrPromote(ISD::XOR
, VT
)))
4845 DAG
.getNode(ISD::SRA
, dl
, VT
, Op
,
4846 DAG
.getConstant(VT
.getScalarSizeInBits() - 1, dl
, ShVT
));
4847 SDValue Add
= DAG
.getNode(ISD::ADD
, dl
, VT
, Op
, Shift
);
4848 Result
= DAG
.getNode(ISD::XOR
, dl
, VT
, Add
, Shift
);
4852 SDValue
TargetLowering::scalarizeVectorLoad(LoadSDNode
*LD
,
4853 SelectionDAG
&DAG
) const {
4855 SDValue Chain
= LD
->getChain();
4856 SDValue BasePTR
= LD
->getBasePtr();
4857 EVT SrcVT
= LD
->getMemoryVT();
4858 ISD::LoadExtType ExtType
= LD
->getExtensionType();
4860 unsigned NumElem
= SrcVT
.getVectorNumElements();
4862 EVT SrcEltVT
= SrcVT
.getScalarType();
4863 EVT DstEltVT
= LD
->getValueType(0).getScalarType();
4865 unsigned Stride
= SrcEltVT
.getSizeInBits() / 8;
4866 assert(SrcEltVT
.isByteSized());
4868 SmallVector
<SDValue
, 8> Vals
;
4869 SmallVector
<SDValue
, 8> LoadChains
;
4871 for (unsigned Idx
= 0; Idx
< NumElem
; ++Idx
) {
4872 SDValue ScalarLoad
=
4873 DAG
.getExtLoad(ExtType
, SL
, DstEltVT
, Chain
, BasePTR
,
4874 LD
->getPointerInfo().getWithOffset(Idx
* Stride
),
4875 SrcEltVT
, MinAlign(LD
->getAlignment(), Idx
* Stride
),
4876 LD
->getMemOperand()->getFlags(), LD
->getAAInfo());
4878 BasePTR
= DAG
.getObjectPtrOffset(SL
, BasePTR
, Stride
);
4880 Vals
.push_back(ScalarLoad
.getValue(0));
4881 LoadChains
.push_back(ScalarLoad
.getValue(1));
4884 SDValue NewChain
= DAG
.getNode(ISD::TokenFactor
, SL
, MVT::Other
, LoadChains
);
4885 SDValue Value
= DAG
.getBuildVector(LD
->getValueType(0), SL
, Vals
);
4887 return DAG
.getMergeValues({Value
, NewChain
}, SL
);
4890 SDValue
TargetLowering::scalarizeVectorStore(StoreSDNode
*ST
,
4891 SelectionDAG
&DAG
) const {
4894 SDValue Chain
= ST
->getChain();
4895 SDValue BasePtr
= ST
->getBasePtr();
4896 SDValue Value
= ST
->getValue();
4897 EVT StVT
= ST
->getMemoryVT();
4899 // The type of the data we want to save
4900 EVT RegVT
= Value
.getValueType();
4901 EVT RegSclVT
= RegVT
.getScalarType();
4903 // The type of data as saved in memory.
4904 EVT MemSclVT
= StVT
.getScalarType();
4906 EVT IdxVT
= getVectorIdxTy(DAG
.getDataLayout());
4907 unsigned NumElem
= StVT
.getVectorNumElements();
4909 // A vector must always be stored in memory as-is, i.e. without any padding
4910 // between the elements, since various code depend on it, e.g. in the
4911 // handling of a bitcast of a vector type to int, which may be done with a
4912 // vector store followed by an integer load. A vector that does not have
4913 // elements that are byte-sized must therefore be stored as an integer
4914 // built out of the extracted vector elements.
4915 if (!MemSclVT
.isByteSized()) {
4916 unsigned NumBits
= StVT
.getSizeInBits();
4917 EVT IntVT
= EVT::getIntegerVT(*DAG
.getContext(), NumBits
);
4919 SDValue CurrVal
= DAG
.getConstant(0, SL
, IntVT
);
4921 for (unsigned Idx
= 0; Idx
< NumElem
; ++Idx
) {
4922 SDValue Elt
= DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, SL
, RegSclVT
, Value
,
4923 DAG
.getConstant(Idx
, SL
, IdxVT
));
4924 SDValue Trunc
= DAG
.getNode(ISD::TRUNCATE
, SL
, MemSclVT
, Elt
);
4925 SDValue ExtElt
= DAG
.getNode(ISD::ZERO_EXTEND
, SL
, IntVT
, Trunc
);
4926 unsigned ShiftIntoIdx
=
4927 (DAG
.getDataLayout().isBigEndian() ? (NumElem
- 1) - Idx
: Idx
);
4928 SDValue ShiftAmount
=
4929 DAG
.getConstant(ShiftIntoIdx
* MemSclVT
.getSizeInBits(), SL
, IntVT
);
4930 SDValue ShiftedElt
=
4931 DAG
.getNode(ISD::SHL
, SL
, IntVT
, ExtElt
, ShiftAmount
);
4932 CurrVal
= DAG
.getNode(ISD::OR
, SL
, IntVT
, CurrVal
, ShiftedElt
);
4935 return DAG
.getStore(Chain
, SL
, CurrVal
, BasePtr
, ST
->getPointerInfo(),
4936 ST
->getAlignment(), ST
->getMemOperand()->getFlags(),
4940 // Store Stride in bytes
4941 unsigned Stride
= MemSclVT
.getSizeInBits() / 8;
4942 assert(Stride
&& "Zero stride!");
4943 // Extract each of the elements from the original vector and save them into
4944 // memory individually.
4945 SmallVector
<SDValue
, 8> Stores
;
4946 for (unsigned Idx
= 0; Idx
< NumElem
; ++Idx
) {
4947 SDValue Elt
= DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, SL
, RegSclVT
, Value
,
4948 DAG
.getConstant(Idx
, SL
, IdxVT
));
4950 SDValue Ptr
= DAG
.getObjectPtrOffset(SL
, BasePtr
, Idx
* Stride
);
4952 // This scalar TruncStore may be illegal, but we legalize it later.
4953 SDValue Store
= DAG
.getTruncStore(
4954 Chain
, SL
, Elt
, Ptr
, ST
->getPointerInfo().getWithOffset(Idx
* Stride
),
4955 MemSclVT
, MinAlign(ST
->getAlignment(), Idx
* Stride
),
4956 ST
->getMemOperand()->getFlags(), ST
->getAAInfo());
4958 Stores
.push_back(Store
);
4961 return DAG
.getNode(ISD::TokenFactor
, SL
, MVT::Other
, Stores
);
4964 std::pair
<SDValue
, SDValue
>
4965 TargetLowering::expandUnalignedLoad(LoadSDNode
*LD
, SelectionDAG
&DAG
) const {
4966 assert(LD
->getAddressingMode() == ISD::UNINDEXED
&&
4967 "unaligned indexed loads not implemented!");
4968 SDValue Chain
= LD
->getChain();
4969 SDValue Ptr
= LD
->getBasePtr();
4970 EVT VT
= LD
->getValueType(0);
4971 EVT LoadedVT
= LD
->getMemoryVT();
4973 auto &MF
= DAG
.getMachineFunction();
4975 if (VT
.isFloatingPoint() || VT
.isVector()) {
4976 EVT intVT
= EVT::getIntegerVT(*DAG
.getContext(), LoadedVT
.getSizeInBits());
4977 if (isTypeLegal(intVT
) && isTypeLegal(LoadedVT
)) {
4978 if (!isOperationLegalOrCustom(ISD::LOAD
, intVT
) &&
4979 LoadedVT
.isVector()) {
4980 // Scalarize the load and let the individual components be handled.
4981 SDValue Scalarized
= scalarizeVectorLoad(LD
, DAG
);
4982 if (Scalarized
->getOpcode() == ISD::MERGE_VALUES
)
4983 return std::make_pair(Scalarized
.getOperand(0), Scalarized
.getOperand(1));
4984 return std::make_pair(Scalarized
.getValue(0), Scalarized
.getValue(1));
4987 // Expand to a (misaligned) integer load of the same size,
4988 // then bitconvert to floating point or vector.
4989 SDValue newLoad
= DAG
.getLoad(intVT
, dl
, Chain
, Ptr
,
4990 LD
->getMemOperand());
4991 SDValue Result
= DAG
.getNode(ISD::BITCAST
, dl
, LoadedVT
, newLoad
);
4993 Result
= DAG
.getNode(VT
.isFloatingPoint() ? ISD::FP_EXTEND
:
4994 ISD::ANY_EXTEND
, dl
, VT
, Result
);
4996 return std::make_pair(Result
, newLoad
.getValue(1));
4999 // Copy the value to a (aligned) stack slot using (unaligned) integer
5000 // loads and stores, then do a (aligned) load from the stack slot.
5001 MVT RegVT
= getRegisterType(*DAG
.getContext(), intVT
);
5002 unsigned LoadedBytes
= LoadedVT
.getStoreSize();
5003 unsigned RegBytes
= RegVT
.getSizeInBits() / 8;
5004 unsigned NumRegs
= (LoadedBytes
+ RegBytes
- 1) / RegBytes
;
5006 // Make sure the stack slot is also aligned for the register type.
5007 SDValue StackBase
= DAG
.CreateStackTemporary(LoadedVT
, RegVT
);
5008 auto FrameIndex
= cast
<FrameIndexSDNode
>(StackBase
.getNode())->getIndex();
5009 SmallVector
<SDValue
, 8> Stores
;
5010 SDValue StackPtr
= StackBase
;
5011 unsigned Offset
= 0;
5013 EVT PtrVT
= Ptr
.getValueType();
5014 EVT StackPtrVT
= StackPtr
.getValueType();
5016 SDValue PtrIncrement
= DAG
.getConstant(RegBytes
, dl
, PtrVT
);
5017 SDValue StackPtrIncrement
= DAG
.getConstant(RegBytes
, dl
, StackPtrVT
);
5019 // Do all but one copies using the full register width.
5020 for (unsigned i
= 1; i
< NumRegs
; i
++) {
5021 // Load one integer register's worth from the original location.
5022 SDValue Load
= DAG
.getLoad(
5023 RegVT
, dl
, Chain
, Ptr
, LD
->getPointerInfo().getWithOffset(Offset
),
5024 MinAlign(LD
->getAlignment(), Offset
), LD
->getMemOperand()->getFlags(),
5026 // Follow the load with a store to the stack slot. Remember the store.
5027 Stores
.push_back(DAG
.getStore(
5028 Load
.getValue(1), dl
, Load
, StackPtr
,
5029 MachinePointerInfo::getFixedStack(MF
, FrameIndex
, Offset
)));
5030 // Increment the pointers.
5033 Ptr
= DAG
.getObjectPtrOffset(dl
, Ptr
, PtrIncrement
);
5034 StackPtr
= DAG
.getObjectPtrOffset(dl
, StackPtr
, StackPtrIncrement
);
5037 // The last copy may be partial. Do an extending load.
5038 EVT MemVT
= EVT::getIntegerVT(*DAG
.getContext(),
5039 8 * (LoadedBytes
- Offset
));
5041 DAG
.getExtLoad(ISD::EXTLOAD
, dl
, RegVT
, Chain
, Ptr
,
5042 LD
->getPointerInfo().getWithOffset(Offset
), MemVT
,
5043 MinAlign(LD
->getAlignment(), Offset
),
5044 LD
->getMemOperand()->getFlags(), LD
->getAAInfo());
5045 // Follow the load with a store to the stack slot. Remember the store.
5046 // On big-endian machines this requires a truncating store to ensure
5047 // that the bits end up in the right place.
5048 Stores
.push_back(DAG
.getTruncStore(
5049 Load
.getValue(1), dl
, Load
, StackPtr
,
5050 MachinePointerInfo::getFixedStack(MF
, FrameIndex
, Offset
), MemVT
));
5052 // The order of the stores doesn't matter - say it with a TokenFactor.
5053 SDValue TF
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
, Stores
);
5055 // Finally, perform the original load only redirected to the stack slot.
5056 Load
= DAG
.getExtLoad(LD
->getExtensionType(), dl
, VT
, TF
, StackBase
,
5057 MachinePointerInfo::getFixedStack(MF
, FrameIndex
, 0),
5060 // Callers expect a MERGE_VALUES node.
5061 return std::make_pair(Load
, TF
);
5064 assert(LoadedVT
.isInteger() && !LoadedVT
.isVector() &&
5065 "Unaligned load of unsupported type.");
5067 // Compute the new VT that is half the size of the old one. This is an
5069 unsigned NumBits
= LoadedVT
.getSizeInBits();
5071 NewLoadedVT
= EVT::getIntegerVT(*DAG
.getContext(), NumBits
/2);
5074 unsigned Alignment
= LD
->getAlignment();
5075 unsigned IncrementSize
= NumBits
/ 8;
5076 ISD::LoadExtType HiExtType
= LD
->getExtensionType();
5078 // If the original load is NON_EXTLOAD, the hi part load must be ZEXTLOAD.
5079 if (HiExtType
== ISD::NON_EXTLOAD
)
5080 HiExtType
= ISD::ZEXTLOAD
;
5082 // Load the value in two parts
5084 if (DAG
.getDataLayout().isLittleEndian()) {
5085 Lo
= DAG
.getExtLoad(ISD::ZEXTLOAD
, dl
, VT
, Chain
, Ptr
, LD
->getPointerInfo(),
5086 NewLoadedVT
, Alignment
, LD
->getMemOperand()->getFlags(),
5089 Ptr
= DAG
.getObjectPtrOffset(dl
, Ptr
, IncrementSize
);
5090 Hi
= DAG
.getExtLoad(HiExtType
, dl
, VT
, Chain
, Ptr
,
5091 LD
->getPointerInfo().getWithOffset(IncrementSize
),
5092 NewLoadedVT
, MinAlign(Alignment
, IncrementSize
),
5093 LD
->getMemOperand()->getFlags(), LD
->getAAInfo());
5095 Hi
= DAG
.getExtLoad(HiExtType
, dl
, VT
, Chain
, Ptr
, LD
->getPointerInfo(),
5096 NewLoadedVT
, Alignment
, LD
->getMemOperand()->getFlags(),
5099 Ptr
= DAG
.getObjectPtrOffset(dl
, Ptr
, IncrementSize
);
5100 Lo
= DAG
.getExtLoad(ISD::ZEXTLOAD
, dl
, VT
, Chain
, Ptr
,
5101 LD
->getPointerInfo().getWithOffset(IncrementSize
),
5102 NewLoadedVT
, MinAlign(Alignment
, IncrementSize
),
5103 LD
->getMemOperand()->getFlags(), LD
->getAAInfo());
5106 // aggregate the two parts
5107 SDValue ShiftAmount
=
5108 DAG
.getConstant(NumBits
, dl
, getShiftAmountTy(Hi
.getValueType(),
5109 DAG
.getDataLayout()));
5110 SDValue Result
= DAG
.getNode(ISD::SHL
, dl
, VT
, Hi
, ShiftAmount
);
5111 Result
= DAG
.getNode(ISD::OR
, dl
, VT
, Result
, Lo
);
5113 SDValue TF
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
, Lo
.getValue(1),
5116 return std::make_pair(Result
, TF
);
5119 SDValue
TargetLowering::expandUnalignedStore(StoreSDNode
*ST
,
5120 SelectionDAG
&DAG
) const {
5121 assert(ST
->getAddressingMode() == ISD::UNINDEXED
&&
5122 "unaligned indexed stores not implemented!");
5123 SDValue Chain
= ST
->getChain();
5124 SDValue Ptr
= ST
->getBasePtr();
5125 SDValue Val
= ST
->getValue();
5126 EVT VT
= Val
.getValueType();
5127 int Alignment
= ST
->getAlignment();
5128 auto &MF
= DAG
.getMachineFunction();
5129 EVT MemVT
= ST
->getMemoryVT();
5132 if (MemVT
.isFloatingPoint() || MemVT
.isVector()) {
5133 EVT intVT
= EVT::getIntegerVT(*DAG
.getContext(), VT
.getSizeInBits());
5134 if (isTypeLegal(intVT
)) {
5135 if (!isOperationLegalOrCustom(ISD::STORE
, intVT
) &&
5137 // Scalarize the store and let the individual components be handled.
5138 SDValue Result
= scalarizeVectorStore(ST
, DAG
);
5142 // Expand to a bitconvert of the value to the integer type of the
5143 // same size, then a (misaligned) int store.
5144 // FIXME: Does not handle truncating floating point stores!
5145 SDValue Result
= DAG
.getNode(ISD::BITCAST
, dl
, intVT
, Val
);
5146 Result
= DAG
.getStore(Chain
, dl
, Result
, Ptr
, ST
->getPointerInfo(),
5147 Alignment
, ST
->getMemOperand()->getFlags());
5150 // Do a (aligned) store to a stack slot, then copy from the stack slot
5151 // to the final destination using (unaligned) integer loads and stores.
5152 EVT StoredVT
= ST
->getMemoryVT();
5154 getRegisterType(*DAG
.getContext(),
5155 EVT::getIntegerVT(*DAG
.getContext(),
5156 StoredVT
.getSizeInBits()));
5157 EVT PtrVT
= Ptr
.getValueType();
5158 unsigned StoredBytes
= StoredVT
.getStoreSize();
5159 unsigned RegBytes
= RegVT
.getSizeInBits() / 8;
5160 unsigned NumRegs
= (StoredBytes
+ RegBytes
- 1) / RegBytes
;
5162 // Make sure the stack slot is also aligned for the register type.
5163 SDValue StackPtr
= DAG
.CreateStackTemporary(StoredVT
, RegVT
);
5164 auto FrameIndex
= cast
<FrameIndexSDNode
>(StackPtr
.getNode())->getIndex();
5166 // Perform the original store, only redirected to the stack slot.
5167 SDValue Store
= DAG
.getTruncStore(
5168 Chain
, dl
, Val
, StackPtr
,
5169 MachinePointerInfo::getFixedStack(MF
, FrameIndex
, 0), StoredVT
);
5171 EVT StackPtrVT
= StackPtr
.getValueType();
5173 SDValue PtrIncrement
= DAG
.getConstant(RegBytes
, dl
, PtrVT
);
5174 SDValue StackPtrIncrement
= DAG
.getConstant(RegBytes
, dl
, StackPtrVT
);
5175 SmallVector
<SDValue
, 8> Stores
;
5176 unsigned Offset
= 0;
5178 // Do all but one copies using the full register width.
5179 for (unsigned i
= 1; i
< NumRegs
; i
++) {
5180 // Load one integer register's worth from the stack slot.
5181 SDValue Load
= DAG
.getLoad(
5182 RegVT
, dl
, Store
, StackPtr
,
5183 MachinePointerInfo::getFixedStack(MF
, FrameIndex
, Offset
));
5184 // Store it to the final location. Remember the store.
5185 Stores
.push_back(DAG
.getStore(Load
.getValue(1), dl
, Load
, Ptr
,
5186 ST
->getPointerInfo().getWithOffset(Offset
),
5187 MinAlign(ST
->getAlignment(), Offset
),
5188 ST
->getMemOperand()->getFlags()));
5189 // Increment the pointers.
5191 StackPtr
= DAG
.getObjectPtrOffset(dl
, StackPtr
, StackPtrIncrement
);
5192 Ptr
= DAG
.getObjectPtrOffset(dl
, Ptr
, PtrIncrement
);
5195 // The last store may be partial. Do a truncating store. On big-endian
5196 // machines this requires an extending load from the stack slot to ensure
5197 // that the bits are in the right place.
5198 EVT MemVT
= EVT::getIntegerVT(*DAG
.getContext(),
5199 8 * (StoredBytes
- Offset
));
5201 // Load from the stack slot.
5202 SDValue Load
= DAG
.getExtLoad(
5203 ISD::EXTLOAD
, dl
, RegVT
, Store
, StackPtr
,
5204 MachinePointerInfo::getFixedStack(MF
, FrameIndex
, Offset
), MemVT
);
5207 DAG
.getTruncStore(Load
.getValue(1), dl
, Load
, Ptr
,
5208 ST
->getPointerInfo().getWithOffset(Offset
), MemVT
,
5209 MinAlign(ST
->getAlignment(), Offset
),
5210 ST
->getMemOperand()->getFlags(), ST
->getAAInfo()));
5211 // The order of the stores doesn't matter - say it with a TokenFactor.
5212 SDValue Result
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
, Stores
);
5216 assert(ST
->getMemoryVT().isInteger() &&
5217 !ST
->getMemoryVT().isVector() &&
5218 "Unaligned store of unknown type.");
5219 // Get the half-size VT
5220 EVT NewStoredVT
= ST
->getMemoryVT().getHalfSizedIntegerVT(*DAG
.getContext());
5221 int NumBits
= NewStoredVT
.getSizeInBits();
5222 int IncrementSize
= NumBits
/ 8;
5224 // Divide the stored value in two parts.
5225 SDValue ShiftAmount
=
5226 DAG
.getConstant(NumBits
, dl
, getShiftAmountTy(Val
.getValueType(),
5227 DAG
.getDataLayout()));
5229 SDValue Hi
= DAG
.getNode(ISD::SRL
, dl
, VT
, Val
, ShiftAmount
);
5231 // Store the two parts
5232 SDValue Store1
, Store2
;
5233 Store1
= DAG
.getTruncStore(Chain
, dl
,
5234 DAG
.getDataLayout().isLittleEndian() ? Lo
: Hi
,
5235 Ptr
, ST
->getPointerInfo(), NewStoredVT
, Alignment
,
5236 ST
->getMemOperand()->getFlags());
5238 Ptr
= DAG
.getObjectPtrOffset(dl
, Ptr
, IncrementSize
);
5239 Alignment
= MinAlign(Alignment
, IncrementSize
);
5240 Store2
= DAG
.getTruncStore(
5241 Chain
, dl
, DAG
.getDataLayout().isLittleEndian() ? Hi
: Lo
, Ptr
,
5242 ST
->getPointerInfo().getWithOffset(IncrementSize
), NewStoredVT
, Alignment
,
5243 ST
->getMemOperand()->getFlags(), ST
->getAAInfo());
5246 DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
, Store1
, Store2
);
5251 TargetLowering::IncrementMemoryAddress(SDValue Addr
, SDValue Mask
,
5252 const SDLoc
&DL
, EVT DataVT
,
5254 bool IsCompressedMemory
) const {
5256 EVT AddrVT
= Addr
.getValueType();
5257 EVT MaskVT
= Mask
.getValueType();
5258 assert(DataVT
.getVectorNumElements() == MaskVT
.getVectorNumElements() &&
5259 "Incompatible types of Data and Mask");
5260 if (IsCompressedMemory
) {
5261 // Incrementing the pointer according to number of '1's in the mask.
5262 EVT MaskIntVT
= EVT::getIntegerVT(*DAG
.getContext(), MaskVT
.getSizeInBits());
5263 SDValue MaskInIntReg
= DAG
.getBitcast(MaskIntVT
, Mask
);
5264 if (MaskIntVT
.getSizeInBits() < 32) {
5265 MaskInIntReg
= DAG
.getNode(ISD::ZERO_EXTEND
, DL
, MVT::i32
, MaskInIntReg
);
5266 MaskIntVT
= MVT::i32
;
5269 // Count '1's with POPCNT.
5270 Increment
= DAG
.getNode(ISD::CTPOP
, DL
, MaskIntVT
, MaskInIntReg
);
5271 Increment
= DAG
.getZExtOrTrunc(Increment
, DL
, AddrVT
);
5272 // Scale is an element size in bytes.
5273 SDValue Scale
= DAG
.getConstant(DataVT
.getScalarSizeInBits() / 8, DL
,
5275 Increment
= DAG
.getNode(ISD::MUL
, DL
, AddrVT
, Increment
, Scale
);
5277 Increment
= DAG
.getConstant(DataVT
.getStoreSize(), DL
, AddrVT
);
5279 return DAG
.getNode(ISD::ADD
, DL
, AddrVT
, Addr
, Increment
);
5282 static SDValue
clampDynamicVectorIndex(SelectionDAG
&DAG
,
5286 if (isa
<ConstantSDNode
>(Idx
))
5289 EVT IdxVT
= Idx
.getValueType();
5290 unsigned NElts
= VecVT
.getVectorNumElements();
5291 if (isPowerOf2_32(NElts
)) {
5292 APInt Imm
= APInt::getLowBitsSet(IdxVT
.getSizeInBits(),
5294 return DAG
.getNode(ISD::AND
, dl
, IdxVT
, Idx
,
5295 DAG
.getConstant(Imm
, dl
, IdxVT
));
5298 return DAG
.getNode(ISD::UMIN
, dl
, IdxVT
, Idx
,
5299 DAG
.getConstant(NElts
- 1, dl
, IdxVT
));
5302 SDValue
TargetLowering::getVectorElementPointer(SelectionDAG
&DAG
,
5303 SDValue VecPtr
, EVT VecVT
,
5304 SDValue Index
) const {
5306 // Make sure the index type is big enough to compute in.
5307 Index
= DAG
.getZExtOrTrunc(Index
, dl
, VecPtr
.getValueType());
5309 EVT EltVT
= VecVT
.getVectorElementType();
5311 // Calculate the element offset and add it to the pointer.
5312 unsigned EltSize
= EltVT
.getSizeInBits() / 8; // FIXME: should be ABI size.
5313 assert(EltSize
* 8 == EltVT
.getSizeInBits() &&
5314 "Converting bits to bytes lost precision");
5316 Index
= clampDynamicVectorIndex(DAG
, Index
, VecVT
, dl
);
5318 EVT IdxVT
= Index
.getValueType();
5320 Index
= DAG
.getNode(ISD::MUL
, dl
, IdxVT
, Index
,
5321 DAG
.getConstant(EltSize
, dl
, IdxVT
));
5322 return DAG
.getNode(ISD::ADD
, dl
, IdxVT
, VecPtr
, Index
);
5325 //===----------------------------------------------------------------------===//
5326 // Implementation of Emulated TLS Model
5327 //===----------------------------------------------------------------------===//
5329 SDValue
TargetLowering::LowerToTLSEmulatedModel(const GlobalAddressSDNode
*GA
,
5330 SelectionDAG
&DAG
) const {
5331 // Access to address of TLS varialbe xyz is lowered to a function call:
5332 // __emutls_get_address( address of global variable named "__emutls_v.xyz" )
5333 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
5334 PointerType
*VoidPtrType
= Type::getInt8PtrTy(*DAG
.getContext());
5339 std::string NameString
= ("__emutls_v." + GA
->getGlobal()->getName()).str();
5340 Module
*VariableModule
= const_cast<Module
*>(GA
->getGlobal()->getParent());
5341 StringRef
EmuTlsVarName(NameString
);
5342 GlobalVariable
*EmuTlsVar
= VariableModule
->getNamedGlobal(EmuTlsVarName
);
5343 assert(EmuTlsVar
&& "Cannot find EmuTlsVar ");
5344 Entry
.Node
= DAG
.getGlobalAddress(EmuTlsVar
, dl
, PtrVT
);
5345 Entry
.Ty
= VoidPtrType
;
5346 Args
.push_back(Entry
);
5348 SDValue EmuTlsGetAddr
= DAG
.getExternalSymbol("__emutls_get_address", PtrVT
);
5350 TargetLowering::CallLoweringInfo
CLI(DAG
);
5351 CLI
.setDebugLoc(dl
).setChain(DAG
.getEntryNode());
5352 CLI
.setLibCallee(CallingConv::C
, VoidPtrType
, EmuTlsGetAddr
, std::move(Args
));
5353 std::pair
<SDValue
, SDValue
> CallResult
= LowerCallTo(CLI
);
5355 // TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
5356 // At last for X86 targets, maybe good for other targets too?
5357 MachineFrameInfo
&MFI
= DAG
.getMachineFunction().getFrameInfo();
5358 MFI
.setAdjustsStack(true); // Is this only for X86 target?
5359 MFI
.setHasCalls(true);
5361 assert((GA
->getOffset() == 0) &&
5362 "Emulated TLS must have zero offset in GlobalAddressSDNode");
5363 return CallResult
.first
;
5366 SDValue
TargetLowering::lowerCmpEqZeroToCtlzSrl(SDValue Op
,
5367 SelectionDAG
&DAG
) const {
5368 assert((Op
->getOpcode() == ISD::SETCC
) && "Input has to be a SETCC node.");
5371 ISD::CondCode CC
= cast
<CondCodeSDNode
>(Op
.getOperand(2))->get();
5373 if (ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(Op
.getOperand(1))) {
5374 if (C
->isNullValue() && CC
== ISD::SETEQ
) {
5375 EVT VT
= Op
.getOperand(0).getValueType();
5376 SDValue Zext
= Op
.getOperand(0);
5377 if (VT
.bitsLT(MVT::i32
)) {
5379 Zext
= DAG
.getNode(ISD::ZERO_EXTEND
, dl
, VT
, Op
.getOperand(0));
5381 unsigned Log2b
= Log2_32(VT
.getSizeInBits());
5382 SDValue Clz
= DAG
.getNode(ISD::CTLZ
, dl
, VT
, Zext
);
5383 SDValue Scc
= DAG
.getNode(ISD::SRL
, dl
, VT
, Clz
,
5384 DAG
.getConstant(Log2b
, dl
, MVT::i32
));
5385 return DAG
.getNode(ISD::TRUNCATE
, dl
, MVT::i32
, Scc
);
5391 SDValue
TargetLowering::expandAddSubSat(SDNode
*Node
, SelectionDAG
&DAG
) const {
5392 unsigned Opcode
= Node
->getOpcode();
5393 SDValue LHS
= Node
->getOperand(0);
5394 SDValue RHS
= Node
->getOperand(1);
5395 EVT VT
= LHS
.getValueType();
5398 assert(LHS
.getValueType().isInteger() && "Expected operands to be integers");
5399 assert(RHS
.getValueType().isInteger() && "Expected operands to be integers");
5400 assert(LHS
.getValueType() == RHS
.getValueType() &&
5401 "Expected both operands to be the same type");
5403 // usub.sat(a, b) -> umax(a, b) - b
5404 if (Opcode
== ISD::USUBSAT
&& isOperationLegalOrCustom(ISD::UMAX
, VT
)) {
5405 SDValue Max
= DAG
.getNode(ISD::UMAX
, dl
, VT
, LHS
, RHS
);
5406 return DAG
.getNode(ISD::SUB
, dl
, VT
, Max
, RHS
);
5409 if (Opcode
== ISD::UADDSAT
&& isOperationLegalOrCustom(ISD::UMIN
, VT
)) {
5410 SDValue InvRHS
= DAG
.getNOT(dl
, RHS
, VT
);
5411 SDValue Min
= DAG
.getNode(ISD::UMIN
, dl
, VT
, LHS
, InvRHS
);
5412 return DAG
.getNode(ISD::ADD
, dl
, VT
, Min
, RHS
);
5415 unsigned OverflowOp
;
5418 OverflowOp
= ISD::SADDO
;
5421 OverflowOp
= ISD::UADDO
;
5424 OverflowOp
= ISD::SSUBO
;
5427 OverflowOp
= ISD::USUBO
;
5430 llvm_unreachable("Expected method to receive signed or unsigned saturation "
5431 "addition or subtraction node.");
5434 unsigned BitWidth
= LHS
.getScalarValueSizeInBits();
5435 EVT ResultType
= LHS
.getValueType();
5437 getSetCCResultType(DAG
.getDataLayout(), *DAG
.getContext(), ResultType
);
5439 DAG
.getNode(OverflowOp
, dl
, DAG
.getVTList(ResultType
, BoolVT
), LHS
, RHS
);
5440 SDValue SumDiff
= Result
.getValue(0);
5441 SDValue Overflow
= Result
.getValue(1);
5442 SDValue Zero
= DAG
.getConstant(0, dl
, ResultType
);
5444 if (Opcode
== ISD::UADDSAT
) {
5445 // Just need to check overflow for SatMax.
5446 APInt MaxVal
= APInt::getMaxValue(BitWidth
);
5447 SDValue SatMax
= DAG
.getConstant(MaxVal
, dl
, ResultType
);
5448 return DAG
.getSelect(dl
, ResultType
, Overflow
, SatMax
, SumDiff
);
5449 } else if (Opcode
== ISD::USUBSAT
) {
5450 // Just need to check overflow for SatMin.
5451 APInt MinVal
= APInt::getMinValue(BitWidth
);
5452 SDValue SatMin
= DAG
.getConstant(MinVal
, dl
, ResultType
);
5453 return DAG
.getSelect(dl
, ResultType
, Overflow
, SatMin
, SumDiff
);
5455 // SatMax -> Overflow && SumDiff < 0
5456 // SatMin -> Overflow && SumDiff >= 0
5457 APInt MinVal
= APInt::getSignedMinValue(BitWidth
);
5458 APInt MaxVal
= APInt::getSignedMaxValue(BitWidth
);
5459 SDValue SatMin
= DAG
.getConstant(MinVal
, dl
, ResultType
);
5460 SDValue SatMax
= DAG
.getConstant(MaxVal
, dl
, ResultType
);
5461 SDValue SumNeg
= DAG
.getSetCC(dl
, BoolVT
, SumDiff
, Zero
, ISD::SETLT
);
5462 Result
= DAG
.getSelect(dl
, ResultType
, SumNeg
, SatMax
, SatMin
);
5463 return DAG
.getSelect(dl
, ResultType
, Overflow
, Result
, SumDiff
);
5468 TargetLowering::expandFixedPointMul(SDNode
*Node
, SelectionDAG
&DAG
) const {
5469 assert((Node
->getOpcode() == ISD::SMULFIX
||
5470 Node
->getOpcode() == ISD::UMULFIX
) &&
5471 "Expected opcode to be SMULFIX or UMULFIX.");
5474 SDValue LHS
= Node
->getOperand(0);
5475 SDValue RHS
= Node
->getOperand(1);
5476 EVT VT
= LHS
.getValueType();
5477 unsigned Scale
= Node
->getConstantOperandVal(2);
5479 // [us]mul.fix(a, b, 0) -> mul(a, b)
5481 if (VT
.isVector() && !isOperationLegalOrCustom(ISD::MUL
, VT
))
5483 return DAG
.getNode(ISD::MUL
, dl
, VT
, LHS
, RHS
);
5486 unsigned VTSize
= VT
.getScalarSizeInBits();
5487 bool Signed
= Node
->getOpcode() == ISD::SMULFIX
;
5489 assert(((Signed
&& Scale
< VTSize
) || (!Signed
&& Scale
<= VTSize
)) &&
5490 "Expected scale to be less than the number of bits if signed or at "
5491 "most the number of bits if unsigned.");
5492 assert(LHS
.getValueType() == RHS
.getValueType() &&
5493 "Expected both operands to be the same type");
5495 // Get the upper and lower bits of the result.
5497 unsigned LoHiOp
= Signed
? ISD::SMUL_LOHI
: ISD::UMUL_LOHI
;
5498 unsigned HiOp
= Signed
? ISD::MULHS
: ISD::MULHU
;
5499 if (isOperationLegalOrCustom(LoHiOp
, VT
)) {
5500 SDValue Result
= DAG
.getNode(LoHiOp
, dl
, DAG
.getVTList(VT
, VT
), LHS
, RHS
);
5501 Lo
= Result
.getValue(0);
5502 Hi
= Result
.getValue(1);
5503 } else if (isOperationLegalOrCustom(HiOp
, VT
)) {
5504 Lo
= DAG
.getNode(ISD::MUL
, dl
, VT
, LHS
, RHS
);
5505 Hi
= DAG
.getNode(HiOp
, dl
, VT
, LHS
, RHS
);
5506 } else if (VT
.isVector()) {
5509 report_fatal_error("Unable to expand fixed point multiplication.");
5512 if (Scale
== VTSize
)
5513 // Result is just the top half since we'd be shifting by the width of the
5517 // The result will need to be shifted right by the scale since both operands
5518 // are scaled. The result is given to us in 2 halves, so we only want part of
5519 // both in the result.
5520 EVT ShiftTy
= getShiftAmountTy(VT
, DAG
.getDataLayout());
5521 return DAG
.getNode(ISD::FSHR
, dl
, VT
, Hi
, Lo
,
5522 DAG
.getConstant(Scale
, dl
, ShiftTy
));
5525 std::pair
<SDValue
, SDValue
> TargetLowering::expandMULO(
5526 SDNode
*Node
, SelectionDAG
&DAG
) const {
5528 EVT VT
= Node
->getValueType(0);
5529 EVT WideVT
= EVT::getIntegerVT(*DAG
.getContext(), VT
.getSizeInBits() * 2);
5530 SDValue LHS
= Node
->getOperand(0);
5531 SDValue RHS
= Node
->getOperand(1);
5534 static const unsigned Ops
[2][3] =
5535 { { ISD::MULHU
, ISD::UMUL_LOHI
, ISD::ZERO_EXTEND
},
5536 { ISD::MULHS
, ISD::SMUL_LOHI
, ISD::SIGN_EXTEND
}};
5537 bool isSigned
= Node
->getOpcode() == ISD::SMULO
;
5538 if (isOperationLegalOrCustom(Ops
[isSigned
][0], VT
)) {
5539 BottomHalf
= DAG
.getNode(ISD::MUL
, dl
, VT
, LHS
, RHS
);
5540 TopHalf
= DAG
.getNode(Ops
[isSigned
][0], dl
, VT
, LHS
, RHS
);
5541 } else if (isOperationLegalOrCustom(Ops
[isSigned
][1], VT
)) {
5542 BottomHalf
= DAG
.getNode(Ops
[isSigned
][1], dl
, DAG
.getVTList(VT
, VT
), LHS
,
5544 TopHalf
= BottomHalf
.getValue(1);
5545 } else if (isTypeLegal(WideVT
)) {
5546 LHS
= DAG
.getNode(Ops
[isSigned
][2], dl
, WideVT
, LHS
);
5547 RHS
= DAG
.getNode(Ops
[isSigned
][2], dl
, WideVT
, RHS
);
5548 SDValue Mul
= DAG
.getNode(ISD::MUL
, dl
, WideVT
, LHS
, RHS
);
5549 BottomHalf
= DAG
.getNode(ISD::EXTRACT_ELEMENT
, dl
, VT
, Mul
,
5550 DAG
.getIntPtrConstant(0, dl
));
5551 TopHalf
= DAG
.getNode(ISD::EXTRACT_ELEMENT
, dl
, VT
, Mul
,
5552 DAG
.getIntPtrConstant(1, dl
));
5554 // We can fall back to a libcall with an illegal type for the MUL if we
5555 // have a libcall big enough.
5556 // Also, we can fall back to a division in some cases, but that's a big
5557 // performance hit in the general case.
5558 RTLIB::Libcall LC
= RTLIB::UNKNOWN_LIBCALL
;
5559 if (WideVT
== MVT::i16
)
5560 LC
= RTLIB::MUL_I16
;
5561 else if (WideVT
== MVT::i32
)
5562 LC
= RTLIB::MUL_I32
;
5563 else if (WideVT
== MVT::i64
)
5564 LC
= RTLIB::MUL_I64
;
5565 else if (WideVT
== MVT::i128
)
5566 LC
= RTLIB::MUL_I128
;
5567 assert(LC
!= RTLIB::UNKNOWN_LIBCALL
&& "Cannot expand this operation!");
5572 // The high part is obtained by SRA'ing all but one of the bits of low
5574 unsigned LoSize
= VT
.getSizeInBits();
5576 DAG
.getNode(ISD::SRA
, dl
, VT
, LHS
,
5577 DAG
.getConstant(LoSize
- 1, dl
,
5578 getPointerTy(DAG
.getDataLayout())));
5580 DAG
.getNode(ISD::SRA
, dl
, VT
, RHS
,
5581 DAG
.getConstant(LoSize
- 1, dl
,
5582 getPointerTy(DAG
.getDataLayout())));
5584 HiLHS
= DAG
.getConstant(0, dl
, VT
);
5585 HiRHS
= DAG
.getConstant(0, dl
, VT
);
5588 // Here we're passing the 2 arguments explicitly as 4 arguments that are
5589 // pre-lowered to the correct types. This all depends upon WideVT not
5590 // being a legal type for the architecture and thus has to be split to
5593 if (DAG
.getDataLayout().isLittleEndian()) {
5594 // Halves of WideVT are packed into registers in different order
5595 // depending on platform endianness. This is usually handled by
5596 // the C calling convention, but we can't defer to it in
5598 SDValue Args
[] = { LHS
, HiLHS
, RHS
, HiRHS
};
5599 Ret
= makeLibCall(DAG
, LC
, WideVT
, Args
, isSigned
, dl
,
5600 /* doesNotReturn */ false, /* isReturnValueUsed */ true,
5601 /* isPostTypeLegalization */ true).first
;
5603 SDValue Args
[] = { HiLHS
, LHS
, HiRHS
, RHS
};
5604 Ret
= makeLibCall(DAG
, LC
, WideVT
, Args
, isSigned
, dl
,
5605 /* doesNotReturn */ false, /* isReturnValueUsed */ true,
5606 /* isPostTypeLegalization */ true).first
;
5608 assert(Ret
.getOpcode() == ISD::MERGE_VALUES
&&
5609 "Ret value is a collection of constituent nodes holding result.");
5610 if (DAG
.getDataLayout().isLittleEndian()) {
5612 BottomHalf
= Ret
.getOperand(0);
5613 TopHalf
= Ret
.getOperand(1);
5615 BottomHalf
= Ret
.getOperand(1);
5616 TopHalf
= Ret
.getOperand(0);
5620 EVT SetCCVT
= getSetCCResultType(DAG
.getDataLayout(), *DAG
.getContext(), VT
);
5622 SDValue ShiftAmt
= DAG
.getConstant(
5623 VT
.getSizeInBits() - 1, dl
,
5624 getShiftAmountTy(BottomHalf
.getValueType(), DAG
.getDataLayout()));
5625 SDValue Sign
= DAG
.getNode(ISD::SRA
, dl
, VT
, BottomHalf
, ShiftAmt
);
5626 TopHalf
= DAG
.getSetCC(dl
, SetCCVT
, TopHalf
, Sign
, ISD::SETNE
);
5628 TopHalf
= DAG
.getSetCC(dl
, SetCCVT
, TopHalf
,
5629 DAG
.getConstant(0, dl
, VT
), ISD::SETNE
);
5632 // Truncate the result if SetCC returns a larger type than needed.
5633 EVT RType
= Node
->getValueType(1);
5634 if (RType
.getSizeInBits() < TopHalf
.getValueSizeInBits())
5635 TopHalf
= DAG
.getNode(ISD::TRUNCATE
, dl
, RType
, TopHalf
);
5637 assert(RType
.getSizeInBits() == TopHalf
.getValueSizeInBits() &&
5638 "Unexpected result type for S/UMULO legalization");
5640 return std::make_pair(BottomHalf
, TopHalf
);