1 //===-- TargetLowering.cpp - Implement the TargetLowering class -----------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This implements the TargetLowering class.
11 //===----------------------------------------------------------------------===//
13 #include "llvm/CodeGen/TargetLowering.h"
14 #include "llvm/ADT/STLExtras.h"
15 #include "llvm/CodeGen/CallingConvLower.h"
16 #include "llvm/CodeGen/MachineFrameInfo.h"
17 #include "llvm/CodeGen/MachineFunction.h"
18 #include "llvm/CodeGen/MachineJumpTableInfo.h"
19 #include "llvm/CodeGen/MachineRegisterInfo.h"
20 #include "llvm/CodeGen/SelectionDAG.h"
21 #include "llvm/CodeGen/TargetRegisterInfo.h"
22 #include "llvm/CodeGen/TargetSubtargetInfo.h"
23 #include "llvm/IR/DataLayout.h"
24 #include "llvm/IR/DerivedTypes.h"
25 #include "llvm/IR/GlobalVariable.h"
26 #include "llvm/IR/LLVMContext.h"
27 #include "llvm/MC/MCAsmInfo.h"
28 #include "llvm/MC/MCExpr.h"
29 #include "llvm/Support/ErrorHandling.h"
30 #include "llvm/Support/KnownBits.h"
31 #include "llvm/Support/MathExtras.h"
32 #include "llvm/Target/TargetLoweringObjectFile.h"
33 #include "llvm/Target/TargetMachine.h"
37 /// NOTE: The TargetMachine owns TLOF.
38 TargetLowering::TargetLowering(const TargetMachine
&tm
)
39 : TargetLoweringBase(tm
) {}
41 const char *TargetLowering::getTargetNodeName(unsigned Opcode
) const {
45 bool TargetLowering::isPositionIndependent() const {
46 return getTargetMachine().isPositionIndependent();
49 /// Check whether a given call node is in tail position within its function. If
50 /// so, it sets Chain to the input chain of the tail call.
51 bool TargetLowering::isInTailCallPosition(SelectionDAG
&DAG
, SDNode
*Node
,
52 SDValue
&Chain
) const {
53 const Function
&F
= DAG
.getMachineFunction().getFunction();
55 // First, check if tail calls have been disabled in this function.
56 if (F
.getFnAttribute("disable-tail-calls").getValueAsBool())
59 // Conservatively require the attributes of the call to match those of
60 // the return. Ignore following attributes because they don't affect the
62 AttrBuilder
CallerAttrs(F
.getAttributes(), AttributeList::ReturnIndex
);
63 for (const auto &Attr
: {Attribute::Alignment
, Attribute::Dereferenceable
,
64 Attribute::DereferenceableOrNull
, Attribute::NoAlias
,
66 CallerAttrs
.removeAttribute(Attr
);
68 if (CallerAttrs
.hasAttributes())
71 // It's not safe to eliminate the sign / zero extension of the return value.
72 if (CallerAttrs
.contains(Attribute::ZExt
) ||
73 CallerAttrs
.contains(Attribute::SExt
))
76 // Check if the only use is a function return node.
77 return isUsedByReturnOnly(Node
, Chain
);
80 bool TargetLowering::parametersInCSRMatch(const MachineRegisterInfo
&MRI
,
81 const uint32_t *CallerPreservedMask
,
82 const SmallVectorImpl
<CCValAssign
> &ArgLocs
,
83 const SmallVectorImpl
<SDValue
> &OutVals
) const {
84 for (unsigned I
= 0, E
= ArgLocs
.size(); I
!= E
; ++I
) {
85 const CCValAssign
&ArgLoc
= ArgLocs
[I
];
86 if (!ArgLoc
.isRegLoc())
88 MCRegister Reg
= ArgLoc
.getLocReg();
89 // Only look at callee saved registers.
90 if (MachineOperand::clobbersPhysReg(CallerPreservedMask
, Reg
))
92 // Check that we pass the value used for the caller.
93 // (We look for a CopyFromReg reading a virtual register that is used
94 // for the function live-in value of register Reg)
95 SDValue Value
= OutVals
[I
];
96 if (Value
->getOpcode() != ISD::CopyFromReg
)
98 Register ArgReg
= cast
<RegisterSDNode
>(Value
->getOperand(1))->getReg();
99 if (MRI
.getLiveInPhysReg(ArgReg
) != Reg
)
105 /// Set CallLoweringInfo attribute flags based on a call instruction
106 /// and called function attributes.
107 void TargetLoweringBase::ArgListEntry::setAttributes(const CallBase
*Call
,
109 IsSExt
= Call
->paramHasAttr(ArgIdx
, Attribute::SExt
);
110 IsZExt
= Call
->paramHasAttr(ArgIdx
, Attribute::ZExt
);
111 IsInReg
= Call
->paramHasAttr(ArgIdx
, Attribute::InReg
);
112 IsSRet
= Call
->paramHasAttr(ArgIdx
, Attribute::StructRet
);
113 IsNest
= Call
->paramHasAttr(ArgIdx
, Attribute::Nest
);
114 IsByVal
= Call
->paramHasAttr(ArgIdx
, Attribute::ByVal
);
115 IsPreallocated
= Call
->paramHasAttr(ArgIdx
, Attribute::Preallocated
);
116 IsInAlloca
= Call
->paramHasAttr(ArgIdx
, Attribute::InAlloca
);
117 IsReturned
= Call
->paramHasAttr(ArgIdx
, Attribute::Returned
);
118 IsSwiftSelf
= Call
->paramHasAttr(ArgIdx
, Attribute::SwiftSelf
);
119 IsSwiftAsync
= Call
->paramHasAttr(ArgIdx
, Attribute::SwiftAsync
);
120 IsSwiftError
= Call
->paramHasAttr(ArgIdx
, Attribute::SwiftError
);
121 Alignment
= Call
->getParamStackAlign(ArgIdx
);
122 IndirectType
= nullptr;
123 assert(IsByVal
+ IsPreallocated
+ IsInAlloca
<= 1 &&
124 "multiple ABI attributes?");
126 IndirectType
= Call
->getParamByValType(ArgIdx
);
128 Alignment
= Call
->getParamAlign(ArgIdx
);
131 IndirectType
= Call
->getParamPreallocatedType(ArgIdx
);
133 IndirectType
= Call
->getParamInAllocaType(ArgIdx
);
136 /// Generate a libcall taking the given operands as arguments and returning a
137 /// result of type RetVT.
138 std::pair
<SDValue
, SDValue
>
139 TargetLowering::makeLibCall(SelectionDAG
&DAG
, RTLIB::Libcall LC
, EVT RetVT
,
140 ArrayRef
<SDValue
> Ops
,
141 MakeLibCallOptions CallOptions
,
143 SDValue InChain
) const {
145 InChain
= DAG
.getEntryNode();
147 TargetLowering::ArgListTy Args
;
148 Args
.reserve(Ops
.size());
150 TargetLowering::ArgListEntry Entry
;
151 for (unsigned i
= 0; i
< Ops
.size(); ++i
) {
152 SDValue NewOp
= Ops
[i
];
154 Entry
.Ty
= Entry
.Node
.getValueType().getTypeForEVT(*DAG
.getContext());
155 Entry
.IsSExt
= shouldSignExtendTypeInLibCall(NewOp
.getValueType(),
157 Entry
.IsZExt
= !Entry
.IsSExt
;
159 if (CallOptions
.IsSoften
&&
160 !shouldExtendTypeInLibCall(CallOptions
.OpsVTBeforeSoften
[i
])) {
161 Entry
.IsSExt
= Entry
.IsZExt
= false;
163 Args
.push_back(Entry
);
166 if (LC
== RTLIB::UNKNOWN_LIBCALL
)
167 report_fatal_error("Unsupported library call operation!");
168 SDValue Callee
= DAG
.getExternalSymbol(getLibcallName(LC
),
169 getPointerTy(DAG
.getDataLayout()));
171 Type
*RetTy
= RetVT
.getTypeForEVT(*DAG
.getContext());
172 TargetLowering::CallLoweringInfo
CLI(DAG
);
173 bool signExtend
= shouldSignExtendTypeInLibCall(RetVT
, CallOptions
.IsSExt
);
174 bool zeroExtend
= !signExtend
;
176 if (CallOptions
.IsSoften
&&
177 !shouldExtendTypeInLibCall(CallOptions
.RetVTBeforeSoften
)) {
178 signExtend
= zeroExtend
= false;
183 .setLibCallee(getLibcallCallingConv(LC
), RetTy
, Callee
, std::move(Args
))
184 .setNoReturn(CallOptions
.DoesNotReturn
)
185 .setDiscardResult(!CallOptions
.IsReturnValueUsed
)
186 .setIsPostTypeLegalization(CallOptions
.IsPostTypeLegalization
)
187 .setSExtResult(signExtend
)
188 .setZExtResult(zeroExtend
);
189 return LowerCallTo(CLI
);
192 bool TargetLowering::findOptimalMemOpLowering(
193 std::vector
<EVT
> &MemOps
, unsigned Limit
, const MemOp
&Op
, unsigned DstAS
,
194 unsigned SrcAS
, const AttributeList
&FuncAttributes
) const {
195 if (Op
.isMemcpyWithFixedDstAlign() && Op
.getSrcAlign() < Op
.getDstAlign())
198 EVT VT
= getOptimalMemOpType(Op
, FuncAttributes
);
200 if (VT
== MVT::Other
) {
201 // Use the largest integer type whose alignment constraints are satisfied.
202 // We only need to check DstAlign here as SrcAlign is always greater or
203 // equal to DstAlign (or zero).
205 if (Op
.isFixedDstAlign())
206 while (Op
.getDstAlign() < (VT
.getSizeInBits() / 8) &&
207 !allowsMisalignedMemoryAccesses(VT
, DstAS
, Op
.getDstAlign()))
208 VT
= (MVT::SimpleValueType
)(VT
.getSimpleVT().SimpleTy
- 1);
209 assert(VT
.isInteger());
211 // Find the largest legal integer type.
213 while (!isTypeLegal(LVT
))
214 LVT
= (MVT::SimpleValueType
)(LVT
.SimpleTy
- 1);
215 assert(LVT
.isInteger());
217 // If the type we've chosen is larger than the largest legal integer type
218 // then use that instead.
223 unsigned NumMemOps
= 0;
224 uint64_t Size
= Op
.size();
226 unsigned VTSize
= VT
.getSizeInBits() / 8;
227 while (VTSize
> Size
) {
228 // For now, only use non-vector load / store's for the left-over pieces.
233 if (VT
.isVector() || VT
.isFloatingPoint()) {
234 NewVT
= (VT
.getSizeInBits() > 64) ? MVT::i64
: MVT::i32
;
235 if (isOperationLegalOrCustom(ISD::STORE
, NewVT
) &&
236 isSafeMemOpType(NewVT
.getSimpleVT()))
238 else if (NewVT
== MVT::i64
&&
239 isOperationLegalOrCustom(ISD::STORE
, MVT::f64
) &&
240 isSafeMemOpType(MVT::f64
)) {
241 // i64 is usually not legal on 32-bit targets, but f64 may be.
249 NewVT
= (MVT::SimpleValueType
)(NewVT
.getSimpleVT().SimpleTy
- 1);
250 if (NewVT
== MVT::i8
)
252 } while (!isSafeMemOpType(NewVT
.getSimpleVT()));
254 NewVTSize
= NewVT
.getSizeInBits() / 8;
256 // If the new VT cannot cover all of the remaining bits, then consider
257 // issuing a (or a pair of) unaligned and overlapping load / store.
259 if (NumMemOps
&& Op
.allowOverlap() && NewVTSize
< Size
&&
260 allowsMisalignedMemoryAccesses(
261 VT
, DstAS
, Op
.isFixedDstAlign() ? Op
.getDstAlign() : Align(1),
262 MachineMemOperand::MONone
, &Fast
) &&
271 if (++NumMemOps
> Limit
)
274 MemOps
.push_back(VT
);
281 /// Soften the operands of a comparison. This code is shared among BR_CC,
282 /// SELECT_CC, and SETCC handlers.
283 void TargetLowering::softenSetCCOperands(SelectionDAG
&DAG
, EVT VT
,
284 SDValue
&NewLHS
, SDValue
&NewRHS
,
285 ISD::CondCode
&CCCode
,
286 const SDLoc
&dl
, const SDValue OldLHS
,
287 const SDValue OldRHS
) const {
289 return softenSetCCOperands(DAG
, VT
, NewLHS
, NewRHS
, CCCode
, dl
, OldLHS
,
293 void TargetLowering::softenSetCCOperands(SelectionDAG
&DAG
, EVT VT
,
294 SDValue
&NewLHS
, SDValue
&NewRHS
,
295 ISD::CondCode
&CCCode
,
296 const SDLoc
&dl
, const SDValue OldLHS
,
297 const SDValue OldRHS
,
299 bool IsSignaling
) const {
300 // FIXME: Currently we cannot really respect all IEEE predicates due to libgcc
301 // not supporting it. We can update this code when libgcc provides such
304 assert((VT
== MVT::f32
|| VT
== MVT::f64
|| VT
== MVT::f128
|| VT
== MVT::ppcf128
)
305 && "Unsupported setcc type!");
307 // Expand into one or more soft-fp libcall(s).
308 RTLIB::Libcall LC1
= RTLIB::UNKNOWN_LIBCALL
, LC2
= RTLIB::UNKNOWN_LIBCALL
;
309 bool ShouldInvertCC
= false;
313 LC1
= (VT
== MVT::f32
) ? RTLIB::OEQ_F32
:
314 (VT
== MVT::f64
) ? RTLIB::OEQ_F64
:
315 (VT
== MVT::f128
) ? RTLIB::OEQ_F128
: RTLIB::OEQ_PPCF128
;
319 LC1
= (VT
== MVT::f32
) ? RTLIB::UNE_F32
:
320 (VT
== MVT::f64
) ? RTLIB::UNE_F64
:
321 (VT
== MVT::f128
) ? RTLIB::UNE_F128
: RTLIB::UNE_PPCF128
;
325 LC1
= (VT
== MVT::f32
) ? RTLIB::OGE_F32
:
326 (VT
== MVT::f64
) ? RTLIB::OGE_F64
:
327 (VT
== MVT::f128
) ? RTLIB::OGE_F128
: RTLIB::OGE_PPCF128
;
331 LC1
= (VT
== MVT::f32
) ? RTLIB::OLT_F32
:
332 (VT
== MVT::f64
) ? RTLIB::OLT_F64
:
333 (VT
== MVT::f128
) ? RTLIB::OLT_F128
: RTLIB::OLT_PPCF128
;
337 LC1
= (VT
== MVT::f32
) ? RTLIB::OLE_F32
:
338 (VT
== MVT::f64
) ? RTLIB::OLE_F64
:
339 (VT
== MVT::f128
) ? RTLIB::OLE_F128
: RTLIB::OLE_PPCF128
;
343 LC1
= (VT
== MVT::f32
) ? RTLIB::OGT_F32
:
344 (VT
== MVT::f64
) ? RTLIB::OGT_F64
:
345 (VT
== MVT::f128
) ? RTLIB::OGT_F128
: RTLIB::OGT_PPCF128
;
348 ShouldInvertCC
= true;
351 LC1
= (VT
== MVT::f32
) ? RTLIB::UO_F32
:
352 (VT
== MVT::f64
) ? RTLIB::UO_F64
:
353 (VT
== MVT::f128
) ? RTLIB::UO_F128
: RTLIB::UO_PPCF128
;
357 ShouldInvertCC
= true;
360 LC1
= (VT
== MVT::f32
) ? RTLIB::UO_F32
:
361 (VT
== MVT::f64
) ? RTLIB::UO_F64
:
362 (VT
== MVT::f128
) ? RTLIB::UO_F128
: RTLIB::UO_PPCF128
;
363 LC2
= (VT
== MVT::f32
) ? RTLIB::OEQ_F32
:
364 (VT
== MVT::f64
) ? RTLIB::OEQ_F64
:
365 (VT
== MVT::f128
) ? RTLIB::OEQ_F128
: RTLIB::OEQ_PPCF128
;
368 // Invert CC for unordered comparisons
369 ShouldInvertCC
= true;
372 LC1
= (VT
== MVT::f32
) ? RTLIB::OGE_F32
:
373 (VT
== MVT::f64
) ? RTLIB::OGE_F64
:
374 (VT
== MVT::f128
) ? RTLIB::OGE_F128
: RTLIB::OGE_PPCF128
;
377 LC1
= (VT
== MVT::f32
) ? RTLIB::OGT_F32
:
378 (VT
== MVT::f64
) ? RTLIB::OGT_F64
:
379 (VT
== MVT::f128
) ? RTLIB::OGT_F128
: RTLIB::OGT_PPCF128
;
382 LC1
= (VT
== MVT::f32
) ? RTLIB::OLE_F32
:
383 (VT
== MVT::f64
) ? RTLIB::OLE_F64
:
384 (VT
== MVT::f128
) ? RTLIB::OLE_F128
: RTLIB::OLE_PPCF128
;
387 LC1
= (VT
== MVT::f32
) ? RTLIB::OLT_F32
:
388 (VT
== MVT::f64
) ? RTLIB::OLT_F64
:
389 (VT
== MVT::f128
) ? RTLIB::OLT_F128
: RTLIB::OLT_PPCF128
;
391 default: llvm_unreachable("Do not know how to soften this setcc!");
395 // Use the target specific return value for comparions lib calls.
396 EVT RetVT
= getCmpLibcallReturnType();
397 SDValue Ops
[2] = {NewLHS
, NewRHS
};
398 TargetLowering::MakeLibCallOptions CallOptions
;
399 EVT OpsVT
[2] = { OldLHS
.getValueType(),
400 OldRHS
.getValueType() };
401 CallOptions
.setTypeListBeforeSoften(OpsVT
, RetVT
, true);
402 auto Call
= makeLibCall(DAG
, LC1
, RetVT
, Ops
, CallOptions
, dl
, Chain
);
404 NewRHS
= DAG
.getConstant(0, dl
, RetVT
);
406 CCCode
= getCmpLibcallCC(LC1
);
407 if (ShouldInvertCC
) {
408 assert(RetVT
.isInteger());
409 CCCode
= getSetCCInverse(CCCode
, RetVT
);
412 if (LC2
== RTLIB::UNKNOWN_LIBCALL
) {
417 getSetCCResultType(DAG
.getDataLayout(), *DAG
.getContext(), RetVT
);
418 SDValue Tmp
= DAG
.getSetCC(dl
, SetCCVT
, NewLHS
, NewRHS
, CCCode
);
419 auto Call2
= makeLibCall(DAG
, LC2
, RetVT
, Ops
, CallOptions
, dl
, Chain
);
420 CCCode
= getCmpLibcallCC(LC2
);
422 CCCode
= getSetCCInverse(CCCode
, RetVT
);
423 NewLHS
= DAG
.getSetCC(dl
, SetCCVT
, Call2
.first
, NewRHS
, CCCode
);
425 Chain
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
, Call
.second
,
427 NewLHS
= DAG
.getNode(ShouldInvertCC
? ISD::AND
: ISD::OR
, dl
,
428 Tmp
.getValueType(), Tmp
, NewLHS
);
433 /// Return the entry encoding for a jump table in the current function. The
434 /// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum.
435 unsigned TargetLowering::getJumpTableEncoding() const {
436 // In non-pic modes, just use the address of a block.
437 if (!isPositionIndependent())
438 return MachineJumpTableInfo::EK_BlockAddress
;
440 // In PIC mode, if the target supports a GPRel32 directive, use it.
441 if (getTargetMachine().getMCAsmInfo()->getGPRel32Directive() != nullptr)
442 return MachineJumpTableInfo::EK_GPRel32BlockAddress
;
444 // Otherwise, use a label difference.
445 return MachineJumpTableInfo::EK_LabelDifference32
;
448 SDValue
TargetLowering::getPICJumpTableRelocBase(SDValue Table
,
449 SelectionDAG
&DAG
) const {
450 // If our PIC model is GP relative, use the global offset table as the base.
451 unsigned JTEncoding
= getJumpTableEncoding();
453 if ((JTEncoding
== MachineJumpTableInfo::EK_GPRel64BlockAddress
) ||
454 (JTEncoding
== MachineJumpTableInfo::EK_GPRel32BlockAddress
))
455 return DAG
.getGLOBAL_OFFSET_TABLE(getPointerTy(DAG
.getDataLayout()));
460 /// This returns the relocation base for the given PIC jumptable, the same as
461 /// getPICJumpTableRelocBase, but as an MCExpr.
463 TargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction
*MF
,
464 unsigned JTI
,MCContext
&Ctx
) const{
465 // The normal PIC reloc base is the label at the start of the jump table.
466 return MCSymbolRefExpr::create(MF
->getJTISymbol(JTI
, Ctx
), Ctx
);
470 TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode
*GA
) const {
471 const TargetMachine
&TM
= getTargetMachine();
472 const GlobalValue
*GV
= GA
->getGlobal();
474 // If the address is not even local to this DSO we will have to load it from
475 // a got and then add the offset.
476 if (!TM
.shouldAssumeDSOLocal(*GV
->getParent(), GV
))
479 // If the code is position independent we will have to add a base register.
480 if (isPositionIndependent())
483 // Otherwise we can do it.
487 //===----------------------------------------------------------------------===//
488 // Optimization Methods
489 //===----------------------------------------------------------------------===//
491 /// If the specified instruction has a constant integer operand and there are
492 /// bits set in that constant that are not demanded, then clear those bits and
494 bool TargetLowering::ShrinkDemandedConstant(SDValue Op
,
495 const APInt
&DemandedBits
,
496 const APInt
&DemandedElts
,
497 TargetLoweringOpt
&TLO
) const {
499 unsigned Opcode
= Op
.getOpcode();
501 // Do target-specific constant optimization.
502 if (targetShrinkDemandedConstant(Op
, DemandedBits
, DemandedElts
, TLO
))
503 return TLO
.New
.getNode();
505 // FIXME: ISD::SELECT, ISD::SELECT_CC
512 auto *Op1C
= dyn_cast
<ConstantSDNode
>(Op
.getOperand(1));
513 if (!Op1C
|| Op1C
->isOpaque())
516 // If this is a 'not' op, don't touch it because that's a canonical form.
517 const APInt
&C
= Op1C
->getAPIntValue();
518 if (Opcode
== ISD::XOR
&& DemandedBits
.isSubsetOf(C
))
521 if (!C
.isSubsetOf(DemandedBits
)) {
522 EVT VT
= Op
.getValueType();
523 SDValue NewC
= TLO
.DAG
.getConstant(DemandedBits
& C
, DL
, VT
);
524 SDValue NewOp
= TLO
.DAG
.getNode(Opcode
, DL
, VT
, Op
.getOperand(0), NewC
);
525 return TLO
.CombineTo(Op
, NewOp
);
535 bool TargetLowering::ShrinkDemandedConstant(SDValue Op
,
536 const APInt
&DemandedBits
,
537 TargetLoweringOpt
&TLO
) const {
538 EVT VT
= Op
.getValueType();
539 APInt DemandedElts
= VT
.isVector()
540 ? APInt::getAllOnesValue(VT
.getVectorNumElements())
542 return ShrinkDemandedConstant(Op
, DemandedBits
, DemandedElts
, TLO
);
545 /// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
546 /// This uses isZExtFree and ZERO_EXTEND for the widening cast, but it could be
547 /// generalized for targets with other types of implicit widening casts.
548 bool TargetLowering::ShrinkDemandedOp(SDValue Op
, unsigned BitWidth
,
549 const APInt
&Demanded
,
550 TargetLoweringOpt
&TLO
) const {
551 assert(Op
.getNumOperands() == 2 &&
552 "ShrinkDemandedOp only supports binary operators!");
553 assert(Op
.getNode()->getNumValues() == 1 &&
554 "ShrinkDemandedOp only supports nodes with one result!");
556 SelectionDAG
&DAG
= TLO
.DAG
;
559 // Early return, as this function cannot handle vector types.
560 if (Op
.getValueType().isVector())
563 // Don't do this if the node has another user, which may require the
565 if (!Op
.getNode()->hasOneUse())
568 // Search for the smallest integer type with free casts to and from
569 // Op's type. For expedience, just check power-of-2 integer types.
570 const TargetLowering
&TLI
= DAG
.getTargetLoweringInfo();
571 unsigned DemandedSize
= Demanded
.getActiveBits();
572 unsigned SmallVTBits
= DemandedSize
;
573 if (!isPowerOf2_32(SmallVTBits
))
574 SmallVTBits
= NextPowerOf2(SmallVTBits
);
575 for (; SmallVTBits
< BitWidth
; SmallVTBits
= NextPowerOf2(SmallVTBits
)) {
576 EVT SmallVT
= EVT::getIntegerVT(*DAG
.getContext(), SmallVTBits
);
577 if (TLI
.isTruncateFree(Op
.getValueType(), SmallVT
) &&
578 TLI
.isZExtFree(SmallVT
, Op
.getValueType())) {
579 // We found a type with free casts.
580 SDValue X
= DAG
.getNode(
581 Op
.getOpcode(), dl
, SmallVT
,
582 DAG
.getNode(ISD::TRUNCATE
, dl
, SmallVT
, Op
.getOperand(0)),
583 DAG
.getNode(ISD::TRUNCATE
, dl
, SmallVT
, Op
.getOperand(1)));
584 assert(DemandedSize
<= SmallVTBits
&& "Narrowed below demanded bits?");
585 SDValue Z
= DAG
.getNode(ISD::ANY_EXTEND
, dl
, Op
.getValueType(), X
);
586 return TLO
.CombineTo(Op
, Z
);
592 bool TargetLowering::SimplifyDemandedBits(SDValue Op
, const APInt
&DemandedBits
,
593 DAGCombinerInfo
&DCI
) const {
594 SelectionDAG
&DAG
= DCI
.DAG
;
595 TargetLoweringOpt
TLO(DAG
, !DCI
.isBeforeLegalize(),
596 !DCI
.isBeforeLegalizeOps());
599 bool Simplified
= SimplifyDemandedBits(Op
, DemandedBits
, Known
, TLO
);
601 DCI
.AddToWorklist(Op
.getNode());
602 DCI
.CommitTargetLoweringOpt(TLO
);
607 bool TargetLowering::SimplifyDemandedBits(SDValue Op
, const APInt
&DemandedBits
,
609 TargetLoweringOpt
&TLO
,
611 bool AssumeSingleUse
) const {
612 EVT VT
= Op
.getValueType();
614 // TODO: We can probably do more work on calculating the known bits and
615 // simplifying the operations for scalable vectors, but for now we just
617 if (VT
.isScalableVector()) {
618 // Pretend we don't know anything for now.
619 Known
= KnownBits(DemandedBits
.getBitWidth());
623 APInt DemandedElts
= VT
.isVector()
624 ? APInt::getAllOnesValue(VT
.getVectorNumElements())
626 return SimplifyDemandedBits(Op
, DemandedBits
, DemandedElts
, Known
, TLO
, Depth
,
630 // TODO: Can we merge SelectionDAG::GetDemandedBits into this?
631 // TODO: Under what circumstances can we create nodes? Constant folding?
632 SDValue
TargetLowering::SimplifyMultipleUseDemandedBits(
633 SDValue Op
, const APInt
&DemandedBits
, const APInt
&DemandedElts
,
634 SelectionDAG
&DAG
, unsigned Depth
) const {
635 // Limit search depth.
636 if (Depth
>= SelectionDAG::MaxRecursionDepth
)
643 // Not demanding any bits/elts from Op.
644 if (DemandedBits
== 0 || DemandedElts
== 0)
645 return DAG
.getUNDEF(Op
.getValueType());
647 unsigned NumElts
= DemandedElts
.getBitWidth();
648 unsigned BitWidth
= DemandedBits
.getBitWidth();
649 KnownBits LHSKnown
, RHSKnown
;
650 switch (Op
.getOpcode()) {
652 SDValue Src
= peekThroughBitcasts(Op
.getOperand(0));
653 EVT SrcVT
= Src
.getValueType();
654 EVT DstVT
= Op
.getValueType();
658 unsigned NumSrcEltBits
= SrcVT
.getScalarSizeInBits();
659 unsigned NumDstEltBits
= DstVT
.getScalarSizeInBits();
660 if (NumSrcEltBits
== NumDstEltBits
)
661 if (SDValue V
= SimplifyMultipleUseDemandedBits(
662 Src
, DemandedBits
, DemandedElts
, DAG
, Depth
+ 1))
663 return DAG
.getBitcast(DstVT
, V
);
665 // TODO - bigendian once we have test coverage.
666 if (SrcVT
.isVector() && (NumDstEltBits
% NumSrcEltBits
) == 0 &&
667 DAG
.getDataLayout().isLittleEndian()) {
668 unsigned Scale
= NumDstEltBits
/ NumSrcEltBits
;
669 unsigned NumSrcElts
= SrcVT
.getVectorNumElements();
670 APInt DemandedSrcBits
= APInt::getNullValue(NumSrcEltBits
);
671 APInt DemandedSrcElts
= APInt::getNullValue(NumSrcElts
);
672 for (unsigned i
= 0; i
!= Scale
; ++i
) {
673 unsigned Offset
= i
* NumSrcEltBits
;
674 APInt Sub
= DemandedBits
.extractBits(NumSrcEltBits
, Offset
);
675 if (!Sub
.isNullValue()) {
676 DemandedSrcBits
|= Sub
;
677 for (unsigned j
= 0; j
!= NumElts
; ++j
)
679 DemandedSrcElts
.setBit((j
* Scale
) + i
);
683 if (SDValue V
= SimplifyMultipleUseDemandedBits(
684 Src
, DemandedSrcBits
, DemandedSrcElts
, DAG
, Depth
+ 1))
685 return DAG
.getBitcast(DstVT
, V
);
688 // TODO - bigendian once we have test coverage.
689 if ((NumSrcEltBits
% NumDstEltBits
) == 0 &&
690 DAG
.getDataLayout().isLittleEndian()) {
691 unsigned Scale
= NumSrcEltBits
/ NumDstEltBits
;
692 unsigned NumSrcElts
= SrcVT
.isVector() ? SrcVT
.getVectorNumElements() : 1;
693 APInt DemandedSrcBits
= APInt::getNullValue(NumSrcEltBits
);
694 APInt DemandedSrcElts
= APInt::getNullValue(NumSrcElts
);
695 for (unsigned i
= 0; i
!= NumElts
; ++i
)
696 if (DemandedElts
[i
]) {
697 unsigned Offset
= (i
% Scale
) * NumDstEltBits
;
698 DemandedSrcBits
.insertBits(DemandedBits
, Offset
);
699 DemandedSrcElts
.setBit(i
/ Scale
);
702 if (SDValue V
= SimplifyMultipleUseDemandedBits(
703 Src
, DemandedSrcBits
, DemandedSrcElts
, DAG
, Depth
+ 1))
704 return DAG
.getBitcast(DstVT
, V
);
710 LHSKnown
= DAG
.computeKnownBits(Op
.getOperand(0), DemandedElts
, Depth
+ 1);
711 RHSKnown
= DAG
.computeKnownBits(Op
.getOperand(1), DemandedElts
, Depth
+ 1);
713 // If all of the demanded bits are known 1 on one side, return the other.
714 // These bits cannot contribute to the result of the 'and' in this
716 if (DemandedBits
.isSubsetOf(LHSKnown
.Zero
| RHSKnown
.One
))
717 return Op
.getOperand(0);
718 if (DemandedBits
.isSubsetOf(RHSKnown
.Zero
| LHSKnown
.One
))
719 return Op
.getOperand(1);
723 LHSKnown
= DAG
.computeKnownBits(Op
.getOperand(0), DemandedElts
, Depth
+ 1);
724 RHSKnown
= DAG
.computeKnownBits(Op
.getOperand(1), DemandedElts
, Depth
+ 1);
726 // If all of the demanded bits are known zero on one side, return the
727 // other. These bits cannot contribute to the result of the 'or' in this
729 if (DemandedBits
.isSubsetOf(LHSKnown
.One
| RHSKnown
.Zero
))
730 return Op
.getOperand(0);
731 if (DemandedBits
.isSubsetOf(RHSKnown
.One
| LHSKnown
.Zero
))
732 return Op
.getOperand(1);
736 LHSKnown
= DAG
.computeKnownBits(Op
.getOperand(0), DemandedElts
, Depth
+ 1);
737 RHSKnown
= DAG
.computeKnownBits(Op
.getOperand(1), DemandedElts
, Depth
+ 1);
739 // If all of the demanded bits are known zero on one side, return the
741 if (DemandedBits
.isSubsetOf(RHSKnown
.Zero
))
742 return Op
.getOperand(0);
743 if (DemandedBits
.isSubsetOf(LHSKnown
.Zero
))
744 return Op
.getOperand(1);
748 // If we are only demanding sign bits then we can use the shift source
750 if (const APInt
*MaxSA
=
751 DAG
.getValidMaximumShiftAmountConstant(Op
, DemandedElts
)) {
752 SDValue Op0
= Op
.getOperand(0);
753 unsigned ShAmt
= MaxSA
->getZExtValue();
754 unsigned NumSignBits
=
755 DAG
.ComputeNumSignBits(Op0
, DemandedElts
, Depth
+ 1);
756 unsigned UpperDemandedBits
= BitWidth
- DemandedBits
.countTrailingZeros();
757 if (NumSignBits
> ShAmt
&& (NumSignBits
- ShAmt
) >= (UpperDemandedBits
))
763 SDValue Op0
= Op
.getOperand(0);
764 SDValue Op1
= Op
.getOperand(1);
765 ISD::CondCode CC
= cast
<CondCodeSDNode
>(Op
.getOperand(2))->get();
766 // If (1) we only need the sign-bit, (2) the setcc operands are the same
767 // width as the setcc result, and (3) the result of a setcc conforms to 0 or
768 // -1, we may be able to bypass the setcc.
769 if (DemandedBits
.isSignMask() &&
770 Op0
.getScalarValueSizeInBits() == BitWidth
&&
771 getBooleanContents(Op0
.getValueType()) ==
772 BooleanContent::ZeroOrNegativeOneBooleanContent
) {
773 // If we're testing X < 0, then this compare isn't needed - just use X!
774 // FIXME: We're limiting to integer types here, but this should also work
775 // if we don't care about FP signed-zero. The use of SETLT with FP means
776 // that we don't care about NaNs.
777 if (CC
== ISD::SETLT
&& Op1
.getValueType().isInteger() &&
778 (isNullConstant(Op1
) || ISD::isBuildVectorAllZeros(Op1
.getNode())))
783 case ISD::SIGN_EXTEND_INREG
: {
784 // If none of the extended bits are demanded, eliminate the sextinreg.
785 SDValue Op0
= Op
.getOperand(0);
786 EVT ExVT
= cast
<VTSDNode
>(Op
.getOperand(1))->getVT();
787 unsigned ExBits
= ExVT
.getScalarSizeInBits();
788 if (DemandedBits
.getActiveBits() <= ExBits
)
790 // If the input is already sign extended, just drop the extension.
791 unsigned NumSignBits
= DAG
.ComputeNumSignBits(Op0
, DemandedElts
, Depth
+ 1);
792 if (NumSignBits
>= (BitWidth
- ExBits
+ 1))
796 case ISD::ANY_EXTEND_VECTOR_INREG
:
797 case ISD::SIGN_EXTEND_VECTOR_INREG
:
798 case ISD::ZERO_EXTEND_VECTOR_INREG
: {
799 // If we only want the lowest element and none of extended bits, then we can
800 // return the bitcasted source vector.
801 SDValue Src
= Op
.getOperand(0);
802 EVT SrcVT
= Src
.getValueType();
803 EVT DstVT
= Op
.getValueType();
804 if (DemandedElts
== 1 && DstVT
.getSizeInBits() == SrcVT
.getSizeInBits() &&
805 DAG
.getDataLayout().isLittleEndian() &&
806 DemandedBits
.getActiveBits() <= SrcVT
.getScalarSizeInBits()) {
807 return DAG
.getBitcast(DstVT
, Src
);
811 case ISD::INSERT_VECTOR_ELT
: {
812 // If we don't demand the inserted element, return the base vector.
813 SDValue Vec
= Op
.getOperand(0);
814 auto *CIdx
= dyn_cast
<ConstantSDNode
>(Op
.getOperand(2));
815 EVT VecVT
= Vec
.getValueType();
816 if (CIdx
&& CIdx
->getAPIntValue().ult(VecVT
.getVectorNumElements()) &&
817 !DemandedElts
[CIdx
->getZExtValue()])
821 case ISD::INSERT_SUBVECTOR
: {
822 // If we don't demand the inserted subvector, return the base vector.
823 SDValue Vec
= Op
.getOperand(0);
824 SDValue Sub
= Op
.getOperand(1);
825 uint64_t Idx
= Op
.getConstantOperandVal(2);
826 unsigned NumSubElts
= Sub
.getValueType().getVectorNumElements();
827 if (DemandedElts
.extractBits(NumSubElts
, Idx
) == 0)
831 case ISD::VECTOR_SHUFFLE
: {
832 ArrayRef
<int> ShuffleMask
= cast
<ShuffleVectorSDNode
>(Op
)->getMask();
834 // If all the demanded elts are from one operand and are inline,
835 // then we can use the operand directly.
836 bool AllUndef
= true, IdentityLHS
= true, IdentityRHS
= true;
837 for (unsigned i
= 0; i
!= NumElts
; ++i
) {
838 int M
= ShuffleMask
[i
];
839 if (M
< 0 || !DemandedElts
[i
])
842 IdentityLHS
&= (M
== (int)i
);
843 IdentityRHS
&= ((M
- NumElts
) == i
);
847 return DAG
.getUNDEF(Op
.getValueType());
849 return Op
.getOperand(0);
851 return Op
.getOperand(1);
855 if (Op
.getOpcode() >= ISD::BUILTIN_OP_END
)
856 if (SDValue V
= SimplifyMultipleUseDemandedBitsForTargetNode(
857 Op
, DemandedBits
, DemandedElts
, DAG
, Depth
))
864 SDValue
TargetLowering::SimplifyMultipleUseDemandedBits(
865 SDValue Op
, const APInt
&DemandedBits
, SelectionDAG
&DAG
,
866 unsigned Depth
) const {
867 EVT VT
= Op
.getValueType();
868 APInt DemandedElts
= VT
.isVector()
869 ? APInt::getAllOnesValue(VT
.getVectorNumElements())
871 return SimplifyMultipleUseDemandedBits(Op
, DemandedBits
, DemandedElts
, DAG
,
875 SDValue
TargetLowering::SimplifyMultipleUseDemandedVectorElts(
876 SDValue Op
, const APInt
&DemandedElts
, SelectionDAG
&DAG
,
877 unsigned Depth
) const {
878 APInt DemandedBits
= APInt::getAllOnesValue(Op
.getScalarValueSizeInBits());
879 return SimplifyMultipleUseDemandedBits(Op
, DemandedBits
, DemandedElts
, DAG
,
883 /// Look at Op. At this point, we know that only the OriginalDemandedBits of the
884 /// result of Op are ever used downstream. If we can use this information to
885 /// simplify Op, create a new simplified DAG node and return true, returning the
886 /// original and new nodes in Old and New. Otherwise, analyze the expression and
887 /// return a mask of Known bits for the expression (used to simplify the
888 /// caller). The Known bits may only be accurate for those bits in the
889 /// OriginalDemandedBits and OriginalDemandedElts.
890 bool TargetLowering::SimplifyDemandedBits(
891 SDValue Op
, const APInt
&OriginalDemandedBits
,
892 const APInt
&OriginalDemandedElts
, KnownBits
&Known
, TargetLoweringOpt
&TLO
,
893 unsigned Depth
, bool AssumeSingleUse
) const {
894 unsigned BitWidth
= OriginalDemandedBits
.getBitWidth();
895 assert(Op
.getScalarValueSizeInBits() == BitWidth
&&
896 "Mask size mismatches value type size!");
898 // Don't know anything.
899 Known
= KnownBits(BitWidth
);
901 // TODO: We can probably do more work on calculating the known bits and
902 // simplifying the operations for scalable vectors, but for now we just
904 if (Op
.getValueType().isScalableVector())
907 unsigned NumElts
= OriginalDemandedElts
.getBitWidth();
908 assert((!Op
.getValueType().isVector() ||
909 NumElts
== Op
.getValueType().getVectorNumElements()) &&
910 "Unexpected vector size");
912 APInt DemandedBits
= OriginalDemandedBits
;
913 APInt DemandedElts
= OriginalDemandedElts
;
915 auto &DL
= TLO
.DAG
.getDataLayout();
921 if (Op
.getOpcode() == ISD::Constant
) {
922 // We know all of the bits for a constant!
923 Known
= KnownBits::makeConstant(cast
<ConstantSDNode
>(Op
)->getAPIntValue());
927 if (Op
.getOpcode() == ISD::ConstantFP
) {
928 // We know all of the bits for a floating point constant!
929 Known
= KnownBits::makeConstant(
930 cast
<ConstantFPSDNode
>(Op
)->getValueAPF().bitcastToAPInt());
934 // Other users may use these bits.
935 EVT VT
= Op
.getValueType();
936 if (!Op
.getNode()->hasOneUse() && !AssumeSingleUse
) {
938 // If not at the root, Just compute the Known bits to
939 // simplify things downstream.
940 Known
= TLO
.DAG
.computeKnownBits(Op
, DemandedElts
, Depth
);
943 // If this is the root being simplified, allow it to have multiple uses,
944 // just set the DemandedBits/Elts to all bits.
945 DemandedBits
= APInt::getAllOnesValue(BitWidth
);
946 DemandedElts
= APInt::getAllOnesValue(NumElts
);
947 } else if (OriginalDemandedBits
== 0 || OriginalDemandedElts
== 0) {
948 // Not demanding any bits/elts from Op.
949 return TLO
.CombineTo(Op
, TLO
.DAG
.getUNDEF(VT
));
950 } else if (Depth
>= SelectionDAG::MaxRecursionDepth
) {
951 // Limit search depth.
956 switch (Op
.getOpcode()) {
957 case ISD::TargetConstant
:
958 llvm_unreachable("Can't simplify this node");
959 case ISD::SCALAR_TO_VECTOR
: {
960 if (!DemandedElts
[0])
961 return TLO
.CombineTo(Op
, TLO
.DAG
.getUNDEF(VT
));
964 SDValue Src
= Op
.getOperand(0);
965 unsigned SrcBitWidth
= Src
.getScalarValueSizeInBits();
966 APInt SrcDemandedBits
= DemandedBits
.zextOrSelf(SrcBitWidth
);
967 if (SimplifyDemandedBits(Src
, SrcDemandedBits
, SrcKnown
, TLO
, Depth
+ 1))
970 // Upper elements are undef, so only get the knownbits if we just demand
971 // the bottom element.
972 if (DemandedElts
== 1)
973 Known
= SrcKnown
.anyextOrTrunc(BitWidth
);
976 case ISD::BUILD_VECTOR
:
977 // Collect the known bits that are shared by every demanded element.
978 // TODO: Call SimplifyDemandedBits for non-constant demanded elements.
979 Known
= TLO
.DAG
.computeKnownBits(Op
, DemandedElts
, Depth
);
980 return false; // Don't fall through, will infinitely loop.
982 auto *LD
= cast
<LoadSDNode
>(Op
);
983 if (getTargetConstantFromLoad(LD
)) {
984 Known
= TLO
.DAG
.computeKnownBits(Op
, DemandedElts
, Depth
);
985 return false; // Don't fall through, will infinitely loop.
987 if (ISD::isZEXTLoad(Op
.getNode()) && Op
.getResNo() == 0) {
988 // If this is a ZEXTLoad and we are looking at the loaded value.
989 EVT MemVT
= LD
->getMemoryVT();
990 unsigned MemBits
= MemVT
.getScalarSizeInBits();
991 Known
.Zero
.setBitsFrom(MemBits
);
992 return false; // Don't fall through, will infinitely loop.
996 case ISD::INSERT_VECTOR_ELT
: {
997 SDValue Vec
= Op
.getOperand(0);
998 SDValue Scl
= Op
.getOperand(1);
999 auto *CIdx
= dyn_cast
<ConstantSDNode
>(Op
.getOperand(2));
1000 EVT VecVT
= Vec
.getValueType();
1002 // If index isn't constant, assume we need all vector elements AND the
1003 // inserted element.
1004 APInt
DemandedVecElts(DemandedElts
);
1005 if (CIdx
&& CIdx
->getAPIntValue().ult(VecVT
.getVectorNumElements())) {
1006 unsigned Idx
= CIdx
->getZExtValue();
1007 DemandedVecElts
.clearBit(Idx
);
1009 // Inserted element is not required.
1010 if (!DemandedElts
[Idx
])
1011 return TLO
.CombineTo(Op
, Vec
);
1015 unsigned NumSclBits
= Scl
.getScalarValueSizeInBits();
1016 APInt DemandedSclBits
= DemandedBits
.zextOrTrunc(NumSclBits
);
1017 if (SimplifyDemandedBits(Scl
, DemandedSclBits
, KnownScl
, TLO
, Depth
+ 1))
1020 Known
= KnownScl
.anyextOrTrunc(BitWidth
);
1023 if (SimplifyDemandedBits(Vec
, DemandedBits
, DemandedVecElts
, KnownVec
, TLO
,
1027 if (!!DemandedVecElts
)
1028 Known
= KnownBits::commonBits(Known
, KnownVec
);
1032 case ISD::INSERT_SUBVECTOR
: {
1033 // Demand any elements from the subvector and the remainder from the src its
1035 SDValue Src
= Op
.getOperand(0);
1036 SDValue Sub
= Op
.getOperand(1);
1037 uint64_t Idx
= Op
.getConstantOperandVal(2);
1038 unsigned NumSubElts
= Sub
.getValueType().getVectorNumElements();
1039 APInt DemandedSubElts
= DemandedElts
.extractBits(NumSubElts
, Idx
);
1040 APInt DemandedSrcElts
= DemandedElts
;
1041 DemandedSrcElts
.insertBits(APInt::getNullValue(NumSubElts
), Idx
);
1043 KnownBits KnownSub
, KnownSrc
;
1044 if (SimplifyDemandedBits(Sub
, DemandedBits
, DemandedSubElts
, KnownSub
, TLO
,
1047 if (SimplifyDemandedBits(Src
, DemandedBits
, DemandedSrcElts
, KnownSrc
, TLO
,
1051 Known
.Zero
.setAllBits();
1052 Known
.One
.setAllBits();
1053 if (!!DemandedSubElts
)
1054 Known
= KnownBits::commonBits(Known
, KnownSub
);
1055 if (!!DemandedSrcElts
)
1056 Known
= KnownBits::commonBits(Known
, KnownSrc
);
1058 // Attempt to avoid multi-use src if we don't need anything from it.
1059 if (!DemandedBits
.isAllOnesValue() || !DemandedSubElts
.isAllOnesValue() ||
1060 !DemandedSrcElts
.isAllOnesValue()) {
1061 SDValue NewSub
= SimplifyMultipleUseDemandedBits(
1062 Sub
, DemandedBits
, DemandedSubElts
, TLO
.DAG
, Depth
+ 1);
1063 SDValue NewSrc
= SimplifyMultipleUseDemandedBits(
1064 Src
, DemandedBits
, DemandedSrcElts
, TLO
.DAG
, Depth
+ 1);
1065 if (NewSub
|| NewSrc
) {
1066 NewSub
= NewSub
? NewSub
: Sub
;
1067 NewSrc
= NewSrc
? NewSrc
: Src
;
1068 SDValue NewOp
= TLO
.DAG
.getNode(Op
.getOpcode(), dl
, VT
, NewSrc
, NewSub
,
1070 return TLO
.CombineTo(Op
, NewOp
);
1075 case ISD::EXTRACT_SUBVECTOR
: {
1076 // Offset the demanded elts by the subvector index.
1077 SDValue Src
= Op
.getOperand(0);
1078 if (Src
.getValueType().isScalableVector())
1080 uint64_t Idx
= Op
.getConstantOperandVal(1);
1081 unsigned NumSrcElts
= Src
.getValueType().getVectorNumElements();
1082 APInt DemandedSrcElts
= DemandedElts
.zextOrSelf(NumSrcElts
).shl(Idx
);
1084 if (SimplifyDemandedBits(Src
, DemandedBits
, DemandedSrcElts
, Known
, TLO
,
1088 // Attempt to avoid multi-use src if we don't need anything from it.
1089 if (!DemandedBits
.isAllOnesValue() || !DemandedSrcElts
.isAllOnesValue()) {
1090 SDValue DemandedSrc
= SimplifyMultipleUseDemandedBits(
1091 Src
, DemandedBits
, DemandedSrcElts
, TLO
.DAG
, Depth
+ 1);
1093 SDValue NewOp
= TLO
.DAG
.getNode(Op
.getOpcode(), dl
, VT
, DemandedSrc
,
1095 return TLO
.CombineTo(Op
, NewOp
);
1100 case ISD::CONCAT_VECTORS
: {
1101 Known
.Zero
.setAllBits();
1102 Known
.One
.setAllBits();
1103 EVT SubVT
= Op
.getOperand(0).getValueType();
1104 unsigned NumSubVecs
= Op
.getNumOperands();
1105 unsigned NumSubElts
= SubVT
.getVectorNumElements();
1106 for (unsigned i
= 0; i
!= NumSubVecs
; ++i
) {
1107 APInt DemandedSubElts
=
1108 DemandedElts
.extractBits(NumSubElts
, i
* NumSubElts
);
1109 if (SimplifyDemandedBits(Op
.getOperand(i
), DemandedBits
, DemandedSubElts
,
1110 Known2
, TLO
, Depth
+ 1))
1112 // Known bits are shared by every demanded subvector element.
1113 if (!!DemandedSubElts
)
1114 Known
= KnownBits::commonBits(Known
, Known2
);
1118 case ISD::VECTOR_SHUFFLE
: {
1119 ArrayRef
<int> ShuffleMask
= cast
<ShuffleVectorSDNode
>(Op
)->getMask();
1121 // Collect demanded elements from shuffle operands..
1122 APInt
DemandedLHS(NumElts
, 0);
1123 APInt
DemandedRHS(NumElts
, 0);
1124 for (unsigned i
= 0; i
!= NumElts
; ++i
) {
1125 if (!DemandedElts
[i
])
1127 int M
= ShuffleMask
[i
];
1129 // For UNDEF elements, we don't know anything about the common state of
1130 // the shuffle result.
1131 DemandedLHS
.clearAllBits();
1132 DemandedRHS
.clearAllBits();
1135 assert(0 <= M
&& M
< (int)(2 * NumElts
) && "Shuffle index out of range");
1136 if (M
< (int)NumElts
)
1137 DemandedLHS
.setBit(M
);
1139 DemandedRHS
.setBit(M
- NumElts
);
1142 if (!!DemandedLHS
|| !!DemandedRHS
) {
1143 SDValue Op0
= Op
.getOperand(0);
1144 SDValue Op1
= Op
.getOperand(1);
1146 Known
.Zero
.setAllBits();
1147 Known
.One
.setAllBits();
1148 if (!!DemandedLHS
) {
1149 if (SimplifyDemandedBits(Op0
, DemandedBits
, DemandedLHS
, Known2
, TLO
,
1152 Known
= KnownBits::commonBits(Known
, Known2
);
1154 if (!!DemandedRHS
) {
1155 if (SimplifyDemandedBits(Op1
, DemandedBits
, DemandedRHS
, Known2
, TLO
,
1158 Known
= KnownBits::commonBits(Known
, Known2
);
1161 // Attempt to avoid multi-use ops if we don't need anything from them.
1162 SDValue DemandedOp0
= SimplifyMultipleUseDemandedBits(
1163 Op0
, DemandedBits
, DemandedLHS
, TLO
.DAG
, Depth
+ 1);
1164 SDValue DemandedOp1
= SimplifyMultipleUseDemandedBits(
1165 Op1
, DemandedBits
, DemandedRHS
, TLO
.DAG
, Depth
+ 1);
1166 if (DemandedOp0
|| DemandedOp1
) {
1167 Op0
= DemandedOp0
? DemandedOp0
: Op0
;
1168 Op1
= DemandedOp1
? DemandedOp1
: Op1
;
1169 SDValue NewOp
= TLO
.DAG
.getVectorShuffle(VT
, dl
, Op0
, Op1
, ShuffleMask
);
1170 return TLO
.CombineTo(Op
, NewOp
);
1176 SDValue Op0
= Op
.getOperand(0);
1177 SDValue Op1
= Op
.getOperand(1);
1179 // If the RHS is a constant, check to see if the LHS would be zero without
1180 // using the bits from the RHS. Below, we use knowledge about the RHS to
1181 // simplify the LHS, here we're using information from the LHS to simplify
1183 if (ConstantSDNode
*RHSC
= isConstOrConstSplat(Op1
)) {
1184 // Do not increment Depth here; that can cause an infinite loop.
1185 KnownBits LHSKnown
= TLO
.DAG
.computeKnownBits(Op0
, DemandedElts
, Depth
);
1186 // If the LHS already has zeros where RHSC does, this 'and' is dead.
1187 if ((LHSKnown
.Zero
& DemandedBits
) ==
1188 (~RHSC
->getAPIntValue() & DemandedBits
))
1189 return TLO
.CombineTo(Op
, Op0
);
1191 // If any of the set bits in the RHS are known zero on the LHS, shrink
1193 if (ShrinkDemandedConstant(Op
, ~LHSKnown
.Zero
& DemandedBits
,
1197 // Bitwise-not (xor X, -1) is a special case: we don't usually shrink its
1198 // constant, but if this 'and' is only clearing bits that were just set by
1199 // the xor, then this 'and' can be eliminated by shrinking the mask of
1200 // the xor. For example, for a 32-bit X:
1201 // and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1
1202 if (isBitwiseNot(Op0
) && Op0
.hasOneUse() &&
1203 LHSKnown
.One
== ~RHSC
->getAPIntValue()) {
1204 SDValue Xor
= TLO
.DAG
.getNode(ISD::XOR
, dl
, VT
, Op0
.getOperand(0), Op1
);
1205 return TLO
.CombineTo(Op
, Xor
);
1209 if (SimplifyDemandedBits(Op1
, DemandedBits
, DemandedElts
, Known
, TLO
,
1212 assert(!Known
.hasConflict() && "Bits known to be one AND zero?");
1213 if (SimplifyDemandedBits(Op0
, ~Known
.Zero
& DemandedBits
, DemandedElts
,
1214 Known2
, TLO
, Depth
+ 1))
1216 assert(!Known2
.hasConflict() && "Bits known to be one AND zero?");
1218 // Attempt to avoid multi-use ops if we don't need anything from them.
1219 if (!DemandedBits
.isAllOnesValue() || !DemandedElts
.isAllOnesValue()) {
1220 SDValue DemandedOp0
= SimplifyMultipleUseDemandedBits(
1221 Op0
, DemandedBits
, DemandedElts
, TLO
.DAG
, Depth
+ 1);
1222 SDValue DemandedOp1
= SimplifyMultipleUseDemandedBits(
1223 Op1
, DemandedBits
, DemandedElts
, TLO
.DAG
, Depth
+ 1);
1224 if (DemandedOp0
|| DemandedOp1
) {
1225 Op0
= DemandedOp0
? DemandedOp0
: Op0
;
1226 Op1
= DemandedOp1
? DemandedOp1
: Op1
;
1227 SDValue NewOp
= TLO
.DAG
.getNode(Op
.getOpcode(), dl
, VT
, Op0
, Op1
);
1228 return TLO
.CombineTo(Op
, NewOp
);
1232 // If all of the demanded bits are known one on one side, return the other.
1233 // These bits cannot contribute to the result of the 'and'.
1234 if (DemandedBits
.isSubsetOf(Known2
.Zero
| Known
.One
))
1235 return TLO
.CombineTo(Op
, Op0
);
1236 if (DemandedBits
.isSubsetOf(Known
.Zero
| Known2
.One
))
1237 return TLO
.CombineTo(Op
, Op1
);
1238 // If all of the demanded bits in the inputs are known zeros, return zero.
1239 if (DemandedBits
.isSubsetOf(Known
.Zero
| Known2
.Zero
))
1240 return TLO
.CombineTo(Op
, TLO
.DAG
.getConstant(0, dl
, VT
));
1241 // If the RHS is a constant, see if we can simplify it.
1242 if (ShrinkDemandedConstant(Op
, ~Known2
.Zero
& DemandedBits
, DemandedElts
,
1245 // If the operation can be done in a smaller type, do so.
1246 if (ShrinkDemandedOp(Op
, BitWidth
, DemandedBits
, TLO
))
1253 SDValue Op0
= Op
.getOperand(0);
1254 SDValue Op1
= Op
.getOperand(1);
1256 if (SimplifyDemandedBits(Op1
, DemandedBits
, DemandedElts
, Known
, TLO
,
1259 assert(!Known
.hasConflict() && "Bits known to be one AND zero?");
1260 if (SimplifyDemandedBits(Op0
, ~Known
.One
& DemandedBits
, DemandedElts
,
1261 Known2
, TLO
, Depth
+ 1))
1263 assert(!Known2
.hasConflict() && "Bits known to be one AND zero?");
1265 // Attempt to avoid multi-use ops if we don't need anything from them.
1266 if (!DemandedBits
.isAllOnesValue() || !DemandedElts
.isAllOnesValue()) {
1267 SDValue DemandedOp0
= SimplifyMultipleUseDemandedBits(
1268 Op0
, DemandedBits
, DemandedElts
, TLO
.DAG
, Depth
+ 1);
1269 SDValue DemandedOp1
= SimplifyMultipleUseDemandedBits(
1270 Op1
, DemandedBits
, DemandedElts
, TLO
.DAG
, Depth
+ 1);
1271 if (DemandedOp0
|| DemandedOp1
) {
1272 Op0
= DemandedOp0
? DemandedOp0
: Op0
;
1273 Op1
= DemandedOp1
? DemandedOp1
: Op1
;
1274 SDValue NewOp
= TLO
.DAG
.getNode(Op
.getOpcode(), dl
, VT
, Op0
, Op1
);
1275 return TLO
.CombineTo(Op
, NewOp
);
1279 // If all of the demanded bits are known zero on one side, return the other.
1280 // These bits cannot contribute to the result of the 'or'.
1281 if (DemandedBits
.isSubsetOf(Known2
.One
| Known
.Zero
))
1282 return TLO
.CombineTo(Op
, Op0
);
1283 if (DemandedBits
.isSubsetOf(Known
.One
| Known2
.Zero
))
1284 return TLO
.CombineTo(Op
, Op1
);
1285 // If the RHS is a constant, see if we can simplify it.
1286 if (ShrinkDemandedConstant(Op
, DemandedBits
, DemandedElts
, TLO
))
1288 // If the operation can be done in a smaller type, do so.
1289 if (ShrinkDemandedOp(Op
, BitWidth
, DemandedBits
, TLO
))
1296 SDValue Op0
= Op
.getOperand(0);
1297 SDValue Op1
= Op
.getOperand(1);
1299 if (SimplifyDemandedBits(Op1
, DemandedBits
, DemandedElts
, Known
, TLO
,
1302 assert(!Known
.hasConflict() && "Bits known to be one AND zero?");
1303 if (SimplifyDemandedBits(Op0
, DemandedBits
, DemandedElts
, Known2
, TLO
,
1306 assert(!Known2
.hasConflict() && "Bits known to be one AND zero?");
1308 // Attempt to avoid multi-use ops if we don't need anything from them.
1309 if (!DemandedBits
.isAllOnesValue() || !DemandedElts
.isAllOnesValue()) {
1310 SDValue DemandedOp0
= SimplifyMultipleUseDemandedBits(
1311 Op0
, DemandedBits
, DemandedElts
, TLO
.DAG
, Depth
+ 1);
1312 SDValue DemandedOp1
= SimplifyMultipleUseDemandedBits(
1313 Op1
, DemandedBits
, DemandedElts
, TLO
.DAG
, Depth
+ 1);
1314 if (DemandedOp0
|| DemandedOp1
) {
1315 Op0
= DemandedOp0
? DemandedOp0
: Op0
;
1316 Op1
= DemandedOp1
? DemandedOp1
: Op1
;
1317 SDValue NewOp
= TLO
.DAG
.getNode(Op
.getOpcode(), dl
, VT
, Op0
, Op1
);
1318 return TLO
.CombineTo(Op
, NewOp
);
1322 // If all of the demanded bits are known zero on one side, return the other.
1323 // These bits cannot contribute to the result of the 'xor'.
1324 if (DemandedBits
.isSubsetOf(Known
.Zero
))
1325 return TLO
.CombineTo(Op
, Op0
);
1326 if (DemandedBits
.isSubsetOf(Known2
.Zero
))
1327 return TLO
.CombineTo(Op
, Op1
);
1328 // If the operation can be done in a smaller type, do so.
1329 if (ShrinkDemandedOp(Op
, BitWidth
, DemandedBits
, TLO
))
1332 // If all of the unknown bits are known to be zero on one side or the other
1333 // turn this into an *inclusive* or.
1334 // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
1335 if (DemandedBits
.isSubsetOf(Known
.Zero
| Known2
.Zero
))
1336 return TLO
.CombineTo(Op
, TLO
.DAG
.getNode(ISD::OR
, dl
, VT
, Op0
, Op1
));
1338 ConstantSDNode
* C
= isConstOrConstSplat(Op1
, DemandedElts
);
1340 // If one side is a constant, and all of the set bits in the constant are
1341 // also known set on the other side, turn this into an AND, as we know
1342 // the bits will be cleared.
1343 // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
1344 // NB: it is okay if more bits are known than are requested
1345 if (C
->getAPIntValue() == Known2
.One
) {
1347 TLO
.DAG
.getConstant(~C
->getAPIntValue() & DemandedBits
, dl
, VT
);
1348 return TLO
.CombineTo(Op
, TLO
.DAG
.getNode(ISD::AND
, dl
, VT
, Op0
, ANDC
));
1351 // If the RHS is a constant, see if we can change it. Don't alter a -1
1352 // constant because that's a 'not' op, and that is better for combining
1354 if (!C
->isAllOnesValue() &&
1355 DemandedBits
.isSubsetOf(C
->getAPIntValue())) {
1356 // We're flipping all demanded bits. Flip the undemanded bits too.
1357 SDValue New
= TLO
.DAG
.getNOT(dl
, Op0
, VT
);
1358 return TLO
.CombineTo(Op
, New
);
1362 // If we can't turn this into a 'not', try to shrink the constant.
1363 if (!C
|| !C
->isAllOnesValue())
1364 if (ShrinkDemandedConstant(Op
, DemandedBits
, DemandedElts
, TLO
))
1371 if (SimplifyDemandedBits(Op
.getOperand(2), DemandedBits
, Known
, TLO
,
1374 if (SimplifyDemandedBits(Op
.getOperand(1), DemandedBits
, Known2
, TLO
,
1377 assert(!Known
.hasConflict() && "Bits known to be one AND zero?");
1378 assert(!Known2
.hasConflict() && "Bits known to be one AND zero?");
1380 // If the operands are constants, see if we can simplify them.
1381 if (ShrinkDemandedConstant(Op
, DemandedBits
, DemandedElts
, TLO
))
1384 // Only known if known in both the LHS and RHS.
1385 Known
= KnownBits::commonBits(Known
, Known2
);
1387 case ISD::SELECT_CC
:
1388 if (SimplifyDemandedBits(Op
.getOperand(3), DemandedBits
, Known
, TLO
,
1391 if (SimplifyDemandedBits(Op
.getOperand(2), DemandedBits
, Known2
, TLO
,
1394 assert(!Known
.hasConflict() && "Bits known to be one AND zero?");
1395 assert(!Known2
.hasConflict() && "Bits known to be one AND zero?");
1397 // If the operands are constants, see if we can simplify them.
1398 if (ShrinkDemandedConstant(Op
, DemandedBits
, DemandedElts
, TLO
))
1401 // Only known if known in both the LHS and RHS.
1402 Known
= KnownBits::commonBits(Known
, Known2
);
1405 SDValue Op0
= Op
.getOperand(0);
1406 SDValue Op1
= Op
.getOperand(1);
1407 ISD::CondCode CC
= cast
<CondCodeSDNode
>(Op
.getOperand(2))->get();
1408 // If (1) we only need the sign-bit, (2) the setcc operands are the same
1409 // width as the setcc result, and (3) the result of a setcc conforms to 0 or
1410 // -1, we may be able to bypass the setcc.
1411 if (DemandedBits
.isSignMask() &&
1412 Op0
.getScalarValueSizeInBits() == BitWidth
&&
1413 getBooleanContents(Op0
.getValueType()) ==
1414 BooleanContent::ZeroOrNegativeOneBooleanContent
) {
1415 // If we're testing X < 0, then this compare isn't needed - just use X!
1416 // FIXME: We're limiting to integer types here, but this should also work
1417 // if we don't care about FP signed-zero. The use of SETLT with FP means
1418 // that we don't care about NaNs.
1419 if (CC
== ISD::SETLT
&& Op1
.getValueType().isInteger() &&
1420 (isNullConstant(Op1
) || ISD::isBuildVectorAllZeros(Op1
.getNode())))
1421 return TLO
.CombineTo(Op
, Op0
);
1423 // TODO: Should we check for other forms of sign-bit comparisons?
1424 // Examples: X <= -1, X >= 0
1426 if (getBooleanContents(Op0
.getValueType()) ==
1427 TargetLowering::ZeroOrOneBooleanContent
&&
1429 Known
.Zero
.setBitsFrom(1);
1433 SDValue Op0
= Op
.getOperand(0);
1434 SDValue Op1
= Op
.getOperand(1);
1435 EVT ShiftVT
= Op1
.getValueType();
1437 if (const APInt
*SA
=
1438 TLO
.DAG
.getValidShiftAmountConstant(Op
, DemandedElts
)) {
1439 unsigned ShAmt
= SA
->getZExtValue();
1441 return TLO
.CombineTo(Op
, Op0
);
1443 // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
1444 // single shift. We can do this if the bottom bits (which are shifted
1445 // out) are never demanded.
1446 // TODO - support non-uniform vector amounts.
1447 if (Op0
.getOpcode() == ISD::SRL
) {
1448 if (!DemandedBits
.intersects(APInt::getLowBitsSet(BitWidth
, ShAmt
))) {
1449 if (const APInt
*SA2
=
1450 TLO
.DAG
.getValidShiftAmountConstant(Op0
, DemandedElts
)) {
1451 unsigned C1
= SA2
->getZExtValue();
1452 unsigned Opc
= ISD::SHL
;
1453 int Diff
= ShAmt
- C1
;
1458 SDValue NewSA
= TLO
.DAG
.getConstant(Diff
, dl
, ShiftVT
);
1459 return TLO
.CombineTo(
1460 Op
, TLO
.DAG
.getNode(Opc
, dl
, VT
, Op0
.getOperand(0), NewSA
));
1465 // Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits
1466 // are not demanded. This will likely allow the anyext to be folded away.
1467 // TODO - support non-uniform vector amounts.
1468 if (Op0
.getOpcode() == ISD::ANY_EXTEND
) {
1469 SDValue InnerOp
= Op0
.getOperand(0);
1470 EVT InnerVT
= InnerOp
.getValueType();
1471 unsigned InnerBits
= InnerVT
.getScalarSizeInBits();
1472 if (ShAmt
< InnerBits
&& DemandedBits
.getActiveBits() <= InnerBits
&&
1473 isTypeDesirableForOp(ISD::SHL
, InnerVT
)) {
1474 EVT ShTy
= getShiftAmountTy(InnerVT
, DL
);
1475 if (!APInt(BitWidth
, ShAmt
).isIntN(ShTy
.getSizeInBits()))
1478 TLO
.DAG
.getNode(ISD::SHL
, dl
, InnerVT
, InnerOp
,
1479 TLO
.DAG
.getConstant(ShAmt
, dl
, ShTy
));
1480 return TLO
.CombineTo(
1481 Op
, TLO
.DAG
.getNode(ISD::ANY_EXTEND
, dl
, VT
, NarrowShl
));
1484 // Repeat the SHL optimization above in cases where an extension
1485 // intervenes: (shl (anyext (shr x, c1)), c2) to
1486 // (shl (anyext x), c2-c1). This requires that the bottom c1 bits
1487 // aren't demanded (as above) and that the shifted upper c1 bits of
1488 // x aren't demanded.
1489 // TODO - support non-uniform vector amounts.
1490 if (Op0
.hasOneUse() && InnerOp
.getOpcode() == ISD::SRL
&&
1491 InnerOp
.hasOneUse()) {
1492 if (const APInt
*SA2
=
1493 TLO
.DAG
.getValidShiftAmountConstant(InnerOp
, DemandedElts
)) {
1494 unsigned InnerShAmt
= SA2
->getZExtValue();
1495 if (InnerShAmt
< ShAmt
&& InnerShAmt
< InnerBits
&&
1496 DemandedBits
.getActiveBits() <=
1497 (InnerBits
- InnerShAmt
+ ShAmt
) &&
1498 DemandedBits
.countTrailingZeros() >= ShAmt
) {
1500 TLO
.DAG
.getConstant(ShAmt
- InnerShAmt
, dl
, ShiftVT
);
1501 SDValue NewExt
= TLO
.DAG
.getNode(ISD::ANY_EXTEND
, dl
, VT
,
1502 InnerOp
.getOperand(0));
1503 return TLO
.CombineTo(
1504 Op
, TLO
.DAG
.getNode(ISD::SHL
, dl
, VT
, NewExt
, NewSA
));
1510 APInt InDemandedMask
= DemandedBits
.lshr(ShAmt
);
1511 if (SimplifyDemandedBits(Op0
, InDemandedMask
, DemandedElts
, Known
, TLO
,
1514 assert(!Known
.hasConflict() && "Bits known to be one AND zero?");
1515 Known
.Zero
<<= ShAmt
;
1516 Known
.One
<<= ShAmt
;
1517 // low bits known zero.
1518 Known
.Zero
.setLowBits(ShAmt
);
1520 // Try shrinking the operation as long as the shift amount will still be
1522 if ((ShAmt
< DemandedBits
.getActiveBits()) &&
1523 ShrinkDemandedOp(Op
, BitWidth
, DemandedBits
, TLO
))
1527 // If we are only demanding sign bits then we can use the shift source
1529 if (const APInt
*MaxSA
=
1530 TLO
.DAG
.getValidMaximumShiftAmountConstant(Op
, DemandedElts
)) {
1531 unsigned ShAmt
= MaxSA
->getZExtValue();
1532 unsigned NumSignBits
=
1533 TLO
.DAG
.ComputeNumSignBits(Op0
, DemandedElts
, Depth
+ 1);
1534 unsigned UpperDemandedBits
= BitWidth
- DemandedBits
.countTrailingZeros();
1535 if (NumSignBits
> ShAmt
&& (NumSignBits
- ShAmt
) >= (UpperDemandedBits
))
1536 return TLO
.CombineTo(Op
, Op0
);
1541 SDValue Op0
= Op
.getOperand(0);
1542 SDValue Op1
= Op
.getOperand(1);
1543 EVT ShiftVT
= Op1
.getValueType();
1545 if (const APInt
*SA
=
1546 TLO
.DAG
.getValidShiftAmountConstant(Op
, DemandedElts
)) {
1547 unsigned ShAmt
= SA
->getZExtValue();
1549 return TLO
.CombineTo(Op
, Op0
);
1551 // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
1552 // single shift. We can do this if the top bits (which are shifted out)
1553 // are never demanded.
1554 // TODO - support non-uniform vector amounts.
1555 if (Op0
.getOpcode() == ISD::SHL
) {
1556 if (!DemandedBits
.intersects(APInt::getHighBitsSet(BitWidth
, ShAmt
))) {
1557 if (const APInt
*SA2
=
1558 TLO
.DAG
.getValidShiftAmountConstant(Op0
, DemandedElts
)) {
1559 unsigned C1
= SA2
->getZExtValue();
1560 unsigned Opc
= ISD::SRL
;
1561 int Diff
= ShAmt
- C1
;
1566 SDValue NewSA
= TLO
.DAG
.getConstant(Diff
, dl
, ShiftVT
);
1567 return TLO
.CombineTo(
1568 Op
, TLO
.DAG
.getNode(Opc
, dl
, VT
, Op0
.getOperand(0), NewSA
));
1573 APInt InDemandedMask
= (DemandedBits
<< ShAmt
);
1575 // If the shift is exact, then it does demand the low bits (and knows that
1577 if (Op
->getFlags().hasExact())
1578 InDemandedMask
.setLowBits(ShAmt
);
1580 // Compute the new bits that are at the top now.
1581 if (SimplifyDemandedBits(Op0
, InDemandedMask
, DemandedElts
, Known
, TLO
,
1584 assert(!Known
.hasConflict() && "Bits known to be one AND zero?");
1585 Known
.Zero
.lshrInPlace(ShAmt
);
1586 Known
.One
.lshrInPlace(ShAmt
);
1587 // High bits known zero.
1588 Known
.Zero
.setHighBits(ShAmt
);
1593 SDValue Op0
= Op
.getOperand(0);
1594 SDValue Op1
= Op
.getOperand(1);
1595 EVT ShiftVT
= Op1
.getValueType();
1597 // If we only want bits that already match the signbit then we don't need
1599 unsigned NumHiDemandedBits
= BitWidth
- DemandedBits
.countTrailingZeros();
1600 if (TLO
.DAG
.ComputeNumSignBits(Op0
, DemandedElts
, Depth
+ 1) >=
1602 return TLO
.CombineTo(Op
, Op0
);
1604 // If this is an arithmetic shift right and only the low-bit is set, we can
1605 // always convert this into a logical shr, even if the shift amount is
1606 // variable. The low bit of the shift cannot be an input sign bit unless
1607 // the shift amount is >= the size of the datatype, which is undefined.
1608 if (DemandedBits
.isOneValue())
1609 return TLO
.CombineTo(Op
, TLO
.DAG
.getNode(ISD::SRL
, dl
, VT
, Op0
, Op1
));
1611 if (const APInt
*SA
=
1612 TLO
.DAG
.getValidShiftAmountConstant(Op
, DemandedElts
)) {
1613 unsigned ShAmt
= SA
->getZExtValue();
1615 return TLO
.CombineTo(Op
, Op0
);
1617 APInt InDemandedMask
= (DemandedBits
<< ShAmt
);
1619 // If the shift is exact, then it does demand the low bits (and knows that
1621 if (Op
->getFlags().hasExact())
1622 InDemandedMask
.setLowBits(ShAmt
);
1624 // If any of the demanded bits are produced by the sign extension, we also
1625 // demand the input sign bit.
1626 if (DemandedBits
.countLeadingZeros() < ShAmt
)
1627 InDemandedMask
.setSignBit();
1629 if (SimplifyDemandedBits(Op0
, InDemandedMask
, DemandedElts
, Known
, TLO
,
1632 assert(!Known
.hasConflict() && "Bits known to be one AND zero?");
1633 Known
.Zero
.lshrInPlace(ShAmt
);
1634 Known
.One
.lshrInPlace(ShAmt
);
1636 // If the input sign bit is known to be zero, or if none of the top bits
1637 // are demanded, turn this into an unsigned shift right.
1638 if (Known
.Zero
[BitWidth
- ShAmt
- 1] ||
1639 DemandedBits
.countLeadingZeros() >= ShAmt
) {
1641 Flags
.setExact(Op
->getFlags().hasExact());
1642 return TLO
.CombineTo(
1643 Op
, TLO
.DAG
.getNode(ISD::SRL
, dl
, VT
, Op0
, Op1
, Flags
));
1646 int Log2
= DemandedBits
.exactLogBase2();
1648 // The bit must come from the sign.
1649 SDValue NewSA
= TLO
.DAG
.getConstant(BitWidth
- 1 - Log2
, dl
, ShiftVT
);
1650 return TLO
.CombineTo(Op
, TLO
.DAG
.getNode(ISD::SRL
, dl
, VT
, Op0
, NewSA
));
1653 if (Known
.One
[BitWidth
- ShAmt
- 1])
1654 // New bits are known one.
1655 Known
.One
.setHighBits(ShAmt
);
1657 // Attempt to avoid multi-use ops if we don't need anything from them.
1658 if (!InDemandedMask
.isAllOnesValue() || !DemandedElts
.isAllOnesValue()) {
1659 SDValue DemandedOp0
= SimplifyMultipleUseDemandedBits(
1660 Op0
, InDemandedMask
, DemandedElts
, TLO
.DAG
, Depth
+ 1);
1662 SDValue NewOp
= TLO
.DAG
.getNode(ISD::SRA
, dl
, VT
, DemandedOp0
, Op1
);
1663 return TLO
.CombineTo(Op
, NewOp
);
1671 SDValue Op0
= Op
.getOperand(0);
1672 SDValue Op1
= Op
.getOperand(1);
1673 SDValue Op2
= Op
.getOperand(2);
1674 bool IsFSHL
= (Op
.getOpcode() == ISD::FSHL
);
1676 if (ConstantSDNode
*SA
= isConstOrConstSplat(Op2
, DemandedElts
)) {
1677 unsigned Amt
= SA
->getAPIntValue().urem(BitWidth
);
1679 // For fshl, 0-shift returns the 1st arg.
1680 // For fshr, 0-shift returns the 2nd arg.
1682 if (SimplifyDemandedBits(IsFSHL
? Op0
: Op1
, DemandedBits
, DemandedElts
,
1683 Known
, TLO
, Depth
+ 1))
1688 // fshl: (Op0 << Amt) | (Op1 >> (BW - Amt))
1689 // fshr: (Op0 << (BW - Amt)) | (Op1 >> Amt)
1690 APInt Demanded0
= DemandedBits
.lshr(IsFSHL
? Amt
: (BitWidth
- Amt
));
1691 APInt Demanded1
= DemandedBits
<< (IsFSHL
? (BitWidth
- Amt
) : Amt
);
1692 if (SimplifyDemandedBits(Op0
, Demanded0
, DemandedElts
, Known2
, TLO
,
1695 if (SimplifyDemandedBits(Op1
, Demanded1
, DemandedElts
, Known
, TLO
,
1699 Known2
.One
<<= (IsFSHL
? Amt
: (BitWidth
- Amt
));
1700 Known2
.Zero
<<= (IsFSHL
? Amt
: (BitWidth
- Amt
));
1701 Known
.One
.lshrInPlace(IsFSHL
? (BitWidth
- Amt
) : Amt
);
1702 Known
.Zero
.lshrInPlace(IsFSHL
? (BitWidth
- Amt
) : Amt
);
1703 Known
.One
|= Known2
.One
;
1704 Known
.Zero
|= Known2
.Zero
;
1707 // For pow-2 bitwidths we only demand the bottom modulo amt bits.
1708 if (isPowerOf2_32(BitWidth
)) {
1709 APInt
DemandedAmtBits(Op2
.getScalarValueSizeInBits(), BitWidth
- 1);
1710 if (SimplifyDemandedBits(Op2
, DemandedAmtBits
, DemandedElts
,
1711 Known2
, TLO
, Depth
+ 1))
1718 SDValue Op0
= Op
.getOperand(0);
1719 SDValue Op1
= Op
.getOperand(1);
1721 // If we're rotating an 0/-1 value, then it stays an 0/-1 value.
1722 if (BitWidth
== TLO
.DAG
.ComputeNumSignBits(Op0
, DemandedElts
, Depth
+ 1))
1723 return TLO
.CombineTo(Op
, Op0
);
1725 // For pow-2 bitwidths we only demand the bottom modulo amt bits.
1726 if (isPowerOf2_32(BitWidth
)) {
1727 APInt
DemandedAmtBits(Op1
.getScalarValueSizeInBits(), BitWidth
- 1);
1728 if (SimplifyDemandedBits(Op1
, DemandedAmtBits
, DemandedElts
, Known2
, TLO
,
1735 // Check if one arg is always less than (or equal) to the other arg.
1736 SDValue Op0
= Op
.getOperand(0);
1737 SDValue Op1
= Op
.getOperand(1);
1738 KnownBits Known0
= TLO
.DAG
.computeKnownBits(Op0
, DemandedElts
, Depth
+ 1);
1739 KnownBits Known1
= TLO
.DAG
.computeKnownBits(Op1
, DemandedElts
, Depth
+ 1);
1740 Known
= KnownBits::umin(Known0
, Known1
);
1741 if (Optional
<bool> IsULE
= KnownBits::ule(Known0
, Known1
))
1742 return TLO
.CombineTo(Op
, IsULE
.getValue() ? Op0
: Op1
);
1743 if (Optional
<bool> IsULT
= KnownBits::ult(Known0
, Known1
))
1744 return TLO
.CombineTo(Op
, IsULT
.getValue() ? Op0
: Op1
);
1748 // Check if one arg is always greater than (or equal) to the other arg.
1749 SDValue Op0
= Op
.getOperand(0);
1750 SDValue Op1
= Op
.getOperand(1);
1751 KnownBits Known0
= TLO
.DAG
.computeKnownBits(Op0
, DemandedElts
, Depth
+ 1);
1752 KnownBits Known1
= TLO
.DAG
.computeKnownBits(Op1
, DemandedElts
, Depth
+ 1);
1753 Known
= KnownBits::umax(Known0
, Known1
);
1754 if (Optional
<bool> IsUGE
= KnownBits::uge(Known0
, Known1
))
1755 return TLO
.CombineTo(Op
, IsUGE
.getValue() ? Op0
: Op1
);
1756 if (Optional
<bool> IsUGT
= KnownBits::ugt(Known0
, Known1
))
1757 return TLO
.CombineTo(Op
, IsUGT
.getValue() ? Op0
: Op1
);
1760 case ISD::BITREVERSE
: {
1761 SDValue Src
= Op
.getOperand(0);
1762 APInt DemandedSrcBits
= DemandedBits
.reverseBits();
1763 if (SimplifyDemandedBits(Src
, DemandedSrcBits
, DemandedElts
, Known2
, TLO
,
1766 Known
.One
= Known2
.One
.reverseBits();
1767 Known
.Zero
= Known2
.Zero
.reverseBits();
1771 SDValue Src
= Op
.getOperand(0);
1772 APInt DemandedSrcBits
= DemandedBits
.byteSwap();
1773 if (SimplifyDemandedBits(Src
, DemandedSrcBits
, DemandedElts
, Known2
, TLO
,
1776 Known
.One
= Known2
.One
.byteSwap();
1777 Known
.Zero
= Known2
.Zero
.byteSwap();
1781 // If only 1 bit is demanded, replace with PARITY as long as we're before
1783 // FIXME: Limit to scalars for now.
1784 if (DemandedBits
.isOneValue() && !TLO
.LegalOps
&& !VT
.isVector())
1785 return TLO
.CombineTo(Op
, TLO
.DAG
.getNode(ISD::PARITY
, dl
, VT
,
1788 Known
= TLO
.DAG
.computeKnownBits(Op
, DemandedElts
, Depth
);
1791 case ISD::SIGN_EXTEND_INREG
: {
1792 SDValue Op0
= Op
.getOperand(0);
1793 EVT ExVT
= cast
<VTSDNode
>(Op
.getOperand(1))->getVT();
1794 unsigned ExVTBits
= ExVT
.getScalarSizeInBits();
1796 // If we only care about the highest bit, don't bother shifting right.
1797 if (DemandedBits
.isSignMask()) {
1798 unsigned NumSignBits
=
1799 TLO
.DAG
.ComputeNumSignBits(Op0
, DemandedElts
, Depth
+ 1);
1800 bool AlreadySignExtended
= NumSignBits
>= BitWidth
- ExVTBits
+ 1;
1801 // However if the input is already sign extended we expect the sign
1802 // extension to be dropped altogether later and do not simplify.
1803 if (!AlreadySignExtended
) {
1804 // Compute the correct shift amount type, which must be getShiftAmountTy
1805 // for scalar types after legalization.
1806 EVT ShiftAmtTy
= VT
;
1807 if (TLO
.LegalTypes() && !ShiftAmtTy
.isVector())
1808 ShiftAmtTy
= getShiftAmountTy(ShiftAmtTy
, DL
);
1811 TLO
.DAG
.getConstant(BitWidth
- ExVTBits
, dl
, ShiftAmtTy
);
1812 return TLO
.CombineTo(Op
,
1813 TLO
.DAG
.getNode(ISD::SHL
, dl
, VT
, Op0
, ShiftAmt
));
1817 // If none of the extended bits are demanded, eliminate the sextinreg.
1818 if (DemandedBits
.getActiveBits() <= ExVTBits
)
1819 return TLO
.CombineTo(Op
, Op0
);
1821 APInt InputDemandedBits
= DemandedBits
.getLoBits(ExVTBits
);
1823 // Since the sign extended bits are demanded, we know that the sign
1825 InputDemandedBits
.setBit(ExVTBits
- 1);
1827 if (SimplifyDemandedBits(Op0
, InputDemandedBits
, Known
, TLO
, Depth
+ 1))
1829 assert(!Known
.hasConflict() && "Bits known to be one AND zero?");
1831 // If the sign bit of the input is known set or clear, then we know the
1832 // top bits of the result.
1834 // If the input sign bit is known zero, convert this into a zero extension.
1835 if (Known
.Zero
[ExVTBits
- 1])
1836 return TLO
.CombineTo(Op
, TLO
.DAG
.getZeroExtendInReg(Op0
, dl
, ExVT
));
1838 APInt Mask
= APInt::getLowBitsSet(BitWidth
, ExVTBits
);
1839 if (Known
.One
[ExVTBits
- 1]) { // Input sign bit known set
1840 Known
.One
.setBitsFrom(ExVTBits
);
1842 } else { // Input sign bit unknown
1848 case ISD::BUILD_PAIR
: {
1849 EVT HalfVT
= Op
.getOperand(0).getValueType();
1850 unsigned HalfBitWidth
= HalfVT
.getScalarSizeInBits();
1852 APInt MaskLo
= DemandedBits
.getLoBits(HalfBitWidth
).trunc(HalfBitWidth
);
1853 APInt MaskHi
= DemandedBits
.getHiBits(HalfBitWidth
).trunc(HalfBitWidth
);
1855 KnownBits KnownLo
, KnownHi
;
1857 if (SimplifyDemandedBits(Op
.getOperand(0), MaskLo
, KnownLo
, TLO
, Depth
+ 1))
1860 if (SimplifyDemandedBits(Op
.getOperand(1), MaskHi
, KnownHi
, TLO
, Depth
+ 1))
1863 Known
.Zero
= KnownLo
.Zero
.zext(BitWidth
) |
1864 KnownHi
.Zero
.zext(BitWidth
).shl(HalfBitWidth
);
1866 Known
.One
= KnownLo
.One
.zext(BitWidth
) |
1867 KnownHi
.One
.zext(BitWidth
).shl(HalfBitWidth
);
1870 case ISD::ZERO_EXTEND
:
1871 case ISD::ZERO_EXTEND_VECTOR_INREG
: {
1872 SDValue Src
= Op
.getOperand(0);
1873 EVT SrcVT
= Src
.getValueType();
1874 unsigned InBits
= SrcVT
.getScalarSizeInBits();
1875 unsigned InElts
= SrcVT
.isVector() ? SrcVT
.getVectorNumElements() : 1;
1876 bool IsVecInReg
= Op
.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG
;
1878 // If none of the top bits are demanded, convert this into an any_extend.
1879 if (DemandedBits
.getActiveBits() <= InBits
) {
1880 // If we only need the non-extended bits of the bottom element
1881 // then we can just bitcast to the result.
1882 if (IsVecInReg
&& DemandedElts
== 1 &&
1883 VT
.getSizeInBits() == SrcVT
.getSizeInBits() &&
1884 TLO
.DAG
.getDataLayout().isLittleEndian())
1885 return TLO
.CombineTo(Op
, TLO
.DAG
.getBitcast(VT
, Src
));
1888 IsVecInReg
? ISD::ANY_EXTEND_VECTOR_INREG
: ISD::ANY_EXTEND
;
1889 if (!TLO
.LegalOperations() || isOperationLegal(Opc
, VT
))
1890 return TLO
.CombineTo(Op
, TLO
.DAG
.getNode(Opc
, dl
, VT
, Src
));
1893 APInt InDemandedBits
= DemandedBits
.trunc(InBits
);
1894 APInt InDemandedElts
= DemandedElts
.zextOrSelf(InElts
);
1895 if (SimplifyDemandedBits(Src
, InDemandedBits
, InDemandedElts
, Known
, TLO
,
1898 assert(!Known
.hasConflict() && "Bits known to be one AND zero?");
1899 assert(Known
.getBitWidth() == InBits
&& "Src width has changed?");
1900 Known
= Known
.zext(BitWidth
);
1902 // Attempt to avoid multi-use ops if we don't need anything from them.
1903 if (SDValue NewSrc
= SimplifyMultipleUseDemandedBits(
1904 Src
, InDemandedBits
, InDemandedElts
, TLO
.DAG
, Depth
+ 1))
1905 return TLO
.CombineTo(Op
, TLO
.DAG
.getNode(Op
.getOpcode(), dl
, VT
, NewSrc
));
1908 case ISD::SIGN_EXTEND
:
1909 case ISD::SIGN_EXTEND_VECTOR_INREG
: {
1910 SDValue Src
= Op
.getOperand(0);
1911 EVT SrcVT
= Src
.getValueType();
1912 unsigned InBits
= SrcVT
.getScalarSizeInBits();
1913 unsigned InElts
= SrcVT
.isVector() ? SrcVT
.getVectorNumElements() : 1;
1914 bool IsVecInReg
= Op
.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG
;
1916 // If none of the top bits are demanded, convert this into an any_extend.
1917 if (DemandedBits
.getActiveBits() <= InBits
) {
1918 // If we only need the non-extended bits of the bottom element
1919 // then we can just bitcast to the result.
1920 if (IsVecInReg
&& DemandedElts
== 1 &&
1921 VT
.getSizeInBits() == SrcVT
.getSizeInBits() &&
1922 TLO
.DAG
.getDataLayout().isLittleEndian())
1923 return TLO
.CombineTo(Op
, TLO
.DAG
.getBitcast(VT
, Src
));
1926 IsVecInReg
? ISD::ANY_EXTEND_VECTOR_INREG
: ISD::ANY_EXTEND
;
1927 if (!TLO
.LegalOperations() || isOperationLegal(Opc
, VT
))
1928 return TLO
.CombineTo(Op
, TLO
.DAG
.getNode(Opc
, dl
, VT
, Src
));
1931 APInt InDemandedBits
= DemandedBits
.trunc(InBits
);
1932 APInt InDemandedElts
= DemandedElts
.zextOrSelf(InElts
);
1934 // Since some of the sign extended bits are demanded, we know that the sign
1936 InDemandedBits
.setBit(InBits
- 1);
1938 if (SimplifyDemandedBits(Src
, InDemandedBits
, InDemandedElts
, Known
, TLO
,
1941 assert(!Known
.hasConflict() && "Bits known to be one AND zero?");
1942 assert(Known
.getBitWidth() == InBits
&& "Src width has changed?");
1944 // If the sign bit is known one, the top bits match.
1945 Known
= Known
.sext(BitWidth
);
1947 // If the sign bit is known zero, convert this to a zero extend.
1948 if (Known
.isNonNegative()) {
1950 IsVecInReg
? ISD::ZERO_EXTEND_VECTOR_INREG
: ISD::ZERO_EXTEND
;
1951 if (!TLO
.LegalOperations() || isOperationLegal(Opc
, VT
))
1952 return TLO
.CombineTo(Op
, TLO
.DAG
.getNode(Opc
, dl
, VT
, Src
));
1955 // Attempt to avoid multi-use ops if we don't need anything from them.
1956 if (SDValue NewSrc
= SimplifyMultipleUseDemandedBits(
1957 Src
, InDemandedBits
, InDemandedElts
, TLO
.DAG
, Depth
+ 1))
1958 return TLO
.CombineTo(Op
, TLO
.DAG
.getNode(Op
.getOpcode(), dl
, VT
, NewSrc
));
1961 case ISD::ANY_EXTEND
:
1962 case ISD::ANY_EXTEND_VECTOR_INREG
: {
1963 SDValue Src
= Op
.getOperand(0);
1964 EVT SrcVT
= Src
.getValueType();
1965 unsigned InBits
= SrcVT
.getScalarSizeInBits();
1966 unsigned InElts
= SrcVT
.isVector() ? SrcVT
.getVectorNumElements() : 1;
1967 bool IsVecInReg
= Op
.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG
;
1969 // If we only need the bottom element then we can just bitcast.
1970 // TODO: Handle ANY_EXTEND?
1971 if (IsVecInReg
&& DemandedElts
== 1 &&
1972 VT
.getSizeInBits() == SrcVT
.getSizeInBits() &&
1973 TLO
.DAG
.getDataLayout().isLittleEndian())
1974 return TLO
.CombineTo(Op
, TLO
.DAG
.getBitcast(VT
, Src
));
1976 APInt InDemandedBits
= DemandedBits
.trunc(InBits
);
1977 APInt InDemandedElts
= DemandedElts
.zextOrSelf(InElts
);
1978 if (SimplifyDemandedBits(Src
, InDemandedBits
, InDemandedElts
, Known
, TLO
,
1981 assert(!Known
.hasConflict() && "Bits known to be one AND zero?");
1982 assert(Known
.getBitWidth() == InBits
&& "Src width has changed?");
1983 Known
= Known
.anyext(BitWidth
);
1985 // Attempt to avoid multi-use ops if we don't need anything from them.
1986 if (SDValue NewSrc
= SimplifyMultipleUseDemandedBits(
1987 Src
, InDemandedBits
, InDemandedElts
, TLO
.DAG
, Depth
+ 1))
1988 return TLO
.CombineTo(Op
, TLO
.DAG
.getNode(Op
.getOpcode(), dl
, VT
, NewSrc
));
1991 case ISD::TRUNCATE
: {
1992 SDValue Src
= Op
.getOperand(0);
1994 // Simplify the input, using demanded bit information, and compute the known
1995 // zero/one bits live out.
1996 unsigned OperandBitWidth
= Src
.getScalarValueSizeInBits();
1997 APInt TruncMask
= DemandedBits
.zext(OperandBitWidth
);
1998 if (SimplifyDemandedBits(Src
, TruncMask
, DemandedElts
, Known
, TLO
,
2001 Known
= Known
.trunc(BitWidth
);
2003 // Attempt to avoid multi-use ops if we don't need anything from them.
2004 if (SDValue NewSrc
= SimplifyMultipleUseDemandedBits(
2005 Src
, TruncMask
, DemandedElts
, TLO
.DAG
, Depth
+ 1))
2006 return TLO
.CombineTo(Op
, TLO
.DAG
.getNode(ISD::TRUNCATE
, dl
, VT
, NewSrc
));
2008 // If the input is only used by this truncate, see if we can shrink it based
2009 // on the known demanded bits.
2010 if (Src
.getNode()->hasOneUse()) {
2011 switch (Src
.getOpcode()) {
2015 // Shrink SRL by a constant if none of the high bits shifted in are
2017 if (TLO
.LegalTypes() && !isTypeDesirableForOp(ISD::SRL
, VT
))
2018 // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
2022 const APInt
*ShAmtC
=
2023 TLO
.DAG
.getValidShiftAmountConstant(Src
, DemandedElts
);
2024 if (!ShAmtC
|| ShAmtC
->uge(BitWidth
))
2026 uint64_t ShVal
= ShAmtC
->getZExtValue();
2029 APInt::getHighBitsSet(OperandBitWidth
, OperandBitWidth
- BitWidth
);
2030 HighBits
.lshrInPlace(ShVal
);
2031 HighBits
= HighBits
.trunc(BitWidth
);
2033 if (!(HighBits
& DemandedBits
)) {
2034 // None of the shifted in bits are needed. Add a truncate of the
2035 // shift input, then shift it.
2036 SDValue NewShAmt
= TLO
.DAG
.getConstant(
2037 ShVal
, dl
, getShiftAmountTy(VT
, DL
, TLO
.LegalTypes()));
2039 TLO
.DAG
.getNode(ISD::TRUNCATE
, dl
, VT
, Src
.getOperand(0));
2040 return TLO
.CombineTo(
2041 Op
, TLO
.DAG
.getNode(ISD::SRL
, dl
, VT
, NewTrunc
, NewShAmt
));
2047 assert(!Known
.hasConflict() && "Bits known to be one AND zero?");
2050 case ISD::AssertZext
: {
2051 // AssertZext demands all of the high bits, plus any of the low bits
2052 // demanded by its users.
2053 EVT ZVT
= cast
<VTSDNode
>(Op
.getOperand(1))->getVT();
2054 APInt InMask
= APInt::getLowBitsSet(BitWidth
, ZVT
.getSizeInBits());
2055 if (SimplifyDemandedBits(Op
.getOperand(0), ~InMask
| DemandedBits
, Known
,
2058 assert(!Known
.hasConflict() && "Bits known to be one AND zero?");
2060 Known
.Zero
|= ~InMask
;
2063 case ISD::EXTRACT_VECTOR_ELT
: {
2064 SDValue Src
= Op
.getOperand(0);
2065 SDValue Idx
= Op
.getOperand(1);
2066 ElementCount SrcEltCnt
= Src
.getValueType().getVectorElementCount();
2067 unsigned EltBitWidth
= Src
.getScalarValueSizeInBits();
2069 if (SrcEltCnt
.isScalable())
2072 // Demand the bits from every vector element without a constant index.
2073 unsigned NumSrcElts
= SrcEltCnt
.getFixedValue();
2074 APInt DemandedSrcElts
= APInt::getAllOnesValue(NumSrcElts
);
2075 if (auto *CIdx
= dyn_cast
<ConstantSDNode
>(Idx
))
2076 if (CIdx
->getAPIntValue().ult(NumSrcElts
))
2077 DemandedSrcElts
= APInt::getOneBitSet(NumSrcElts
, CIdx
->getZExtValue());
2079 // If BitWidth > EltBitWidth the value is anyext:ed. So we do not know
2080 // anything about the extended bits.
2081 APInt DemandedSrcBits
= DemandedBits
;
2082 if (BitWidth
> EltBitWidth
)
2083 DemandedSrcBits
= DemandedSrcBits
.trunc(EltBitWidth
);
2085 if (SimplifyDemandedBits(Src
, DemandedSrcBits
, DemandedSrcElts
, Known2
, TLO
,
2089 // Attempt to avoid multi-use ops if we don't need anything from them.
2090 if (!DemandedSrcBits
.isAllOnesValue() ||
2091 !DemandedSrcElts
.isAllOnesValue()) {
2092 if (SDValue DemandedSrc
= SimplifyMultipleUseDemandedBits(
2093 Src
, DemandedSrcBits
, DemandedSrcElts
, TLO
.DAG
, Depth
+ 1)) {
2095 TLO
.DAG
.getNode(Op
.getOpcode(), dl
, VT
, DemandedSrc
, Idx
);
2096 return TLO
.CombineTo(Op
, NewOp
);
2101 if (BitWidth
> EltBitWidth
)
2102 Known
= Known
.anyext(BitWidth
);
2105 case ISD::BITCAST
: {
2106 SDValue Src
= Op
.getOperand(0);
2107 EVT SrcVT
= Src
.getValueType();
2108 unsigned NumSrcEltBits
= SrcVT
.getScalarSizeInBits();
2110 // If this is an FP->Int bitcast and if the sign bit is the only
2111 // thing demanded, turn this into a FGETSIGN.
2112 if (!TLO
.LegalOperations() && !VT
.isVector() && !SrcVT
.isVector() &&
2113 DemandedBits
== APInt::getSignMask(Op
.getValueSizeInBits()) &&
2114 SrcVT
.isFloatingPoint()) {
2115 bool OpVTLegal
= isOperationLegalOrCustom(ISD::FGETSIGN
, VT
);
2116 bool i32Legal
= isOperationLegalOrCustom(ISD::FGETSIGN
, MVT::i32
);
2117 if ((OpVTLegal
|| i32Legal
) && VT
.isSimple() && SrcVT
!= MVT::f16
&&
2118 SrcVT
!= MVT::f128
) {
2119 // Cannot eliminate/lower SHL for f128 yet.
2120 EVT Ty
= OpVTLegal
? VT
: MVT::i32
;
2121 // Make a FGETSIGN + SHL to move the sign bit into the appropriate
2122 // place. We expect the SHL to be eliminated by other optimizations.
2123 SDValue Sign
= TLO
.DAG
.getNode(ISD::FGETSIGN
, dl
, Ty
, Src
);
2124 unsigned OpVTSizeInBits
= Op
.getValueSizeInBits();
2125 if (!OpVTLegal
&& OpVTSizeInBits
> 32)
2126 Sign
= TLO
.DAG
.getNode(ISD::ZERO_EXTEND
, dl
, VT
, Sign
);
2127 unsigned ShVal
= Op
.getValueSizeInBits() - 1;
2128 SDValue ShAmt
= TLO
.DAG
.getConstant(ShVal
, dl
, VT
);
2129 return TLO
.CombineTo(Op
,
2130 TLO
.DAG
.getNode(ISD::SHL
, dl
, VT
, Sign
, ShAmt
));
2134 // Bitcast from a vector using SimplifyDemanded Bits/VectorElts.
2135 // Demand the elt/bit if any of the original elts/bits are demanded.
2136 // TODO - bigendian once we have test coverage.
2137 if (SrcVT
.isVector() && (BitWidth
% NumSrcEltBits
) == 0 &&
2138 TLO
.DAG
.getDataLayout().isLittleEndian()) {
2139 unsigned Scale
= BitWidth
/ NumSrcEltBits
;
2140 unsigned NumSrcElts
= SrcVT
.getVectorNumElements();
2141 APInt DemandedSrcBits
= APInt::getNullValue(NumSrcEltBits
);
2142 APInt DemandedSrcElts
= APInt::getNullValue(NumSrcElts
);
2143 for (unsigned i
= 0; i
!= Scale
; ++i
) {
2144 unsigned Offset
= i
* NumSrcEltBits
;
2145 APInt Sub
= DemandedBits
.extractBits(NumSrcEltBits
, Offset
);
2146 if (!Sub
.isNullValue()) {
2147 DemandedSrcBits
|= Sub
;
2148 for (unsigned j
= 0; j
!= NumElts
; ++j
)
2149 if (DemandedElts
[j
])
2150 DemandedSrcElts
.setBit((j
* Scale
) + i
);
2154 APInt KnownSrcUndef
, KnownSrcZero
;
2155 if (SimplifyDemandedVectorElts(Src
, DemandedSrcElts
, KnownSrcUndef
,
2156 KnownSrcZero
, TLO
, Depth
+ 1))
2159 KnownBits KnownSrcBits
;
2160 if (SimplifyDemandedBits(Src
, DemandedSrcBits
, DemandedSrcElts
,
2161 KnownSrcBits
, TLO
, Depth
+ 1))
2163 } else if ((NumSrcEltBits
% BitWidth
) == 0 &&
2164 TLO
.DAG
.getDataLayout().isLittleEndian()) {
2165 unsigned Scale
= NumSrcEltBits
/ BitWidth
;
2166 unsigned NumSrcElts
= SrcVT
.isVector() ? SrcVT
.getVectorNumElements() : 1;
2167 APInt DemandedSrcBits
= APInt::getNullValue(NumSrcEltBits
);
2168 APInt DemandedSrcElts
= APInt::getNullValue(NumSrcElts
);
2169 for (unsigned i
= 0; i
!= NumElts
; ++i
)
2170 if (DemandedElts
[i
]) {
2171 unsigned Offset
= (i
% Scale
) * BitWidth
;
2172 DemandedSrcBits
.insertBits(DemandedBits
, Offset
);
2173 DemandedSrcElts
.setBit(i
/ Scale
);
2176 if (SrcVT
.isVector()) {
2177 APInt KnownSrcUndef
, KnownSrcZero
;
2178 if (SimplifyDemandedVectorElts(Src
, DemandedSrcElts
, KnownSrcUndef
,
2179 KnownSrcZero
, TLO
, Depth
+ 1))
2183 KnownBits KnownSrcBits
;
2184 if (SimplifyDemandedBits(Src
, DemandedSrcBits
, DemandedSrcElts
,
2185 KnownSrcBits
, TLO
, Depth
+ 1))
2189 // If this is a bitcast, let computeKnownBits handle it. Only do this on a
2190 // recursive call where Known may be useful to the caller.
2192 Known
= TLO
.DAG
.computeKnownBits(Op
, DemandedElts
, Depth
);
2200 // Add, Sub, and Mul don't demand any bits in positions beyond that
2201 // of the highest bit demanded of them.
2202 SDValue Op0
= Op
.getOperand(0), Op1
= Op
.getOperand(1);
2203 SDNodeFlags Flags
= Op
.getNode()->getFlags();
2204 unsigned DemandedBitsLZ
= DemandedBits
.countLeadingZeros();
2205 APInt LoMask
= APInt::getLowBitsSet(BitWidth
, BitWidth
- DemandedBitsLZ
);
2206 if (SimplifyDemandedBits(Op0
, LoMask
, DemandedElts
, Known2
, TLO
,
2208 SimplifyDemandedBits(Op1
, LoMask
, DemandedElts
, Known2
, TLO
,
2210 // See if the operation should be performed at a smaller bit width.
2211 ShrinkDemandedOp(Op
, BitWidth
, DemandedBits
, TLO
)) {
2212 if (Flags
.hasNoSignedWrap() || Flags
.hasNoUnsignedWrap()) {
2213 // Disable the nsw and nuw flags. We can no longer guarantee that we
2214 // won't wrap after simplification.
2215 Flags
.setNoSignedWrap(false);
2216 Flags
.setNoUnsignedWrap(false);
2218 TLO
.DAG
.getNode(Op
.getOpcode(), dl
, VT
, Op0
, Op1
, Flags
);
2219 return TLO
.CombineTo(Op
, NewOp
);
2224 // Attempt to avoid multi-use ops if we don't need anything from them.
2225 if (!LoMask
.isAllOnesValue() || !DemandedElts
.isAllOnesValue()) {
2226 SDValue DemandedOp0
= SimplifyMultipleUseDemandedBits(
2227 Op0
, LoMask
, DemandedElts
, TLO
.DAG
, Depth
+ 1);
2228 SDValue DemandedOp1
= SimplifyMultipleUseDemandedBits(
2229 Op1
, LoMask
, DemandedElts
, TLO
.DAG
, Depth
+ 1);
2230 if (DemandedOp0
|| DemandedOp1
) {
2231 Flags
.setNoSignedWrap(false);
2232 Flags
.setNoUnsignedWrap(false);
2233 Op0
= DemandedOp0
? DemandedOp0
: Op0
;
2234 Op1
= DemandedOp1
? DemandedOp1
: Op1
;
2236 TLO
.DAG
.getNode(Op
.getOpcode(), dl
, VT
, Op0
, Op1
, Flags
);
2237 return TLO
.CombineTo(Op
, NewOp
);
2241 // If we have a constant operand, we may be able to turn it into -1 if we
2242 // do not demand the high bits. This can make the constant smaller to
2243 // encode, allow more general folding, or match specialized instruction
2244 // patterns (eg, 'blsr' on x86). Don't bother changing 1 to -1 because that
2245 // is probably not useful (and could be detrimental).
2246 ConstantSDNode
*C
= isConstOrConstSplat(Op1
);
2247 APInt HighMask
= APInt::getHighBitsSet(BitWidth
, DemandedBitsLZ
);
2248 if (C
&& !C
->isAllOnesValue() && !C
->isOne() &&
2249 (C
->getAPIntValue() | HighMask
).isAllOnesValue()) {
2250 SDValue Neg1
= TLO
.DAG
.getAllOnesConstant(dl
, VT
);
2251 // Disable the nsw and nuw flags. We can no longer guarantee that we
2252 // won't wrap after simplification.
2253 Flags
.setNoSignedWrap(false);
2254 Flags
.setNoUnsignedWrap(false);
2255 SDValue NewOp
= TLO
.DAG
.getNode(Op
.getOpcode(), dl
, VT
, Op0
, Neg1
, Flags
);
2256 return TLO
.CombineTo(Op
, NewOp
);
2262 if (Op
.getOpcode() >= ISD::BUILTIN_OP_END
) {
2263 if (SimplifyDemandedBitsForTargetNode(Op
, DemandedBits
, DemandedElts
,
2269 // Just use computeKnownBits to compute output bits.
2270 Known
= TLO
.DAG
.computeKnownBits(Op
, DemandedElts
, Depth
);
2274 // If we know the value of all of the demanded bits, return this as a
2276 if (DemandedBits
.isSubsetOf(Known
.Zero
| Known
.One
)) {
2277 // Avoid folding to a constant if any OpaqueConstant is involved.
2278 const SDNode
*N
= Op
.getNode();
2280 llvm::make_range(SDNodeIterator::begin(N
), SDNodeIterator::end(N
))) {
2281 if (ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(Op
))
2286 return TLO
.CombineTo(Op
, TLO
.DAG
.getConstant(Known
.One
, dl
, VT
));
2287 if (VT
.isFloatingPoint())
2288 return TLO
.CombineTo(
2290 TLO
.DAG
.getConstantFP(
2291 APFloat(TLO
.DAG
.EVTToAPFloatSemantics(VT
), Known
.One
), dl
, VT
));
2297 bool TargetLowering::SimplifyDemandedVectorElts(SDValue Op
,
2298 const APInt
&DemandedElts
,
2301 DAGCombinerInfo
&DCI
) const {
2302 SelectionDAG
&DAG
= DCI
.DAG
;
2303 TargetLoweringOpt
TLO(DAG
, !DCI
.isBeforeLegalize(),
2304 !DCI
.isBeforeLegalizeOps());
2307 SimplifyDemandedVectorElts(Op
, DemandedElts
, KnownUndef
, KnownZero
, TLO
);
2309 DCI
.AddToWorklist(Op
.getNode());
2310 DCI
.CommitTargetLoweringOpt(TLO
);
2316 /// Given a vector binary operation and known undefined elements for each input
2317 /// operand, compute whether each element of the output is undefined.
2318 static APInt
getKnownUndefForVectorBinop(SDValue BO
, SelectionDAG
&DAG
,
2319 const APInt
&UndefOp0
,
2320 const APInt
&UndefOp1
) {
2321 EVT VT
= BO
.getValueType();
2322 assert(DAG
.getTargetLoweringInfo().isBinOp(BO
.getOpcode()) && VT
.isVector() &&
2323 "Vector binop only");
2325 EVT EltVT
= VT
.getVectorElementType();
2326 unsigned NumElts
= VT
.getVectorNumElements();
2327 assert(UndefOp0
.getBitWidth() == NumElts
&&
2328 UndefOp1
.getBitWidth() == NumElts
&& "Bad type for undef analysis");
2330 auto getUndefOrConstantElt
= [&](SDValue V
, unsigned Index
,
2331 const APInt
&UndefVals
) {
2332 if (UndefVals
[Index
])
2333 return DAG
.getUNDEF(EltVT
);
2335 if (auto *BV
= dyn_cast
<BuildVectorSDNode
>(V
)) {
2336 // Try hard to make sure that the getNode() call is not creating temporary
2337 // nodes. Ignore opaque integers because they do not constant fold.
2338 SDValue Elt
= BV
->getOperand(Index
);
2339 auto *C
= dyn_cast
<ConstantSDNode
>(Elt
);
2340 if (isa
<ConstantFPSDNode
>(Elt
) || Elt
.isUndef() || (C
&& !C
->isOpaque()))
2347 APInt KnownUndef
= APInt::getNullValue(NumElts
);
2348 for (unsigned i
= 0; i
!= NumElts
; ++i
) {
2349 // If both inputs for this element are either constant or undef and match
2350 // the element type, compute the constant/undef result for this element of
2352 // TODO: Ideally we would use FoldConstantArithmetic() here, but that does
2353 // not handle FP constants. The code within getNode() should be refactored
2354 // to avoid the danger of creating a bogus temporary node here.
2355 SDValue C0
= getUndefOrConstantElt(BO
.getOperand(0), i
, UndefOp0
);
2356 SDValue C1
= getUndefOrConstantElt(BO
.getOperand(1), i
, UndefOp1
);
2357 if (C0
&& C1
&& C0
.getValueType() == EltVT
&& C1
.getValueType() == EltVT
)
2358 if (DAG
.getNode(BO
.getOpcode(), SDLoc(BO
), EltVT
, C0
, C1
).isUndef())
2359 KnownUndef
.setBit(i
);
2364 bool TargetLowering::SimplifyDemandedVectorElts(
2365 SDValue Op
, const APInt
&OriginalDemandedElts
, APInt
&KnownUndef
,
2366 APInt
&KnownZero
, TargetLoweringOpt
&TLO
, unsigned Depth
,
2367 bool AssumeSingleUse
) const {
2368 EVT VT
= Op
.getValueType();
2369 unsigned Opcode
= Op
.getOpcode();
2370 APInt DemandedElts
= OriginalDemandedElts
;
2371 unsigned NumElts
= DemandedElts
.getBitWidth();
2372 assert(VT
.isVector() && "Expected vector op");
2374 KnownUndef
= KnownZero
= APInt::getNullValue(NumElts
);
2376 // TODO: For now we assume we know nothing about scalable vectors.
2377 if (VT
.isScalableVector())
2380 assert(VT
.getVectorNumElements() == NumElts
&&
2381 "Mask size mismatches value type element count!");
2385 KnownUndef
.setAllBits();
2389 // If Op has other users, assume that all elements are needed.
2390 if (!Op
.getNode()->hasOneUse() && !AssumeSingleUse
)
2391 DemandedElts
.setAllBits();
2393 // Not demanding any elements from Op.
2394 if (DemandedElts
== 0) {
2395 KnownUndef
.setAllBits();
2396 return TLO
.CombineTo(Op
, TLO
.DAG
.getUNDEF(VT
));
2399 // Limit search depth.
2400 if (Depth
>= SelectionDAG::MaxRecursionDepth
)
2404 unsigned EltSizeInBits
= VT
.getScalarSizeInBits();
2406 // Helper for demanding the specified elements and all the bits of both binary
2408 auto SimplifyDemandedVectorEltsBinOp
= [&](SDValue Op0
, SDValue Op1
) {
2409 SDValue NewOp0
= SimplifyMultipleUseDemandedVectorElts(Op0
, DemandedElts
,
2410 TLO
.DAG
, Depth
+ 1);
2411 SDValue NewOp1
= SimplifyMultipleUseDemandedVectorElts(Op1
, DemandedElts
,
2412 TLO
.DAG
, Depth
+ 1);
2413 if (NewOp0
|| NewOp1
) {
2414 SDValue NewOp
= TLO
.DAG
.getNode(
2415 Opcode
, SDLoc(Op
), VT
, NewOp0
? NewOp0
: Op0
, NewOp1
? NewOp1
: Op1
);
2416 return TLO
.CombineTo(Op
, NewOp
);
2422 case ISD::SCALAR_TO_VECTOR
: {
2423 if (!DemandedElts
[0]) {
2424 KnownUndef
.setAllBits();
2425 return TLO
.CombineTo(Op
, TLO
.DAG
.getUNDEF(VT
));
2427 SDValue ScalarSrc
= Op
.getOperand(0);
2428 if (ScalarSrc
.getOpcode() == ISD::EXTRACT_VECTOR_ELT
) {
2429 SDValue Src
= ScalarSrc
.getOperand(0);
2430 SDValue Idx
= ScalarSrc
.getOperand(1);
2431 EVT SrcVT
= Src
.getValueType();
2433 ElementCount SrcEltCnt
= SrcVT
.getVectorElementCount();
2435 if (SrcEltCnt
.isScalable())
2438 unsigned NumSrcElts
= SrcEltCnt
.getFixedValue();
2439 if (isNullConstant(Idx
)) {
2440 APInt SrcDemandedElts
= APInt::getOneBitSet(NumSrcElts
, 0);
2441 APInt SrcUndef
= KnownUndef
.zextOrTrunc(NumSrcElts
);
2442 APInt SrcZero
= KnownZero
.zextOrTrunc(NumSrcElts
);
2443 if (SimplifyDemandedVectorElts(Src
, SrcDemandedElts
, SrcUndef
, SrcZero
,
2448 KnownUndef
.setHighBits(NumElts
- 1);
2451 case ISD::BITCAST
: {
2452 SDValue Src
= Op
.getOperand(0);
2453 EVT SrcVT
= Src
.getValueType();
2455 // We only handle vectors here.
2456 // TODO - investigate calling SimplifyDemandedBits/ComputeKnownBits?
2457 if (!SrcVT
.isVector())
2460 // Fast handling of 'identity' bitcasts.
2461 unsigned NumSrcElts
= SrcVT
.getVectorNumElements();
2462 if (NumSrcElts
== NumElts
)
2463 return SimplifyDemandedVectorElts(Src
, DemandedElts
, KnownUndef
,
2464 KnownZero
, TLO
, Depth
+ 1);
2466 APInt SrcZero
, SrcUndef
;
2467 APInt SrcDemandedElts
= APInt::getNullValue(NumSrcElts
);
2469 // Bitcast from 'large element' src vector to 'small element' vector, we
2470 // must demand a source element if any DemandedElt maps to it.
2471 if ((NumElts
% NumSrcElts
) == 0) {
2472 unsigned Scale
= NumElts
/ NumSrcElts
;
2473 for (unsigned i
= 0; i
!= NumElts
; ++i
)
2474 if (DemandedElts
[i
])
2475 SrcDemandedElts
.setBit(i
/ Scale
);
2477 if (SimplifyDemandedVectorElts(Src
, SrcDemandedElts
, SrcUndef
, SrcZero
,
2481 // Try calling SimplifyDemandedBits, converting demanded elts to the bits
2482 // of the large element.
2483 // TODO - bigendian once we have test coverage.
2484 if (TLO
.DAG
.getDataLayout().isLittleEndian()) {
2485 unsigned SrcEltSizeInBits
= SrcVT
.getScalarSizeInBits();
2486 APInt SrcDemandedBits
= APInt::getNullValue(SrcEltSizeInBits
);
2487 for (unsigned i
= 0; i
!= NumElts
; ++i
)
2488 if (DemandedElts
[i
]) {
2489 unsigned Ofs
= (i
% Scale
) * EltSizeInBits
;
2490 SrcDemandedBits
.setBits(Ofs
, Ofs
+ EltSizeInBits
);
2494 if (SimplifyDemandedBits(Src
, SrcDemandedBits
, SrcDemandedElts
, Known
,
2499 // If the src element is zero/undef then all the output elements will be -
2500 // only demanded elements are guaranteed to be correct.
2501 for (unsigned i
= 0; i
!= NumSrcElts
; ++i
) {
2502 if (SrcDemandedElts
[i
]) {
2504 KnownZero
.setBits(i
* Scale
, (i
+ 1) * Scale
);
2506 KnownUndef
.setBits(i
* Scale
, (i
+ 1) * Scale
);
2511 // Bitcast from 'small element' src vector to 'large element' vector, we
2512 // demand all smaller source elements covered by the larger demanded element
2514 if ((NumSrcElts
% NumElts
) == 0) {
2515 unsigned Scale
= NumSrcElts
/ NumElts
;
2516 for (unsigned i
= 0; i
!= NumElts
; ++i
)
2517 if (DemandedElts
[i
])
2518 SrcDemandedElts
.setBits(i
* Scale
, (i
+ 1) * Scale
);
2520 if (SimplifyDemandedVectorElts(Src
, SrcDemandedElts
, SrcUndef
, SrcZero
,
2524 // If all the src elements covering an output element are zero/undef, then
2525 // the output element will be as well, assuming it was demanded.
2526 for (unsigned i
= 0; i
!= NumElts
; ++i
) {
2527 if (DemandedElts
[i
]) {
2528 if (SrcZero
.extractBits(Scale
, i
* Scale
).isAllOnesValue())
2529 KnownZero
.setBit(i
);
2530 if (SrcUndef
.extractBits(Scale
, i
* Scale
).isAllOnesValue())
2531 KnownUndef
.setBit(i
);
2537 case ISD::BUILD_VECTOR
: {
2538 // Check all elements and simplify any unused elements with UNDEF.
2539 if (!DemandedElts
.isAllOnesValue()) {
2540 // Don't simplify BROADCASTS.
2541 if (llvm::any_of(Op
->op_values(),
2542 [&](SDValue Elt
) { return Op
.getOperand(0) != Elt
; })) {
2543 SmallVector
<SDValue
, 32> Ops(Op
->op_begin(), Op
->op_end());
2544 bool Updated
= false;
2545 for (unsigned i
= 0; i
!= NumElts
; ++i
) {
2546 if (!DemandedElts
[i
] && !Ops
[i
].isUndef()) {
2547 Ops
[i
] = TLO
.DAG
.getUNDEF(Ops
[0].getValueType());
2548 KnownUndef
.setBit(i
);
2553 return TLO
.CombineTo(Op
, TLO
.DAG
.getBuildVector(VT
, DL
, Ops
));
2556 for (unsigned i
= 0; i
!= NumElts
; ++i
) {
2557 SDValue SrcOp
= Op
.getOperand(i
);
2558 if (SrcOp
.isUndef()) {
2559 KnownUndef
.setBit(i
);
2560 } else if (EltSizeInBits
== SrcOp
.getScalarValueSizeInBits() &&
2561 (isNullConstant(SrcOp
) || isNullFPConstant(SrcOp
))) {
2562 KnownZero
.setBit(i
);
2567 case ISD::CONCAT_VECTORS
: {
2568 EVT SubVT
= Op
.getOperand(0).getValueType();
2569 unsigned NumSubVecs
= Op
.getNumOperands();
2570 unsigned NumSubElts
= SubVT
.getVectorNumElements();
2571 for (unsigned i
= 0; i
!= NumSubVecs
; ++i
) {
2572 SDValue SubOp
= Op
.getOperand(i
);
2573 APInt SubElts
= DemandedElts
.extractBits(NumSubElts
, i
* NumSubElts
);
2574 APInt SubUndef
, SubZero
;
2575 if (SimplifyDemandedVectorElts(SubOp
, SubElts
, SubUndef
, SubZero
, TLO
,
2578 KnownUndef
.insertBits(SubUndef
, i
* NumSubElts
);
2579 KnownZero
.insertBits(SubZero
, i
* NumSubElts
);
2583 case ISD::INSERT_SUBVECTOR
: {
2584 // Demand any elements from the subvector and the remainder from the src its
2586 SDValue Src
= Op
.getOperand(0);
2587 SDValue Sub
= Op
.getOperand(1);
2588 uint64_t Idx
= Op
.getConstantOperandVal(2);
2589 unsigned NumSubElts
= Sub
.getValueType().getVectorNumElements();
2590 APInt DemandedSubElts
= DemandedElts
.extractBits(NumSubElts
, Idx
);
2591 APInt DemandedSrcElts
= DemandedElts
;
2592 DemandedSrcElts
.insertBits(APInt::getNullValue(NumSubElts
), Idx
);
2594 APInt SubUndef
, SubZero
;
2595 if (SimplifyDemandedVectorElts(Sub
, DemandedSubElts
, SubUndef
, SubZero
, TLO
,
2599 // If none of the src operand elements are demanded, replace it with undef.
2600 if (!DemandedSrcElts
&& !Src
.isUndef())
2601 return TLO
.CombineTo(Op
, TLO
.DAG
.getNode(ISD::INSERT_SUBVECTOR
, DL
, VT
,
2602 TLO
.DAG
.getUNDEF(VT
), Sub
,
2605 if (SimplifyDemandedVectorElts(Src
, DemandedSrcElts
, KnownUndef
, KnownZero
,
2608 KnownUndef
.insertBits(SubUndef
, Idx
);
2609 KnownZero
.insertBits(SubZero
, Idx
);
2611 // Attempt to avoid multi-use ops if we don't need anything from them.
2612 if (!DemandedSrcElts
.isAllOnesValue() ||
2613 !DemandedSubElts
.isAllOnesValue()) {
2614 SDValue NewSrc
= SimplifyMultipleUseDemandedVectorElts(
2615 Src
, DemandedSrcElts
, TLO
.DAG
, Depth
+ 1);
2616 SDValue NewSub
= SimplifyMultipleUseDemandedVectorElts(
2617 Sub
, DemandedSubElts
, TLO
.DAG
, Depth
+ 1);
2618 if (NewSrc
|| NewSub
) {
2619 NewSrc
= NewSrc
? NewSrc
: Src
;
2620 NewSub
= NewSub
? NewSub
: Sub
;
2621 SDValue NewOp
= TLO
.DAG
.getNode(Op
.getOpcode(), SDLoc(Op
), VT
, NewSrc
,
2622 NewSub
, Op
.getOperand(2));
2623 return TLO
.CombineTo(Op
, NewOp
);
2628 case ISD::EXTRACT_SUBVECTOR
: {
2629 // Offset the demanded elts by the subvector index.
2630 SDValue Src
= Op
.getOperand(0);
2631 if (Src
.getValueType().isScalableVector())
2633 uint64_t Idx
= Op
.getConstantOperandVal(1);
2634 unsigned NumSrcElts
= Src
.getValueType().getVectorNumElements();
2635 APInt DemandedSrcElts
= DemandedElts
.zextOrSelf(NumSrcElts
).shl(Idx
);
2637 APInt SrcUndef
, SrcZero
;
2638 if (SimplifyDemandedVectorElts(Src
, DemandedSrcElts
, SrcUndef
, SrcZero
, TLO
,
2641 KnownUndef
= SrcUndef
.extractBits(NumElts
, Idx
);
2642 KnownZero
= SrcZero
.extractBits(NumElts
, Idx
);
2644 // Attempt to avoid multi-use ops if we don't need anything from them.
2645 if (!DemandedElts
.isAllOnesValue()) {
2646 SDValue NewSrc
= SimplifyMultipleUseDemandedVectorElts(
2647 Src
, DemandedSrcElts
, TLO
.DAG
, Depth
+ 1);
2649 SDValue NewOp
= TLO
.DAG
.getNode(Op
.getOpcode(), SDLoc(Op
), VT
, NewSrc
,
2651 return TLO
.CombineTo(Op
, NewOp
);
2656 case ISD::INSERT_VECTOR_ELT
: {
2657 SDValue Vec
= Op
.getOperand(0);
2658 SDValue Scl
= Op
.getOperand(1);
2659 auto *CIdx
= dyn_cast
<ConstantSDNode
>(Op
.getOperand(2));
2661 // For a legal, constant insertion index, if we don't need this insertion
2662 // then strip it, else remove it from the demanded elts.
2663 if (CIdx
&& CIdx
->getAPIntValue().ult(NumElts
)) {
2664 unsigned Idx
= CIdx
->getZExtValue();
2665 if (!DemandedElts
[Idx
])
2666 return TLO
.CombineTo(Op
, Vec
);
2668 APInt
DemandedVecElts(DemandedElts
);
2669 DemandedVecElts
.clearBit(Idx
);
2670 if (SimplifyDemandedVectorElts(Vec
, DemandedVecElts
, KnownUndef
,
2671 KnownZero
, TLO
, Depth
+ 1))
2674 KnownUndef
.setBitVal(Idx
, Scl
.isUndef());
2676 KnownZero
.setBitVal(Idx
, isNullConstant(Scl
) || isNullFPConstant(Scl
));
2680 APInt VecUndef
, VecZero
;
2681 if (SimplifyDemandedVectorElts(Vec
, DemandedElts
, VecUndef
, VecZero
, TLO
,
2684 // Without knowing the insertion index we can't set KnownUndef/KnownZero.
2687 case ISD::VSELECT
: {
2688 // Try to transform the select condition based on the current demanded
2690 // TODO: If a condition element is undef, we can choose from one arm of the
2691 // select (and if one arm is undef, then we can propagate that to the
2693 // TODO - add support for constant vselect masks (see IR version of this).
2694 APInt UnusedUndef
, UnusedZero
;
2695 if (SimplifyDemandedVectorElts(Op
.getOperand(0), DemandedElts
, UnusedUndef
,
2696 UnusedZero
, TLO
, Depth
+ 1))
2699 // See if we can simplify either vselect operand.
2700 APInt
DemandedLHS(DemandedElts
);
2701 APInt
DemandedRHS(DemandedElts
);
2702 APInt UndefLHS
, ZeroLHS
;
2703 APInt UndefRHS
, ZeroRHS
;
2704 if (SimplifyDemandedVectorElts(Op
.getOperand(1), DemandedLHS
, UndefLHS
,
2705 ZeroLHS
, TLO
, Depth
+ 1))
2707 if (SimplifyDemandedVectorElts(Op
.getOperand(2), DemandedRHS
, UndefRHS
,
2708 ZeroRHS
, TLO
, Depth
+ 1))
2711 KnownUndef
= UndefLHS
& UndefRHS
;
2712 KnownZero
= ZeroLHS
& ZeroRHS
;
2715 case ISD::VECTOR_SHUFFLE
: {
2716 ArrayRef
<int> ShuffleMask
= cast
<ShuffleVectorSDNode
>(Op
)->getMask();
2718 // Collect demanded elements from shuffle operands..
2719 APInt
DemandedLHS(NumElts
, 0);
2720 APInt
DemandedRHS(NumElts
, 0);
2721 for (unsigned i
= 0; i
!= NumElts
; ++i
) {
2722 int M
= ShuffleMask
[i
];
2723 if (M
< 0 || !DemandedElts
[i
])
2725 assert(0 <= M
&& M
< (int)(2 * NumElts
) && "Shuffle index out of range");
2726 if (M
< (int)NumElts
)
2727 DemandedLHS
.setBit(M
);
2729 DemandedRHS
.setBit(M
- NumElts
);
2732 // See if we can simplify either shuffle operand.
2733 APInt UndefLHS
, ZeroLHS
;
2734 APInt UndefRHS
, ZeroRHS
;
2735 if (SimplifyDemandedVectorElts(Op
.getOperand(0), DemandedLHS
, UndefLHS
,
2736 ZeroLHS
, TLO
, Depth
+ 1))
2738 if (SimplifyDemandedVectorElts(Op
.getOperand(1), DemandedRHS
, UndefRHS
,
2739 ZeroRHS
, TLO
, Depth
+ 1))
2742 // Simplify mask using undef elements from LHS/RHS.
2743 bool Updated
= false;
2744 bool IdentityLHS
= true, IdentityRHS
= true;
2745 SmallVector
<int, 32> NewMask(ShuffleMask
.begin(), ShuffleMask
.end());
2746 for (unsigned i
= 0; i
!= NumElts
; ++i
) {
2747 int &M
= NewMask
[i
];
2750 if (!DemandedElts
[i
] || (M
< (int)NumElts
&& UndefLHS
[M
]) ||
2751 (M
>= (int)NumElts
&& UndefRHS
[M
- NumElts
])) {
2755 IdentityLHS
&= (M
< 0) || (M
== (int)i
);
2756 IdentityRHS
&= (M
< 0) || ((M
- NumElts
) == i
);
2759 // Update legal shuffle masks based on demanded elements if it won't reduce
2760 // to Identity which can cause premature removal of the shuffle mask.
2761 if (Updated
&& !IdentityLHS
&& !IdentityRHS
&& !TLO
.LegalOps
) {
2762 SDValue LegalShuffle
=
2763 buildLegalVectorShuffle(VT
, DL
, Op
.getOperand(0), Op
.getOperand(1),
2766 return TLO
.CombineTo(Op
, LegalShuffle
);
2769 // Propagate undef/zero elements from LHS/RHS.
2770 for (unsigned i
= 0; i
!= NumElts
; ++i
) {
2771 int M
= ShuffleMask
[i
];
2773 KnownUndef
.setBit(i
);
2774 } else if (M
< (int)NumElts
) {
2776 KnownUndef
.setBit(i
);
2778 KnownZero
.setBit(i
);
2780 if (UndefRHS
[M
- NumElts
])
2781 KnownUndef
.setBit(i
);
2782 if (ZeroRHS
[M
- NumElts
])
2783 KnownZero
.setBit(i
);
2788 case ISD::ANY_EXTEND_VECTOR_INREG
:
2789 case ISD::SIGN_EXTEND_VECTOR_INREG
:
2790 case ISD::ZERO_EXTEND_VECTOR_INREG
: {
2791 APInt SrcUndef
, SrcZero
;
2792 SDValue Src
= Op
.getOperand(0);
2793 unsigned NumSrcElts
= Src
.getValueType().getVectorNumElements();
2794 APInt DemandedSrcElts
= DemandedElts
.zextOrSelf(NumSrcElts
);
2795 if (SimplifyDemandedVectorElts(Src
, DemandedSrcElts
, SrcUndef
, SrcZero
, TLO
,
2798 KnownZero
= SrcZero
.zextOrTrunc(NumElts
);
2799 KnownUndef
= SrcUndef
.zextOrTrunc(NumElts
);
2801 if (Op
.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG
&&
2802 Op
.getValueSizeInBits() == Src
.getValueSizeInBits() &&
2803 DemandedSrcElts
== 1 && TLO
.DAG
.getDataLayout().isLittleEndian()) {
2804 // aext - if we just need the bottom element then we can bitcast.
2805 return TLO
.CombineTo(Op
, TLO
.DAG
.getBitcast(VT
, Src
));
2808 if (Op
.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG
) {
2809 // zext(undef) upper bits are guaranteed to be zero.
2810 if (DemandedElts
.isSubsetOf(KnownUndef
))
2811 return TLO
.CombineTo(Op
, TLO
.DAG
.getConstant(0, SDLoc(Op
), VT
));
2812 KnownUndef
.clearAllBits();
2817 // TODO: There are more binop opcodes that could be handled here - MIN,
2818 // MAX, saturated math, etc.
2828 SDValue Op0
= Op
.getOperand(0);
2829 SDValue Op1
= Op
.getOperand(1);
2831 APInt UndefRHS
, ZeroRHS
;
2832 if (SimplifyDemandedVectorElts(Op1
, DemandedElts
, UndefRHS
, ZeroRHS
, TLO
,
2835 APInt UndefLHS
, ZeroLHS
;
2836 if (SimplifyDemandedVectorElts(Op0
, DemandedElts
, UndefLHS
, ZeroLHS
, TLO
,
2840 KnownZero
= ZeroLHS
& ZeroRHS
;
2841 KnownUndef
= getKnownUndefForVectorBinop(Op
, TLO
.DAG
, UndefLHS
, UndefRHS
);
2843 // Attempt to avoid multi-use ops if we don't need anything from them.
2844 // TODO - use KnownUndef to relax the demandedelts?
2845 if (!DemandedElts
.isAllOnesValue())
2846 if (SimplifyDemandedVectorEltsBinOp(Op0
, Op1
))
2855 SDValue Op0
= Op
.getOperand(0);
2856 SDValue Op1
= Op
.getOperand(1);
2858 APInt UndefRHS
, ZeroRHS
;
2859 if (SimplifyDemandedVectorElts(Op1
, DemandedElts
, UndefRHS
, ZeroRHS
, TLO
,
2862 APInt UndefLHS
, ZeroLHS
;
2863 if (SimplifyDemandedVectorElts(Op0
, DemandedElts
, UndefLHS
, ZeroLHS
, TLO
,
2867 KnownZero
= ZeroLHS
;
2868 KnownUndef
= UndefLHS
& UndefRHS
; // TODO: use getKnownUndefForVectorBinop?
2870 // Attempt to avoid multi-use ops if we don't need anything from them.
2871 // TODO - use KnownUndef to relax the demandedelts?
2872 if (!DemandedElts
.isAllOnesValue())
2873 if (SimplifyDemandedVectorEltsBinOp(Op0
, Op1
))
2879 SDValue Op0
= Op
.getOperand(0);
2880 SDValue Op1
= Op
.getOperand(1);
2882 APInt SrcUndef
, SrcZero
;
2883 if (SimplifyDemandedVectorElts(Op1
, DemandedElts
, SrcUndef
, SrcZero
, TLO
,
2886 if (SimplifyDemandedVectorElts(Op0
, DemandedElts
, KnownUndef
, KnownZero
,
2890 // If either side has a zero element, then the result element is zero, even
2891 // if the other is an UNDEF.
2892 // TODO: Extend getKnownUndefForVectorBinop to also deal with known zeros
2893 // and then handle 'and' nodes with the rest of the binop opcodes.
2894 KnownZero
|= SrcZero
;
2895 KnownUndef
&= SrcUndef
;
2896 KnownUndef
&= ~KnownZero
;
2898 // Attempt to avoid multi-use ops if we don't need anything from them.
2899 // TODO - use KnownUndef to relax the demandedelts?
2900 if (!DemandedElts
.isAllOnesValue())
2901 if (SimplifyDemandedVectorEltsBinOp(Op0
, Op1
))
2906 case ISD::SIGN_EXTEND
:
2907 case ISD::ZERO_EXTEND
:
2908 if (SimplifyDemandedVectorElts(Op
.getOperand(0), DemandedElts
, KnownUndef
,
2909 KnownZero
, TLO
, Depth
+ 1))
2912 if (Op
.getOpcode() == ISD::ZERO_EXTEND
) {
2913 // zext(undef) upper bits are guaranteed to be zero.
2914 if (DemandedElts
.isSubsetOf(KnownUndef
))
2915 return TLO
.CombineTo(Op
, TLO
.DAG
.getConstant(0, SDLoc(Op
), VT
));
2916 KnownUndef
.clearAllBits();
2920 if (Op
.getOpcode() >= ISD::BUILTIN_OP_END
) {
2921 if (SimplifyDemandedVectorEltsForTargetNode(Op
, DemandedElts
, KnownUndef
,
2922 KnownZero
, TLO
, Depth
))
2926 APInt DemandedBits
= APInt::getAllOnesValue(EltSizeInBits
);
2927 if (SimplifyDemandedBits(Op
, DemandedBits
, OriginalDemandedElts
, Known
,
2928 TLO
, Depth
, AssumeSingleUse
))
2934 assert((KnownUndef
& KnownZero
) == 0 && "Elements flagged as undef AND zero");
2936 // Constant fold all undef cases.
2937 // TODO: Handle zero cases as well.
2938 if (DemandedElts
.isSubsetOf(KnownUndef
))
2939 return TLO
.CombineTo(Op
, TLO
.DAG
.getUNDEF(VT
));
2944 /// Determine which of the bits specified in Mask are known to be either zero or
2945 /// one and return them in the Known.
2946 void TargetLowering::computeKnownBitsForTargetNode(const SDValue Op
,
2948 const APInt
&DemandedElts
,
2949 const SelectionDAG
&DAG
,
2950 unsigned Depth
) const {
2951 assert((Op
.getOpcode() >= ISD::BUILTIN_OP_END
||
2952 Op
.getOpcode() == ISD::INTRINSIC_WO_CHAIN
||
2953 Op
.getOpcode() == ISD::INTRINSIC_W_CHAIN
||
2954 Op
.getOpcode() == ISD::INTRINSIC_VOID
) &&
2955 "Should use MaskedValueIsZero if you don't know whether Op"
2956 " is a target node!");
2960 void TargetLowering::computeKnownBitsForTargetInstr(
2961 GISelKnownBits
&Analysis
, Register R
, KnownBits
&Known
,
2962 const APInt
&DemandedElts
, const MachineRegisterInfo
&MRI
,
2963 unsigned Depth
) const {
2967 void TargetLowering::computeKnownBitsForFrameIndex(
2968 const int FrameIdx
, KnownBits
&Known
, const MachineFunction
&MF
) const {
2969 // The low bits are known zero if the pointer is aligned.
2970 Known
.Zero
.setLowBits(Log2(MF
.getFrameInfo().getObjectAlign(FrameIdx
)));
2973 Align
TargetLowering::computeKnownAlignForTargetInstr(
2974 GISelKnownBits
&Analysis
, Register R
, const MachineRegisterInfo
&MRI
,
2975 unsigned Depth
) const {
2979 /// This method can be implemented by targets that want to expose additional
2980 /// information about sign bits to the DAG Combiner.
2981 unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op
,
2983 const SelectionDAG
&,
2984 unsigned Depth
) const {
2985 assert((Op
.getOpcode() >= ISD::BUILTIN_OP_END
||
2986 Op
.getOpcode() == ISD::INTRINSIC_WO_CHAIN
||
2987 Op
.getOpcode() == ISD::INTRINSIC_W_CHAIN
||
2988 Op
.getOpcode() == ISD::INTRINSIC_VOID
) &&
2989 "Should use ComputeNumSignBits if you don't know whether Op"
2990 " is a target node!");
2994 unsigned TargetLowering::computeNumSignBitsForTargetInstr(
2995 GISelKnownBits
&Analysis
, Register R
, const APInt
&DemandedElts
,
2996 const MachineRegisterInfo
&MRI
, unsigned Depth
) const {
3000 bool TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
3001 SDValue Op
, const APInt
&DemandedElts
, APInt
&KnownUndef
, APInt
&KnownZero
,
3002 TargetLoweringOpt
&TLO
, unsigned Depth
) const {
3003 assert((Op
.getOpcode() >= ISD::BUILTIN_OP_END
||
3004 Op
.getOpcode() == ISD::INTRINSIC_WO_CHAIN
||
3005 Op
.getOpcode() == ISD::INTRINSIC_W_CHAIN
||
3006 Op
.getOpcode() == ISD::INTRINSIC_VOID
) &&
3007 "Should use SimplifyDemandedVectorElts if you don't know whether Op"
3008 " is a target node!");
3012 bool TargetLowering::SimplifyDemandedBitsForTargetNode(
3013 SDValue Op
, const APInt
&DemandedBits
, const APInt
&DemandedElts
,
3014 KnownBits
&Known
, TargetLoweringOpt
&TLO
, unsigned Depth
) const {
3015 assert((Op
.getOpcode() >= ISD::BUILTIN_OP_END
||
3016 Op
.getOpcode() == ISD::INTRINSIC_WO_CHAIN
||
3017 Op
.getOpcode() == ISD::INTRINSIC_W_CHAIN
||
3018 Op
.getOpcode() == ISD::INTRINSIC_VOID
) &&
3019 "Should use SimplifyDemandedBits if you don't know whether Op"
3020 " is a target node!");
3021 computeKnownBitsForTargetNode(Op
, Known
, DemandedElts
, TLO
.DAG
, Depth
);
3025 SDValue
TargetLowering::SimplifyMultipleUseDemandedBitsForTargetNode(
3026 SDValue Op
, const APInt
&DemandedBits
, const APInt
&DemandedElts
,
3027 SelectionDAG
&DAG
, unsigned Depth
) const {
3029 (Op
.getOpcode() >= ISD::BUILTIN_OP_END
||
3030 Op
.getOpcode() == ISD::INTRINSIC_WO_CHAIN
||
3031 Op
.getOpcode() == ISD::INTRINSIC_W_CHAIN
||
3032 Op
.getOpcode() == ISD::INTRINSIC_VOID
) &&
3033 "Should use SimplifyMultipleUseDemandedBits if you don't know whether Op"
3034 " is a target node!");
3039 TargetLowering::buildLegalVectorShuffle(EVT VT
, const SDLoc
&DL
, SDValue N0
,
3040 SDValue N1
, MutableArrayRef
<int> Mask
,
3041 SelectionDAG
&DAG
) const {
3042 bool LegalMask
= isShuffleMaskLegal(Mask
, VT
);
3045 ShuffleVectorSDNode::commuteMask(Mask
);
3046 LegalMask
= isShuffleMaskLegal(Mask
, VT
);
3052 return DAG
.getVectorShuffle(VT
, DL
, N0
, N1
, Mask
);
3055 const Constant
*TargetLowering::getTargetConstantFromLoad(LoadSDNode
*) const {
3059 bool TargetLowering::isGuaranteedNotToBeUndefOrPoisonForTargetNode(
3060 SDValue Op
, const APInt
&DemandedElts
, const SelectionDAG
&DAG
,
3061 bool PoisonOnly
, unsigned Depth
) const {
3063 (Op
.getOpcode() >= ISD::BUILTIN_OP_END
||
3064 Op
.getOpcode() == ISD::INTRINSIC_WO_CHAIN
||
3065 Op
.getOpcode() == ISD::INTRINSIC_W_CHAIN
||
3066 Op
.getOpcode() == ISD::INTRINSIC_VOID
) &&
3067 "Should use isGuaranteedNotToBeUndefOrPoison if you don't know whether Op"
3068 " is a target node!");
3072 bool TargetLowering::isKnownNeverNaNForTargetNode(SDValue Op
,
3073 const SelectionDAG
&DAG
,
3075 unsigned Depth
) const {
3076 assert((Op
.getOpcode() >= ISD::BUILTIN_OP_END
||
3077 Op
.getOpcode() == ISD::INTRINSIC_WO_CHAIN
||
3078 Op
.getOpcode() == ISD::INTRINSIC_W_CHAIN
||
3079 Op
.getOpcode() == ISD::INTRINSIC_VOID
) &&
3080 "Should use isKnownNeverNaN if you don't know whether Op"
3081 " is a target node!");
3085 // FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must
3086 // work with truncating build vectors and vectors with elements of less than
3088 bool TargetLowering::isConstTrueVal(const SDNode
*N
) const {
3093 if (auto *CN
= dyn_cast
<ConstantSDNode
>(N
)) {
3094 CVal
= CN
->getAPIntValue();
3095 } else if (auto *BV
= dyn_cast
<BuildVectorSDNode
>(N
)) {
3096 auto *CN
= BV
->getConstantSplatNode();
3100 // If this is a truncating build vector, truncate the splat value.
3101 // Otherwise, we may fail to match the expected values below.
3102 unsigned BVEltWidth
= BV
->getValueType(0).getScalarSizeInBits();
3103 CVal
= CN
->getAPIntValue();
3104 if (BVEltWidth
< CVal
.getBitWidth())
3105 CVal
= CVal
.trunc(BVEltWidth
);
3110 switch (getBooleanContents(N
->getValueType(0))) {
3111 case UndefinedBooleanContent
:
3113 case ZeroOrOneBooleanContent
:
3114 return CVal
.isOneValue();
3115 case ZeroOrNegativeOneBooleanContent
:
3116 return CVal
.isAllOnesValue();
3119 llvm_unreachable("Invalid boolean contents");
3122 bool TargetLowering::isConstFalseVal(const SDNode
*N
) const {
3126 const ConstantSDNode
*CN
= dyn_cast
<ConstantSDNode
>(N
);
3128 const BuildVectorSDNode
*BV
= dyn_cast
<BuildVectorSDNode
>(N
);
3132 // Only interested in constant splats, we don't care about undef
3133 // elements in identifying boolean constants and getConstantSplatNode
3134 // returns NULL if all ops are undef;
3135 CN
= BV
->getConstantSplatNode();
3140 if (getBooleanContents(N
->getValueType(0)) == UndefinedBooleanContent
)
3141 return !CN
->getAPIntValue()[0];
3143 return CN
->isNullValue();
3146 bool TargetLowering::isExtendedTrueVal(const ConstantSDNode
*N
, EVT VT
,
3151 TargetLowering::BooleanContent Cnt
= getBooleanContents(VT
);
3153 case TargetLowering::ZeroOrOneBooleanContent
:
3154 // An extended value of 1 is always true, unless its original type is i1,
3155 // in which case it will be sign extended to -1.
3156 return (N
->isOne() && !SExt
) || (SExt
&& (N
->getValueType(0) != MVT::i1
));
3157 case TargetLowering::UndefinedBooleanContent
:
3158 case TargetLowering::ZeroOrNegativeOneBooleanContent
:
3159 return N
->isAllOnesValue() && SExt
;
3161 llvm_unreachable("Unexpected enumeration.");
3164 /// This helper function of SimplifySetCC tries to optimize the comparison when
3165 /// either operand of the SetCC node is a bitwise-and instruction.
3166 SDValue
TargetLowering::foldSetCCWithAnd(EVT VT
, SDValue N0
, SDValue N1
,
3167 ISD::CondCode Cond
, const SDLoc
&DL
,
3168 DAGCombinerInfo
&DCI
) const {
3169 // Match these patterns in any of their permutations:
3172 if (N1
.getOpcode() == ISD::AND
&& N0
.getOpcode() != ISD::AND
)
3175 EVT OpVT
= N0
.getValueType();
3176 if (N0
.getOpcode() != ISD::AND
|| !OpVT
.isInteger() ||
3177 (Cond
!= ISD::SETEQ
&& Cond
!= ISD::SETNE
))
3181 if (N0
.getOperand(0) == N1
) {
3182 X
= N0
.getOperand(1);
3183 Y
= N0
.getOperand(0);
3184 } else if (N0
.getOperand(1) == N1
) {
3185 X
= N0
.getOperand(0);
3186 Y
= N0
.getOperand(1);
3191 SelectionDAG
&DAG
= DCI
.DAG
;
3192 SDValue Zero
= DAG
.getConstant(0, DL
, OpVT
);
3193 if (DAG
.isKnownToBeAPowerOfTwo(Y
)) {
3194 // Simplify X & Y == Y to X & Y != 0 if Y has exactly one bit set.
3195 // Note that where Y is variable and is known to have at most one bit set
3196 // (for example, if it is Z & 1) we cannot do this; the expressions are not
3197 // equivalent when Y == 0.
3198 assert(OpVT
.isInteger());
3199 Cond
= ISD::getSetCCInverse(Cond
, OpVT
);
3200 if (DCI
.isBeforeLegalizeOps() ||
3201 isCondCodeLegal(Cond
, N0
.getSimpleValueType()))
3202 return DAG
.getSetCC(DL
, VT
, N0
, Zero
, Cond
);
3203 } else if (N0
.hasOneUse() && hasAndNotCompare(Y
)) {
3204 // If the target supports an 'and-not' or 'and-complement' logic operation,
3205 // try to use that to make a comparison operation more efficient.
3206 // But don't do this transform if the mask is a single bit because there are
3207 // more efficient ways to deal with that case (for example, 'bt' on x86 or
3208 // 'rlwinm' on PPC).
3210 // Bail out if the compare operand that we want to turn into a zero is
3211 // already a zero (otherwise, infinite loop).
3212 auto *YConst
= dyn_cast
<ConstantSDNode
>(Y
);
3213 if (YConst
&& YConst
->isNullValue())
3216 // Transform this into: ~X & Y == 0.
3217 SDValue NotX
= DAG
.getNOT(SDLoc(X
), X
, OpVT
);
3218 SDValue NewAnd
= DAG
.getNode(ISD::AND
, SDLoc(N0
), OpVT
, NotX
, Y
);
3219 return DAG
.getSetCC(DL
, VT
, NewAnd
, Zero
, Cond
);
3225 /// There are multiple IR patterns that could be checking whether certain
3226 /// truncation of a signed number would be lossy or not. The pattern which is
3227 /// best at IR level, may not lower optimally. Thus, we want to unfold it.
3228 /// We are looking for the following pattern: (KeptBits is a constant)
3229 /// (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits)
3230 /// KeptBits won't be bitwidth(x), that will be constant-folded to true/false.
3231 /// KeptBits also can't be 1, that would have been folded to %x dstcond 0
3232 /// We will unfold it into the natural trunc+sext pattern:
3233 /// ((%x << C) a>> C) dstcond %x
3234 /// Where C = bitwidth(x) - KeptBits and C u< bitwidth(x)
3235 SDValue
TargetLowering::optimizeSetCCOfSignedTruncationCheck(
3236 EVT SCCVT
, SDValue N0
, SDValue N1
, ISD::CondCode Cond
, DAGCombinerInfo
&DCI
,
3237 const SDLoc
&DL
) const {
3238 // We must be comparing with a constant.
3240 if (!(C1
= dyn_cast
<ConstantSDNode
>(N1
)))
3243 // N0 should be: add %x, (1 << (KeptBits-1))
3244 if (N0
->getOpcode() != ISD::ADD
)
3247 // And we must be 'add'ing a constant.
3248 ConstantSDNode
*C01
;
3249 if (!(C01
= dyn_cast
<ConstantSDNode
>(N0
->getOperand(1))))
3252 SDValue X
= N0
->getOperand(0);
3253 EVT XVT
= X
.getValueType();
3255 // Validate constants ...
3257 APInt I1
= C1
->getAPIntValue();
3259 ISD::CondCode NewCond
;
3260 if (Cond
== ISD::CondCode::SETULT
) {
3261 NewCond
= ISD::CondCode::SETEQ
;
3262 } else if (Cond
== ISD::CondCode::SETULE
) {
3263 NewCond
= ISD::CondCode::SETEQ
;
3264 // But need to 'canonicalize' the constant.
3266 } else if (Cond
== ISD::CondCode::SETUGT
) {
3267 NewCond
= ISD::CondCode::SETNE
;
3268 // But need to 'canonicalize' the constant.
3270 } else if (Cond
== ISD::CondCode::SETUGE
) {
3271 NewCond
= ISD::CondCode::SETNE
;
3275 APInt I01
= C01
->getAPIntValue();
3277 auto checkConstants
= [&I1
, &I01
]() -> bool {
3278 // Both of them must be power-of-two, and the constant from setcc is bigger.
3279 return I1
.ugt(I01
) && I1
.isPowerOf2() && I01
.isPowerOf2();
3282 if (checkConstants()) {
3283 // Great, e.g. got icmp ult i16 (add i16 %x, 128), 256
3285 // What if we invert constants? (and the target predicate)
3288 assert(XVT
.isInteger());
3289 NewCond
= getSetCCInverse(NewCond
, XVT
);
3290 if (!checkConstants())
3292 // Great, e.g. got icmp uge i16 (add i16 %x, -128), -256
3295 // They are power-of-two, so which bit is set?
3296 const unsigned KeptBits
= I1
.logBase2();
3297 const unsigned KeptBitsMinusOne
= I01
.logBase2();
3300 if (KeptBits
!= (KeptBitsMinusOne
+ 1))
3302 assert(KeptBits
> 0 && KeptBits
< XVT
.getSizeInBits() && "unreachable");
3304 // We don't want to do this in every single case.
3305 SelectionDAG
&DAG
= DCI
.DAG
;
3306 if (!DAG
.getTargetLoweringInfo().shouldTransformSignedTruncationCheck(
3310 const unsigned MaskedBits
= XVT
.getSizeInBits() - KeptBits
;
3311 assert(MaskedBits
> 0 && MaskedBits
< XVT
.getSizeInBits() && "unreachable");
3313 // Unfold into: ((%x << C) a>> C) cond %x
3314 // Where 'cond' will be either 'eq' or 'ne'.
3315 SDValue ShiftAmt
= DAG
.getConstant(MaskedBits
, DL
, XVT
);
3316 SDValue T0
= DAG
.getNode(ISD::SHL
, DL
, XVT
, X
, ShiftAmt
);
3317 SDValue T1
= DAG
.getNode(ISD::SRA
, DL
, XVT
, T0
, ShiftAmt
);
3318 SDValue T2
= DAG
.getSetCC(DL
, SCCVT
, T1
, X
, NewCond
);
3323 // (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
3324 SDValue
TargetLowering::optimizeSetCCByHoistingAndByConstFromLogicalShift(
3325 EVT SCCVT
, SDValue N0
, SDValue N1C
, ISD::CondCode Cond
,
3326 DAGCombinerInfo
&DCI
, const SDLoc
&DL
) const {
3327 assert(isConstOrConstSplat(N1C
) &&
3328 isConstOrConstSplat(N1C
)->getAPIntValue().isNullValue() &&
3329 "Should be a comparison with 0.");
3330 assert((Cond
== ISD::SETEQ
|| Cond
== ISD::SETNE
) &&
3331 "Valid only for [in]equality comparisons.");
3333 unsigned NewShiftOpcode
;
3336 SelectionDAG
&DAG
= DCI
.DAG
;
3337 const TargetLowering
&TLI
= DAG
.getTargetLoweringInfo();
3339 // Look for '(C l>>/<< Y)'.
3340 auto Match
= [&NewShiftOpcode
, &X
, &C
, &Y
, &TLI
, &DAG
](SDValue V
) {
3341 // The shift should be one-use.
3344 unsigned OldShiftOpcode
= V
.getOpcode();
3345 switch (OldShiftOpcode
) {
3347 NewShiftOpcode
= ISD::SRL
;
3350 NewShiftOpcode
= ISD::SHL
;
3353 return false; // must be a logical shift.
3355 // We should be shifting a constant.
3356 // FIXME: best to use isConstantOrConstantVector().
3357 C
= V
.getOperand(0);
3358 ConstantSDNode
*CC
=
3359 isConstOrConstSplat(C
, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
3362 Y
= V
.getOperand(1);
3364 ConstantSDNode
*XC
=
3365 isConstOrConstSplat(X
, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
3366 return TLI
.shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
3367 X
, XC
, CC
, Y
, OldShiftOpcode
, NewShiftOpcode
, DAG
);
3370 // LHS of comparison should be an one-use 'and'.
3371 if (N0
.getOpcode() != ISD::AND
|| !N0
.hasOneUse())
3374 X
= N0
.getOperand(0);
3375 SDValue Mask
= N0
.getOperand(1);
3377 // 'and' is commutative!
3384 EVT VT
= X
.getValueType();
3387 // ((X 'OppositeShiftOpcode' Y) & C) Cond 0
3388 SDValue T0
= DAG
.getNode(NewShiftOpcode
, DL
, VT
, X
, Y
);
3389 SDValue T1
= DAG
.getNode(ISD::AND
, DL
, VT
, T0
, C
);
3390 SDValue T2
= DAG
.getSetCC(DL
, SCCVT
, T1
, N1C
, Cond
);
3394 /// Try to fold an equality comparison with a {add/sub/xor} binary operation as
3395 /// the 1st operand (N0). Callers are expected to swap the N0/N1 parameters to
3396 /// handle the commuted versions of these patterns.
3397 SDValue
TargetLowering::foldSetCCWithBinOp(EVT VT
, SDValue N0
, SDValue N1
,
3398 ISD::CondCode Cond
, const SDLoc
&DL
,
3399 DAGCombinerInfo
&DCI
) const {
3400 unsigned BOpcode
= N0
.getOpcode();
3401 assert((BOpcode
== ISD::ADD
|| BOpcode
== ISD::SUB
|| BOpcode
== ISD::XOR
) &&
3402 "Unexpected binop");
3403 assert((Cond
== ISD::SETEQ
|| Cond
== ISD::SETNE
) && "Unexpected condcode");
3405 // (X + Y) == X --> Y == 0
3406 // (X - Y) == X --> Y == 0
3407 // (X ^ Y) == X --> Y == 0
3408 SelectionDAG
&DAG
= DCI
.DAG
;
3409 EVT OpVT
= N0
.getValueType();
3410 SDValue X
= N0
.getOperand(0);
3411 SDValue Y
= N0
.getOperand(1);
3413 return DAG
.getSetCC(DL
, VT
, Y
, DAG
.getConstant(0, DL
, OpVT
), Cond
);
3418 // (X + Y) == Y --> X == 0
3419 // (X ^ Y) == Y --> X == 0
3420 if (BOpcode
== ISD::ADD
|| BOpcode
== ISD::XOR
)
3421 return DAG
.getSetCC(DL
, VT
, X
, DAG
.getConstant(0, DL
, OpVT
), Cond
);
3423 // The shift would not be valid if the operands are boolean (i1).
3424 if (!N0
.hasOneUse() || OpVT
.getScalarSizeInBits() == 1)
3427 // (X - Y) == Y --> X == Y << 1
3428 EVT ShiftVT
= getShiftAmountTy(OpVT
, DAG
.getDataLayout(),
3429 !DCI
.isBeforeLegalize());
3430 SDValue One
= DAG
.getConstant(1, DL
, ShiftVT
);
3431 SDValue YShl1
= DAG
.getNode(ISD::SHL
, DL
, N1
.getValueType(), Y
, One
);
3432 if (!DCI
.isCalledByLegalizer())
3433 DCI
.AddToWorklist(YShl1
.getNode());
3434 return DAG
.getSetCC(DL
, VT
, X
, YShl1
, Cond
);
3437 static SDValue
simplifySetCCWithCTPOP(const TargetLowering
&TLI
, EVT VT
,
3438 SDValue N0
, const APInt
&C1
,
3439 ISD::CondCode Cond
, const SDLoc
&dl
,
3440 SelectionDAG
&DAG
) {
3441 // Look through truncs that don't change the value of a ctpop.
3442 // FIXME: Add vector support? Need to be careful with setcc result type below.
3444 if (N0
.getOpcode() == ISD::TRUNCATE
&& N0
.hasOneUse() && !VT
.isVector() &&
3445 N0
.getScalarValueSizeInBits() > Log2_32(N0
.getOperand(0).getScalarValueSizeInBits()))
3446 CTPOP
= N0
.getOperand(0);
3448 if (CTPOP
.getOpcode() != ISD::CTPOP
|| !CTPOP
.hasOneUse())
3451 EVT CTVT
= CTPOP
.getValueType();
3452 SDValue CTOp
= CTPOP
.getOperand(0);
3454 // If this is a vector CTPOP, keep the CTPOP if it is legal.
3455 // TODO: Should we check if CTPOP is legal(or custom) for scalars?
3456 if (VT
.isVector() && TLI
.isOperationLegal(ISD::CTPOP
, CTVT
))
3459 // (ctpop x) u< 2 -> (x & x-1) == 0
3460 // (ctpop x) u> 1 -> (x & x-1) != 0
3461 if (Cond
== ISD::SETULT
|| Cond
== ISD::SETUGT
) {
3462 unsigned CostLimit
= TLI
.getCustomCtpopCost(CTVT
, Cond
);
3463 if (C1
.ugt(CostLimit
+ (Cond
== ISD::SETULT
)))
3465 if (C1
== 0 && (Cond
== ISD::SETULT
))
3466 return SDValue(); // This is handled elsewhere.
3468 unsigned Passes
= C1
.getLimitedValue() - (Cond
== ISD::SETULT
);
3470 SDValue NegOne
= DAG
.getAllOnesConstant(dl
, CTVT
);
3471 SDValue Result
= CTOp
;
3472 for (unsigned i
= 0; i
< Passes
; i
++) {
3473 SDValue Add
= DAG
.getNode(ISD::ADD
, dl
, CTVT
, Result
, NegOne
);
3474 Result
= DAG
.getNode(ISD::AND
, dl
, CTVT
, Result
, Add
);
3476 ISD::CondCode CC
= Cond
== ISD::SETULT
? ISD::SETEQ
: ISD::SETNE
;
3477 return DAG
.getSetCC(dl
, VT
, Result
, DAG
.getConstant(0, dl
, CTVT
), CC
);
3480 // If ctpop is not supported, expand a power-of-2 comparison based on it.
3481 if ((Cond
== ISD::SETEQ
|| Cond
== ISD::SETNE
) && C1
== 1) {
3482 // For scalars, keep CTPOP if it is legal or custom.
3483 if (!VT
.isVector() && TLI
.isOperationLegalOrCustom(ISD::CTPOP
, CTVT
))
3485 // This is based on X86's custom lowering for CTPOP which produces more
3486 // instructions than the expansion here.
3488 // (ctpop x) == 1 --> (x != 0) && ((x & x-1) == 0)
3489 // (ctpop x) != 1 --> (x == 0) || ((x & x-1) != 0)
3490 SDValue Zero
= DAG
.getConstant(0, dl
, CTVT
);
3491 SDValue NegOne
= DAG
.getAllOnesConstant(dl
, CTVT
);
3492 assert(CTVT
.isInteger());
3493 ISD::CondCode InvCond
= ISD::getSetCCInverse(Cond
, CTVT
);
3494 SDValue Add
= DAG
.getNode(ISD::ADD
, dl
, CTVT
, CTOp
, NegOne
);
3495 SDValue And
= DAG
.getNode(ISD::AND
, dl
, CTVT
, CTOp
, Add
);
3496 SDValue LHS
= DAG
.getSetCC(dl
, VT
, CTOp
, Zero
, InvCond
);
3497 SDValue RHS
= DAG
.getSetCC(dl
, VT
, And
, Zero
, Cond
);
3498 unsigned LogicOpcode
= Cond
== ISD::SETEQ
? ISD::AND
: ISD::OR
;
3499 return DAG
.getNode(LogicOpcode
, dl
, VT
, LHS
, RHS
);
3505 /// Try to simplify a setcc built with the specified operands and cc. If it is
3506 /// unable to simplify it, return a null SDValue.
3507 SDValue
TargetLowering::SimplifySetCC(EVT VT
, SDValue N0
, SDValue N1
,
3508 ISD::CondCode Cond
, bool foldBooleans
,
3509 DAGCombinerInfo
&DCI
,
3510 const SDLoc
&dl
) const {
3511 SelectionDAG
&DAG
= DCI
.DAG
;
3512 const DataLayout
&Layout
= DAG
.getDataLayout();
3513 EVT OpVT
= N0
.getValueType();
3515 // Constant fold or commute setcc.
3516 if (SDValue Fold
= DAG
.FoldSetCC(VT
, N0
, N1
, Cond
, dl
))
3519 // Ensure that the constant occurs on the RHS and fold constant comparisons.
3520 // TODO: Handle non-splat vector constants. All undef causes trouble.
3521 // FIXME: We can't yet fold constant scalable vector splats, so avoid an
3522 // infinite loop here when we encounter one.
3523 ISD::CondCode SwappedCC
= ISD::getSetCCSwappedOperands(Cond
);
3524 if (isConstOrConstSplat(N0
) &&
3525 (!OpVT
.isScalableVector() || !isConstOrConstSplat(N1
)) &&
3526 (DCI
.isBeforeLegalizeOps() ||
3527 isCondCodeLegal(SwappedCC
, N0
.getSimpleValueType())))
3528 return DAG
.getSetCC(dl
, VT
, N1
, N0
, SwappedCC
);
3530 // If we have a subtract with the same 2 non-constant operands as this setcc
3531 // -- but in reverse order -- then try to commute the operands of this setcc
3532 // to match. A matching pair of setcc (cmp) and sub may be combined into 1
3533 // instruction on some targets.
3534 if (!isConstOrConstSplat(N0
) && !isConstOrConstSplat(N1
) &&
3535 (DCI
.isBeforeLegalizeOps() ||
3536 isCondCodeLegal(SwappedCC
, N0
.getSimpleValueType())) &&
3537 DAG
.doesNodeExist(ISD::SUB
, DAG
.getVTList(OpVT
), {N1
, N0
}) &&
3538 !DAG
.doesNodeExist(ISD::SUB
, DAG
.getVTList(OpVT
), {N0
, N1
}))
3539 return DAG
.getSetCC(dl
, VT
, N1
, N0
, SwappedCC
);
3541 if (auto *N1C
= isConstOrConstSplat(N1
)) {
3542 const APInt
&C1
= N1C
->getAPIntValue();
3544 // Optimize some CTPOP cases.
3545 if (SDValue V
= simplifySetCCWithCTPOP(*this, VT
, N0
, C1
, Cond
, dl
, DAG
))
3548 // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
3549 // equality comparison, then we're just comparing whether X itself is
3551 if (N0
.getOpcode() == ISD::SRL
&& (C1
.isNullValue() || C1
.isOneValue()) &&
3552 N0
.getOperand(0).getOpcode() == ISD::CTLZ
&&
3553 isPowerOf2_32(N0
.getScalarValueSizeInBits())) {
3554 if (ConstantSDNode
*ShAmt
= isConstOrConstSplat(N0
.getOperand(1))) {
3555 if ((Cond
== ISD::SETEQ
|| Cond
== ISD::SETNE
) &&
3556 ShAmt
->getAPIntValue() == Log2_32(N0
.getScalarValueSizeInBits())) {
3557 if ((C1
== 0) == (Cond
== ISD::SETEQ
)) {
3558 // (srl (ctlz x), 5) == 0 -> X != 0
3559 // (srl (ctlz x), 5) != 1 -> X != 0
3562 // (srl (ctlz x), 5) != 0 -> X == 0
3563 // (srl (ctlz x), 5) == 1 -> X == 0
3566 SDValue Zero
= DAG
.getConstant(0, dl
, N0
.getValueType());
3567 return DAG
.getSetCC(dl
, VT
, N0
.getOperand(0).getOperand(0), Zero
,
3574 // FIXME: Support vectors.
3575 if (auto *N1C
= dyn_cast
<ConstantSDNode
>(N1
.getNode())) {
3576 const APInt
&C1
= N1C
->getAPIntValue();
3578 // (zext x) == C --> x == (trunc C)
3579 // (sext x) == C --> x == (trunc C)
3580 if ((Cond
== ISD::SETEQ
|| Cond
== ISD::SETNE
) &&
3581 DCI
.isBeforeLegalize() && N0
->hasOneUse()) {
3582 unsigned MinBits
= N0
.getValueSizeInBits();
3584 bool Signed
= false;
3585 if (N0
->getOpcode() == ISD::ZERO_EXTEND
) {
3587 MinBits
= N0
->getOperand(0).getValueSizeInBits();
3588 PreExt
= N0
->getOperand(0);
3589 } else if (N0
->getOpcode() == ISD::AND
) {
3590 // DAGCombine turns costly ZExts into ANDs
3591 if (auto *C
= dyn_cast
<ConstantSDNode
>(N0
->getOperand(1)))
3592 if ((C
->getAPIntValue()+1).isPowerOf2()) {
3593 MinBits
= C
->getAPIntValue().countTrailingOnes();
3594 PreExt
= N0
->getOperand(0);
3596 } else if (N0
->getOpcode() == ISD::SIGN_EXTEND
) {
3598 MinBits
= N0
->getOperand(0).getValueSizeInBits();
3599 PreExt
= N0
->getOperand(0);
3601 } else if (auto *LN0
= dyn_cast
<LoadSDNode
>(N0
)) {
3602 // ZEXTLOAD / SEXTLOAD
3603 if (LN0
->getExtensionType() == ISD::ZEXTLOAD
) {
3604 MinBits
= LN0
->getMemoryVT().getSizeInBits();
3606 } else if (LN0
->getExtensionType() == ISD::SEXTLOAD
) {
3608 MinBits
= LN0
->getMemoryVT().getSizeInBits();
3613 // Figure out how many bits we need to preserve this constant.
3614 unsigned ReqdBits
= Signed
?
3615 C1
.getBitWidth() - C1
.getNumSignBits() + 1 :
3618 // Make sure we're not losing bits from the constant.
3620 MinBits
< C1
.getBitWidth() &&
3621 MinBits
>= ReqdBits
) {
3622 EVT MinVT
= EVT::getIntegerVT(*DAG
.getContext(), MinBits
);
3623 if (isTypeDesirableForOp(ISD::SETCC
, MinVT
)) {
3624 // Will get folded away.
3625 SDValue Trunc
= DAG
.getNode(ISD::TRUNCATE
, dl
, MinVT
, PreExt
);
3626 if (MinBits
== 1 && C1
== 1)
3627 // Invert the condition.
3628 return DAG
.getSetCC(dl
, VT
, Trunc
, DAG
.getConstant(0, dl
, MVT::i1
),
3629 Cond
== ISD::SETEQ
? ISD::SETNE
: ISD::SETEQ
);
3630 SDValue C
= DAG
.getConstant(C1
.trunc(MinBits
), dl
, MinVT
);
3631 return DAG
.getSetCC(dl
, VT
, Trunc
, C
, Cond
);
3634 // If truncating the setcc operands is not desirable, we can still
3635 // simplify the expression in some cases:
3636 // setcc ([sz]ext (setcc x, y, cc)), 0, setne) -> setcc (x, y, cc)
3637 // setcc ([sz]ext (setcc x, y, cc)), 0, seteq) -> setcc (x, y, inv(cc))
3638 // setcc (zext (setcc x, y, cc)), 1, setne) -> setcc (x, y, inv(cc))
3639 // setcc (zext (setcc x, y, cc)), 1, seteq) -> setcc (x, y, cc)
3640 // setcc (sext (setcc x, y, cc)), -1, setne) -> setcc (x, y, inv(cc))
3641 // setcc (sext (setcc x, y, cc)), -1, seteq) -> setcc (x, y, cc)
3642 SDValue TopSetCC
= N0
->getOperand(0);
3643 unsigned N0Opc
= N0
->getOpcode();
3644 bool SExt
= (N0Opc
== ISD::SIGN_EXTEND
);
3645 if (TopSetCC
.getValueType() == MVT::i1
&& VT
== MVT::i1
&&
3646 TopSetCC
.getOpcode() == ISD::SETCC
&&
3647 (N0Opc
== ISD::ZERO_EXTEND
|| N0Opc
== ISD::SIGN_EXTEND
) &&
3648 (isConstFalseVal(N1C
) ||
3649 isExtendedTrueVal(N1C
, N0
->getValueType(0), SExt
))) {
3651 bool Inverse
= (N1C
->isNullValue() && Cond
== ISD::SETEQ
) ||
3652 (!N1C
->isNullValue() && Cond
== ISD::SETNE
);
3657 ISD::CondCode InvCond
= ISD::getSetCCInverse(
3658 cast
<CondCodeSDNode
>(TopSetCC
.getOperand(2))->get(),
3659 TopSetCC
.getOperand(0).getValueType());
3660 return DAG
.getSetCC(dl
, VT
, TopSetCC
.getOperand(0),
3661 TopSetCC
.getOperand(1),
3667 // If the LHS is '(and load, const)', the RHS is 0, the test is for
3668 // equality or unsigned, and all 1 bits of the const are in the same
3669 // partial word, see if we can shorten the load.
3670 if (DCI
.isBeforeLegalize() &&
3671 !ISD::isSignedIntSetCC(Cond
) &&
3672 N0
.getOpcode() == ISD::AND
&& C1
== 0 &&
3673 N0
.getNode()->hasOneUse() &&
3674 isa
<LoadSDNode
>(N0
.getOperand(0)) &&
3675 N0
.getOperand(0).getNode()->hasOneUse() &&
3676 isa
<ConstantSDNode
>(N0
.getOperand(1))) {
3677 LoadSDNode
*Lod
= cast
<LoadSDNode
>(N0
.getOperand(0));
3679 unsigned bestWidth
= 0, bestOffset
= 0;
3680 if (Lod
->isSimple() && Lod
->isUnindexed()) {
3681 unsigned origWidth
= N0
.getValueSizeInBits();
3682 unsigned maskWidth
= origWidth
;
3683 // We can narrow (e.g.) 16-bit extending loads on 32-bit target to
3684 // 8 bits, but have to be careful...
3685 if (Lod
->getExtensionType() != ISD::NON_EXTLOAD
)
3686 origWidth
= Lod
->getMemoryVT().getSizeInBits();
3687 const APInt
&Mask
= N0
.getConstantOperandAPInt(1);
3688 for (unsigned width
= origWidth
/ 2; width
>=8; width
/= 2) {
3689 APInt newMask
= APInt::getLowBitsSet(maskWidth
, width
);
3690 for (unsigned offset
=0; offset
<origWidth
/width
; offset
++) {
3691 if (Mask
.isSubsetOf(newMask
)) {
3692 if (Layout
.isLittleEndian())
3693 bestOffset
= (uint64_t)offset
* (width
/8);
3695 bestOffset
= (origWidth
/width
- offset
- 1) * (width
/8);
3696 bestMask
= Mask
.lshr(offset
* (width
/8) * 8);
3705 EVT newVT
= EVT::getIntegerVT(*DAG
.getContext(), bestWidth
);
3706 if (newVT
.isRound() &&
3707 shouldReduceLoadWidth(Lod
, ISD::NON_EXTLOAD
, newVT
)) {
3708 SDValue Ptr
= Lod
->getBasePtr();
3709 if (bestOffset
!= 0)
3711 DAG
.getMemBasePlusOffset(Ptr
, TypeSize::Fixed(bestOffset
), dl
);
3713 DAG
.getLoad(newVT
, dl
, Lod
->getChain(), Ptr
,
3714 Lod
->getPointerInfo().getWithOffset(bestOffset
),
3715 Lod
->getOriginalAlign());
3716 return DAG
.getSetCC(dl
, VT
,
3717 DAG
.getNode(ISD::AND
, dl
, newVT
, NewLoad
,
3718 DAG
.getConstant(bestMask
.trunc(bestWidth
),
3720 DAG
.getConstant(0LL, dl
, newVT
), Cond
);
3725 // If the LHS is a ZERO_EXTEND, perform the comparison on the input.
3726 if (N0
.getOpcode() == ISD::ZERO_EXTEND
) {
3727 unsigned InSize
= N0
.getOperand(0).getValueSizeInBits();
3729 // If the comparison constant has bits in the upper part, the
3730 // zero-extended value could never match.
3731 if (C1
.intersects(APInt::getHighBitsSet(C1
.getBitWidth(),
3732 C1
.getBitWidth() - InSize
))) {
3737 return DAG
.getConstant(0, dl
, VT
);
3741 return DAG
.getConstant(1, dl
, VT
);
3744 // True if the sign bit of C1 is set.
3745 return DAG
.getConstant(C1
.isNegative(), dl
, VT
);
3748 // True if the sign bit of C1 isn't set.
3749 return DAG
.getConstant(C1
.isNonNegative(), dl
, VT
);
3755 // Otherwise, we can perform the comparison with the low bits.
3763 EVT newVT
= N0
.getOperand(0).getValueType();
3764 if (DCI
.isBeforeLegalizeOps() ||
3765 (isOperationLegal(ISD::SETCC
, newVT
) &&
3766 isCondCodeLegal(Cond
, newVT
.getSimpleVT()))) {
3767 EVT NewSetCCVT
= getSetCCResultType(Layout
, *DAG
.getContext(), newVT
);
3768 SDValue NewConst
= DAG
.getConstant(C1
.trunc(InSize
), dl
, newVT
);
3770 SDValue NewSetCC
= DAG
.getSetCC(dl
, NewSetCCVT
, N0
.getOperand(0),
3772 return DAG
.getBoolExtOrTrunc(NewSetCC
, dl
, VT
, N0
.getValueType());
3777 break; // todo, be more careful with signed comparisons
3779 } else if (N0
.getOpcode() == ISD::SIGN_EXTEND_INREG
&&
3780 (Cond
== ISD::SETEQ
|| Cond
== ISD::SETNE
) &&
3781 !isSExtCheaperThanZExt(cast
<VTSDNode
>(N0
.getOperand(1))->getVT(),
3783 EVT ExtSrcTy
= cast
<VTSDNode
>(N0
.getOperand(1))->getVT();
3784 unsigned ExtSrcTyBits
= ExtSrcTy
.getSizeInBits();
3785 EVT ExtDstTy
= N0
.getValueType();
3786 unsigned ExtDstTyBits
= ExtDstTy
.getSizeInBits();
3788 // If the constant doesn't fit into the number of bits for the source of
3789 // the sign extension, it is impossible for both sides to be equal.
3790 if (C1
.getMinSignedBits() > ExtSrcTyBits
)
3791 return DAG
.getBoolConstant(Cond
== ISD::SETNE
, dl
, VT
, OpVT
);
3793 assert(ExtDstTy
== N0
.getOperand(0).getValueType() &&
3794 ExtDstTy
!= ExtSrcTy
&& "Unexpected types!");
3795 APInt Imm
= APInt::getLowBitsSet(ExtDstTyBits
, ExtSrcTyBits
);
3796 SDValue ZextOp
= DAG
.getNode(ISD::AND
, dl
, ExtDstTy
, N0
.getOperand(0),
3797 DAG
.getConstant(Imm
, dl
, ExtDstTy
));
3798 if (!DCI
.isCalledByLegalizer())
3799 DCI
.AddToWorklist(ZextOp
.getNode());
3800 // Otherwise, make this a use of a zext.
3801 return DAG
.getSetCC(dl
, VT
, ZextOp
,
3802 DAG
.getConstant(C1
& Imm
, dl
, ExtDstTy
), Cond
);
3803 } else if ((N1C
->isNullValue() || N1C
->isOne()) &&
3804 (Cond
== ISD::SETEQ
|| Cond
== ISD::SETNE
)) {
3805 // SETCC (SETCC), [0|1], [EQ|NE] -> SETCC
3806 if (N0
.getOpcode() == ISD::SETCC
&&
3807 isTypeLegal(VT
) && VT
.bitsLE(N0
.getValueType()) &&
3808 (N0
.getValueType() == MVT::i1
||
3809 getBooleanContents(N0
.getOperand(0).getValueType()) ==
3810 ZeroOrOneBooleanContent
)) {
3811 bool TrueWhenTrue
= (Cond
== ISD::SETEQ
) ^ (!N1C
->isOne());
3813 return DAG
.getNode(ISD::TRUNCATE
, dl
, VT
, N0
);
3814 // Invert the condition.
3815 ISD::CondCode CC
= cast
<CondCodeSDNode
>(N0
.getOperand(2))->get();
3816 CC
= ISD::getSetCCInverse(CC
, N0
.getOperand(0).getValueType());
3817 if (DCI
.isBeforeLegalizeOps() ||
3818 isCondCodeLegal(CC
, N0
.getOperand(0).getSimpleValueType()))
3819 return DAG
.getSetCC(dl
, VT
, N0
.getOperand(0), N0
.getOperand(1), CC
);
3822 if ((N0
.getOpcode() == ISD::XOR
||
3823 (N0
.getOpcode() == ISD::AND
&&
3824 N0
.getOperand(0).getOpcode() == ISD::XOR
&&
3825 N0
.getOperand(1) == N0
.getOperand(0).getOperand(1))) &&
3826 isOneConstant(N0
.getOperand(1))) {
3827 // If this is (X^1) == 0/1, swap the RHS and eliminate the xor. We
3828 // can only do this if the top bits are known zero.
3829 unsigned BitWidth
= N0
.getValueSizeInBits();
3830 if (DAG
.MaskedValueIsZero(N0
,
3831 APInt::getHighBitsSet(BitWidth
,
3833 // Okay, get the un-inverted input value.
3835 if (N0
.getOpcode() == ISD::XOR
) {
3836 Val
= N0
.getOperand(0);
3838 assert(N0
.getOpcode() == ISD::AND
&&
3839 N0
.getOperand(0).getOpcode() == ISD::XOR
);
3840 // ((X^1)&1)^1 -> X & 1
3841 Val
= DAG
.getNode(ISD::AND
, dl
, N0
.getValueType(),
3842 N0
.getOperand(0).getOperand(0),
3846 return DAG
.getSetCC(dl
, VT
, Val
, N1
,
3847 Cond
== ISD::SETEQ
? ISD::SETNE
: ISD::SETEQ
);
3849 } else if (N1C
->isOne()) {
3851 if (Op0
.getOpcode() == ISD::TRUNCATE
)
3852 Op0
= Op0
.getOperand(0);
3854 if ((Op0
.getOpcode() == ISD::XOR
) &&
3855 Op0
.getOperand(0).getOpcode() == ISD::SETCC
&&
3856 Op0
.getOperand(1).getOpcode() == ISD::SETCC
) {
3857 SDValue XorLHS
= Op0
.getOperand(0);
3858 SDValue XorRHS
= Op0
.getOperand(1);
3859 // Ensure that the input setccs return an i1 type or 0/1 value.
3860 if (Op0
.getValueType() == MVT::i1
||
3861 (getBooleanContents(XorLHS
.getOperand(0).getValueType()) ==
3862 ZeroOrOneBooleanContent
&&
3863 getBooleanContents(XorRHS
.getOperand(0).getValueType()) ==
3864 ZeroOrOneBooleanContent
)) {
3865 // (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc)
3866 Cond
= (Cond
== ISD::SETEQ
) ? ISD::SETNE
: ISD::SETEQ
;
3867 return DAG
.getSetCC(dl
, VT
, XorLHS
, XorRHS
, Cond
);
3870 if (Op0
.getOpcode() == ISD::AND
&& isOneConstant(Op0
.getOperand(1))) {
3871 // If this is (X&1) == / != 1, normalize it to (X&1) != / == 0.
3872 if (Op0
.getValueType().bitsGT(VT
))
3873 Op0
= DAG
.getNode(ISD::AND
, dl
, VT
,
3874 DAG
.getNode(ISD::TRUNCATE
, dl
, VT
, Op0
.getOperand(0)),
3875 DAG
.getConstant(1, dl
, VT
));
3876 else if (Op0
.getValueType().bitsLT(VT
))
3877 Op0
= DAG
.getNode(ISD::AND
, dl
, VT
,
3878 DAG
.getNode(ISD::ANY_EXTEND
, dl
, VT
, Op0
.getOperand(0)),
3879 DAG
.getConstant(1, dl
, VT
));
3881 return DAG
.getSetCC(dl
, VT
, Op0
,
3882 DAG
.getConstant(0, dl
, Op0
.getValueType()),
3883 Cond
== ISD::SETEQ
? ISD::SETNE
: ISD::SETEQ
);
3885 if (Op0
.getOpcode() == ISD::AssertZext
&&
3886 cast
<VTSDNode
>(Op0
.getOperand(1))->getVT() == MVT::i1
)
3887 return DAG
.getSetCC(dl
, VT
, Op0
,
3888 DAG
.getConstant(0, dl
, Op0
.getValueType()),
3889 Cond
== ISD::SETEQ
? ISD::SETNE
: ISD::SETEQ
);
3894 // icmp eq/ne (urem %x, %y), 0
3895 // Iff %x has 0 or 1 bits set, and %y has at least 2 bits set, omit 'urem':
3897 if (N0
.getOpcode() == ISD::UREM
&& N1C
->isNullValue() &&
3898 (Cond
== ISD::SETEQ
|| Cond
== ISD::SETNE
)) {
3899 KnownBits XKnown
= DAG
.computeKnownBits(N0
.getOperand(0));
3900 KnownBits YKnown
= DAG
.computeKnownBits(N0
.getOperand(1));
3901 if (XKnown
.countMaxPopulation() == 1 && YKnown
.countMinPopulation() >= 2)
3902 return DAG
.getSetCC(dl
, VT
, N0
.getOperand(0), N1
, Cond
);
3906 optimizeSetCCOfSignedTruncationCheck(VT
, N0
, N1
, Cond
, DCI
, dl
))
3910 // These simplifications apply to splat vectors as well.
3911 // TODO: Handle more splat vector cases.
3912 if (auto *N1C
= isConstOrConstSplat(N1
)) {
3913 const APInt
&C1
= N1C
->getAPIntValue();
3915 APInt MinVal
, MaxVal
;
3916 unsigned OperandBitSize
= N1C
->getValueType(0).getScalarSizeInBits();
3917 if (ISD::isSignedIntSetCC(Cond
)) {
3918 MinVal
= APInt::getSignedMinValue(OperandBitSize
);
3919 MaxVal
= APInt::getSignedMaxValue(OperandBitSize
);
3921 MinVal
= APInt::getMinValue(OperandBitSize
);
3922 MaxVal
= APInt::getMaxValue(OperandBitSize
);
3925 // Canonicalize GE/LE comparisons to use GT/LT comparisons.
3926 if (Cond
== ISD::SETGE
|| Cond
== ISD::SETUGE
) {
3927 // X >= MIN --> true
3929 return DAG
.getBoolConstant(true, dl
, VT
, OpVT
);
3931 if (!VT
.isVector()) { // TODO: Support this for vectors.
3932 // X >= C0 --> X > (C0 - 1)
3934 ISD::CondCode NewCC
= (Cond
== ISD::SETGE
) ? ISD::SETGT
: ISD::SETUGT
;
3935 if ((DCI
.isBeforeLegalizeOps() ||
3936 isCondCodeLegal(NewCC
, VT
.getSimpleVT())) &&
3937 (!N1C
->isOpaque() || (C
.getBitWidth() <= 64 &&
3938 isLegalICmpImmediate(C
.getSExtValue())))) {
3939 return DAG
.getSetCC(dl
, VT
, N0
,
3940 DAG
.getConstant(C
, dl
, N1
.getValueType()),
3946 if (Cond
== ISD::SETLE
|| Cond
== ISD::SETULE
) {
3947 // X <= MAX --> true
3949 return DAG
.getBoolConstant(true, dl
, VT
, OpVT
);
3951 // X <= C0 --> X < (C0 + 1)
3952 if (!VT
.isVector()) { // TODO: Support this for vectors.
3954 ISD::CondCode NewCC
= (Cond
== ISD::SETLE
) ? ISD::SETLT
: ISD::SETULT
;
3955 if ((DCI
.isBeforeLegalizeOps() ||
3956 isCondCodeLegal(NewCC
, VT
.getSimpleVT())) &&
3957 (!N1C
->isOpaque() || (C
.getBitWidth() <= 64 &&
3958 isLegalICmpImmediate(C
.getSExtValue())))) {
3959 return DAG
.getSetCC(dl
, VT
, N0
,
3960 DAG
.getConstant(C
, dl
, N1
.getValueType()),
3966 if (Cond
== ISD::SETLT
|| Cond
== ISD::SETULT
) {
3968 return DAG
.getBoolConstant(false, dl
, VT
, OpVT
); // X < MIN --> false
3970 // TODO: Support this for vectors after legalize ops.
3971 if (!VT
.isVector() || DCI
.isBeforeLegalizeOps()) {
3972 // Canonicalize setlt X, Max --> setne X, Max
3974 return DAG
.getSetCC(dl
, VT
, N0
, N1
, ISD::SETNE
);
3976 // If we have setult X, 1, turn it into seteq X, 0
3978 return DAG
.getSetCC(dl
, VT
, N0
,
3979 DAG
.getConstant(MinVal
, dl
, N0
.getValueType()),
3984 if (Cond
== ISD::SETGT
|| Cond
== ISD::SETUGT
) {
3986 return DAG
.getBoolConstant(false, dl
, VT
, OpVT
); // X > MAX --> false
3988 // TODO: Support this for vectors after legalize ops.
3989 if (!VT
.isVector() || DCI
.isBeforeLegalizeOps()) {
3990 // Canonicalize setgt X, Min --> setne X, Min
3992 return DAG
.getSetCC(dl
, VT
, N0
, N1
, ISD::SETNE
);
3994 // If we have setugt X, Max-1, turn it into seteq X, Max
3996 return DAG
.getSetCC(dl
, VT
, N0
,
3997 DAG
.getConstant(MaxVal
, dl
, N0
.getValueType()),
4002 if (Cond
== ISD::SETEQ
|| Cond
== ISD::SETNE
) {
4003 // (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
4004 if (C1
.isNullValue())
4005 if (SDValue CC
= optimizeSetCCByHoistingAndByConstFromLogicalShift(
4006 VT
, N0
, N1
, Cond
, DCI
, dl
))
4009 // For all/any comparisons, replace or(x,shl(y,bw/2)) with and/or(x,y).
4010 // For example, when high 32-bits of i64 X are known clear:
4011 // all bits clear: (X | (Y<<32)) == 0 --> (X | Y) == 0
4012 // all bits set: (X | (Y<<32)) == -1 --> (X & Y) == -1
4013 bool CmpZero
= N1C
->getAPIntValue().isNullValue();
4014 bool CmpNegOne
= N1C
->getAPIntValue().isAllOnesValue();
4015 if ((CmpZero
|| CmpNegOne
) && N0
.hasOneUse()) {
4016 // Match or(lo,shl(hi,bw/2)) pattern.
4017 auto IsConcat
= [&](SDValue V
, SDValue
&Lo
, SDValue
&Hi
) {
4018 unsigned EltBits
= V
.getScalarValueSizeInBits();
4019 if (V
.getOpcode() != ISD::OR
|| (EltBits
% 2) != 0)
4021 SDValue LHS
= V
.getOperand(0);
4022 SDValue RHS
= V
.getOperand(1);
4023 APInt HiBits
= APInt::getHighBitsSet(EltBits
, EltBits
/ 2);
4024 // Unshifted element must have zero upperbits.
4025 if (RHS
.getOpcode() == ISD::SHL
&&
4026 isa
<ConstantSDNode
>(RHS
.getOperand(1)) &&
4027 RHS
.getConstantOperandAPInt(1) == (EltBits
/ 2) &&
4028 DAG
.MaskedValueIsZero(LHS
, HiBits
)) {
4030 Hi
= RHS
.getOperand(0);
4033 if (LHS
.getOpcode() == ISD::SHL
&&
4034 isa
<ConstantSDNode
>(LHS
.getOperand(1)) &&
4035 LHS
.getConstantOperandAPInt(1) == (EltBits
/ 2) &&
4036 DAG
.MaskedValueIsZero(RHS
, HiBits
)) {
4038 Hi
= LHS
.getOperand(0);
4044 auto MergeConcat
= [&](SDValue Lo
, SDValue Hi
) {
4045 unsigned EltBits
= N0
.getScalarValueSizeInBits();
4046 unsigned HalfBits
= EltBits
/ 2;
4047 APInt HiBits
= APInt::getHighBitsSet(EltBits
, HalfBits
);
4048 SDValue LoBits
= DAG
.getConstant(~HiBits
, dl
, OpVT
);
4049 SDValue HiMask
= DAG
.getNode(ISD::AND
, dl
, OpVT
, Hi
, LoBits
);
4051 DAG
.getNode(CmpZero
? ISD::OR
: ISD::AND
, dl
, OpVT
, Lo
, HiMask
);
4052 SDValue NewN1
= CmpZero
? DAG
.getConstant(0, dl
, OpVT
) : LoBits
;
4053 return DAG
.getSetCC(dl
, VT
, NewN0
, NewN1
, Cond
);
4057 if (IsConcat(N0
, Lo
, Hi
))
4058 return MergeConcat(Lo
, Hi
);
4060 if (N0
.getOpcode() == ISD::AND
|| N0
.getOpcode() == ISD::OR
) {
4061 SDValue Lo0
, Lo1
, Hi0
, Hi1
;
4062 if (IsConcat(N0
.getOperand(0), Lo0
, Hi0
) &&
4063 IsConcat(N0
.getOperand(1), Lo1
, Hi1
)) {
4064 return MergeConcat(DAG
.getNode(N0
.getOpcode(), dl
, OpVT
, Lo0
, Lo1
),
4065 DAG
.getNode(N0
.getOpcode(), dl
, OpVT
, Hi0
, Hi1
));
4071 // If we have "setcc X, C0", check to see if we can shrink the immediate
4073 // TODO: Support this for vectors after legalize ops.
4074 if (!VT
.isVector() || DCI
.isBeforeLegalizeOps()) {
4075 // SETUGT X, SINTMAX -> SETLT X, 0
4076 // SETUGE X, SINTMIN -> SETLT X, 0
4077 if ((Cond
== ISD::SETUGT
&& C1
.isMaxSignedValue()) ||
4078 (Cond
== ISD::SETUGE
&& C1
.isMinSignedValue()))
4079 return DAG
.getSetCC(dl
, VT
, N0
,
4080 DAG
.getConstant(0, dl
, N1
.getValueType()),
4083 // SETULT X, SINTMIN -> SETGT X, -1
4084 // SETULE X, SINTMAX -> SETGT X, -1
4085 if ((Cond
== ISD::SETULT
&& C1
.isMinSignedValue()) ||
4086 (Cond
== ISD::SETULE
&& C1
.isMaxSignedValue()))
4087 return DAG
.getSetCC(dl
, VT
, N0
,
4088 DAG
.getAllOnesConstant(dl
, N1
.getValueType()),
4093 // Back to non-vector simplifications.
4094 // TODO: Can we do these for vector splats?
4095 if (auto *N1C
= dyn_cast
<ConstantSDNode
>(N1
.getNode())) {
4096 const TargetLowering
&TLI
= DAG
.getTargetLoweringInfo();
4097 const APInt
&C1
= N1C
->getAPIntValue();
4098 EVT ShValTy
= N0
.getValueType();
4100 // Fold bit comparisons when we can. This will result in an
4101 // incorrect value when boolean false is negative one, unless
4102 // the bitsize is 1 in which case the false value is the same
4103 // in practice regardless of the representation.
4104 if ((VT
.getSizeInBits() == 1 ||
4105 getBooleanContents(N0
.getValueType()) == ZeroOrOneBooleanContent
) &&
4106 (Cond
== ISD::SETEQ
|| Cond
== ISD::SETNE
) &&
4107 (VT
== ShValTy
|| (isTypeLegal(VT
) && VT
.bitsLE(ShValTy
))) &&
4108 N0
.getOpcode() == ISD::AND
) {
4109 if (auto *AndRHS
= dyn_cast
<ConstantSDNode
>(N0
.getOperand(1))) {
4111 getShiftAmountTy(ShValTy
, Layout
, !DCI
.isBeforeLegalize());
4112 if (Cond
== ISD::SETNE
&& C1
== 0) {// (X & 8) != 0 --> (X & 8) >> 3
4113 // Perform the xform if the AND RHS is a single bit.
4114 unsigned ShCt
= AndRHS
->getAPIntValue().logBase2();
4115 if (AndRHS
->getAPIntValue().isPowerOf2() &&
4116 !TLI
.shouldAvoidTransformToShift(ShValTy
, ShCt
)) {
4117 return DAG
.getNode(ISD::TRUNCATE
, dl
, VT
,
4118 DAG
.getNode(ISD::SRL
, dl
, ShValTy
, N0
,
4119 DAG
.getConstant(ShCt
, dl
, ShiftTy
)));
4121 } else if (Cond
== ISD::SETEQ
&& C1
== AndRHS
->getAPIntValue()) {
4122 // (X & 8) == 8 --> (X & 8) >> 3
4123 // Perform the xform if C1 is a single bit.
4124 unsigned ShCt
= C1
.logBase2();
4125 if (C1
.isPowerOf2() &&
4126 !TLI
.shouldAvoidTransformToShift(ShValTy
, ShCt
)) {
4127 return DAG
.getNode(ISD::TRUNCATE
, dl
, VT
,
4128 DAG
.getNode(ISD::SRL
, dl
, ShValTy
, N0
,
4129 DAG
.getConstant(ShCt
, dl
, ShiftTy
)));
4135 if (C1
.getMinSignedBits() <= 64 &&
4136 !isLegalICmpImmediate(C1
.getSExtValue())) {
4137 EVT ShiftTy
= getShiftAmountTy(ShValTy
, Layout
, !DCI
.isBeforeLegalize());
4138 // (X & -256) == 256 -> (X >> 8) == 1
4139 if ((Cond
== ISD::SETEQ
|| Cond
== ISD::SETNE
) &&
4140 N0
.getOpcode() == ISD::AND
&& N0
.hasOneUse()) {
4141 if (auto *AndRHS
= dyn_cast
<ConstantSDNode
>(N0
.getOperand(1))) {
4142 const APInt
&AndRHSC
= AndRHS
->getAPIntValue();
4143 if ((-AndRHSC
).isPowerOf2() && (AndRHSC
& C1
) == C1
) {
4144 unsigned ShiftBits
= AndRHSC
.countTrailingZeros();
4145 if (!TLI
.shouldAvoidTransformToShift(ShValTy
, ShiftBits
)) {
4147 DAG
.getNode(ISD::SRL
, dl
, ShValTy
, N0
.getOperand(0),
4148 DAG
.getConstant(ShiftBits
, dl
, ShiftTy
));
4149 SDValue CmpRHS
= DAG
.getConstant(C1
.lshr(ShiftBits
), dl
, ShValTy
);
4150 return DAG
.getSetCC(dl
, VT
, Shift
, CmpRHS
, Cond
);
4154 } else if (Cond
== ISD::SETULT
|| Cond
== ISD::SETUGE
||
4155 Cond
== ISD::SETULE
|| Cond
== ISD::SETUGT
) {
4156 bool AdjOne
= (Cond
== ISD::SETULE
|| Cond
== ISD::SETUGT
);
4157 // X < 0x100000000 -> (X >> 32) < 1
4158 // X >= 0x100000000 -> (X >> 32) >= 1
4159 // X <= 0x0ffffffff -> (X >> 32) < 1
4160 // X > 0x0ffffffff -> (X >> 32) >= 1
4163 ISD::CondCode NewCond
= Cond
;
4165 ShiftBits
= C1
.countTrailingOnes();
4167 NewCond
= (Cond
== ISD::SETULE
) ? ISD::SETULT
: ISD::SETUGE
;
4169 ShiftBits
= C1
.countTrailingZeros();
4171 NewC
.lshrInPlace(ShiftBits
);
4172 if (ShiftBits
&& NewC
.getMinSignedBits() <= 64 &&
4173 isLegalICmpImmediate(NewC
.getSExtValue()) &&
4174 !TLI
.shouldAvoidTransformToShift(ShValTy
, ShiftBits
)) {
4175 SDValue Shift
= DAG
.getNode(ISD::SRL
, dl
, ShValTy
, N0
,
4176 DAG
.getConstant(ShiftBits
, dl
, ShiftTy
));
4177 SDValue CmpRHS
= DAG
.getConstant(NewC
, dl
, ShValTy
);
4178 return DAG
.getSetCC(dl
, VT
, Shift
, CmpRHS
, NewCond
);
4184 if (!isa
<ConstantFPSDNode
>(N0
) && isa
<ConstantFPSDNode
>(N1
)) {
4185 auto *CFP
= cast
<ConstantFPSDNode
>(N1
);
4186 assert(!CFP
->getValueAPF().isNaN() && "Unexpected NaN value");
4188 // Otherwise, we know the RHS is not a NaN. Simplify the node to drop the
4189 // constant if knowing that the operand is non-nan is enough. We prefer to
4190 // have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to
4192 if (Cond
== ISD::SETO
|| Cond
== ISD::SETUO
)
4193 return DAG
.getSetCC(dl
, VT
, N0
, N0
, Cond
);
4195 // setcc (fneg x), C -> setcc swap(pred) x, -C
4196 if (N0
.getOpcode() == ISD::FNEG
) {
4197 ISD::CondCode SwapCond
= ISD::getSetCCSwappedOperands(Cond
);
4198 if (DCI
.isBeforeLegalizeOps() ||
4199 isCondCodeLegal(SwapCond
, N0
.getSimpleValueType())) {
4200 SDValue NegN1
= DAG
.getNode(ISD::FNEG
, dl
, N0
.getValueType(), N1
);
4201 return DAG
.getSetCC(dl
, VT
, N0
.getOperand(0), NegN1
, SwapCond
);
4205 // If the condition is not legal, see if we can find an equivalent one
4207 if (!isCondCodeLegal(Cond
, N0
.getSimpleValueType())) {
4208 // If the comparison was an awkward floating-point == or != and one of
4209 // the comparison operands is infinity or negative infinity, convert the
4210 // condition to a less-awkward <= or >=.
4211 if (CFP
->getValueAPF().isInfinity()) {
4212 bool IsNegInf
= CFP
->getValueAPF().isNegative();
4213 ISD::CondCode NewCond
= ISD::SETCC_INVALID
;
4215 case ISD::SETOEQ
: NewCond
= IsNegInf
? ISD::SETOLE
: ISD::SETOGE
; break;
4216 case ISD::SETUEQ
: NewCond
= IsNegInf
? ISD::SETULE
: ISD::SETUGE
; break;
4217 case ISD::SETUNE
: NewCond
= IsNegInf
? ISD::SETUGT
: ISD::SETULT
; break;
4218 case ISD::SETONE
: NewCond
= IsNegInf
? ISD::SETOGT
: ISD::SETOLT
; break;
4221 if (NewCond
!= ISD::SETCC_INVALID
&&
4222 isCondCodeLegal(NewCond
, N0
.getSimpleValueType()))
4223 return DAG
.getSetCC(dl
, VT
, N0
, N1
, NewCond
);
4229 // The sext(setcc()) => setcc() optimization relies on the appropriate
4230 // constant being emitted.
4231 assert(!N0
.getValueType().isInteger() &&
4232 "Integer types should be handled by FoldSetCC");
4234 bool EqTrue
= ISD::isTrueWhenEqual(Cond
);
4235 unsigned UOF
= ISD::getUnorderedFlavor(Cond
);
4236 if (UOF
== 2) // FP operators that are undefined on NaNs.
4237 return DAG
.getBoolConstant(EqTrue
, dl
, VT
, OpVT
);
4238 if (UOF
== unsigned(EqTrue
))
4239 return DAG
.getBoolConstant(EqTrue
, dl
, VT
, OpVT
);
4240 // Otherwise, we can't fold it. However, we can simplify it to SETUO/SETO
4241 // if it is not already.
4242 ISD::CondCode NewCond
= UOF
== 0 ? ISD::SETO
: ISD::SETUO
;
4243 if (NewCond
!= Cond
&&
4244 (DCI
.isBeforeLegalizeOps() ||
4245 isCondCodeLegal(NewCond
, N0
.getSimpleValueType())))
4246 return DAG
.getSetCC(dl
, VT
, N0
, N1
, NewCond
);
4249 if ((Cond
== ISD::SETEQ
|| Cond
== ISD::SETNE
) &&
4250 N0
.getValueType().isInteger()) {
4251 if (N0
.getOpcode() == ISD::ADD
|| N0
.getOpcode() == ISD::SUB
||
4252 N0
.getOpcode() == ISD::XOR
) {
4253 // Simplify (X+Y) == (X+Z) --> Y == Z
4254 if (N0
.getOpcode() == N1
.getOpcode()) {
4255 if (N0
.getOperand(0) == N1
.getOperand(0))
4256 return DAG
.getSetCC(dl
, VT
, N0
.getOperand(1), N1
.getOperand(1), Cond
);
4257 if (N0
.getOperand(1) == N1
.getOperand(1))
4258 return DAG
.getSetCC(dl
, VT
, N0
.getOperand(0), N1
.getOperand(0), Cond
);
4259 if (isCommutativeBinOp(N0
.getOpcode())) {
4260 // If X op Y == Y op X, try other combinations.
4261 if (N0
.getOperand(0) == N1
.getOperand(1))
4262 return DAG
.getSetCC(dl
, VT
, N0
.getOperand(1), N1
.getOperand(0),
4264 if (N0
.getOperand(1) == N1
.getOperand(0))
4265 return DAG
.getSetCC(dl
, VT
, N0
.getOperand(0), N1
.getOperand(1),
4270 // If RHS is a legal immediate value for a compare instruction, we need
4271 // to be careful about increasing register pressure needlessly.
4272 bool LegalRHSImm
= false;
4274 if (auto *RHSC
= dyn_cast
<ConstantSDNode
>(N1
)) {
4275 if (auto *LHSR
= dyn_cast
<ConstantSDNode
>(N0
.getOperand(1))) {
4276 // Turn (X+C1) == C2 --> X == C2-C1
4277 if (N0
.getOpcode() == ISD::ADD
&& N0
.getNode()->hasOneUse()) {
4278 return DAG
.getSetCC(dl
, VT
, N0
.getOperand(0),
4279 DAG
.getConstant(RHSC
->getAPIntValue()-
4280 LHSR
->getAPIntValue(),
4281 dl
, N0
.getValueType()), Cond
);
4284 // Turn (X^C1) == C2 into X == C1^C2 iff X&~C1 = 0.
4285 if (N0
.getOpcode() == ISD::XOR
)
4286 // If we know that all of the inverted bits are zero, don't bother
4287 // performing the inversion.
4288 if (DAG
.MaskedValueIsZero(N0
.getOperand(0), ~LHSR
->getAPIntValue()))
4290 DAG
.getSetCC(dl
, VT
, N0
.getOperand(0),
4291 DAG
.getConstant(LHSR
->getAPIntValue() ^
4292 RHSC
->getAPIntValue(),
4293 dl
, N0
.getValueType()),
4297 // Turn (C1-X) == C2 --> X == C1-C2
4298 if (auto *SUBC
= dyn_cast
<ConstantSDNode
>(N0
.getOperand(0))) {
4299 if (N0
.getOpcode() == ISD::SUB
&& N0
.getNode()->hasOneUse()) {
4301 DAG
.getSetCC(dl
, VT
, N0
.getOperand(1),
4302 DAG
.getConstant(SUBC
->getAPIntValue() -
4303 RHSC
->getAPIntValue(),
4304 dl
, N0
.getValueType()),
4309 // Could RHSC fold directly into a compare?
4310 if (RHSC
->getValueType(0).getSizeInBits() <= 64)
4311 LegalRHSImm
= isLegalICmpImmediate(RHSC
->getSExtValue());
4314 // (X+Y) == X --> Y == 0 and similar folds.
4315 // Don't do this if X is an immediate that can fold into a cmp
4316 // instruction and X+Y has other uses. It could be an induction variable
4317 // chain, and the transform would increase register pressure.
4318 if (!LegalRHSImm
|| N0
.hasOneUse())
4319 if (SDValue V
= foldSetCCWithBinOp(VT
, N0
, N1
, Cond
, dl
, DCI
))
4323 if (N1
.getOpcode() == ISD::ADD
|| N1
.getOpcode() == ISD::SUB
||
4324 N1
.getOpcode() == ISD::XOR
)
4325 if (SDValue V
= foldSetCCWithBinOp(VT
, N1
, N0
, Cond
, dl
, DCI
))
4328 if (SDValue V
= foldSetCCWithAnd(VT
, N0
, N1
, Cond
, dl
, DCI
))
4332 // Fold remainder of division by a constant.
4333 if ((N0
.getOpcode() == ISD::UREM
|| N0
.getOpcode() == ISD::SREM
) &&
4334 N0
.hasOneUse() && (Cond
== ISD::SETEQ
|| Cond
== ISD::SETNE
)) {
4335 AttributeList Attr
= DAG
.getMachineFunction().getFunction().getAttributes();
4337 // When division is cheap or optimizing for minimum size,
4338 // fall through to DIVREM creation by skipping this fold.
4339 if (!isIntDivCheap(VT
, Attr
) && !Attr
.hasFnAttr(Attribute::MinSize
)) {
4340 if (N0
.getOpcode() == ISD::UREM
) {
4341 if (SDValue Folded
= buildUREMEqFold(VT
, N0
, N1
, Cond
, DCI
, dl
))
4343 } else if (N0
.getOpcode() == ISD::SREM
) {
4344 if (SDValue Folded
= buildSREMEqFold(VT
, N0
, N1
, Cond
, DCI
, dl
))
4350 // Fold away ALL boolean setcc's.
4351 if (N0
.getValueType().getScalarType() == MVT::i1
&& foldBooleans
) {
4354 default: llvm_unreachable("Unknown integer setcc!");
4355 case ISD::SETEQ
: // X == Y -> ~(X^Y)
4356 Temp
= DAG
.getNode(ISD::XOR
, dl
, OpVT
, N0
, N1
);
4357 N0
= DAG
.getNOT(dl
, Temp
, OpVT
);
4358 if (!DCI
.isCalledByLegalizer())
4359 DCI
.AddToWorklist(Temp
.getNode());
4361 case ISD::SETNE
: // X != Y --> (X^Y)
4362 N0
= DAG
.getNode(ISD::XOR
, dl
, OpVT
, N0
, N1
);
4364 case ISD::SETGT
: // X >s Y --> X == 0 & Y == 1 --> ~X & Y
4365 case ISD::SETULT
: // X <u Y --> X == 0 & Y == 1 --> ~X & Y
4366 Temp
= DAG
.getNOT(dl
, N0
, OpVT
);
4367 N0
= DAG
.getNode(ISD::AND
, dl
, OpVT
, N1
, Temp
);
4368 if (!DCI
.isCalledByLegalizer())
4369 DCI
.AddToWorklist(Temp
.getNode());
4371 case ISD::SETLT
: // X <s Y --> X == 1 & Y == 0 --> ~Y & X
4372 case ISD::SETUGT
: // X >u Y --> X == 1 & Y == 0 --> ~Y & X
4373 Temp
= DAG
.getNOT(dl
, N1
, OpVT
);
4374 N0
= DAG
.getNode(ISD::AND
, dl
, OpVT
, N0
, Temp
);
4375 if (!DCI
.isCalledByLegalizer())
4376 DCI
.AddToWorklist(Temp
.getNode());
4378 case ISD::SETULE
: // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
4379 case ISD::SETGE
: // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
4380 Temp
= DAG
.getNOT(dl
, N0
, OpVT
);
4381 N0
= DAG
.getNode(ISD::OR
, dl
, OpVT
, N1
, Temp
);
4382 if (!DCI
.isCalledByLegalizer())
4383 DCI
.AddToWorklist(Temp
.getNode());
4385 case ISD::SETUGE
: // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
4386 case ISD::SETLE
: // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
4387 Temp
= DAG
.getNOT(dl
, N1
, OpVT
);
4388 N0
= DAG
.getNode(ISD::OR
, dl
, OpVT
, N0
, Temp
);
4391 if (VT
.getScalarType() != MVT::i1
) {
4392 if (!DCI
.isCalledByLegalizer())
4393 DCI
.AddToWorklist(N0
.getNode());
4394 // FIXME: If running after legalize, we probably can't do this.
4395 ISD::NodeType ExtendCode
= getExtendForContent(getBooleanContents(OpVT
));
4396 N0
= DAG
.getNode(ExtendCode
, dl
, VT
, N0
);
4401 // Could not fold it.
4405 /// Returns true (and the GlobalValue and the offset) if the node is a
4406 /// GlobalAddress + offset.
4407 bool TargetLowering::isGAPlusOffset(SDNode
*WN
, const GlobalValue
*&GA
,
4408 int64_t &Offset
) const {
4410 SDNode
*N
= unwrapAddress(SDValue(WN
, 0)).getNode();
4412 if (auto *GASD
= dyn_cast
<GlobalAddressSDNode
>(N
)) {
4413 GA
= GASD
->getGlobal();
4414 Offset
+= GASD
->getOffset();
4418 if (N
->getOpcode() == ISD::ADD
) {
4419 SDValue N1
= N
->getOperand(0);
4420 SDValue N2
= N
->getOperand(1);
4421 if (isGAPlusOffset(N1
.getNode(), GA
, Offset
)) {
4422 if (auto *V
= dyn_cast
<ConstantSDNode
>(N2
)) {
4423 Offset
+= V
->getSExtValue();
4426 } else if (isGAPlusOffset(N2
.getNode(), GA
, Offset
)) {
4427 if (auto *V
= dyn_cast
<ConstantSDNode
>(N1
)) {
4428 Offset
+= V
->getSExtValue();
4437 SDValue
TargetLowering::PerformDAGCombine(SDNode
*N
,
4438 DAGCombinerInfo
&DCI
) const {
4439 // Default implementation: no optimization.
4443 //===----------------------------------------------------------------------===//
4444 // Inline Assembler Implementation Methods
4445 //===----------------------------------------------------------------------===//
4447 TargetLowering::ConstraintType
4448 TargetLowering::getConstraintType(StringRef Constraint
) const {
4449 unsigned S
= Constraint
.size();
4452 switch (Constraint
[0]) {
4455 return C_RegisterClass
;
4457 case 'o': // offsetable
4458 case 'V': // not offsetable
4460 case 'n': // Simple Integer
4461 case 'E': // Floating Point Constant
4462 case 'F': // Floating Point Constant
4464 case 'i': // Simple Integer or Relocatable Constant
4465 case 's': // Relocatable Constant
4466 case 'p': // Address.
4467 case 'X': // Allow ANY value.
4468 case 'I': // Target registers.
4482 if (S
> 1 && Constraint
[0] == '{' && Constraint
[S
- 1] == '}') {
4483 if (S
== 8 && Constraint
.substr(1, 6) == "memory") // "{memory}"
4490 /// Try to replace an X constraint, which matches anything, with another that
4491 /// has more specific requirements based on the type of the corresponding
4493 const char *TargetLowering::LowerXConstraint(EVT ConstraintVT
) const {
4494 if (ConstraintVT
.isInteger())
4496 if (ConstraintVT
.isFloatingPoint())
4497 return "f"; // works for many targets
4501 SDValue
TargetLowering::LowerAsmOutputForConstraint(
4502 SDValue
&Chain
, SDValue
&Flag
, const SDLoc
&DL
,
4503 const AsmOperandInfo
&OpInfo
, SelectionDAG
&DAG
) const {
4507 /// Lower the specified operand into the Ops vector.
4508 /// If it is invalid, don't add anything to Ops.
4509 void TargetLowering::LowerAsmOperandForConstraint(SDValue Op
,
4510 std::string
&Constraint
,
4511 std::vector
<SDValue
> &Ops
,
4512 SelectionDAG
&DAG
) const {
4514 if (Constraint
.length() > 1) return;
4516 char ConstraintLetter
= Constraint
[0];
4517 switch (ConstraintLetter
) {
4519 case 'X': // Allows any operand; labels (basic block) use this.
4520 if (Op
.getOpcode() == ISD::BasicBlock
||
4521 Op
.getOpcode() == ISD::TargetBlockAddress
) {
4526 case 'i': // Simple Integer or Relocatable Constant
4527 case 'n': // Simple Integer
4528 case 's': { // Relocatable Constant
4530 GlobalAddressSDNode
*GA
;
4532 BlockAddressSDNode
*BA
;
4533 uint64_t Offset
= 0;
4535 // Match (GA) or (C) or (GA+C) or (GA-C) or ((GA+C)+C) or (((GA+C)+C)+C),
4536 // etc., since getelementpointer is variadic. We can't use
4537 // SelectionDAG::FoldSymbolOffset because it expects the GA to be accessible
4538 // while in this case the GA may be furthest from the root node which is
4539 // likely an ISD::ADD.
4541 if ((GA
= dyn_cast
<GlobalAddressSDNode
>(Op
)) && ConstraintLetter
!= 'n') {
4542 Ops
.push_back(DAG
.getTargetGlobalAddress(GA
->getGlobal(), SDLoc(Op
),
4543 GA
->getValueType(0),
4544 Offset
+ GA
->getOffset()));
4547 if ((C
= dyn_cast
<ConstantSDNode
>(Op
)) && ConstraintLetter
!= 's') {
4548 // gcc prints these as sign extended. Sign extend value to 64 bits
4549 // now; without this it would get ZExt'd later in
4550 // ScheduleDAGSDNodes::EmitNode, which is very generic.
4551 bool IsBool
= C
->getConstantIntValue()->getBitWidth() == 1;
4552 BooleanContent BCont
= getBooleanContents(MVT::i64
);
4553 ISD::NodeType ExtOpc
=
4554 IsBool
? getExtendForContent(BCont
) : ISD::SIGN_EXTEND
;
4556 ExtOpc
== ISD::ZERO_EXTEND
? C
->getZExtValue() : C
->getSExtValue();
4558 DAG
.getTargetConstant(Offset
+ ExtVal
, SDLoc(C
), MVT::i64
));
4561 if ((BA
= dyn_cast
<BlockAddressSDNode
>(Op
)) && ConstraintLetter
!= 'n') {
4562 Ops
.push_back(DAG
.getTargetBlockAddress(
4563 BA
->getBlockAddress(), BA
->getValueType(0),
4564 Offset
+ BA
->getOffset(), BA
->getTargetFlags()));
4567 const unsigned OpCode
= Op
.getOpcode();
4568 if (OpCode
== ISD::ADD
|| OpCode
== ISD::SUB
) {
4569 if ((C
= dyn_cast
<ConstantSDNode
>(Op
.getOperand(0))))
4570 Op
= Op
.getOperand(1);
4571 // Subtraction is not commutative.
4572 else if (OpCode
== ISD::ADD
&&
4573 (C
= dyn_cast
<ConstantSDNode
>(Op
.getOperand(1))))
4574 Op
= Op
.getOperand(0);
4577 Offset
+= (OpCode
== ISD::ADD
? 1 : -1) * C
->getSExtValue();
4587 std::pair
<unsigned, const TargetRegisterClass
*>
4588 TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo
*RI
,
4589 StringRef Constraint
,
4591 if (Constraint
.empty() || Constraint
[0] != '{')
4592 return std::make_pair(0u, static_cast<TargetRegisterClass
*>(nullptr));
4593 assert(*(Constraint
.end() - 1) == '}' && "Not a brace enclosed constraint?");
4595 // Remove the braces from around the name.
4596 StringRef
RegName(Constraint
.data() + 1, Constraint
.size() - 2);
4598 std::pair
<unsigned, const TargetRegisterClass
*> R
=
4599 std::make_pair(0u, static_cast<const TargetRegisterClass
*>(nullptr));
4601 // Figure out which register class contains this reg.
4602 for (const TargetRegisterClass
*RC
: RI
->regclasses()) {
4603 // If none of the value types for this register class are valid, we
4604 // can't use it. For example, 64-bit reg classes on 32-bit targets.
4605 if (!isLegalRC(*RI
, *RC
))
4608 for (const MCPhysReg
&PR
: *RC
) {
4609 if (RegName
.equals_insensitive(RI
->getRegAsmName(PR
))) {
4610 std::pair
<unsigned, const TargetRegisterClass
*> S
=
4611 std::make_pair(PR
, RC
);
4613 // If this register class has the requested value type, return it,
4614 // otherwise keep searching and return the first class found
4615 // if no other is found which explicitly has the requested type.
4616 if (RI
->isTypeLegalForClass(*RC
, VT
))
4627 //===----------------------------------------------------------------------===//
4628 // Constraint Selection.
4630 /// Return true of this is an input operand that is a matching constraint like
4632 bool TargetLowering::AsmOperandInfo::isMatchingInputConstraint() const {
4633 assert(!ConstraintCode
.empty() && "No known constraint!");
4634 return isdigit(static_cast<unsigned char>(ConstraintCode
[0]));
4637 /// If this is an input matching constraint, this method returns the output
4638 /// operand it matches.
4639 unsigned TargetLowering::AsmOperandInfo::getMatchedOperand() const {
4640 assert(!ConstraintCode
.empty() && "No known constraint!");
4641 return atoi(ConstraintCode
.c_str());
4644 /// Split up the constraint string from the inline assembly value into the
4645 /// specific constraints and their prefixes, and also tie in the associated
4647 /// If this returns an empty vector, and if the constraint string itself
4648 /// isn't empty, there was an error parsing.
4649 TargetLowering::AsmOperandInfoVector
4650 TargetLowering::ParseConstraints(const DataLayout
&DL
,
4651 const TargetRegisterInfo
*TRI
,
4652 const CallBase
&Call
) const {
4653 /// Information about all of the constraints.
4654 AsmOperandInfoVector ConstraintOperands
;
4655 const InlineAsm
*IA
= cast
<InlineAsm
>(Call
.getCalledOperand());
4656 unsigned maCount
= 0; // Largest number of multiple alternative constraints.
4658 // Do a prepass over the constraints, canonicalizing them, and building up the
4659 // ConstraintOperands list.
4660 unsigned ArgNo
= 0; // ArgNo - The argument of the CallInst.
4661 unsigned ResNo
= 0; // ResNo - The result number of the next output.
4663 for (InlineAsm::ConstraintInfo
&CI
: IA
->ParseConstraints()) {
4664 ConstraintOperands
.emplace_back(std::move(CI
));
4665 AsmOperandInfo
&OpInfo
= ConstraintOperands
.back();
4667 // Update multiple alternative constraint count.
4668 if (OpInfo
.multipleAlternatives
.size() > maCount
)
4669 maCount
= OpInfo
.multipleAlternatives
.size();
4671 OpInfo
.ConstraintVT
= MVT::Other
;
4673 // Compute the value type for each operand.
4674 switch (OpInfo
.Type
) {
4675 case InlineAsm::isOutput
:
4676 // Indirect outputs just consume an argument.
4677 if (OpInfo
.isIndirect
) {
4678 OpInfo
.CallOperandVal
= Call
.getArgOperand(ArgNo
++);
4682 // The return value of the call is this value. As such, there is no
4683 // corresponding argument.
4684 assert(!Call
.getType()->isVoidTy() && "Bad inline asm!");
4685 if (StructType
*STy
= dyn_cast
<StructType
>(Call
.getType())) {
4686 OpInfo
.ConstraintVT
=
4687 getSimpleValueType(DL
, STy
->getElementType(ResNo
));
4689 assert(ResNo
== 0 && "Asm only has one result!");
4690 OpInfo
.ConstraintVT
=
4691 getAsmOperandValueType(DL
, Call
.getType()).getSimpleVT();
4695 case InlineAsm::isInput
:
4696 OpInfo
.CallOperandVal
= Call
.getArgOperand(ArgNo
++);
4698 case InlineAsm::isClobber
:
4703 if (OpInfo
.CallOperandVal
) {
4704 llvm::Type
*OpTy
= OpInfo
.CallOperandVal
->getType();
4705 if (OpInfo
.isIndirect
) {
4706 llvm::PointerType
*PtrTy
= dyn_cast
<PointerType
>(OpTy
);
4708 report_fatal_error("Indirect operand for inline asm not a pointer!");
4709 OpTy
= PtrTy
->getElementType();
4712 // Look for vector wrapped in a struct. e.g. { <16 x i8> }.
4713 if (StructType
*STy
= dyn_cast
<StructType
>(OpTy
))
4714 if (STy
->getNumElements() == 1)
4715 OpTy
= STy
->getElementType(0);
4717 // If OpTy is not a single value, it may be a struct/union that we
4718 // can tile with integers.
4719 if (!OpTy
->isSingleValueType() && OpTy
->isSized()) {
4720 unsigned BitSize
= DL
.getTypeSizeInBits(OpTy
);
4729 OpInfo
.ConstraintVT
=
4730 MVT::getVT(IntegerType::get(OpTy
->getContext(), BitSize
), true);
4733 } else if (PointerType
*PT
= dyn_cast
<PointerType
>(OpTy
)) {
4734 unsigned PtrSize
= DL
.getPointerSizeInBits(PT
->getAddressSpace());
4735 OpInfo
.ConstraintVT
= MVT::getIntegerVT(PtrSize
);
4737 OpInfo
.ConstraintVT
= MVT::getVT(OpTy
, true);
4742 // If we have multiple alternative constraints, select the best alternative.
4743 if (!ConstraintOperands
.empty()) {
4745 unsigned bestMAIndex
= 0;
4746 int bestWeight
= -1;
4747 // weight: -1 = invalid match, and 0 = so-so match to 5 = good match.
4750 // Compute the sums of the weights for each alternative, keeping track
4751 // of the best (highest weight) one so far.
4752 for (maIndex
= 0; maIndex
< maCount
; ++maIndex
) {
4754 for (unsigned cIndex
= 0, eIndex
= ConstraintOperands
.size();
4755 cIndex
!= eIndex
; ++cIndex
) {
4756 AsmOperandInfo
&OpInfo
= ConstraintOperands
[cIndex
];
4757 if (OpInfo
.Type
== InlineAsm::isClobber
)
4760 // If this is an output operand with a matching input operand,
4761 // look up the matching input. If their types mismatch, e.g. one
4762 // is an integer, the other is floating point, or their sizes are
4763 // different, flag it as an maCantMatch.
4764 if (OpInfo
.hasMatchingInput()) {
4765 AsmOperandInfo
&Input
= ConstraintOperands
[OpInfo
.MatchingInput
];
4766 if (OpInfo
.ConstraintVT
!= Input
.ConstraintVT
) {
4767 if ((OpInfo
.ConstraintVT
.isInteger() !=
4768 Input
.ConstraintVT
.isInteger()) ||
4769 (OpInfo
.ConstraintVT
.getSizeInBits() !=
4770 Input
.ConstraintVT
.getSizeInBits())) {
4771 weightSum
= -1; // Can't match.
4776 weight
= getMultipleConstraintMatchWeight(OpInfo
, maIndex
);
4781 weightSum
+= weight
;
4784 if (weightSum
> bestWeight
) {
4785 bestWeight
= weightSum
;
4786 bestMAIndex
= maIndex
;
4790 // Now select chosen alternative in each constraint.
4791 for (unsigned cIndex
= 0, eIndex
= ConstraintOperands
.size();
4792 cIndex
!= eIndex
; ++cIndex
) {
4793 AsmOperandInfo
&cInfo
= ConstraintOperands
[cIndex
];
4794 if (cInfo
.Type
== InlineAsm::isClobber
)
4796 cInfo
.selectAlternative(bestMAIndex
);
4801 // Check and hook up tied operands, choose constraint code to use.
4802 for (unsigned cIndex
= 0, eIndex
= ConstraintOperands
.size();
4803 cIndex
!= eIndex
; ++cIndex
) {
4804 AsmOperandInfo
&OpInfo
= ConstraintOperands
[cIndex
];
4806 // If this is an output operand with a matching input operand, look up the
4807 // matching input. If their types mismatch, e.g. one is an integer, the
4808 // other is floating point, or their sizes are different, flag it as an
4810 if (OpInfo
.hasMatchingInput()) {
4811 AsmOperandInfo
&Input
= ConstraintOperands
[OpInfo
.MatchingInput
];
4813 if (OpInfo
.ConstraintVT
!= Input
.ConstraintVT
) {
4814 std::pair
<unsigned, const TargetRegisterClass
*> MatchRC
=
4815 getRegForInlineAsmConstraint(TRI
, OpInfo
.ConstraintCode
,
4816 OpInfo
.ConstraintVT
);
4817 std::pair
<unsigned, const TargetRegisterClass
*> InputRC
=
4818 getRegForInlineAsmConstraint(TRI
, Input
.ConstraintCode
,
4819 Input
.ConstraintVT
);
4820 if ((OpInfo
.ConstraintVT
.isInteger() !=
4821 Input
.ConstraintVT
.isInteger()) ||
4822 (MatchRC
.second
!= InputRC
.second
)) {
4823 report_fatal_error("Unsupported asm: input constraint"
4824 " with a matching output constraint of"
4825 " incompatible type!");
4831 return ConstraintOperands
;
4834 /// Return an integer indicating how general CT is.
4835 static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT
) {
4837 case TargetLowering::C_Immediate
:
4838 case TargetLowering::C_Other
:
4839 case TargetLowering::C_Unknown
:
4841 case TargetLowering::C_Register
:
4843 case TargetLowering::C_RegisterClass
:
4845 case TargetLowering::C_Memory
:
4848 llvm_unreachable("Invalid constraint type");
4851 /// Examine constraint type and operand type and determine a weight value.
4852 /// This object must already have been set up with the operand type
4853 /// and the current alternative constraint selected.
4854 TargetLowering::ConstraintWeight
4855 TargetLowering::getMultipleConstraintMatchWeight(
4856 AsmOperandInfo
&info
, int maIndex
) const {
4857 InlineAsm::ConstraintCodeVector
*rCodes
;
4858 if (maIndex
>= (int)info
.multipleAlternatives
.size())
4859 rCodes
= &info
.Codes
;
4861 rCodes
= &info
.multipleAlternatives
[maIndex
].Codes
;
4862 ConstraintWeight BestWeight
= CW_Invalid
;
4864 // Loop over the options, keeping track of the most general one.
4865 for (unsigned i
= 0, e
= rCodes
->size(); i
!= e
; ++i
) {
4866 ConstraintWeight weight
=
4867 getSingleConstraintMatchWeight(info
, (*rCodes
)[i
].c_str());
4868 if (weight
> BestWeight
)
4869 BestWeight
= weight
;
4875 /// Examine constraint type and operand type and determine a weight value.
4876 /// This object must already have been set up with the operand type
4877 /// and the current alternative constraint selected.
4878 TargetLowering::ConstraintWeight
4879 TargetLowering::getSingleConstraintMatchWeight(
4880 AsmOperandInfo
&info
, const char *constraint
) const {
4881 ConstraintWeight weight
= CW_Invalid
;
4882 Value
*CallOperandVal
= info
.CallOperandVal
;
4883 // If we don't have a value, we can't do a match,
4884 // but allow it at the lowest weight.
4885 if (!CallOperandVal
)
4887 // Look at the constraint type.
4888 switch (*constraint
) {
4889 case 'i': // immediate integer.
4890 case 'n': // immediate integer with a known value.
4891 if (isa
<ConstantInt
>(CallOperandVal
))
4892 weight
= CW_Constant
;
4894 case 's': // non-explicit intregal immediate.
4895 if (isa
<GlobalValue
>(CallOperandVal
))
4896 weight
= CW_Constant
;
4898 case 'E': // immediate float if host format.
4899 case 'F': // immediate float.
4900 if (isa
<ConstantFP
>(CallOperandVal
))
4901 weight
= CW_Constant
;
4903 case '<': // memory operand with autodecrement.
4904 case '>': // memory operand with autoincrement.
4905 case 'm': // memory operand.
4906 case 'o': // offsettable memory operand
4907 case 'V': // non-offsettable memory operand
4910 case 'r': // general register.
4911 case 'g': // general register, memory operand or immediate integer.
4912 // note: Clang converts "g" to "imr".
4913 if (CallOperandVal
->getType()->isIntegerTy())
4914 weight
= CW_Register
;
4916 case 'X': // any operand.
4918 weight
= CW_Default
;
4924 /// If there are multiple different constraints that we could pick for this
4925 /// operand (e.g. "imr") try to pick the 'best' one.
4926 /// This is somewhat tricky: constraints fall into four classes:
4927 /// Other -> immediates and magic values
4928 /// Register -> one specific register
4929 /// RegisterClass -> a group of regs
4930 /// Memory -> memory
4931 /// Ideally, we would pick the most specific constraint possible: if we have
4932 /// something that fits into a register, we would pick it. The problem here
4933 /// is that if we have something that could either be in a register or in
4934 /// memory that use of the register could cause selection of *other*
4935 /// operands to fail: they might only succeed if we pick memory. Because of
4936 /// this the heuristic we use is:
4938 /// 1) If there is an 'other' constraint, and if the operand is valid for
4939 /// that constraint, use it. This makes us take advantage of 'i'
4940 /// constraints when available.
4941 /// 2) Otherwise, pick the most general constraint present. This prefers
4942 /// 'm' over 'r', for example.
4944 static void ChooseConstraint(TargetLowering::AsmOperandInfo
&OpInfo
,
4945 const TargetLowering
&TLI
,
4946 SDValue Op
, SelectionDAG
*DAG
) {
4947 assert(OpInfo
.Codes
.size() > 1 && "Doesn't have multiple constraint options");
4948 unsigned BestIdx
= 0;
4949 TargetLowering::ConstraintType BestType
= TargetLowering::C_Unknown
;
4950 int BestGenerality
= -1;
4952 // Loop over the options, keeping track of the most general one.
4953 for (unsigned i
= 0, e
= OpInfo
.Codes
.size(); i
!= e
; ++i
) {
4954 TargetLowering::ConstraintType CType
=
4955 TLI
.getConstraintType(OpInfo
.Codes
[i
]);
4957 // Indirect 'other' or 'immediate' constraints are not allowed.
4958 if (OpInfo
.isIndirect
&& !(CType
== TargetLowering::C_Memory
||
4959 CType
== TargetLowering::C_Register
||
4960 CType
== TargetLowering::C_RegisterClass
))
4963 // If this is an 'other' or 'immediate' constraint, see if the operand is
4964 // valid for it. For example, on X86 we might have an 'rI' constraint. If
4965 // the operand is an integer in the range [0..31] we want to use I (saving a
4966 // load of a register), otherwise we must use 'r'.
4967 if ((CType
== TargetLowering::C_Other
||
4968 CType
== TargetLowering::C_Immediate
) && Op
.getNode()) {
4969 assert(OpInfo
.Codes
[i
].size() == 1 &&
4970 "Unhandled multi-letter 'other' constraint");
4971 std::vector
<SDValue
> ResultOps
;
4972 TLI
.LowerAsmOperandForConstraint(Op
, OpInfo
.Codes
[i
],
4974 if (!ResultOps
.empty()) {
4981 // Things with matching constraints can only be registers, per gcc
4982 // documentation. This mainly affects "g" constraints.
4983 if (CType
== TargetLowering::C_Memory
&& OpInfo
.hasMatchingInput())
4986 // This constraint letter is more general than the previous one, use it.
4987 int Generality
= getConstraintGenerality(CType
);
4988 if (Generality
> BestGenerality
) {
4991 BestGenerality
= Generality
;
4995 OpInfo
.ConstraintCode
= OpInfo
.Codes
[BestIdx
];
4996 OpInfo
.ConstraintType
= BestType
;
4999 /// Determines the constraint code and constraint type to use for the specific
5000 /// AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType.
5001 void TargetLowering::ComputeConstraintToUse(AsmOperandInfo
&OpInfo
,
5003 SelectionDAG
*DAG
) const {
5004 assert(!OpInfo
.Codes
.empty() && "Must have at least one constraint");
5006 // Single-letter constraints ('r') are very common.
5007 if (OpInfo
.Codes
.size() == 1) {
5008 OpInfo
.ConstraintCode
= OpInfo
.Codes
[0];
5009 OpInfo
.ConstraintType
= getConstraintType(OpInfo
.ConstraintCode
);
5011 ChooseConstraint(OpInfo
, *this, Op
, DAG
);
5014 // 'X' matches anything.
5015 if (OpInfo
.ConstraintCode
== "X" && OpInfo
.CallOperandVal
) {
5016 // Labels and constants are handled elsewhere ('X' is the only thing
5017 // that matches labels). For Functions, the type here is the type of
5018 // the result, which is not what we want to look at; leave them alone.
5019 Value
*v
= OpInfo
.CallOperandVal
;
5020 if (isa
<BasicBlock
>(v
) || isa
<ConstantInt
>(v
) || isa
<Function
>(v
)) {
5021 OpInfo
.CallOperandVal
= v
;
5025 if (Op
.getNode() && Op
.getOpcode() == ISD::TargetBlockAddress
)
5028 // Otherwise, try to resolve it to something we know about by looking at
5029 // the actual operand type.
5030 if (const char *Repl
= LowerXConstraint(OpInfo
.ConstraintVT
)) {
5031 OpInfo
.ConstraintCode
= Repl
;
5032 OpInfo
.ConstraintType
= getConstraintType(OpInfo
.ConstraintCode
);
5037 /// Given an exact SDIV by a constant, create a multiplication
5038 /// with the multiplicative inverse of the constant.
5039 static SDValue
BuildExactSDIV(const TargetLowering
&TLI
, SDNode
*N
,
5040 const SDLoc
&dl
, SelectionDAG
&DAG
,
5041 SmallVectorImpl
<SDNode
*> &Created
) {
5042 SDValue Op0
= N
->getOperand(0);
5043 SDValue Op1
= N
->getOperand(1);
5044 EVT VT
= N
->getValueType(0);
5045 EVT SVT
= VT
.getScalarType();
5046 EVT ShVT
= TLI
.getShiftAmountTy(VT
, DAG
.getDataLayout());
5047 EVT ShSVT
= ShVT
.getScalarType();
5049 bool UseSRA
= false;
5050 SmallVector
<SDValue
, 16> Shifts
, Factors
;
5052 auto BuildSDIVPattern
= [&](ConstantSDNode
*C
) {
5053 if (C
->isNullValue())
5055 APInt Divisor
= C
->getAPIntValue();
5056 unsigned Shift
= Divisor
.countTrailingZeros();
5058 Divisor
.ashrInPlace(Shift
);
5061 // Calculate the multiplicative inverse, using Newton's method.
5063 APInt Factor
= Divisor
;
5064 while ((t
= Divisor
* Factor
) != 1)
5065 Factor
*= APInt(Divisor
.getBitWidth(), 2) - t
;
5066 Shifts
.push_back(DAG
.getConstant(Shift
, dl
, ShSVT
));
5067 Factors
.push_back(DAG
.getConstant(Factor
, dl
, SVT
));
5071 // Collect all magic values from the build vector.
5072 if (!ISD::matchUnaryPredicate(Op1
, BuildSDIVPattern
))
5075 SDValue Shift
, Factor
;
5076 if (Op1
.getOpcode() == ISD::BUILD_VECTOR
) {
5077 Shift
= DAG
.getBuildVector(ShVT
, dl
, Shifts
);
5078 Factor
= DAG
.getBuildVector(VT
, dl
, Factors
);
5079 } else if (Op1
.getOpcode() == ISD::SPLAT_VECTOR
) {
5080 assert(Shifts
.size() == 1 && Factors
.size() == 1 &&
5081 "Expected matchUnaryPredicate to return one element for scalable "
5083 Shift
= DAG
.getSplatVector(ShVT
, dl
, Shifts
[0]);
5084 Factor
= DAG
.getSplatVector(VT
, dl
, Factors
[0]);
5086 assert(isa
<ConstantSDNode
>(Op1
) && "Expected a constant");
5088 Factor
= Factors
[0];
5093 // Shift the value upfront if it is even, so the LSB is one.
5095 // TODO: For UDIV use SRL instead of SRA.
5097 Flags
.setExact(true);
5098 Res
= DAG
.getNode(ISD::SRA
, dl
, VT
, Res
, Shift
, Flags
);
5099 Created
.push_back(Res
.getNode());
5102 return DAG
.getNode(ISD::MUL
, dl
, VT
, Res
, Factor
);
5105 SDValue
TargetLowering::BuildSDIVPow2(SDNode
*N
, const APInt
&Divisor
,
5107 SmallVectorImpl
<SDNode
*> &Created
) const {
5108 AttributeList Attr
= DAG
.getMachineFunction().getFunction().getAttributes();
5109 const TargetLowering
&TLI
= DAG
.getTargetLoweringInfo();
5110 if (TLI
.isIntDivCheap(N
->getValueType(0), Attr
))
5111 return SDValue(N
, 0); // Lower SDIV as SDIV
5115 /// Given an ISD::SDIV node expressing a divide by constant,
5116 /// return a DAG expression to select that will generate the same value by
5117 /// multiplying by a magic number.
5118 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
5119 SDValue
TargetLowering::BuildSDIV(SDNode
*N
, SelectionDAG
&DAG
,
5120 bool IsAfterLegalization
,
5121 SmallVectorImpl
<SDNode
*> &Created
) const {
5123 EVT VT
= N
->getValueType(0);
5124 EVT SVT
= VT
.getScalarType();
5125 EVT ShVT
= getShiftAmountTy(VT
, DAG
.getDataLayout());
5126 EVT ShSVT
= ShVT
.getScalarType();
5127 unsigned EltBits
= VT
.getScalarSizeInBits();
5130 // Check to see if we can do this.
5131 // FIXME: We should be more aggressive here.
5132 if (!isTypeLegal(VT
)) {
5133 // Limit this to simple scalars for now.
5134 if (VT
.isVector() || !VT
.isSimple())
5137 // If this type will be promoted to a large enough type with a legal
5138 // multiply operation, we can go ahead and do this transform.
5139 if (getTypeAction(VT
.getSimpleVT()) != TypePromoteInteger
)
5142 MulVT
= getTypeToTransformTo(*DAG
.getContext(), VT
);
5143 if (MulVT
.getSizeInBits() < (2 * EltBits
) ||
5144 !isOperationLegal(ISD::MUL
, MulVT
))
5148 // If the sdiv has an 'exact' bit we can use a simpler lowering.
5149 if (N
->getFlags().hasExact())
5150 return BuildExactSDIV(*this, N
, dl
, DAG
, Created
);
5152 SmallVector
<SDValue
, 16> MagicFactors
, Factors
, Shifts
, ShiftMasks
;
5154 auto BuildSDIVPattern
= [&](ConstantSDNode
*C
) {
5155 if (C
->isNullValue())
5158 const APInt
&Divisor
= C
->getAPIntValue();
5159 APInt::ms magics
= Divisor
.magic();
5160 int NumeratorFactor
= 0;
5163 if (Divisor
.isOneValue() || Divisor
.isAllOnesValue()) {
5164 // If d is +1/-1, we just multiply the numerator by +1/-1.
5165 NumeratorFactor
= Divisor
.getSExtValue();
5169 } else if (Divisor
.isStrictlyPositive() && magics
.m
.isNegative()) {
5170 // If d > 0 and m < 0, add the numerator.
5171 NumeratorFactor
= 1;
5172 } else if (Divisor
.isNegative() && magics
.m
.isStrictlyPositive()) {
5173 // If d < 0 and m > 0, subtract the numerator.
5174 NumeratorFactor
= -1;
5177 MagicFactors
.push_back(DAG
.getConstant(magics
.m
, dl
, SVT
));
5178 Factors
.push_back(DAG
.getConstant(NumeratorFactor
, dl
, SVT
));
5179 Shifts
.push_back(DAG
.getConstant(magics
.s
, dl
, ShSVT
));
5180 ShiftMasks
.push_back(DAG
.getConstant(ShiftMask
, dl
, SVT
));
5184 SDValue N0
= N
->getOperand(0);
5185 SDValue N1
= N
->getOperand(1);
5187 // Collect the shifts / magic values from each element.
5188 if (!ISD::matchUnaryPredicate(N1
, BuildSDIVPattern
))
5191 SDValue MagicFactor
, Factor
, Shift
, ShiftMask
;
5192 if (N1
.getOpcode() == ISD::BUILD_VECTOR
) {
5193 MagicFactor
= DAG
.getBuildVector(VT
, dl
, MagicFactors
);
5194 Factor
= DAG
.getBuildVector(VT
, dl
, Factors
);
5195 Shift
= DAG
.getBuildVector(ShVT
, dl
, Shifts
);
5196 ShiftMask
= DAG
.getBuildVector(VT
, dl
, ShiftMasks
);
5197 } else if (N1
.getOpcode() == ISD::SPLAT_VECTOR
) {
5198 assert(MagicFactors
.size() == 1 && Factors
.size() == 1 &&
5199 Shifts
.size() == 1 && ShiftMasks
.size() == 1 &&
5200 "Expected matchUnaryPredicate to return one element for scalable "
5202 MagicFactor
= DAG
.getSplatVector(VT
, dl
, MagicFactors
[0]);
5203 Factor
= DAG
.getSplatVector(VT
, dl
, Factors
[0]);
5204 Shift
= DAG
.getSplatVector(ShVT
, dl
, Shifts
[0]);
5205 ShiftMask
= DAG
.getSplatVector(VT
, dl
, ShiftMasks
[0]);
5207 assert(isa
<ConstantSDNode
>(N1
) && "Expected a constant");
5208 MagicFactor
= MagicFactors
[0];
5209 Factor
= Factors
[0];
5211 ShiftMask
= ShiftMasks
[0];
5214 // Multiply the numerator (operand 0) by the magic value.
5215 // FIXME: We should support doing a MUL in a wider type.
5216 auto GetMULHS
= [&](SDValue X
, SDValue Y
) {
5217 // If the type isn't legal, use a wider mul of the the type calculated
5219 if (!isTypeLegal(VT
)) {
5220 X
= DAG
.getNode(ISD::SIGN_EXTEND
, dl
, MulVT
, X
);
5221 Y
= DAG
.getNode(ISD::SIGN_EXTEND
, dl
, MulVT
, Y
);
5222 Y
= DAG
.getNode(ISD::MUL
, dl
, MulVT
, X
, Y
);
5223 Y
= DAG
.getNode(ISD::SRL
, dl
, MulVT
, Y
,
5224 DAG
.getShiftAmountConstant(EltBits
, MulVT
, dl
));
5225 return DAG
.getNode(ISD::TRUNCATE
, dl
, VT
, Y
);
5228 if (isOperationLegalOrCustom(ISD::MULHS
, VT
, IsAfterLegalization
))
5229 return DAG
.getNode(ISD::MULHS
, dl
, VT
, X
, Y
);
5230 if (isOperationLegalOrCustom(ISD::SMUL_LOHI
, VT
, IsAfterLegalization
)) {
5232 DAG
.getNode(ISD::SMUL_LOHI
, dl
, DAG
.getVTList(VT
, VT
), X
, Y
);
5233 return SDValue(LoHi
.getNode(), 1);
5238 SDValue Q
= GetMULHS(N0
, MagicFactor
);
5242 Created
.push_back(Q
.getNode());
5244 // (Optionally) Add/subtract the numerator using Factor.
5245 Factor
= DAG
.getNode(ISD::MUL
, dl
, VT
, N0
, Factor
);
5246 Created
.push_back(Factor
.getNode());
5247 Q
= DAG
.getNode(ISD::ADD
, dl
, VT
, Q
, Factor
);
5248 Created
.push_back(Q
.getNode());
5250 // Shift right algebraic by shift value.
5251 Q
= DAG
.getNode(ISD::SRA
, dl
, VT
, Q
, Shift
);
5252 Created
.push_back(Q
.getNode());
5254 // Extract the sign bit, mask it and add it to the quotient.
5255 SDValue SignShift
= DAG
.getConstant(EltBits
- 1, dl
, ShVT
);
5256 SDValue T
= DAG
.getNode(ISD::SRL
, dl
, VT
, Q
, SignShift
);
5257 Created
.push_back(T
.getNode());
5258 T
= DAG
.getNode(ISD::AND
, dl
, VT
, T
, ShiftMask
);
5259 Created
.push_back(T
.getNode());
5260 return DAG
.getNode(ISD::ADD
, dl
, VT
, Q
, T
);
5263 /// Given an ISD::UDIV node expressing a divide by constant,
5264 /// return a DAG expression to select that will generate the same value by
5265 /// multiplying by a magic number.
5266 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
5267 SDValue
TargetLowering::BuildUDIV(SDNode
*N
, SelectionDAG
&DAG
,
5268 bool IsAfterLegalization
,
5269 SmallVectorImpl
<SDNode
*> &Created
) const {
5271 EVT VT
= N
->getValueType(0);
5272 EVT SVT
= VT
.getScalarType();
5273 EVT ShVT
= getShiftAmountTy(VT
, DAG
.getDataLayout());
5274 EVT ShSVT
= ShVT
.getScalarType();
5275 unsigned EltBits
= VT
.getScalarSizeInBits();
5278 // Check to see if we can do this.
5279 // FIXME: We should be more aggressive here.
5280 if (!isTypeLegal(VT
)) {
5281 // Limit this to simple scalars for now.
5282 if (VT
.isVector() || !VT
.isSimple())
5285 // If this type will be promoted to a large enough type with a legal
5286 // multiply operation, we can go ahead and do this transform.
5287 if (getTypeAction(VT
.getSimpleVT()) != TypePromoteInteger
)
5290 MulVT
= getTypeToTransformTo(*DAG
.getContext(), VT
);
5291 if (MulVT
.getSizeInBits() < (2 * EltBits
) ||
5292 !isOperationLegal(ISD::MUL
, MulVT
))
5296 bool UseNPQ
= false;
5297 SmallVector
<SDValue
, 16> PreShifts
, PostShifts
, MagicFactors
, NPQFactors
;
5299 auto BuildUDIVPattern
= [&](ConstantSDNode
*C
) {
5300 if (C
->isNullValue())
5302 // FIXME: We should use a narrower constant when the upper
5303 // bits are known to be zero.
5304 const APInt
& Divisor
= C
->getAPIntValue();
5305 APInt::mu magics
= Divisor
.magicu();
5306 unsigned PreShift
= 0, PostShift
= 0;
5308 // If the divisor is even, we can avoid using the expensive fixup by
5309 // shifting the divided value upfront.
5310 if (magics
.a
!= 0 && !Divisor
[0]) {
5311 PreShift
= Divisor
.countTrailingZeros();
5312 // Get magic number for the shifted divisor.
5313 magics
= Divisor
.lshr(PreShift
).magicu(PreShift
);
5314 assert(magics
.a
== 0 && "Should use cheap fixup now");
5317 APInt Magic
= magics
.m
;
5320 if (magics
.a
== 0 || Divisor
.isOneValue()) {
5321 assert(magics
.s
< Divisor
.getBitWidth() &&
5322 "We shouldn't generate an undefined shift!");
5323 PostShift
= magics
.s
;
5326 PostShift
= magics
.s
- 1;
5330 PreShifts
.push_back(DAG
.getConstant(PreShift
, dl
, ShSVT
));
5331 MagicFactors
.push_back(DAG
.getConstant(Magic
, dl
, SVT
));
5332 NPQFactors
.push_back(
5333 DAG
.getConstant(SelNPQ
? APInt::getOneBitSet(EltBits
, EltBits
- 1)
5334 : APInt::getNullValue(EltBits
),
5336 PostShifts
.push_back(DAG
.getConstant(PostShift
, dl
, ShSVT
));
5341 SDValue N0
= N
->getOperand(0);
5342 SDValue N1
= N
->getOperand(1);
5344 // Collect the shifts/magic values from each element.
5345 if (!ISD::matchUnaryPredicate(N1
, BuildUDIVPattern
))
5348 SDValue PreShift
, PostShift
, MagicFactor
, NPQFactor
;
5349 if (N1
.getOpcode() == ISD::BUILD_VECTOR
) {
5350 PreShift
= DAG
.getBuildVector(ShVT
, dl
, PreShifts
);
5351 MagicFactor
= DAG
.getBuildVector(VT
, dl
, MagicFactors
);
5352 NPQFactor
= DAG
.getBuildVector(VT
, dl
, NPQFactors
);
5353 PostShift
= DAG
.getBuildVector(ShVT
, dl
, PostShifts
);
5354 } else if (N1
.getOpcode() == ISD::SPLAT_VECTOR
) {
5355 assert(PreShifts
.size() == 1 && MagicFactors
.size() == 1 &&
5356 NPQFactors
.size() == 1 && PostShifts
.size() == 1 &&
5357 "Expected matchUnaryPredicate to return one for scalable vectors");
5358 PreShift
= DAG
.getSplatVector(ShVT
, dl
, PreShifts
[0]);
5359 MagicFactor
= DAG
.getSplatVector(VT
, dl
, MagicFactors
[0]);
5360 NPQFactor
= DAG
.getSplatVector(VT
, dl
, NPQFactors
[0]);
5361 PostShift
= DAG
.getSplatVector(ShVT
, dl
, PostShifts
[0]);
5363 assert(isa
<ConstantSDNode
>(N1
) && "Expected a constant");
5364 PreShift
= PreShifts
[0];
5365 MagicFactor
= MagicFactors
[0];
5366 PostShift
= PostShifts
[0];
5370 Q
= DAG
.getNode(ISD::SRL
, dl
, VT
, Q
, PreShift
);
5371 Created
.push_back(Q
.getNode());
5373 // FIXME: We should support doing a MUL in a wider type.
5374 auto GetMULHU
= [&](SDValue X
, SDValue Y
) {
5375 // If the type isn't legal, use a wider mul of the the type calculated
5377 if (!isTypeLegal(VT
)) {
5378 X
= DAG
.getNode(ISD::ZERO_EXTEND
, dl
, MulVT
, X
);
5379 Y
= DAG
.getNode(ISD::ZERO_EXTEND
, dl
, MulVT
, Y
);
5380 Y
= DAG
.getNode(ISD::MUL
, dl
, MulVT
, X
, Y
);
5381 Y
= DAG
.getNode(ISD::SRL
, dl
, MulVT
, Y
,
5382 DAG
.getShiftAmountConstant(EltBits
, MulVT
, dl
));
5383 return DAG
.getNode(ISD::TRUNCATE
, dl
, VT
, Y
);
5386 if (isOperationLegalOrCustom(ISD::MULHU
, VT
, IsAfterLegalization
))
5387 return DAG
.getNode(ISD::MULHU
, dl
, VT
, X
, Y
);
5388 if (isOperationLegalOrCustom(ISD::UMUL_LOHI
, VT
, IsAfterLegalization
)) {
5390 DAG
.getNode(ISD::UMUL_LOHI
, dl
, DAG
.getVTList(VT
, VT
), X
, Y
);
5391 return SDValue(LoHi
.getNode(), 1);
5393 return SDValue(); // No mulhu or equivalent
5396 // Multiply the numerator (operand 0) by the magic value.
5397 Q
= GetMULHU(Q
, MagicFactor
);
5401 Created
.push_back(Q
.getNode());
5404 SDValue NPQ
= DAG
.getNode(ISD::SUB
, dl
, VT
, N0
, Q
);
5405 Created
.push_back(NPQ
.getNode());
5407 // For vectors we might have a mix of non-NPQ/NPQ paths, so use
5408 // MULHU to act as a SRL-by-1 for NPQ, else multiply by zero.
5410 NPQ
= GetMULHU(NPQ
, NPQFactor
);
5412 NPQ
= DAG
.getNode(ISD::SRL
, dl
, VT
, NPQ
, DAG
.getConstant(1, dl
, ShVT
));
5414 Created
.push_back(NPQ
.getNode());
5416 Q
= DAG
.getNode(ISD::ADD
, dl
, VT
, NPQ
, Q
);
5417 Created
.push_back(Q
.getNode());
5420 Q
= DAG
.getNode(ISD::SRL
, dl
, VT
, Q
, PostShift
);
5421 Created
.push_back(Q
.getNode());
5423 EVT SetCCVT
= getSetCCResultType(DAG
.getDataLayout(), *DAG
.getContext(), VT
);
5425 SDValue One
= DAG
.getConstant(1, dl
, VT
);
5426 SDValue IsOne
= DAG
.getSetCC(dl
, SetCCVT
, N1
, One
, ISD::SETEQ
);
5427 return DAG
.getSelect(dl
, VT
, IsOne
, N0
, Q
);
5430 /// If all values in Values that *don't* match the predicate are same 'splat'
5431 /// value, then replace all values with that splat value.
5432 /// Else, if AlternativeReplacement was provided, then replace all values that
5433 /// do match predicate with AlternativeReplacement value.
5435 turnVectorIntoSplatVector(MutableArrayRef
<SDValue
> Values
,
5436 std::function
<bool(SDValue
)> Predicate
,
5437 SDValue AlternativeReplacement
= SDValue()) {
5438 SDValue Replacement
;
5439 // Is there a value for which the Predicate does *NOT* match? What is it?
5440 auto SplatValue
= llvm::find_if_not(Values
, Predicate
);
5441 if (SplatValue
!= Values
.end()) {
5442 // Does Values consist only of SplatValue's and values matching Predicate?
5443 if (llvm::all_of(Values
, [Predicate
, SplatValue
](SDValue Value
) {
5444 return Value
== *SplatValue
|| Predicate(Value
);
5445 })) // Then we shall replace values matching predicate with SplatValue.
5446 Replacement
= *SplatValue
;
5449 // Oops, we did not find the "baseline" splat value.
5450 if (!AlternativeReplacement
)
5451 return; // Nothing to do.
5452 // Let's replace with provided value then.
5453 Replacement
= AlternativeReplacement
;
5455 std::replace_if(Values
.begin(), Values
.end(), Predicate
, Replacement
);
5458 /// Given an ISD::UREM used only by an ISD::SETEQ or ISD::SETNE
5459 /// where the divisor is constant and the comparison target is zero,
5460 /// return a DAG expression that will generate the same comparison result
5461 /// using only multiplications, additions and shifts/rotations.
5462 /// Ref: "Hacker's Delight" 10-17.
5463 SDValue
TargetLowering::buildUREMEqFold(EVT SETCCVT
, SDValue REMNode
,
5464 SDValue CompTargetNode
,
5466 DAGCombinerInfo
&DCI
,
5467 const SDLoc
&DL
) const {
5468 SmallVector
<SDNode
*, 5> Built
;
5469 if (SDValue Folded
= prepareUREMEqFold(SETCCVT
, REMNode
, CompTargetNode
, Cond
,
5471 for (SDNode
*N
: Built
)
5472 DCI
.AddToWorklist(N
);
5480 TargetLowering::prepareUREMEqFold(EVT SETCCVT
, SDValue REMNode
,
5481 SDValue CompTargetNode
, ISD::CondCode Cond
,
5482 DAGCombinerInfo
&DCI
, const SDLoc
&DL
,
5483 SmallVectorImpl
<SDNode
*> &Created
) const {
5484 // fold (seteq/ne (urem N, D), 0) -> (setule/ugt (rotr (mul N, P), K), Q)
5485 // - D must be constant, with D = D0 * 2^K where D0 is odd
5486 // - P is the multiplicative inverse of D0 modulo 2^W
5487 // - Q = floor(((2^W) - 1) / D)
5488 // where W is the width of the common type of N and D.
5489 assert((Cond
== ISD::SETEQ
|| Cond
== ISD::SETNE
) &&
5490 "Only applicable for (in)equality comparisons.");
5492 SelectionDAG
&DAG
= DCI
.DAG
;
5494 EVT VT
= REMNode
.getValueType();
5495 EVT SVT
= VT
.getScalarType();
5496 EVT ShVT
= getShiftAmountTy(VT
, DAG
.getDataLayout(), !DCI
.isBeforeLegalize());
5497 EVT ShSVT
= ShVT
.getScalarType();
5499 // If MUL is unavailable, we cannot proceed in any case.
5500 if (!DCI
.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::MUL
, VT
))
5503 bool ComparingWithAllZeros
= true;
5504 bool AllComparisonsWithNonZerosAreTautological
= true;
5505 bool HadTautologicalLanes
= false;
5506 bool AllLanesAreTautological
= true;
5507 bool HadEvenDivisor
= false;
5508 bool AllDivisorsArePowerOfTwo
= true;
5509 bool HadTautologicalInvertedLanes
= false;
5510 SmallVector
<SDValue
, 16> PAmts
, KAmts
, QAmts
, IAmts
;
5512 auto BuildUREMPattern
= [&](ConstantSDNode
*CDiv
, ConstantSDNode
*CCmp
) {
5513 // Division by 0 is UB. Leave it to be constant-folded elsewhere.
5514 if (CDiv
->isNullValue())
5517 const APInt
&D
= CDiv
->getAPIntValue();
5518 const APInt
&Cmp
= CCmp
->getAPIntValue();
5520 ComparingWithAllZeros
&= Cmp
.isNullValue();
5522 // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
5523 // if C2 is not less than C1, the comparison is always false.
5524 // But we will only be able to produce the comparison that will give the
5525 // opposive tautological answer. So this lane would need to be fixed up.
5526 bool TautologicalInvertedLane
= D
.ule(Cmp
);
5527 HadTautologicalInvertedLanes
|= TautologicalInvertedLane
;
5529 // If all lanes are tautological (either all divisors are ones, or divisor
5530 // is not greater than the constant we are comparing with),
5531 // we will prefer to avoid the fold.
5532 bool TautologicalLane
= D
.isOneValue() || TautologicalInvertedLane
;
5533 HadTautologicalLanes
|= TautologicalLane
;
5534 AllLanesAreTautological
&= TautologicalLane
;
5536 // If we are comparing with non-zero, we need'll need to subtract said
5537 // comparison value from the LHS. But there is no point in doing that if
5538 // every lane where we are comparing with non-zero is tautological..
5539 if (!Cmp
.isNullValue())
5540 AllComparisonsWithNonZerosAreTautological
&= TautologicalLane
;
5542 // Decompose D into D0 * 2^K
5543 unsigned K
= D
.countTrailingZeros();
5544 assert((!D
.isOneValue() || (K
== 0)) && "For divisor '1' we won't rotate.");
5545 APInt D0
= D
.lshr(K
);
5547 // D is even if it has trailing zeros.
5548 HadEvenDivisor
|= (K
!= 0);
5549 // D is a power-of-two if D0 is one.
5550 // If all divisors are power-of-two, we will prefer to avoid the fold.
5551 AllDivisorsArePowerOfTwo
&= D0
.isOneValue();
5554 // 2^W requires W + 1 bits, so we have to extend and then truncate.
5555 unsigned W
= D
.getBitWidth();
5556 APInt P
= D0
.zext(W
+ 1)
5557 .multiplicativeInverse(APInt::getSignedMinValue(W
+ 1))
5559 assert(!P
.isNullValue() && "No multiplicative inverse!"); // unreachable
5560 assert((D0
* P
).isOneValue() && "Multiplicative inverse sanity check.");
5562 // Q = floor((2^W - 1) u/ D)
5563 // R = ((2^W - 1) u% D)
5565 APInt::udivrem(APInt::getAllOnesValue(W
), D
, Q
, R
);
5567 // If we are comparing with zero, then that comparison constant is okay,
5568 // else it may need to be one less than that.
5572 assert(APInt::getAllOnesValue(ShSVT
.getSizeInBits()).ugt(K
) &&
5573 "We are expecting that K is always less than all-ones for ShSVT");
5575 // If the lane is tautological the result can be constant-folded.
5576 if (TautologicalLane
) {
5577 // Set P and K amount to a bogus values so we can try to splat them.
5580 // And ensure that comparison constant is tautological,
5581 // it will always compare true/false.
5585 PAmts
.push_back(DAG
.getConstant(P
, DL
, SVT
));
5587 DAG
.getConstant(APInt(ShSVT
.getSizeInBits(), K
), DL
, ShSVT
));
5588 QAmts
.push_back(DAG
.getConstant(Q
, DL
, SVT
));
5592 SDValue N
= REMNode
.getOperand(0);
5593 SDValue D
= REMNode
.getOperand(1);
5595 // Collect the values from each element.
5596 if (!ISD::matchBinaryPredicate(D
, CompTargetNode
, BuildUREMPattern
))
5599 // If all lanes are tautological, the result can be constant-folded.
5600 if (AllLanesAreTautological
)
5603 // If this is a urem by a powers-of-two, avoid the fold since it can be
5604 // best implemented as a bit test.
5605 if (AllDivisorsArePowerOfTwo
)
5608 SDValue PVal
, KVal
, QVal
;
5609 if (D
.getOpcode() == ISD::BUILD_VECTOR
) {
5610 if (HadTautologicalLanes
) {
5611 // Try to turn PAmts into a splat, since we don't care about the values
5612 // that are currently '0'. If we can't, just keep '0'`s.
5613 turnVectorIntoSplatVector(PAmts
, isNullConstant
);
5614 // Try to turn KAmts into a splat, since we don't care about the values
5615 // that are currently '-1'. If we can't, change them to '0'`s.
5616 turnVectorIntoSplatVector(KAmts
, isAllOnesConstant
,
5617 DAG
.getConstant(0, DL
, ShSVT
));
5620 PVal
= DAG
.getBuildVector(VT
, DL
, PAmts
);
5621 KVal
= DAG
.getBuildVector(ShVT
, DL
, KAmts
);
5622 QVal
= DAG
.getBuildVector(VT
, DL
, QAmts
);
5623 } else if (D
.getOpcode() == ISD::SPLAT_VECTOR
) {
5624 assert(PAmts
.size() == 1 && KAmts
.size() == 1 && QAmts
.size() == 1 &&
5625 "Expected matchBinaryPredicate to return one element for "
5627 PVal
= DAG
.getSplatVector(VT
, DL
, PAmts
[0]);
5628 KVal
= DAG
.getSplatVector(ShVT
, DL
, KAmts
[0]);
5629 QVal
= DAG
.getSplatVector(VT
, DL
, QAmts
[0]);
5636 if (!ComparingWithAllZeros
&& !AllComparisonsWithNonZerosAreTautological
) {
5637 if (!DCI
.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::SUB
, VT
))
5638 return SDValue(); // FIXME: Could/should use `ISD::ADD`?
5639 assert(CompTargetNode
.getValueType() == N
.getValueType() &&
5640 "Expecting that the types on LHS and RHS of comparisons match.");
5641 N
= DAG
.getNode(ISD::SUB
, DL
, VT
, N
, CompTargetNode
);
5645 SDValue Op0
= DAG
.getNode(ISD::MUL
, DL
, VT
, N
, PVal
);
5646 Created
.push_back(Op0
.getNode());
5648 // Rotate right only if any divisor was even. We avoid rotates for all-odd
5649 // divisors as a performance improvement, since rotating by 0 is a no-op.
5650 if (HadEvenDivisor
) {
5651 // We need ROTR to do this.
5652 if (!DCI
.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ROTR
, VT
))
5654 // UREM: (rotr (mul N, P), K)
5655 Op0
= DAG
.getNode(ISD::ROTR
, DL
, VT
, Op0
, KVal
);
5656 Created
.push_back(Op0
.getNode());
5659 // UREM: (setule/setugt (rotr (mul N, P), K), Q)
5661 DAG
.getSetCC(DL
, SETCCVT
, Op0
, QVal
,
5662 ((Cond
== ISD::SETEQ
) ? ISD::SETULE
: ISD::SETUGT
));
5663 if (!HadTautologicalInvertedLanes
)
5666 // If any lanes previously compared always-false, the NewCC will give
5667 // always-true result for them, so we need to fixup those lanes.
5668 // Or the other way around for inequality predicate.
5669 assert(VT
.isVector() && "Can/should only get here for vectors.");
5670 Created
.push_back(NewCC
.getNode());
5672 // x u% C1` is *always* less than C1. So given `x u% C1 == C2`,
5673 // if C2 is not less than C1, the comparison is always false.
5674 // But we have produced the comparison that will give the
5675 // opposive tautological answer. So these lanes would need to be fixed up.
5676 SDValue TautologicalInvertedChannels
=
5677 DAG
.getSetCC(DL
, SETCCVT
, D
, CompTargetNode
, ISD::SETULE
);
5678 Created
.push_back(TautologicalInvertedChannels
.getNode());
5680 // NOTE: we avoid letting illegal types through even if we're before legalize
5681 // ops – legalization has a hard time producing good code for this.
5682 if (isOperationLegalOrCustom(ISD::VSELECT
, SETCCVT
)) {
5683 // If we have a vector select, let's replace the comparison results in the
5684 // affected lanes with the correct tautological result.
5685 SDValue Replacement
= DAG
.getBoolConstant(Cond
== ISD::SETEQ
? false : true,
5686 DL
, SETCCVT
, SETCCVT
);
5687 return DAG
.getNode(ISD::VSELECT
, DL
, SETCCVT
, TautologicalInvertedChannels
,
5688 Replacement
, NewCC
);
5691 // Else, we can just invert the comparison result in the appropriate lanes.
5693 // NOTE: see the note above VSELECT above.
5694 if (isOperationLegalOrCustom(ISD::XOR
, SETCCVT
))
5695 return DAG
.getNode(ISD::XOR
, DL
, SETCCVT
, NewCC
,
5696 TautologicalInvertedChannels
);
5698 return SDValue(); // Don't know how to lower.
5701 /// Given an ISD::SREM used only by an ISD::SETEQ or ISD::SETNE
5702 /// where the divisor is constant and the comparison target is zero,
5703 /// return a DAG expression that will generate the same comparison result
5704 /// using only multiplications, additions and shifts/rotations.
5705 /// Ref: "Hacker's Delight" 10-17.
5706 SDValue
TargetLowering::buildSREMEqFold(EVT SETCCVT
, SDValue REMNode
,
5707 SDValue CompTargetNode
,
5709 DAGCombinerInfo
&DCI
,
5710 const SDLoc
&DL
) const {
5711 SmallVector
<SDNode
*, 7> Built
;
5712 if (SDValue Folded
= prepareSREMEqFold(SETCCVT
, REMNode
, CompTargetNode
, Cond
,
5714 assert(Built
.size() <= 7 && "Max size prediction failed.");
5715 for (SDNode
*N
: Built
)
5716 DCI
.AddToWorklist(N
);
5724 TargetLowering::prepareSREMEqFold(EVT SETCCVT
, SDValue REMNode
,
5725 SDValue CompTargetNode
, ISD::CondCode Cond
,
5726 DAGCombinerInfo
&DCI
, const SDLoc
&DL
,
5727 SmallVectorImpl
<SDNode
*> &Created
) const {
5729 // (seteq/ne (srem N, D), 0)
5731 // (setule/ugt (rotr (add (mul N, P), A), K), Q)
5733 // - D must be constant, with D = D0 * 2^K where D0 is odd
5734 // - P is the multiplicative inverse of D0 modulo 2^W
5735 // - A = bitwiseand(floor((2^(W - 1) - 1) / D0), (-(2^k)))
5736 // - Q = floor((2 * A) / (2^K))
5737 // where W is the width of the common type of N and D.
5738 assert((Cond
== ISD::SETEQ
|| Cond
== ISD::SETNE
) &&
5739 "Only applicable for (in)equality comparisons.");
5741 SelectionDAG
&DAG
= DCI
.DAG
;
5743 EVT VT
= REMNode
.getValueType();
5744 EVT SVT
= VT
.getScalarType();
5745 EVT ShVT
= getShiftAmountTy(VT
, DAG
.getDataLayout(), !DCI
.isBeforeLegalize());
5746 EVT ShSVT
= ShVT
.getScalarType();
5748 // If we are after ops legalization, and MUL is unavailable, we can not
5750 if (!DCI
.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::MUL
, VT
))
5753 // TODO: Could support comparing with non-zero too.
5754 ConstantSDNode
*CompTarget
= isConstOrConstSplat(CompTargetNode
);
5755 if (!CompTarget
|| !CompTarget
->isNullValue())
5758 bool HadIntMinDivisor
= false;
5759 bool HadOneDivisor
= false;
5760 bool AllDivisorsAreOnes
= true;
5761 bool HadEvenDivisor
= false;
5762 bool NeedToApplyOffset
= false;
5763 bool AllDivisorsArePowerOfTwo
= true;
5764 SmallVector
<SDValue
, 16> PAmts
, AAmts
, KAmts
, QAmts
;
5766 auto BuildSREMPattern
= [&](ConstantSDNode
*C
) {
5767 // Division by 0 is UB. Leave it to be constant-folded elsewhere.
5768 if (C
->isNullValue())
5771 // FIXME: we don't fold `rem %X, -C` to `rem %X, C` in DAGCombine.
5773 // WARNING: this fold is only valid for positive divisors!
5774 APInt D
= C
->getAPIntValue();
5776 D
.negate(); // `rem %X, -C` is equivalent to `rem %X, C`
5778 HadIntMinDivisor
|= D
.isMinSignedValue();
5780 // If all divisors are ones, we will prefer to avoid the fold.
5781 HadOneDivisor
|= D
.isOneValue();
5782 AllDivisorsAreOnes
&= D
.isOneValue();
5784 // Decompose D into D0 * 2^K
5785 unsigned K
= D
.countTrailingZeros();
5786 assert((!D
.isOneValue() || (K
== 0)) && "For divisor '1' we won't rotate.");
5787 APInt D0
= D
.lshr(K
);
5789 if (!D
.isMinSignedValue()) {
5790 // D is even if it has trailing zeros; unless it's INT_MIN, in which case
5791 // we don't care about this lane in this fold, we'll special-handle it.
5792 HadEvenDivisor
|= (K
!= 0);
5795 // D is a power-of-two if D0 is one. This includes INT_MIN.
5796 // If all divisors are power-of-two, we will prefer to avoid the fold.
5797 AllDivisorsArePowerOfTwo
&= D0
.isOneValue();
5800 // 2^W requires W + 1 bits, so we have to extend and then truncate.
5801 unsigned W
= D
.getBitWidth();
5802 APInt P
= D0
.zext(W
+ 1)
5803 .multiplicativeInverse(APInt::getSignedMinValue(W
+ 1))
5805 assert(!P
.isNullValue() && "No multiplicative inverse!"); // unreachable
5806 assert((D0
* P
).isOneValue() && "Multiplicative inverse sanity check.");
5808 // A = floor((2^(W - 1) - 1) / D0) & -2^K
5809 APInt A
= APInt::getSignedMaxValue(W
).udiv(D0
);
5812 if (!D
.isMinSignedValue()) {
5813 // If divisor INT_MIN, then we don't care about this lane in this fold,
5814 // we'll special-handle it.
5815 NeedToApplyOffset
|= A
!= 0;
5818 // Q = floor((2 * A) / (2^K))
5819 APInt Q
= (2 * A
).udiv(APInt::getOneBitSet(W
, K
));
5821 assert(APInt::getAllOnesValue(SVT
.getSizeInBits()).ugt(A
) &&
5822 "We are expecting that A is always less than all-ones for SVT");
5823 assert(APInt::getAllOnesValue(ShSVT
.getSizeInBits()).ugt(K
) &&
5824 "We are expecting that K is always less than all-ones for ShSVT");
5826 // If the divisor is 1 the result can be constant-folded. Likewise, we
5827 // don't care about INT_MIN lanes, those can be set to undef if appropriate.
5828 if (D
.isOneValue()) {
5829 // Set P, A and K to a bogus values so we can try to splat them.
5834 // x ?% 1 == 0 <--> true <--> x u<= -1
5838 PAmts
.push_back(DAG
.getConstant(P
, DL
, SVT
));
5839 AAmts
.push_back(DAG
.getConstant(A
, DL
, SVT
));
5841 DAG
.getConstant(APInt(ShSVT
.getSizeInBits(), K
), DL
, ShSVT
));
5842 QAmts
.push_back(DAG
.getConstant(Q
, DL
, SVT
));
5846 SDValue N
= REMNode
.getOperand(0);
5847 SDValue D
= REMNode
.getOperand(1);
5849 // Collect the values from each element.
5850 if (!ISD::matchUnaryPredicate(D
, BuildSREMPattern
))
5853 // If this is a srem by a one, avoid the fold since it can be constant-folded.
5854 if (AllDivisorsAreOnes
)
5857 // If this is a srem by a powers-of-two (including INT_MIN), avoid the fold
5858 // since it can be best implemented as a bit test.
5859 if (AllDivisorsArePowerOfTwo
)
5862 SDValue PVal
, AVal
, KVal
, QVal
;
5863 if (D
.getOpcode() == ISD::BUILD_VECTOR
) {
5864 if (HadOneDivisor
) {
5865 // Try to turn PAmts into a splat, since we don't care about the values
5866 // that are currently '0'. If we can't, just keep '0'`s.
5867 turnVectorIntoSplatVector(PAmts
, isNullConstant
);
5868 // Try to turn AAmts into a splat, since we don't care about the
5869 // values that are currently '-1'. If we can't, change them to '0'`s.
5870 turnVectorIntoSplatVector(AAmts
, isAllOnesConstant
,
5871 DAG
.getConstant(0, DL
, SVT
));
5872 // Try to turn KAmts into a splat, since we don't care about the values
5873 // that are currently '-1'. If we can't, change them to '0'`s.
5874 turnVectorIntoSplatVector(KAmts
, isAllOnesConstant
,
5875 DAG
.getConstant(0, DL
, ShSVT
));
5878 PVal
= DAG
.getBuildVector(VT
, DL
, PAmts
);
5879 AVal
= DAG
.getBuildVector(VT
, DL
, AAmts
);
5880 KVal
= DAG
.getBuildVector(ShVT
, DL
, KAmts
);
5881 QVal
= DAG
.getBuildVector(VT
, DL
, QAmts
);
5882 } else if (D
.getOpcode() == ISD::SPLAT_VECTOR
) {
5883 assert(PAmts
.size() == 1 && AAmts
.size() == 1 && KAmts
.size() == 1 &&
5884 QAmts
.size() == 1 &&
5885 "Expected matchUnaryPredicate to return one element for scalable "
5887 PVal
= DAG
.getSplatVector(VT
, DL
, PAmts
[0]);
5888 AVal
= DAG
.getSplatVector(VT
, DL
, AAmts
[0]);
5889 KVal
= DAG
.getSplatVector(ShVT
, DL
, KAmts
[0]);
5890 QVal
= DAG
.getSplatVector(VT
, DL
, QAmts
[0]);
5892 assert(isa
<ConstantSDNode
>(D
) && "Expected a constant");
5900 SDValue Op0
= DAG
.getNode(ISD::MUL
, DL
, VT
, N
, PVal
);
5901 Created
.push_back(Op0
.getNode());
5903 if (NeedToApplyOffset
) {
5904 // We need ADD to do this.
5905 if (!DCI
.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ADD
, VT
))
5908 // (add (mul N, P), A)
5909 Op0
= DAG
.getNode(ISD::ADD
, DL
, VT
, Op0
, AVal
);
5910 Created
.push_back(Op0
.getNode());
5913 // Rotate right only if any divisor was even. We avoid rotates for all-odd
5914 // divisors as a performance improvement, since rotating by 0 is a no-op.
5915 if (HadEvenDivisor
) {
5916 // We need ROTR to do this.
5917 if (!DCI
.isBeforeLegalizeOps() && !isOperationLegalOrCustom(ISD::ROTR
, VT
))
5919 // SREM: (rotr (add (mul N, P), A), K)
5920 Op0
= DAG
.getNode(ISD::ROTR
, DL
, VT
, Op0
, KVal
);
5921 Created
.push_back(Op0
.getNode());
5924 // SREM: (setule/setugt (rotr (add (mul N, P), A), K), Q)
5926 DAG
.getSetCC(DL
, SETCCVT
, Op0
, QVal
,
5927 ((Cond
== ISD::SETEQ
) ? ISD::SETULE
: ISD::SETUGT
));
5929 // If we didn't have lanes with INT_MIN divisor, then we're done.
5930 if (!HadIntMinDivisor
)
5933 // That fold is only valid for positive divisors. Which effectively means,
5934 // it is invalid for INT_MIN divisors. So if we have such a lane,
5935 // we must fix-up results for said lanes.
5936 assert(VT
.isVector() && "Can/should only get here for vectors.");
5938 // NOTE: we avoid letting illegal types through even if we're before legalize
5939 // ops – legalization has a hard time producing good code for the code that
5941 if (!isOperationLegalOrCustom(ISD::SETEQ
, VT
) ||
5942 !isOperationLegalOrCustom(ISD::AND
, VT
) ||
5943 !isOperationLegalOrCustom(Cond
, VT
) ||
5944 !isOperationLegalOrCustom(ISD::VSELECT
, SETCCVT
))
5947 Created
.push_back(Fold
.getNode());
5949 SDValue IntMin
= DAG
.getConstant(
5950 APInt::getSignedMinValue(SVT
.getScalarSizeInBits()), DL
, VT
);
5951 SDValue IntMax
= DAG
.getConstant(
5952 APInt::getSignedMaxValue(SVT
.getScalarSizeInBits()), DL
, VT
);
5954 DAG
.getConstant(APInt::getNullValue(SVT
.getScalarSizeInBits()), DL
, VT
);
5956 // Which lanes had INT_MIN divisors? Divisor is constant, so const-folded.
5957 SDValue DivisorIsIntMin
= DAG
.getSetCC(DL
, SETCCVT
, D
, IntMin
, ISD::SETEQ
);
5958 Created
.push_back(DivisorIsIntMin
.getNode());
5960 // (N s% INT_MIN) ==/!= 0 <--> (N & INT_MAX) ==/!= 0
5961 SDValue Masked
= DAG
.getNode(ISD::AND
, DL
, VT
, N
, IntMax
);
5962 Created
.push_back(Masked
.getNode());
5963 SDValue MaskedIsZero
= DAG
.getSetCC(DL
, SETCCVT
, Masked
, Zero
, Cond
);
5964 Created
.push_back(MaskedIsZero
.getNode());
5966 // To produce final result we need to blend 2 vectors: 'SetCC' and
5967 // 'MaskedIsZero'. If the divisor for channel was *NOT* INT_MIN, we pick
5968 // from 'Fold', else pick from 'MaskedIsZero'. Since 'DivisorIsIntMin' is
5969 // constant-folded, select can get lowered to a shuffle with constant mask.
5970 SDValue Blended
= DAG
.getNode(ISD::VSELECT
, DL
, SETCCVT
, DivisorIsIntMin
,
5971 MaskedIsZero
, Fold
);
5976 bool TargetLowering::
5977 verifyReturnAddressArgumentIsConstant(SDValue Op
, SelectionDAG
&DAG
) const {
5978 if (!isa
<ConstantSDNode
>(Op
.getOperand(0))) {
5979 DAG
.getContext()->emitError("argument to '__builtin_return_address' must "
5980 "be a constant integer");
5987 SDValue
TargetLowering::getSqrtInputTest(SDValue Op
, SelectionDAG
&DAG
,
5988 const DenormalMode
&Mode
) const {
5990 EVT VT
= Op
.getValueType();
5991 EVT CCVT
= getSetCCResultType(DAG
.getDataLayout(), *DAG
.getContext(), VT
);
5992 SDValue FPZero
= DAG
.getConstantFP(0.0, DL
, VT
);
5993 // Testing it with denormal inputs to avoid wrong estimate.
5994 if (Mode
.Input
== DenormalMode::IEEE
) {
5995 // This is specifically a check for the handling of denormal inputs,
5998 // Test = fabs(X) < SmallestNormal
5999 const fltSemantics
&FltSem
= DAG
.EVTToAPFloatSemantics(VT
);
6000 APFloat SmallestNorm
= APFloat::getSmallestNormalized(FltSem
);
6001 SDValue NormC
= DAG
.getConstantFP(SmallestNorm
, DL
, VT
);
6002 SDValue Fabs
= DAG
.getNode(ISD::FABS
, DL
, VT
, Op
);
6003 return DAG
.getSetCC(DL
, CCVT
, Fabs
, NormC
, ISD::SETLT
);
6006 return DAG
.getSetCC(DL
, CCVT
, Op
, FPZero
, ISD::SETEQ
);
6009 SDValue
TargetLowering::getNegatedExpression(SDValue Op
, SelectionDAG
&DAG
,
6010 bool LegalOps
, bool OptForSize
,
6011 NegatibleCost
&Cost
,
6012 unsigned Depth
) const {
6013 // fneg is removable even if it has multiple uses.
6014 if (Op
.getOpcode() == ISD::FNEG
) {
6015 Cost
= NegatibleCost::Cheaper
;
6016 return Op
.getOperand(0);
6019 // Don't recurse exponentially.
6020 if (Depth
> SelectionDAG::MaxRecursionDepth
)
6023 // Pre-increment recursion depth for use in recursive calls.
6025 const SDNodeFlags Flags
= Op
->getFlags();
6026 const TargetOptions
&Options
= DAG
.getTarget().Options
;
6027 EVT VT
= Op
.getValueType();
6028 unsigned Opcode
= Op
.getOpcode();
6030 // Don't allow anything with multiple uses unless we know it is free.
6031 if (!Op
.hasOneUse() && Opcode
!= ISD::ConstantFP
) {
6032 bool IsFreeExtend
= Opcode
== ISD::FP_EXTEND
&&
6033 isFPExtFree(VT
, Op
.getOperand(0).getValueType());
6038 auto RemoveDeadNode
= [&](SDValue N
) {
6039 if (N
&& N
.getNode()->use_empty())
6040 DAG
.RemoveDeadNode(N
.getNode());
6045 // Because getNegatedExpression can delete nodes we need a handle to keep
6046 // temporary nodes alive in case the recursion manages to create an identical
6048 std::list
<HandleSDNode
> Handles
;
6051 case ISD::ConstantFP
: {
6052 // Don't invert constant FP values after legalization unless the target says
6053 // the negated constant is legal.
6055 isOperationLegal(ISD::ConstantFP
, VT
) ||
6056 isFPImmLegal(neg(cast
<ConstantFPSDNode
>(Op
)->getValueAPF()), VT
,
6059 if (LegalOps
&& !IsOpLegal
)
6062 APFloat V
= cast
<ConstantFPSDNode
>(Op
)->getValueAPF();
6064 SDValue CFP
= DAG
.getConstantFP(V
, DL
, VT
);
6066 // If we already have the use of the negated floating constant, it is free
6067 // to negate it even it has multiple uses.
6068 if (!Op
.hasOneUse() && CFP
.use_empty())
6070 Cost
= NegatibleCost::Neutral
;
6073 case ISD::BUILD_VECTOR
: {
6074 // Only permit BUILD_VECTOR of constants.
6075 if (llvm::any_of(Op
->op_values(), [&](SDValue N
) {
6076 return !N
.isUndef() && !isa
<ConstantFPSDNode
>(N
);
6081 (isOperationLegal(ISD::ConstantFP
, VT
) &&
6082 isOperationLegal(ISD::BUILD_VECTOR
, VT
)) ||
6083 llvm::all_of(Op
->op_values(), [&](SDValue N
) {
6084 return N
.isUndef() ||
6085 isFPImmLegal(neg(cast
<ConstantFPSDNode
>(N
)->getValueAPF()), VT
,
6089 if (LegalOps
&& !IsOpLegal
)
6092 SmallVector
<SDValue
, 4> Ops
;
6093 for (SDValue C
: Op
->op_values()) {
6098 APFloat V
= cast
<ConstantFPSDNode
>(C
)->getValueAPF();
6100 Ops
.push_back(DAG
.getConstantFP(V
, DL
, C
.getValueType()));
6102 Cost
= NegatibleCost::Neutral
;
6103 return DAG
.getBuildVector(VT
, DL
, Ops
);
6106 if (!Options
.NoSignedZerosFPMath
&& !Flags
.hasNoSignedZeros())
6109 // After operation legalization, it might not be legal to create new FSUBs.
6110 if (LegalOps
&& !isOperationLegalOrCustom(ISD::FSUB
, VT
))
6112 SDValue X
= Op
.getOperand(0), Y
= Op
.getOperand(1);
6114 // fold (fneg (fadd X, Y)) -> (fsub (fneg X), Y)
6115 NegatibleCost CostX
= NegatibleCost::Expensive
;
6117 getNegatedExpression(X
, DAG
, LegalOps
, OptForSize
, CostX
, Depth
);
6118 // Prevent this node from being deleted by the next call.
6120 Handles
.emplace_back(NegX
);
6122 // fold (fneg (fadd X, Y)) -> (fsub (fneg Y), X)
6123 NegatibleCost CostY
= NegatibleCost::Expensive
;
6125 getNegatedExpression(Y
, DAG
, LegalOps
, OptForSize
, CostY
, Depth
);
6127 // We're done with the handles.
6130 // Negate the X if its cost is less or equal than Y.
6131 if (NegX
&& (CostX
<= CostY
)) {
6133 SDValue N
= DAG
.getNode(ISD::FSUB
, DL
, VT
, NegX
, Y
, Flags
);
6135 RemoveDeadNode(NegY
);
6139 // Negate the Y if it is not expensive.
6142 SDValue N
= DAG
.getNode(ISD::FSUB
, DL
, VT
, NegY
, X
, Flags
);
6144 RemoveDeadNode(NegX
);
6150 // We can't turn -(A-B) into B-A when we honor signed zeros.
6151 if (!Options
.NoSignedZerosFPMath
&& !Flags
.hasNoSignedZeros())
6154 SDValue X
= Op
.getOperand(0), Y
= Op
.getOperand(1);
6155 // fold (fneg (fsub 0, Y)) -> Y
6156 if (ConstantFPSDNode
*C
= isConstOrConstSplatFP(X
, /*AllowUndefs*/ true))
6158 Cost
= NegatibleCost::Cheaper
;
6162 // fold (fneg (fsub X, Y)) -> (fsub Y, X)
6163 Cost
= NegatibleCost::Neutral
;
6164 return DAG
.getNode(ISD::FSUB
, DL
, VT
, Y
, X
, Flags
);
6168 SDValue X
= Op
.getOperand(0), Y
= Op
.getOperand(1);
6170 // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
6171 NegatibleCost CostX
= NegatibleCost::Expensive
;
6173 getNegatedExpression(X
, DAG
, LegalOps
, OptForSize
, CostX
, Depth
);
6174 // Prevent this node from being deleted by the next call.
6176 Handles
.emplace_back(NegX
);
6178 // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
6179 NegatibleCost CostY
= NegatibleCost::Expensive
;
6181 getNegatedExpression(Y
, DAG
, LegalOps
, OptForSize
, CostY
, Depth
);
6183 // We're done with the handles.
6186 // Negate the X if its cost is less or equal than Y.
6187 if (NegX
&& (CostX
<= CostY
)) {
6189 SDValue N
= DAG
.getNode(Opcode
, DL
, VT
, NegX
, Y
, Flags
);
6191 RemoveDeadNode(NegY
);
6195 // Ignore X * 2.0 because that is expected to be canonicalized to X + X.
6196 if (auto *C
= isConstOrConstSplatFP(Op
.getOperand(1)))
6197 if (C
->isExactlyValue(2.0) && Op
.getOpcode() == ISD::FMUL
)
6200 // Negate the Y if it is not expensive.
6203 SDValue N
= DAG
.getNode(Opcode
, DL
, VT
, X
, NegY
, Flags
);
6205 RemoveDeadNode(NegX
);
6212 if (!Options
.NoSignedZerosFPMath
&& !Flags
.hasNoSignedZeros())
6215 SDValue X
= Op
.getOperand(0), Y
= Op
.getOperand(1), Z
= Op
.getOperand(2);
6216 NegatibleCost CostZ
= NegatibleCost::Expensive
;
6218 getNegatedExpression(Z
, DAG
, LegalOps
, OptForSize
, CostZ
, Depth
);
6219 // Give up if fail to negate the Z.
6223 // Prevent this node from being deleted by the next two calls.
6224 Handles
.emplace_back(NegZ
);
6226 // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z))
6227 NegatibleCost CostX
= NegatibleCost::Expensive
;
6229 getNegatedExpression(X
, DAG
, LegalOps
, OptForSize
, CostX
, Depth
);
6230 // Prevent this node from being deleted by the next call.
6232 Handles
.emplace_back(NegX
);
6234 // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z))
6235 NegatibleCost CostY
= NegatibleCost::Expensive
;
6237 getNegatedExpression(Y
, DAG
, LegalOps
, OptForSize
, CostY
, Depth
);
6239 // We're done with the handles.
6242 // Negate the X if its cost is less or equal than Y.
6243 if (NegX
&& (CostX
<= CostY
)) {
6244 Cost
= std::min(CostX
, CostZ
);
6245 SDValue N
= DAG
.getNode(Opcode
, DL
, VT
, NegX
, Y
, NegZ
, Flags
);
6247 RemoveDeadNode(NegY
);
6251 // Negate the Y if it is not expensive.
6253 Cost
= std::min(CostY
, CostZ
);
6254 SDValue N
= DAG
.getNode(Opcode
, DL
, VT
, X
, NegY
, NegZ
, Flags
);
6256 RemoveDeadNode(NegX
);
6262 case ISD::FP_EXTEND
:
6264 if (SDValue NegV
= getNegatedExpression(Op
.getOperand(0), DAG
, LegalOps
,
6265 OptForSize
, Cost
, Depth
))
6266 return DAG
.getNode(Opcode
, DL
, VT
, NegV
);
6269 if (SDValue NegV
= getNegatedExpression(Op
.getOperand(0), DAG
, LegalOps
,
6270 OptForSize
, Cost
, Depth
))
6271 return DAG
.getNode(ISD::FP_ROUND
, DL
, VT
, NegV
, Op
.getOperand(1));
6278 //===----------------------------------------------------------------------===//
6279 // Legalization Utilities
6280 //===----------------------------------------------------------------------===//
6282 bool TargetLowering::expandMUL_LOHI(unsigned Opcode
, EVT VT
, const SDLoc
&dl
,
6283 SDValue LHS
, SDValue RHS
,
6284 SmallVectorImpl
<SDValue
> &Result
,
6285 EVT HiLoVT
, SelectionDAG
&DAG
,
6286 MulExpansionKind Kind
, SDValue LL
,
6287 SDValue LH
, SDValue RL
, SDValue RH
) const {
6288 assert(Opcode
== ISD::MUL
|| Opcode
== ISD::UMUL_LOHI
||
6289 Opcode
== ISD::SMUL_LOHI
);
6291 bool HasMULHS
= (Kind
== MulExpansionKind::Always
) ||
6292 isOperationLegalOrCustom(ISD::MULHS
, HiLoVT
);
6293 bool HasMULHU
= (Kind
== MulExpansionKind::Always
) ||
6294 isOperationLegalOrCustom(ISD::MULHU
, HiLoVT
);
6295 bool HasSMUL_LOHI
= (Kind
== MulExpansionKind::Always
) ||
6296 isOperationLegalOrCustom(ISD::SMUL_LOHI
, HiLoVT
);
6297 bool HasUMUL_LOHI
= (Kind
== MulExpansionKind::Always
) ||
6298 isOperationLegalOrCustom(ISD::UMUL_LOHI
, HiLoVT
);
6300 if (!HasMULHU
&& !HasMULHS
&& !HasUMUL_LOHI
&& !HasSMUL_LOHI
)
6303 unsigned OuterBitSize
= VT
.getScalarSizeInBits();
6304 unsigned InnerBitSize
= HiLoVT
.getScalarSizeInBits();
6306 // LL, LH, RL, and RH must be either all NULL or all set to a value.
6307 assert((LL
.getNode() && LH
.getNode() && RL
.getNode() && RH
.getNode()) ||
6308 (!LL
.getNode() && !LH
.getNode() && !RL
.getNode() && !RH
.getNode()));
6310 SDVTList VTs
= DAG
.getVTList(HiLoVT
, HiLoVT
);
6311 auto MakeMUL_LOHI
= [&](SDValue L
, SDValue R
, SDValue
&Lo
, SDValue
&Hi
,
6312 bool Signed
) -> bool {
6313 if ((Signed
&& HasSMUL_LOHI
) || (!Signed
&& HasUMUL_LOHI
)) {
6314 Lo
= DAG
.getNode(Signed
? ISD::SMUL_LOHI
: ISD::UMUL_LOHI
, dl
, VTs
, L
, R
);
6315 Hi
= SDValue(Lo
.getNode(), 1);
6318 if ((Signed
&& HasMULHS
) || (!Signed
&& HasMULHU
)) {
6319 Lo
= DAG
.getNode(ISD::MUL
, dl
, HiLoVT
, L
, R
);
6320 Hi
= DAG
.getNode(Signed
? ISD::MULHS
: ISD::MULHU
, dl
, HiLoVT
, L
, R
);
6328 if (!LL
.getNode() && !RL
.getNode() &&
6329 isOperationLegalOrCustom(ISD::TRUNCATE
, HiLoVT
)) {
6330 LL
= DAG
.getNode(ISD::TRUNCATE
, dl
, HiLoVT
, LHS
);
6331 RL
= DAG
.getNode(ISD::TRUNCATE
, dl
, HiLoVT
, RHS
);
6337 APInt HighMask
= APInt::getHighBitsSet(OuterBitSize
, InnerBitSize
);
6338 if (DAG
.MaskedValueIsZero(LHS
, HighMask
) &&
6339 DAG
.MaskedValueIsZero(RHS
, HighMask
)) {
6340 // The inputs are both zero-extended.
6341 if (MakeMUL_LOHI(LL
, RL
, Lo
, Hi
, false)) {
6342 Result
.push_back(Lo
);
6343 Result
.push_back(Hi
);
6344 if (Opcode
!= ISD::MUL
) {
6345 SDValue Zero
= DAG
.getConstant(0, dl
, HiLoVT
);
6346 Result
.push_back(Zero
);
6347 Result
.push_back(Zero
);
6353 if (!VT
.isVector() && Opcode
== ISD::MUL
&&
6354 DAG
.ComputeNumSignBits(LHS
) > InnerBitSize
&&
6355 DAG
.ComputeNumSignBits(RHS
) > InnerBitSize
) {
6356 // The input values are both sign-extended.
6357 // TODO non-MUL case?
6358 if (MakeMUL_LOHI(LL
, RL
, Lo
, Hi
, true)) {
6359 Result
.push_back(Lo
);
6360 Result
.push_back(Hi
);
6365 unsigned ShiftAmount
= OuterBitSize
- InnerBitSize
;
6366 EVT ShiftAmountTy
= getShiftAmountTy(VT
, DAG
.getDataLayout());
6367 if (APInt::getMaxValue(ShiftAmountTy
.getSizeInBits()).ult(ShiftAmount
)) {
6368 // FIXME getShiftAmountTy does not always return a sensible result when VT
6369 // is an illegal type, and so the type may be too small to fit the shift
6370 // amount. Override it with i32. The shift will have to be legalized.
6371 ShiftAmountTy
= MVT::i32
;
6373 SDValue Shift
= DAG
.getConstant(ShiftAmount
, dl
, ShiftAmountTy
);
6375 if (!LH
.getNode() && !RH
.getNode() &&
6376 isOperationLegalOrCustom(ISD::SRL
, VT
) &&
6377 isOperationLegalOrCustom(ISD::TRUNCATE
, HiLoVT
)) {
6378 LH
= DAG
.getNode(ISD::SRL
, dl
, VT
, LHS
, Shift
);
6379 LH
= DAG
.getNode(ISD::TRUNCATE
, dl
, HiLoVT
, LH
);
6380 RH
= DAG
.getNode(ISD::SRL
, dl
, VT
, RHS
, Shift
);
6381 RH
= DAG
.getNode(ISD::TRUNCATE
, dl
, HiLoVT
, RH
);
6387 if (!MakeMUL_LOHI(LL
, RL
, Lo
, Hi
, false))
6390 Result
.push_back(Lo
);
6392 if (Opcode
== ISD::MUL
) {
6393 RH
= DAG
.getNode(ISD::MUL
, dl
, HiLoVT
, LL
, RH
);
6394 LH
= DAG
.getNode(ISD::MUL
, dl
, HiLoVT
, LH
, RL
);
6395 Hi
= DAG
.getNode(ISD::ADD
, dl
, HiLoVT
, Hi
, RH
);
6396 Hi
= DAG
.getNode(ISD::ADD
, dl
, HiLoVT
, Hi
, LH
);
6397 Result
.push_back(Hi
);
6401 // Compute the full width result.
6402 auto Merge
= [&](SDValue Lo
, SDValue Hi
) -> SDValue
{
6403 Lo
= DAG
.getNode(ISD::ZERO_EXTEND
, dl
, VT
, Lo
);
6404 Hi
= DAG
.getNode(ISD::ZERO_EXTEND
, dl
, VT
, Hi
);
6405 Hi
= DAG
.getNode(ISD::SHL
, dl
, VT
, Hi
, Shift
);
6406 return DAG
.getNode(ISD::OR
, dl
, VT
, Lo
, Hi
);
6409 SDValue Next
= DAG
.getNode(ISD::ZERO_EXTEND
, dl
, VT
, Hi
);
6410 if (!MakeMUL_LOHI(LL
, RH
, Lo
, Hi
, false))
6413 // This is effectively the add part of a multiply-add of half-sized operands,
6414 // so it cannot overflow.
6415 Next
= DAG
.getNode(ISD::ADD
, dl
, VT
, Next
, Merge(Lo
, Hi
));
6417 if (!MakeMUL_LOHI(LH
, RL
, Lo
, Hi
, false))
6420 SDValue Zero
= DAG
.getConstant(0, dl
, HiLoVT
);
6421 EVT BoolType
= getSetCCResultType(DAG
.getDataLayout(), *DAG
.getContext(), VT
);
6423 bool UseGlue
= (isOperationLegalOrCustom(ISD::ADDC
, VT
) &&
6424 isOperationLegalOrCustom(ISD::ADDE
, VT
));
6426 Next
= DAG
.getNode(ISD::ADDC
, dl
, DAG
.getVTList(VT
, MVT::Glue
), Next
,
6429 Next
= DAG
.getNode(ISD::ADDCARRY
, dl
, DAG
.getVTList(VT
, BoolType
), Next
,
6430 Merge(Lo
, Hi
), DAG
.getConstant(0, dl
, BoolType
));
6432 SDValue Carry
= Next
.getValue(1);
6433 Result
.push_back(DAG
.getNode(ISD::TRUNCATE
, dl
, HiLoVT
, Next
));
6434 Next
= DAG
.getNode(ISD::SRL
, dl
, VT
, Next
, Shift
);
6436 if (!MakeMUL_LOHI(LH
, RH
, Lo
, Hi
, Opcode
== ISD::SMUL_LOHI
))
6440 Hi
= DAG
.getNode(ISD::ADDE
, dl
, DAG
.getVTList(HiLoVT
, MVT::Glue
), Hi
, Zero
,
6443 Hi
= DAG
.getNode(ISD::ADDCARRY
, dl
, DAG
.getVTList(HiLoVT
, BoolType
), Hi
,
6446 Next
= DAG
.getNode(ISD::ADD
, dl
, VT
, Next
, Merge(Lo
, Hi
));
6448 if (Opcode
== ISD::SMUL_LOHI
) {
6449 SDValue NextSub
= DAG
.getNode(ISD::SUB
, dl
, VT
, Next
,
6450 DAG
.getNode(ISD::ZERO_EXTEND
, dl
, VT
, RL
));
6451 Next
= DAG
.getSelectCC(dl
, LH
, Zero
, NextSub
, Next
, ISD::SETLT
);
6453 NextSub
= DAG
.getNode(ISD::SUB
, dl
, VT
, Next
,
6454 DAG
.getNode(ISD::ZERO_EXTEND
, dl
, VT
, LL
));
6455 Next
= DAG
.getSelectCC(dl
, RH
, Zero
, NextSub
, Next
, ISD::SETLT
);
6458 Result
.push_back(DAG
.getNode(ISD::TRUNCATE
, dl
, HiLoVT
, Next
));
6459 Next
= DAG
.getNode(ISD::SRL
, dl
, VT
, Next
, Shift
);
6460 Result
.push_back(DAG
.getNode(ISD::TRUNCATE
, dl
, HiLoVT
, Next
));
6464 bool TargetLowering::expandMUL(SDNode
*N
, SDValue
&Lo
, SDValue
&Hi
, EVT HiLoVT
,
6465 SelectionDAG
&DAG
, MulExpansionKind Kind
,
6466 SDValue LL
, SDValue LH
, SDValue RL
,
6468 SmallVector
<SDValue
, 2> Result
;
6469 bool Ok
= expandMUL_LOHI(N
->getOpcode(), N
->getValueType(0), SDLoc(N
),
6470 N
->getOperand(0), N
->getOperand(1), Result
, HiLoVT
,
6471 DAG
, Kind
, LL
, LH
, RL
, RH
);
6473 assert(Result
.size() == 2);
6480 // Check that (every element of) Z is undef or not an exact multiple of BW.
6481 static bool isNonZeroModBitWidthOrUndef(SDValue Z
, unsigned BW
) {
6482 return ISD::matchUnaryPredicate(
6484 [=](ConstantSDNode
*C
) { return !C
|| C
->getAPIntValue().urem(BW
) != 0; },
6488 bool TargetLowering::expandFunnelShift(SDNode
*Node
, SDValue
&Result
,
6489 SelectionDAG
&DAG
) const {
6490 EVT VT
= Node
->getValueType(0);
6492 if (VT
.isVector() && (!isOperationLegalOrCustom(ISD::SHL
, VT
) ||
6493 !isOperationLegalOrCustom(ISD::SRL
, VT
) ||
6494 !isOperationLegalOrCustom(ISD::SUB
, VT
) ||
6495 !isOperationLegalOrCustomOrPromote(ISD::OR
, VT
)))
6498 SDValue X
= Node
->getOperand(0);
6499 SDValue Y
= Node
->getOperand(1);
6500 SDValue Z
= Node
->getOperand(2);
6502 unsigned BW
= VT
.getScalarSizeInBits();
6503 bool IsFSHL
= Node
->getOpcode() == ISD::FSHL
;
6504 SDLoc
DL(SDValue(Node
, 0));
6506 EVT ShVT
= Z
.getValueType();
6508 // If a funnel shift in the other direction is more supported, use it.
6509 unsigned RevOpcode
= IsFSHL
? ISD::FSHR
: ISD::FSHL
;
6510 if (!isOperationLegalOrCustom(Node
->getOpcode(), VT
) &&
6511 isOperationLegalOrCustom(RevOpcode
, VT
) && isPowerOf2_32(BW
)) {
6512 if (isNonZeroModBitWidthOrUndef(Z
, BW
)) {
6513 // fshl X, Y, Z -> fshr X, Y, -Z
6514 // fshr X, Y, Z -> fshl X, Y, -Z
6515 SDValue Zero
= DAG
.getConstant(0, DL
, ShVT
);
6516 Z
= DAG
.getNode(ISD::SUB
, DL
, VT
, Zero
, Z
);
6518 // fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z
6519 // fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z
6520 SDValue One
= DAG
.getConstant(1, DL
, ShVT
);
6522 Y
= DAG
.getNode(RevOpcode
, DL
, VT
, X
, Y
, One
);
6523 X
= DAG
.getNode(ISD::SRL
, DL
, VT
, X
, One
);
6525 X
= DAG
.getNode(RevOpcode
, DL
, VT
, X
, Y
, One
);
6526 Y
= DAG
.getNode(ISD::SHL
, DL
, VT
, Y
, One
);
6528 Z
= DAG
.getNOT(DL
, Z
, ShVT
);
6530 Result
= DAG
.getNode(RevOpcode
, DL
, VT
, X
, Y
, Z
);
6535 SDValue ShAmt
, InvShAmt
;
6536 if (isNonZeroModBitWidthOrUndef(Z
, BW
)) {
6537 // fshl: X << C | Y >> (BW - C)
6538 // fshr: X << (BW - C) | Y >> C
6539 // where C = Z % BW is not zero
6540 SDValue BitWidthC
= DAG
.getConstant(BW
, DL
, ShVT
);
6541 ShAmt
= DAG
.getNode(ISD::UREM
, DL
, ShVT
, Z
, BitWidthC
);
6542 InvShAmt
= DAG
.getNode(ISD::SUB
, DL
, ShVT
, BitWidthC
, ShAmt
);
6543 ShX
= DAG
.getNode(ISD::SHL
, DL
, VT
, X
, IsFSHL
? ShAmt
: InvShAmt
);
6544 ShY
= DAG
.getNode(ISD::SRL
, DL
, VT
, Y
, IsFSHL
? InvShAmt
: ShAmt
);
6546 // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
6547 // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
6548 SDValue Mask
= DAG
.getConstant(BW
- 1, DL
, ShVT
);
6549 if (isPowerOf2_32(BW
)) {
6550 // Z % BW -> Z & (BW - 1)
6551 ShAmt
= DAG
.getNode(ISD::AND
, DL
, ShVT
, Z
, Mask
);
6552 // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
6553 InvShAmt
= DAG
.getNode(ISD::AND
, DL
, ShVT
, DAG
.getNOT(DL
, Z
, ShVT
), Mask
);
6555 SDValue BitWidthC
= DAG
.getConstant(BW
, DL
, ShVT
);
6556 ShAmt
= DAG
.getNode(ISD::UREM
, DL
, ShVT
, Z
, BitWidthC
);
6557 InvShAmt
= DAG
.getNode(ISD::SUB
, DL
, ShVT
, Mask
, ShAmt
);
6560 SDValue One
= DAG
.getConstant(1, DL
, ShVT
);
6562 ShX
= DAG
.getNode(ISD::SHL
, DL
, VT
, X
, ShAmt
);
6563 SDValue ShY1
= DAG
.getNode(ISD::SRL
, DL
, VT
, Y
, One
);
6564 ShY
= DAG
.getNode(ISD::SRL
, DL
, VT
, ShY1
, InvShAmt
);
6566 SDValue ShX1
= DAG
.getNode(ISD::SHL
, DL
, VT
, X
, One
);
6567 ShX
= DAG
.getNode(ISD::SHL
, DL
, VT
, ShX1
, InvShAmt
);
6568 ShY
= DAG
.getNode(ISD::SRL
, DL
, VT
, Y
, ShAmt
);
6571 Result
= DAG
.getNode(ISD::OR
, DL
, VT
, ShX
, ShY
);
6575 // TODO: Merge with expandFunnelShift.
6576 bool TargetLowering::expandROT(SDNode
*Node
, bool AllowVectorOps
,
6577 SDValue
&Result
, SelectionDAG
&DAG
) const {
6578 EVT VT
= Node
->getValueType(0);
6579 unsigned EltSizeInBits
= VT
.getScalarSizeInBits();
6580 bool IsLeft
= Node
->getOpcode() == ISD::ROTL
;
6581 SDValue Op0
= Node
->getOperand(0);
6582 SDValue Op1
= Node
->getOperand(1);
6583 SDLoc
DL(SDValue(Node
, 0));
6585 EVT ShVT
= Op1
.getValueType();
6586 SDValue Zero
= DAG
.getConstant(0, DL
, ShVT
);
6588 // If a rotate in the other direction is supported, use it.
6589 unsigned RevRot
= IsLeft
? ISD::ROTR
: ISD::ROTL
;
6590 if (isOperationLegalOrCustom(RevRot
, VT
) && isPowerOf2_32(EltSizeInBits
)) {
6591 SDValue Sub
= DAG
.getNode(ISD::SUB
, DL
, ShVT
, Zero
, Op1
);
6592 Result
= DAG
.getNode(RevRot
, DL
, VT
, Op0
, Sub
);
6596 if (!AllowVectorOps
&& VT
.isVector() &&
6597 (!isOperationLegalOrCustom(ISD::SHL
, VT
) ||
6598 !isOperationLegalOrCustom(ISD::SRL
, VT
) ||
6599 !isOperationLegalOrCustom(ISD::SUB
, VT
) ||
6600 !isOperationLegalOrCustomOrPromote(ISD::OR
, VT
) ||
6601 !isOperationLegalOrCustomOrPromote(ISD::AND
, VT
)))
6604 unsigned ShOpc
= IsLeft
? ISD::SHL
: ISD::SRL
;
6605 unsigned HsOpc
= IsLeft
? ISD::SRL
: ISD::SHL
;
6606 SDValue BitWidthMinusOneC
= DAG
.getConstant(EltSizeInBits
- 1, DL
, ShVT
);
6609 if (isPowerOf2_32(EltSizeInBits
)) {
6610 // (rotl x, c) -> x << (c & (w - 1)) | x >> (-c & (w - 1))
6611 // (rotr x, c) -> x >> (c & (w - 1)) | x << (-c & (w - 1))
6612 SDValue NegOp1
= DAG
.getNode(ISD::SUB
, DL
, ShVT
, Zero
, Op1
);
6613 SDValue ShAmt
= DAG
.getNode(ISD::AND
, DL
, ShVT
, Op1
, BitWidthMinusOneC
);
6614 ShVal
= DAG
.getNode(ShOpc
, DL
, VT
, Op0
, ShAmt
);
6615 SDValue HsAmt
= DAG
.getNode(ISD::AND
, DL
, ShVT
, NegOp1
, BitWidthMinusOneC
);
6616 HsVal
= DAG
.getNode(HsOpc
, DL
, VT
, Op0
, HsAmt
);
6618 // (rotl x, c) -> x << (c % w) | x >> 1 >> (w - 1 - (c % w))
6619 // (rotr x, c) -> x >> (c % w) | x << 1 << (w - 1 - (c % w))
6620 SDValue BitWidthC
= DAG
.getConstant(EltSizeInBits
, DL
, ShVT
);
6621 SDValue ShAmt
= DAG
.getNode(ISD::UREM
, DL
, ShVT
, Op1
, BitWidthC
);
6622 ShVal
= DAG
.getNode(ShOpc
, DL
, VT
, Op0
, ShAmt
);
6623 SDValue HsAmt
= DAG
.getNode(ISD::SUB
, DL
, ShVT
, BitWidthMinusOneC
, ShAmt
);
6624 SDValue One
= DAG
.getConstant(1, DL
, ShVT
);
6626 DAG
.getNode(HsOpc
, DL
, VT
, DAG
.getNode(HsOpc
, DL
, VT
, Op0
, One
), HsAmt
);
6628 Result
= DAG
.getNode(ISD::OR
, DL
, VT
, ShVal
, HsVal
);
6632 void TargetLowering::expandShiftParts(SDNode
*Node
, SDValue
&Lo
, SDValue
&Hi
,
6633 SelectionDAG
&DAG
) const {
6634 assert(Node
->getNumOperands() == 3 && "Not a double-shift!");
6635 EVT VT
= Node
->getValueType(0);
6636 unsigned VTBits
= VT
.getScalarSizeInBits();
6637 assert(isPowerOf2_32(VTBits
) && "Power-of-two integer type expected");
6639 bool IsSHL
= Node
->getOpcode() == ISD::SHL_PARTS
;
6640 bool IsSRA
= Node
->getOpcode() == ISD::SRA_PARTS
;
6641 SDValue ShOpLo
= Node
->getOperand(0);
6642 SDValue ShOpHi
= Node
->getOperand(1);
6643 SDValue ShAmt
= Node
->getOperand(2);
6644 EVT ShAmtVT
= ShAmt
.getValueType();
6646 getSetCCResultType(DAG
.getDataLayout(), *DAG
.getContext(), ShAmtVT
);
6649 // ISD::FSHL and ISD::FSHR have defined overflow behavior but ISD::SHL and
6650 // ISD::SRA/L nodes haven't. Insert an AND to be safe, it's usually optimized
6651 // away during isel.
6652 SDValue SafeShAmt
= DAG
.getNode(ISD::AND
, dl
, ShAmtVT
, ShAmt
,
6653 DAG
.getConstant(VTBits
- 1, dl
, ShAmtVT
));
6654 SDValue Tmp1
= IsSRA
? DAG
.getNode(ISD::SRA
, dl
, VT
, ShOpHi
,
6655 DAG
.getConstant(VTBits
- 1, dl
, ShAmtVT
))
6656 : DAG
.getConstant(0, dl
, VT
);
6660 Tmp2
= DAG
.getNode(ISD::FSHL
, dl
, VT
, ShOpHi
, ShOpLo
, ShAmt
);
6661 Tmp3
= DAG
.getNode(ISD::SHL
, dl
, VT
, ShOpLo
, SafeShAmt
);
6663 Tmp2
= DAG
.getNode(ISD::FSHR
, dl
, VT
, ShOpHi
, ShOpLo
, ShAmt
);
6664 Tmp3
= DAG
.getNode(IsSRA
? ISD::SRA
: ISD::SRL
, dl
, VT
, ShOpHi
, SafeShAmt
);
6667 // If the shift amount is larger or equal than the width of a part we don't
6668 // use the result from the FSHL/FSHR. Insert a test and select the appropriate
6669 // values for large shift amounts.
6670 SDValue AndNode
= DAG
.getNode(ISD::AND
, dl
, ShAmtVT
, ShAmt
,
6671 DAG
.getConstant(VTBits
, dl
, ShAmtVT
));
6672 SDValue Cond
= DAG
.getSetCC(dl
, ShAmtCCVT
, AndNode
,
6673 DAG
.getConstant(0, dl
, ShAmtVT
), ISD::SETNE
);
6676 Hi
= DAG
.getNode(ISD::SELECT
, dl
, VT
, Cond
, Tmp3
, Tmp2
);
6677 Lo
= DAG
.getNode(ISD::SELECT
, dl
, VT
, Cond
, Tmp1
, Tmp3
);
6679 Lo
= DAG
.getNode(ISD::SELECT
, dl
, VT
, Cond
, Tmp3
, Tmp2
);
6680 Hi
= DAG
.getNode(ISD::SELECT
, dl
, VT
, Cond
, Tmp1
, Tmp3
);
6684 bool TargetLowering::expandFP_TO_SINT(SDNode
*Node
, SDValue
&Result
,
6685 SelectionDAG
&DAG
) const {
6686 unsigned OpNo
= Node
->isStrictFPOpcode() ? 1 : 0;
6687 SDValue Src
= Node
->getOperand(OpNo
);
6688 EVT SrcVT
= Src
.getValueType();
6689 EVT DstVT
= Node
->getValueType(0);
6690 SDLoc
dl(SDValue(Node
, 0));
6692 // FIXME: Only f32 to i64 conversions are supported.
6693 if (SrcVT
!= MVT::f32
|| DstVT
!= MVT::i64
)
6696 if (Node
->isStrictFPOpcode())
6697 // When a NaN is converted to an integer a trap is allowed. We can't
6698 // use this expansion here because it would eliminate that trap. Other
6699 // traps are also allowed and cannot be eliminated. See
6700 // IEEE 754-2008 sec 5.8.
6703 // Expand f32 -> i64 conversion
6704 // This algorithm comes from compiler-rt's implementation of fixsfdi:
6705 // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c
6706 unsigned SrcEltBits
= SrcVT
.getScalarSizeInBits();
6707 EVT IntVT
= SrcVT
.changeTypeToInteger();
6708 EVT IntShVT
= getShiftAmountTy(IntVT
, DAG
.getDataLayout());
6710 SDValue ExponentMask
= DAG
.getConstant(0x7F800000, dl
, IntVT
);
6711 SDValue ExponentLoBit
= DAG
.getConstant(23, dl
, IntVT
);
6712 SDValue Bias
= DAG
.getConstant(127, dl
, IntVT
);
6713 SDValue SignMask
= DAG
.getConstant(APInt::getSignMask(SrcEltBits
), dl
, IntVT
);
6714 SDValue SignLowBit
= DAG
.getConstant(SrcEltBits
- 1, dl
, IntVT
);
6715 SDValue MantissaMask
= DAG
.getConstant(0x007FFFFF, dl
, IntVT
);
6717 SDValue Bits
= DAG
.getNode(ISD::BITCAST
, dl
, IntVT
, Src
);
6719 SDValue ExponentBits
= DAG
.getNode(
6720 ISD::SRL
, dl
, IntVT
, DAG
.getNode(ISD::AND
, dl
, IntVT
, Bits
, ExponentMask
),
6721 DAG
.getZExtOrTrunc(ExponentLoBit
, dl
, IntShVT
));
6722 SDValue Exponent
= DAG
.getNode(ISD::SUB
, dl
, IntVT
, ExponentBits
, Bias
);
6724 SDValue Sign
= DAG
.getNode(ISD::SRA
, dl
, IntVT
,
6725 DAG
.getNode(ISD::AND
, dl
, IntVT
, Bits
, SignMask
),
6726 DAG
.getZExtOrTrunc(SignLowBit
, dl
, IntShVT
));
6727 Sign
= DAG
.getSExtOrTrunc(Sign
, dl
, DstVT
);
6729 SDValue R
= DAG
.getNode(ISD::OR
, dl
, IntVT
,
6730 DAG
.getNode(ISD::AND
, dl
, IntVT
, Bits
, MantissaMask
),
6731 DAG
.getConstant(0x00800000, dl
, IntVT
));
6733 R
= DAG
.getZExtOrTrunc(R
, dl
, DstVT
);
6735 R
= DAG
.getSelectCC(
6736 dl
, Exponent
, ExponentLoBit
,
6737 DAG
.getNode(ISD::SHL
, dl
, DstVT
, R
,
6739 DAG
.getNode(ISD::SUB
, dl
, IntVT
, Exponent
, ExponentLoBit
),
6741 DAG
.getNode(ISD::SRL
, dl
, DstVT
, R
,
6743 DAG
.getNode(ISD::SUB
, dl
, IntVT
, ExponentLoBit
, Exponent
),
6747 SDValue Ret
= DAG
.getNode(ISD::SUB
, dl
, DstVT
,
6748 DAG
.getNode(ISD::XOR
, dl
, DstVT
, R
, Sign
), Sign
);
6750 Result
= DAG
.getSelectCC(dl
, Exponent
, DAG
.getConstant(0, dl
, IntVT
),
6751 DAG
.getConstant(0, dl
, DstVT
), Ret
, ISD::SETLT
);
6755 bool TargetLowering::expandFP_TO_UINT(SDNode
*Node
, SDValue
&Result
,
6757 SelectionDAG
&DAG
) const {
6758 SDLoc
dl(SDValue(Node
, 0));
6759 unsigned OpNo
= Node
->isStrictFPOpcode() ? 1 : 0;
6760 SDValue Src
= Node
->getOperand(OpNo
);
6762 EVT SrcVT
= Src
.getValueType();
6763 EVT DstVT
= Node
->getValueType(0);
6765 getSetCCResultType(DAG
.getDataLayout(), *DAG
.getContext(), SrcVT
);
6767 getSetCCResultType(DAG
.getDataLayout(), *DAG
.getContext(), DstVT
);
6769 // Only expand vector types if we have the appropriate vector bit operations.
6770 unsigned SIntOpcode
= Node
->isStrictFPOpcode() ? ISD::STRICT_FP_TO_SINT
:
6772 if (DstVT
.isVector() && (!isOperationLegalOrCustom(SIntOpcode
, DstVT
) ||
6773 !isOperationLegalOrCustomOrPromote(ISD::XOR
, SrcVT
)))
6776 // If the maximum float value is smaller then the signed integer range,
6777 // the destination signmask can't be represented by the float, so we can
6778 // just use FP_TO_SINT directly.
6779 const fltSemantics
&APFSem
= DAG
.EVTToAPFloatSemantics(SrcVT
);
6780 APFloat
APF(APFSem
, APInt::getNullValue(SrcVT
.getScalarSizeInBits()));
6781 APInt SignMask
= APInt::getSignMask(DstVT
.getScalarSizeInBits());
6782 if (APFloat::opOverflow
&
6783 APF
.convertFromAPInt(SignMask
, false, APFloat::rmNearestTiesToEven
)) {
6784 if (Node
->isStrictFPOpcode()) {
6785 Result
= DAG
.getNode(ISD::STRICT_FP_TO_SINT
, dl
, { DstVT
, MVT::Other
},
6786 { Node
->getOperand(0), Src
});
6787 Chain
= Result
.getValue(1);
6789 Result
= DAG
.getNode(ISD::FP_TO_SINT
, dl
, DstVT
, Src
);
6793 // Don't expand it if there isn't cheap fsub instruction.
6794 if (!isOperationLegalOrCustom(
6795 Node
->isStrictFPOpcode() ? ISD::STRICT_FSUB
: ISD::FSUB
, SrcVT
))
6798 SDValue Cst
= DAG
.getConstantFP(APF
, dl
, SrcVT
);
6801 if (Node
->isStrictFPOpcode()) {
6802 Sel
= DAG
.getSetCC(dl
, SetCCVT
, Src
, Cst
, ISD::SETLT
,
6803 Node
->getOperand(0), /*IsSignaling*/ true);
6804 Chain
= Sel
.getValue(1);
6806 Sel
= DAG
.getSetCC(dl
, SetCCVT
, Src
, Cst
, ISD::SETLT
);
6809 bool Strict
= Node
->isStrictFPOpcode() ||
6810 shouldUseStrictFP_TO_INT(SrcVT
, DstVT
, /*IsSigned*/ false);
6813 // Expand based on maximum range of FP_TO_SINT, if the value exceeds the
6814 // signmask then offset (the result of which should be fully representable).
6815 // Sel = Src < 0x8000000000000000
6816 // FltOfs = select Sel, 0, 0x8000000000000000
6817 // IntOfs = select Sel, 0, 0x8000000000000000
6818 // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
6820 // TODO: Should any fast-math-flags be set for the FSUB?
6821 SDValue FltOfs
= DAG
.getSelect(dl
, SrcVT
, Sel
,
6822 DAG
.getConstantFP(0.0, dl
, SrcVT
), Cst
);
6823 Sel
= DAG
.getBoolExtOrTrunc(Sel
, dl
, DstSetCCVT
, DstVT
);
6824 SDValue IntOfs
= DAG
.getSelect(dl
, DstVT
, Sel
,
6825 DAG
.getConstant(0, dl
, DstVT
),
6826 DAG
.getConstant(SignMask
, dl
, DstVT
));
6828 if (Node
->isStrictFPOpcode()) {
6829 SDValue Val
= DAG
.getNode(ISD::STRICT_FSUB
, dl
, { SrcVT
, MVT::Other
},
6830 { Chain
, Src
, FltOfs
});
6831 SInt
= DAG
.getNode(ISD::STRICT_FP_TO_SINT
, dl
, { DstVT
, MVT::Other
},
6832 { Val
.getValue(1), Val
});
6833 Chain
= SInt
.getValue(1);
6835 SDValue Val
= DAG
.getNode(ISD::FSUB
, dl
, SrcVT
, Src
, FltOfs
);
6836 SInt
= DAG
.getNode(ISD::FP_TO_SINT
, dl
, DstVT
, Val
);
6838 Result
= DAG
.getNode(ISD::XOR
, dl
, DstVT
, SInt
, IntOfs
);
6840 // Expand based on maximum range of FP_TO_SINT:
6841 // True = fp_to_sint(Src)
6842 // False = 0x8000000000000000 + fp_to_sint(Src - 0x8000000000000000)
6843 // Result = select (Src < 0x8000000000000000), True, False
6845 SDValue True
= DAG
.getNode(ISD::FP_TO_SINT
, dl
, DstVT
, Src
);
6846 // TODO: Should any fast-math-flags be set for the FSUB?
6847 SDValue False
= DAG
.getNode(ISD::FP_TO_SINT
, dl
, DstVT
,
6848 DAG
.getNode(ISD::FSUB
, dl
, SrcVT
, Src
, Cst
));
6849 False
= DAG
.getNode(ISD::XOR
, dl
, DstVT
, False
,
6850 DAG
.getConstant(SignMask
, dl
, DstVT
));
6851 Sel
= DAG
.getBoolExtOrTrunc(Sel
, dl
, DstSetCCVT
, DstVT
);
6852 Result
= DAG
.getSelect(dl
, DstVT
, Sel
, True
, False
);
6857 bool TargetLowering::expandUINT_TO_FP(SDNode
*Node
, SDValue
&Result
,
6859 SelectionDAG
&DAG
) const {
6860 // This transform is not correct for converting 0 when rounding mode is set
6861 // to round toward negative infinity which will produce -0.0. So disable under
6863 if (Node
->isStrictFPOpcode())
6866 SDValue Src
= Node
->getOperand(0);
6867 EVT SrcVT
= Src
.getValueType();
6868 EVT DstVT
= Node
->getValueType(0);
6870 if (SrcVT
.getScalarType() != MVT::i64
|| DstVT
.getScalarType() != MVT::f64
)
6873 // Only expand vector types if we have the appropriate vector bit operations.
6874 if (SrcVT
.isVector() && (!isOperationLegalOrCustom(ISD::SRL
, SrcVT
) ||
6875 !isOperationLegalOrCustom(ISD::FADD
, DstVT
) ||
6876 !isOperationLegalOrCustom(ISD::FSUB
, DstVT
) ||
6877 !isOperationLegalOrCustomOrPromote(ISD::OR
, SrcVT
) ||
6878 !isOperationLegalOrCustomOrPromote(ISD::AND
, SrcVT
)))
6881 SDLoc
dl(SDValue(Node
, 0));
6882 EVT ShiftVT
= getShiftAmountTy(SrcVT
, DAG
.getDataLayout());
6884 // Implementation of unsigned i64 to f64 following the algorithm in
6885 // __floatundidf in compiler_rt. This implementation performs rounding
6886 // correctly in all rounding modes with the exception of converting 0
6887 // when rounding toward negative infinity. In that case the fsub will produce
6888 // -0.0. This will be added to +0.0 and produce -0.0 which is incorrect.
6889 SDValue TwoP52
= DAG
.getConstant(UINT64_C(0x4330000000000000), dl
, SrcVT
);
6890 SDValue TwoP84PlusTwoP52
= DAG
.getConstantFP(
6891 BitsToDouble(UINT64_C(0x4530000000100000)), dl
, DstVT
);
6892 SDValue TwoP84
= DAG
.getConstant(UINT64_C(0x4530000000000000), dl
, SrcVT
);
6893 SDValue LoMask
= DAG
.getConstant(UINT64_C(0x00000000FFFFFFFF), dl
, SrcVT
);
6894 SDValue HiShift
= DAG
.getConstant(32, dl
, ShiftVT
);
6896 SDValue Lo
= DAG
.getNode(ISD::AND
, dl
, SrcVT
, Src
, LoMask
);
6897 SDValue Hi
= DAG
.getNode(ISD::SRL
, dl
, SrcVT
, Src
, HiShift
);
6898 SDValue LoOr
= DAG
.getNode(ISD::OR
, dl
, SrcVT
, Lo
, TwoP52
);
6899 SDValue HiOr
= DAG
.getNode(ISD::OR
, dl
, SrcVT
, Hi
, TwoP84
);
6900 SDValue LoFlt
= DAG
.getBitcast(DstVT
, LoOr
);
6901 SDValue HiFlt
= DAG
.getBitcast(DstVT
, HiOr
);
6903 DAG
.getNode(ISD::FSUB
, dl
, DstVT
, HiFlt
, TwoP84PlusTwoP52
);
6904 Result
= DAG
.getNode(ISD::FADD
, dl
, DstVT
, LoFlt
, HiSub
);
6908 SDValue
TargetLowering::expandFMINNUM_FMAXNUM(SDNode
*Node
,
6909 SelectionDAG
&DAG
) const {
6911 unsigned NewOp
= Node
->getOpcode() == ISD::FMINNUM
?
6912 ISD::FMINNUM_IEEE
: ISD::FMAXNUM_IEEE
;
6913 EVT VT
= Node
->getValueType(0);
6915 if (VT
.isScalableVector())
6917 "Expanding fminnum/fmaxnum for scalable vectors is undefined.");
6919 if (isOperationLegalOrCustom(NewOp
, VT
)) {
6920 SDValue Quiet0
= Node
->getOperand(0);
6921 SDValue Quiet1
= Node
->getOperand(1);
6923 if (!Node
->getFlags().hasNoNaNs()) {
6924 // Insert canonicalizes if it's possible we need to quiet to get correct
6926 if (!DAG
.isKnownNeverSNaN(Quiet0
)) {
6927 Quiet0
= DAG
.getNode(ISD::FCANONICALIZE
, dl
, VT
, Quiet0
,
6930 if (!DAG
.isKnownNeverSNaN(Quiet1
)) {
6931 Quiet1
= DAG
.getNode(ISD::FCANONICALIZE
, dl
, VT
, Quiet1
,
6936 return DAG
.getNode(NewOp
, dl
, VT
, Quiet0
, Quiet1
, Node
->getFlags());
6939 // If the target has FMINIMUM/FMAXIMUM but not FMINNUM/FMAXNUM use that
6940 // instead if there are no NaNs.
6941 if (Node
->getFlags().hasNoNaNs()) {
6942 unsigned IEEE2018Op
=
6943 Node
->getOpcode() == ISD::FMINNUM
? ISD::FMINIMUM
: ISD::FMAXIMUM
;
6944 if (isOperationLegalOrCustom(IEEE2018Op
, VT
)) {
6945 return DAG
.getNode(IEEE2018Op
, dl
, VT
, Node
->getOperand(0),
6946 Node
->getOperand(1), Node
->getFlags());
6950 // If none of the above worked, but there are no NaNs, then expand to
6951 // a compare/select sequence. This is required for correctness since
6952 // InstCombine might have canonicalized a fcmp+select sequence to a
6953 // FMINNUM/FMAXNUM node. If we were to fall through to the default
6954 // expansion to libcall, we might introduce a link-time dependency
6955 // on libm into a file that originally did not have one.
6956 if (Node
->getFlags().hasNoNaNs()) {
6957 ISD::CondCode Pred
=
6958 Node
->getOpcode() == ISD::FMINNUM
? ISD::SETLT
: ISD::SETGT
;
6959 SDValue Op1
= Node
->getOperand(0);
6960 SDValue Op2
= Node
->getOperand(1);
6961 SDValue SelCC
= DAG
.getSelectCC(dl
, Op1
, Op2
, Op1
, Op2
, Pred
);
6962 // Copy FMF flags, but always set the no-signed-zeros flag
6963 // as this is implied by the FMINNUM/FMAXNUM semantics.
6964 SDNodeFlags Flags
= Node
->getFlags();
6965 Flags
.setNoSignedZeros(true);
6966 SelCC
->setFlags(Flags
);
6973 SDValue
TargetLowering::expandISNAN(EVT ResultVT
, SDValue Op
, SDNodeFlags Flags
,
6974 const SDLoc
&DL
, SelectionDAG
&DAG
) const {
6975 EVT OperandVT
= Op
.getValueType();
6976 assert(OperandVT
.isFloatingPoint());
6978 // If floating point exceptions are ignored, expand to unordered comparison.
6979 if ((Flags
.hasNoFPExcept() &&
6980 isOperationLegalOrCustom(ISD::SETCC
, OperandVT
.getScalarType())) ||
6981 OperandVT
== MVT::ppcf128
)
6982 return DAG
.getSetCC(DL
, ResultVT
, Op
, DAG
.getConstantFP(0.0, DL
, OperandVT
),
6985 // In general case use integer operations to avoid traps if argument is SNaN.
6987 // NaN has all exp bits set and a non zero significand. Therefore:
6988 // isnan(V) == exp mask < abs(V)
6989 unsigned BitSize
= OperandVT
.getScalarSizeInBits();
6990 EVT IntVT
= OperandVT
.changeTypeToInteger();
6991 SDValue ArgV
= DAG
.getBitcast(IntVT
, Op
);
6992 APInt AndMask
= APInt::getSignedMaxValue(BitSize
);
6993 SDValue AndMaskV
= DAG
.getConstant(AndMask
, DL
, IntVT
);
6994 SDValue AbsV
= DAG
.getNode(ISD::AND
, DL
, IntVT
, ArgV
, AndMaskV
);
6995 EVT ScalarFloatVT
= OperandVT
.getScalarType();
6996 const Type
*FloatTy
= ScalarFloatVT
.getTypeForEVT(*DAG
.getContext());
6997 const llvm::fltSemantics
&Semantics
= FloatTy
->getFltSemantics();
6998 APInt ExpMask
= APFloat::getInf(Semantics
).bitcastToAPInt();
6999 SDValue ExpMaskV
= DAG
.getConstant(ExpMask
, DL
, IntVT
);
7000 return DAG
.getSetCC(DL
, ResultVT
, ExpMaskV
, AbsV
, ISD::SETLT
);
7003 bool TargetLowering::expandCTPOP(SDNode
*Node
, SDValue
&Result
,
7004 SelectionDAG
&DAG
) const {
7006 EVT VT
= Node
->getValueType(0);
7007 EVT ShVT
= getShiftAmountTy(VT
, DAG
.getDataLayout());
7008 SDValue Op
= Node
->getOperand(0);
7009 unsigned Len
= VT
.getScalarSizeInBits();
7010 assert(VT
.isInteger() && "CTPOP not implemented for this type.");
7012 // TODO: Add support for irregular type lengths.
7013 if (!(Len
<= 128 && Len
% 8 == 0))
7016 // Only expand vector types if we have the appropriate vector bit operations.
7017 if (VT
.isVector() && (!isOperationLegalOrCustom(ISD::ADD
, VT
) ||
7018 !isOperationLegalOrCustom(ISD::SUB
, VT
) ||
7019 !isOperationLegalOrCustom(ISD::SRL
, VT
) ||
7020 (Len
!= 8 && !isOperationLegalOrCustom(ISD::MUL
, VT
)) ||
7021 !isOperationLegalOrCustomOrPromote(ISD::AND
, VT
)))
7024 // This is the "best" algorithm from
7025 // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
7027 DAG
.getConstant(APInt::getSplat(Len
, APInt(8, 0x55)), dl
, VT
);
7029 DAG
.getConstant(APInt::getSplat(Len
, APInt(8, 0x33)), dl
, VT
);
7031 DAG
.getConstant(APInt::getSplat(Len
, APInt(8, 0x0F)), dl
, VT
);
7033 DAG
.getConstant(APInt::getSplat(Len
, APInt(8, 0x01)), dl
, VT
);
7035 // v = v - ((v >> 1) & 0x55555555...)
7036 Op
= DAG
.getNode(ISD::SUB
, dl
, VT
, Op
,
7037 DAG
.getNode(ISD::AND
, dl
, VT
,
7038 DAG
.getNode(ISD::SRL
, dl
, VT
, Op
,
7039 DAG
.getConstant(1, dl
, ShVT
)),
7041 // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
7042 Op
= DAG
.getNode(ISD::ADD
, dl
, VT
, DAG
.getNode(ISD::AND
, dl
, VT
, Op
, Mask33
),
7043 DAG
.getNode(ISD::AND
, dl
, VT
,
7044 DAG
.getNode(ISD::SRL
, dl
, VT
, Op
,
7045 DAG
.getConstant(2, dl
, ShVT
)),
7047 // v = (v + (v >> 4)) & 0x0F0F0F0F...
7048 Op
= DAG
.getNode(ISD::AND
, dl
, VT
,
7049 DAG
.getNode(ISD::ADD
, dl
, VT
, Op
,
7050 DAG
.getNode(ISD::SRL
, dl
, VT
, Op
,
7051 DAG
.getConstant(4, dl
, ShVT
))),
7053 // v = (v * 0x01010101...) >> (Len - 8)
7056 DAG
.getNode(ISD::SRL
, dl
, VT
, DAG
.getNode(ISD::MUL
, dl
, VT
, Op
, Mask01
),
7057 DAG
.getConstant(Len
- 8, dl
, ShVT
));
7063 bool TargetLowering::expandCTLZ(SDNode
*Node
, SDValue
&Result
,
7064 SelectionDAG
&DAG
) const {
7066 EVT VT
= Node
->getValueType(0);
7067 EVT ShVT
= getShiftAmountTy(VT
, DAG
.getDataLayout());
7068 SDValue Op
= Node
->getOperand(0);
7069 unsigned NumBitsPerElt
= VT
.getScalarSizeInBits();
7071 // If the non-ZERO_UNDEF version is supported we can use that instead.
7072 if (Node
->getOpcode() == ISD::CTLZ_ZERO_UNDEF
&&
7073 isOperationLegalOrCustom(ISD::CTLZ
, VT
)) {
7074 Result
= DAG
.getNode(ISD::CTLZ
, dl
, VT
, Op
);
7078 // If the ZERO_UNDEF version is supported use that and handle the zero case.
7079 if (isOperationLegalOrCustom(ISD::CTLZ_ZERO_UNDEF
, VT
)) {
7081 getSetCCResultType(DAG
.getDataLayout(), *DAG
.getContext(), VT
);
7082 SDValue CTLZ
= DAG
.getNode(ISD::CTLZ_ZERO_UNDEF
, dl
, VT
, Op
);
7083 SDValue Zero
= DAG
.getConstant(0, dl
, VT
);
7084 SDValue SrcIsZero
= DAG
.getSetCC(dl
, SetCCVT
, Op
, Zero
, ISD::SETEQ
);
7085 Result
= DAG
.getNode(ISD::SELECT
, dl
, VT
, SrcIsZero
,
7086 DAG
.getConstant(NumBitsPerElt
, dl
, VT
), CTLZ
);
7090 // Only expand vector types if we have the appropriate vector bit operations.
7091 if (VT
.isVector() && (!isPowerOf2_32(NumBitsPerElt
) ||
7092 !isOperationLegalOrCustom(ISD::CTPOP
, VT
) ||
7093 !isOperationLegalOrCustom(ISD::SRL
, VT
) ||
7094 !isOperationLegalOrCustomOrPromote(ISD::OR
, VT
)))
7097 // for now, we do this:
7098 // x = x | (x >> 1);
7099 // x = x | (x >> 2);
7101 // x = x | (x >>16);
7102 // x = x | (x >>32); // for 64-bit input
7103 // return popcount(~x);
7105 // Ref: "Hacker's Delight" by Henry Warren
7106 for (unsigned i
= 0; (1U << i
) <= (NumBitsPerElt
/ 2); ++i
) {
7107 SDValue Tmp
= DAG
.getConstant(1ULL << i
, dl
, ShVT
);
7108 Op
= DAG
.getNode(ISD::OR
, dl
, VT
, Op
,
7109 DAG
.getNode(ISD::SRL
, dl
, VT
, Op
, Tmp
));
7111 Op
= DAG
.getNOT(dl
, Op
, VT
);
7112 Result
= DAG
.getNode(ISD::CTPOP
, dl
, VT
, Op
);
7116 bool TargetLowering::expandCTTZ(SDNode
*Node
, SDValue
&Result
,
7117 SelectionDAG
&DAG
) const {
7119 EVT VT
= Node
->getValueType(0);
7120 SDValue Op
= Node
->getOperand(0);
7121 unsigned NumBitsPerElt
= VT
.getScalarSizeInBits();
7123 // If the non-ZERO_UNDEF version is supported we can use that instead.
7124 if (Node
->getOpcode() == ISD::CTTZ_ZERO_UNDEF
&&
7125 isOperationLegalOrCustom(ISD::CTTZ
, VT
)) {
7126 Result
= DAG
.getNode(ISD::CTTZ
, dl
, VT
, Op
);
7130 // If the ZERO_UNDEF version is supported use that and handle the zero case.
7131 if (isOperationLegalOrCustom(ISD::CTTZ_ZERO_UNDEF
, VT
)) {
7133 getSetCCResultType(DAG
.getDataLayout(), *DAG
.getContext(), VT
);
7134 SDValue CTTZ
= DAG
.getNode(ISD::CTTZ_ZERO_UNDEF
, dl
, VT
, Op
);
7135 SDValue Zero
= DAG
.getConstant(0, dl
, VT
);
7136 SDValue SrcIsZero
= DAG
.getSetCC(dl
, SetCCVT
, Op
, Zero
, ISD::SETEQ
);
7137 Result
= DAG
.getNode(ISD::SELECT
, dl
, VT
, SrcIsZero
,
7138 DAG
.getConstant(NumBitsPerElt
, dl
, VT
), CTTZ
);
7142 // Only expand vector types if we have the appropriate vector bit operations.
7143 if (VT
.isVector() && (!isPowerOf2_32(NumBitsPerElt
) ||
7144 (!isOperationLegalOrCustom(ISD::CTPOP
, VT
) &&
7145 !isOperationLegalOrCustom(ISD::CTLZ
, VT
)) ||
7146 !isOperationLegalOrCustom(ISD::SUB
, VT
) ||
7147 !isOperationLegalOrCustomOrPromote(ISD::AND
, VT
) ||
7148 !isOperationLegalOrCustomOrPromote(ISD::XOR
, VT
)))
7151 // for now, we use: { return popcount(~x & (x - 1)); }
7152 // unless the target has ctlz but not ctpop, in which case we use:
7153 // { return 32 - nlz(~x & (x-1)); }
7154 // Ref: "Hacker's Delight" by Henry Warren
7155 SDValue Tmp
= DAG
.getNode(
7156 ISD::AND
, dl
, VT
, DAG
.getNOT(dl
, Op
, VT
),
7157 DAG
.getNode(ISD::SUB
, dl
, VT
, Op
, DAG
.getConstant(1, dl
, VT
)));
7159 // If ISD::CTLZ is legal and CTPOP isn't, then do that instead.
7160 if (isOperationLegal(ISD::CTLZ
, VT
) && !isOperationLegal(ISD::CTPOP
, VT
)) {
7162 DAG
.getNode(ISD::SUB
, dl
, VT
, DAG
.getConstant(NumBitsPerElt
, dl
, VT
),
7163 DAG
.getNode(ISD::CTLZ
, dl
, VT
, Tmp
));
7167 Result
= DAG
.getNode(ISD::CTPOP
, dl
, VT
, Tmp
);
7171 bool TargetLowering::expandABS(SDNode
*N
, SDValue
&Result
,
7172 SelectionDAG
&DAG
, bool IsNegative
) const {
7174 EVT VT
= N
->getValueType(0);
7175 EVT ShVT
= getShiftAmountTy(VT
, DAG
.getDataLayout());
7176 SDValue Op
= N
->getOperand(0);
7178 // abs(x) -> smax(x,sub(0,x))
7179 if (!IsNegative
&& isOperationLegal(ISD::SUB
, VT
) &&
7180 isOperationLegal(ISD::SMAX
, VT
)) {
7181 SDValue Zero
= DAG
.getConstant(0, dl
, VT
);
7182 Result
= DAG
.getNode(ISD::SMAX
, dl
, VT
, Op
,
7183 DAG
.getNode(ISD::SUB
, dl
, VT
, Zero
, Op
));
7187 // abs(x) -> umin(x,sub(0,x))
7188 if (!IsNegative
&& isOperationLegal(ISD::SUB
, VT
) &&
7189 isOperationLegal(ISD::UMIN
, VT
)) {
7190 SDValue Zero
= DAG
.getConstant(0, dl
, VT
);
7191 Result
= DAG
.getNode(ISD::UMIN
, dl
, VT
, Op
,
7192 DAG
.getNode(ISD::SUB
, dl
, VT
, Zero
, Op
));
7196 // 0 - abs(x) -> smin(x, sub(0,x))
7197 if (IsNegative
&& isOperationLegal(ISD::SUB
, VT
) &&
7198 isOperationLegal(ISD::SMIN
, VT
)) {
7199 SDValue Zero
= DAG
.getConstant(0, dl
, VT
);
7200 Result
= DAG
.getNode(ISD::SMIN
, dl
, VT
, Op
,
7201 DAG
.getNode(ISD::SUB
, dl
, VT
, Zero
, Op
));
7205 // Only expand vector types if we have the appropriate vector operations.
7206 if (VT
.isVector() &&
7207 (!isOperationLegalOrCustom(ISD::SRA
, VT
) ||
7208 (!IsNegative
&& !isOperationLegalOrCustom(ISD::ADD
, VT
)) ||
7209 (IsNegative
&& !isOperationLegalOrCustom(ISD::SUB
, VT
)) ||
7210 !isOperationLegalOrCustomOrPromote(ISD::XOR
, VT
)))
7214 DAG
.getNode(ISD::SRA
, dl
, VT
, Op
,
7215 DAG
.getConstant(VT
.getScalarSizeInBits() - 1, dl
, ShVT
));
7217 SDValue Add
= DAG
.getNode(ISD::ADD
, dl
, VT
, Op
, Shift
);
7218 Result
= DAG
.getNode(ISD::XOR
, dl
, VT
, Add
, Shift
);
7220 // 0 - abs(x) -> Y = sra (X, size(X)-1); sub (Y, xor (X, Y))
7221 SDValue Xor
= DAG
.getNode(ISD::XOR
, dl
, VT
, Op
, Shift
);
7222 Result
= DAG
.getNode(ISD::SUB
, dl
, VT
, Shift
, Xor
);
7227 SDValue
TargetLowering::expandBSWAP(SDNode
*N
, SelectionDAG
&DAG
) const {
7229 EVT VT
= N
->getValueType(0);
7230 SDValue Op
= N
->getOperand(0);
7235 EVT SHVT
= getShiftAmountTy(VT
, DAG
.getDataLayout());
7236 SDValue Tmp1
, Tmp2
, Tmp3
, Tmp4
, Tmp5
, Tmp6
, Tmp7
, Tmp8
;
7237 switch (VT
.getSimpleVT().getScalarType().SimpleTy
) {
7241 // Use a rotate by 8. This can be further expanded if necessary.
7242 return DAG
.getNode(ISD::ROTL
, dl
, VT
, Op
, DAG
.getConstant(8, dl
, SHVT
));
7244 Tmp4
= DAG
.getNode(ISD::SHL
, dl
, VT
, Op
, DAG
.getConstant(24, dl
, SHVT
));
7245 Tmp3
= DAG
.getNode(ISD::SHL
, dl
, VT
, Op
, DAG
.getConstant(8, dl
, SHVT
));
7246 Tmp2
= DAG
.getNode(ISD::SRL
, dl
, VT
, Op
, DAG
.getConstant(8, dl
, SHVT
));
7247 Tmp1
= DAG
.getNode(ISD::SRL
, dl
, VT
, Op
, DAG
.getConstant(24, dl
, SHVT
));
7248 Tmp3
= DAG
.getNode(ISD::AND
, dl
, VT
, Tmp3
,
7249 DAG
.getConstant(0xFF0000, dl
, VT
));
7250 Tmp2
= DAG
.getNode(ISD::AND
, dl
, VT
, Tmp2
, DAG
.getConstant(0xFF00, dl
, VT
));
7251 Tmp4
= DAG
.getNode(ISD::OR
, dl
, VT
, Tmp4
, Tmp3
);
7252 Tmp2
= DAG
.getNode(ISD::OR
, dl
, VT
, Tmp2
, Tmp1
);
7253 return DAG
.getNode(ISD::OR
, dl
, VT
, Tmp4
, Tmp2
);
7255 Tmp8
= DAG
.getNode(ISD::SHL
, dl
, VT
, Op
, DAG
.getConstant(56, dl
, SHVT
));
7256 Tmp7
= DAG
.getNode(ISD::SHL
, dl
, VT
, Op
, DAG
.getConstant(40, dl
, SHVT
));
7257 Tmp6
= DAG
.getNode(ISD::SHL
, dl
, VT
, Op
, DAG
.getConstant(24, dl
, SHVT
));
7258 Tmp5
= DAG
.getNode(ISD::SHL
, dl
, VT
, Op
, DAG
.getConstant(8, dl
, SHVT
));
7259 Tmp4
= DAG
.getNode(ISD::SRL
, dl
, VT
, Op
, DAG
.getConstant(8, dl
, SHVT
));
7260 Tmp3
= DAG
.getNode(ISD::SRL
, dl
, VT
, Op
, DAG
.getConstant(24, dl
, SHVT
));
7261 Tmp2
= DAG
.getNode(ISD::SRL
, dl
, VT
, Op
, DAG
.getConstant(40, dl
, SHVT
));
7262 Tmp1
= DAG
.getNode(ISD::SRL
, dl
, VT
, Op
, DAG
.getConstant(56, dl
, SHVT
));
7263 Tmp7
= DAG
.getNode(ISD::AND
, dl
, VT
, Tmp7
,
7264 DAG
.getConstant(255ULL<<48, dl
, VT
));
7265 Tmp6
= DAG
.getNode(ISD::AND
, dl
, VT
, Tmp6
,
7266 DAG
.getConstant(255ULL<<40, dl
, VT
));
7267 Tmp5
= DAG
.getNode(ISD::AND
, dl
, VT
, Tmp5
,
7268 DAG
.getConstant(255ULL<<32, dl
, VT
));
7269 Tmp4
= DAG
.getNode(ISD::AND
, dl
, VT
, Tmp4
,
7270 DAG
.getConstant(255ULL<<24, dl
, VT
));
7271 Tmp3
= DAG
.getNode(ISD::AND
, dl
, VT
, Tmp3
,
7272 DAG
.getConstant(255ULL<<16, dl
, VT
));
7273 Tmp2
= DAG
.getNode(ISD::AND
, dl
, VT
, Tmp2
,
7274 DAG
.getConstant(255ULL<<8 , dl
, VT
));
7275 Tmp8
= DAG
.getNode(ISD::OR
, dl
, VT
, Tmp8
, Tmp7
);
7276 Tmp6
= DAG
.getNode(ISD::OR
, dl
, VT
, Tmp6
, Tmp5
);
7277 Tmp4
= DAG
.getNode(ISD::OR
, dl
, VT
, Tmp4
, Tmp3
);
7278 Tmp2
= DAG
.getNode(ISD::OR
, dl
, VT
, Tmp2
, Tmp1
);
7279 Tmp8
= DAG
.getNode(ISD::OR
, dl
, VT
, Tmp8
, Tmp6
);
7280 Tmp4
= DAG
.getNode(ISD::OR
, dl
, VT
, Tmp4
, Tmp2
);
7281 return DAG
.getNode(ISD::OR
, dl
, VT
, Tmp8
, Tmp4
);
7285 SDValue
TargetLowering::expandBITREVERSE(SDNode
*N
, SelectionDAG
&DAG
) const {
7287 EVT VT
= N
->getValueType(0);
7288 SDValue Op
= N
->getOperand(0);
7289 EVT SHVT
= getShiftAmountTy(VT
, DAG
.getDataLayout());
7290 unsigned Sz
= VT
.getScalarSizeInBits();
7292 SDValue Tmp
, Tmp2
, Tmp3
;
7294 // If we can, perform BSWAP first and then the mask+swap the i4, then i2
7295 // and finally the i1 pairs.
7296 // TODO: We can easily support i4/i2 legal types if any target ever does.
7297 if (Sz
>= 8 && isPowerOf2_32(Sz
)) {
7298 // Create the masks - repeating the pattern every byte.
7299 APInt MaskHi4
= APInt::getSplat(Sz
, APInt(8, 0xF0));
7300 APInt MaskHi2
= APInt::getSplat(Sz
, APInt(8, 0xCC));
7301 APInt MaskHi1
= APInt::getSplat(Sz
, APInt(8, 0xAA));
7302 APInt MaskLo4
= APInt::getSplat(Sz
, APInt(8, 0x0F));
7303 APInt MaskLo2
= APInt::getSplat(Sz
, APInt(8, 0x33));
7304 APInt MaskLo1
= APInt::getSplat(Sz
, APInt(8, 0x55));
7306 // BSWAP if the type is wider than a single byte.
7307 Tmp
= (Sz
> 8 ? DAG
.getNode(ISD::BSWAP
, dl
, VT
, Op
) : Op
);
7309 // swap i4: ((V & 0xF0) >> 4) | ((V & 0x0F) << 4)
7310 Tmp2
= DAG
.getNode(ISD::AND
, dl
, VT
, Tmp
, DAG
.getConstant(MaskHi4
, dl
, VT
));
7311 Tmp3
= DAG
.getNode(ISD::AND
, dl
, VT
, Tmp
, DAG
.getConstant(MaskLo4
, dl
, VT
));
7312 Tmp2
= DAG
.getNode(ISD::SRL
, dl
, VT
, Tmp2
, DAG
.getConstant(4, dl
, SHVT
));
7313 Tmp3
= DAG
.getNode(ISD::SHL
, dl
, VT
, Tmp3
, DAG
.getConstant(4, dl
, SHVT
));
7314 Tmp
= DAG
.getNode(ISD::OR
, dl
, VT
, Tmp2
, Tmp3
);
7316 // swap i2: ((V & 0xCC) >> 2) | ((V & 0x33) << 2)
7317 Tmp2
= DAG
.getNode(ISD::AND
, dl
, VT
, Tmp
, DAG
.getConstant(MaskHi2
, dl
, VT
));
7318 Tmp3
= DAG
.getNode(ISD::AND
, dl
, VT
, Tmp
, DAG
.getConstant(MaskLo2
, dl
, VT
));
7319 Tmp2
= DAG
.getNode(ISD::SRL
, dl
, VT
, Tmp2
, DAG
.getConstant(2, dl
, SHVT
));
7320 Tmp3
= DAG
.getNode(ISD::SHL
, dl
, VT
, Tmp3
, DAG
.getConstant(2, dl
, SHVT
));
7321 Tmp
= DAG
.getNode(ISD::OR
, dl
, VT
, Tmp2
, Tmp3
);
7323 // swap i1: ((V & 0xAA) >> 1) | ((V & 0x55) << 1)
7324 Tmp2
= DAG
.getNode(ISD::AND
, dl
, VT
, Tmp
, DAG
.getConstant(MaskHi1
, dl
, VT
));
7325 Tmp3
= DAG
.getNode(ISD::AND
, dl
, VT
, Tmp
, DAG
.getConstant(MaskLo1
, dl
, VT
));
7326 Tmp2
= DAG
.getNode(ISD::SRL
, dl
, VT
, Tmp2
, DAG
.getConstant(1, dl
, SHVT
));
7327 Tmp3
= DAG
.getNode(ISD::SHL
, dl
, VT
, Tmp3
, DAG
.getConstant(1, dl
, SHVT
));
7328 Tmp
= DAG
.getNode(ISD::OR
, dl
, VT
, Tmp2
, Tmp3
);
7332 Tmp
= DAG
.getConstant(0, dl
, VT
);
7333 for (unsigned I
= 0, J
= Sz
-1; I
< Sz
; ++I
, --J
) {
7336 DAG
.getNode(ISD::SHL
, dl
, VT
, Op
, DAG
.getConstant(J
- I
, dl
, SHVT
));
7339 DAG
.getNode(ISD::SRL
, dl
, VT
, Op
, DAG
.getConstant(I
- J
, dl
, SHVT
));
7343 Tmp2
= DAG
.getNode(ISD::AND
, dl
, VT
, Tmp2
, DAG
.getConstant(Shift
, dl
, VT
));
7344 Tmp
= DAG
.getNode(ISD::OR
, dl
, VT
, Tmp
, Tmp2
);
7350 std::pair
<SDValue
, SDValue
>
7351 TargetLowering::scalarizeVectorLoad(LoadSDNode
*LD
,
7352 SelectionDAG
&DAG
) const {
7354 SDValue Chain
= LD
->getChain();
7355 SDValue BasePTR
= LD
->getBasePtr();
7356 EVT SrcVT
= LD
->getMemoryVT();
7357 EVT DstVT
= LD
->getValueType(0);
7358 ISD::LoadExtType ExtType
= LD
->getExtensionType();
7360 if (SrcVT
.isScalableVector())
7361 report_fatal_error("Cannot scalarize scalable vector loads");
7363 unsigned NumElem
= SrcVT
.getVectorNumElements();
7365 EVT SrcEltVT
= SrcVT
.getScalarType();
7366 EVT DstEltVT
= DstVT
.getScalarType();
7368 // A vector must always be stored in memory as-is, i.e. without any padding
7369 // between the elements, since various code depend on it, e.g. in the
7370 // handling of a bitcast of a vector type to int, which may be done with a
7371 // vector store followed by an integer load. A vector that does not have
7372 // elements that are byte-sized must therefore be stored as an integer
7373 // built out of the extracted vector elements.
7374 if (!SrcEltVT
.isByteSized()) {
7375 unsigned NumLoadBits
= SrcVT
.getStoreSizeInBits();
7376 EVT LoadVT
= EVT::getIntegerVT(*DAG
.getContext(), NumLoadBits
);
7378 unsigned NumSrcBits
= SrcVT
.getSizeInBits();
7379 EVT SrcIntVT
= EVT::getIntegerVT(*DAG
.getContext(), NumSrcBits
);
7381 unsigned SrcEltBits
= SrcEltVT
.getSizeInBits();
7382 SDValue SrcEltBitMask
= DAG
.getConstant(
7383 APInt::getLowBitsSet(NumLoadBits
, SrcEltBits
), SL
, LoadVT
);
7385 // Load the whole vector and avoid masking off the top bits as it makes
7386 // the codegen worse.
7388 DAG
.getExtLoad(ISD::EXTLOAD
, SL
, LoadVT
, Chain
, BasePTR
,
7389 LD
->getPointerInfo(), SrcIntVT
, LD
->getOriginalAlign(),
7390 LD
->getMemOperand()->getFlags(), LD
->getAAInfo());
7392 SmallVector
<SDValue
, 8> Vals
;
7393 for (unsigned Idx
= 0; Idx
< NumElem
; ++Idx
) {
7394 unsigned ShiftIntoIdx
=
7395 (DAG
.getDataLayout().isBigEndian() ? (NumElem
- 1) - Idx
: Idx
);
7396 SDValue ShiftAmount
=
7397 DAG
.getShiftAmountConstant(ShiftIntoIdx
* SrcEltVT
.getSizeInBits(),
7398 LoadVT
, SL
, /*LegalTypes=*/false);
7399 SDValue ShiftedElt
= DAG
.getNode(ISD::SRL
, SL
, LoadVT
, Load
, ShiftAmount
);
7401 DAG
.getNode(ISD::AND
, SL
, LoadVT
, ShiftedElt
, SrcEltBitMask
);
7402 SDValue Scalar
= DAG
.getNode(ISD::TRUNCATE
, SL
, SrcEltVT
, Elt
);
7404 if (ExtType
!= ISD::NON_EXTLOAD
) {
7405 unsigned ExtendOp
= ISD::getExtForLoadExtType(false, ExtType
);
7406 Scalar
= DAG
.getNode(ExtendOp
, SL
, DstEltVT
, Scalar
);
7409 Vals
.push_back(Scalar
);
7412 SDValue Value
= DAG
.getBuildVector(DstVT
, SL
, Vals
);
7413 return std::make_pair(Value
, Load
.getValue(1));
7416 unsigned Stride
= SrcEltVT
.getSizeInBits() / 8;
7417 assert(SrcEltVT
.isByteSized());
7419 SmallVector
<SDValue
, 8> Vals
;
7420 SmallVector
<SDValue
, 8> LoadChains
;
7422 for (unsigned Idx
= 0; Idx
< NumElem
; ++Idx
) {
7423 SDValue ScalarLoad
=
7424 DAG
.getExtLoad(ExtType
, SL
, DstEltVT
, Chain
, BasePTR
,
7425 LD
->getPointerInfo().getWithOffset(Idx
* Stride
),
7426 SrcEltVT
, LD
->getOriginalAlign(),
7427 LD
->getMemOperand()->getFlags(), LD
->getAAInfo());
7429 BasePTR
= DAG
.getObjectPtrOffset(SL
, BasePTR
, TypeSize::Fixed(Stride
));
7431 Vals
.push_back(ScalarLoad
.getValue(0));
7432 LoadChains
.push_back(ScalarLoad
.getValue(1));
7435 SDValue NewChain
= DAG
.getNode(ISD::TokenFactor
, SL
, MVT::Other
, LoadChains
);
7436 SDValue Value
= DAG
.getBuildVector(DstVT
, SL
, Vals
);
7438 return std::make_pair(Value
, NewChain
);
7441 SDValue
TargetLowering::scalarizeVectorStore(StoreSDNode
*ST
,
7442 SelectionDAG
&DAG
) const {
7445 SDValue Chain
= ST
->getChain();
7446 SDValue BasePtr
= ST
->getBasePtr();
7447 SDValue Value
= ST
->getValue();
7448 EVT StVT
= ST
->getMemoryVT();
7450 if (StVT
.isScalableVector())
7451 report_fatal_error("Cannot scalarize scalable vector stores");
7453 // The type of the data we want to save
7454 EVT RegVT
= Value
.getValueType();
7455 EVT RegSclVT
= RegVT
.getScalarType();
7457 // The type of data as saved in memory.
7458 EVT MemSclVT
= StVT
.getScalarType();
7460 unsigned NumElem
= StVT
.getVectorNumElements();
7462 // A vector must always be stored in memory as-is, i.e. without any padding
7463 // between the elements, since various code depend on it, e.g. in the
7464 // handling of a bitcast of a vector type to int, which may be done with a
7465 // vector store followed by an integer load. A vector that does not have
7466 // elements that are byte-sized must therefore be stored as an integer
7467 // built out of the extracted vector elements.
7468 if (!MemSclVT
.isByteSized()) {
7469 unsigned NumBits
= StVT
.getSizeInBits();
7470 EVT IntVT
= EVT::getIntegerVT(*DAG
.getContext(), NumBits
);
7472 SDValue CurrVal
= DAG
.getConstant(0, SL
, IntVT
);
7474 for (unsigned Idx
= 0; Idx
< NumElem
; ++Idx
) {
7475 SDValue Elt
= DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, SL
, RegSclVT
, Value
,
7476 DAG
.getVectorIdxConstant(Idx
, SL
));
7477 SDValue Trunc
= DAG
.getNode(ISD::TRUNCATE
, SL
, MemSclVT
, Elt
);
7478 SDValue ExtElt
= DAG
.getNode(ISD::ZERO_EXTEND
, SL
, IntVT
, Trunc
);
7479 unsigned ShiftIntoIdx
=
7480 (DAG
.getDataLayout().isBigEndian() ? (NumElem
- 1) - Idx
: Idx
);
7481 SDValue ShiftAmount
=
7482 DAG
.getConstant(ShiftIntoIdx
* MemSclVT
.getSizeInBits(), SL
, IntVT
);
7483 SDValue ShiftedElt
=
7484 DAG
.getNode(ISD::SHL
, SL
, IntVT
, ExtElt
, ShiftAmount
);
7485 CurrVal
= DAG
.getNode(ISD::OR
, SL
, IntVT
, CurrVal
, ShiftedElt
);
7488 return DAG
.getStore(Chain
, SL
, CurrVal
, BasePtr
, ST
->getPointerInfo(),
7489 ST
->getOriginalAlign(), ST
->getMemOperand()->getFlags(),
7493 // Store Stride in bytes
7494 unsigned Stride
= MemSclVT
.getSizeInBits() / 8;
7495 assert(Stride
&& "Zero stride!");
7496 // Extract each of the elements from the original vector and save them into
7497 // memory individually.
7498 SmallVector
<SDValue
, 8> Stores
;
7499 for (unsigned Idx
= 0; Idx
< NumElem
; ++Idx
) {
7500 SDValue Elt
= DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, SL
, RegSclVT
, Value
,
7501 DAG
.getVectorIdxConstant(Idx
, SL
));
7504 DAG
.getObjectPtrOffset(SL
, BasePtr
, TypeSize::Fixed(Idx
* Stride
));
7506 // This scalar TruncStore may be illegal, but we legalize it later.
7507 SDValue Store
= DAG
.getTruncStore(
7508 Chain
, SL
, Elt
, Ptr
, ST
->getPointerInfo().getWithOffset(Idx
* Stride
),
7509 MemSclVT
, ST
->getOriginalAlign(), ST
->getMemOperand()->getFlags(),
7512 Stores
.push_back(Store
);
7515 return DAG
.getNode(ISD::TokenFactor
, SL
, MVT::Other
, Stores
);
7518 std::pair
<SDValue
, SDValue
>
7519 TargetLowering::expandUnalignedLoad(LoadSDNode
*LD
, SelectionDAG
&DAG
) const {
7520 assert(LD
->getAddressingMode() == ISD::UNINDEXED
&&
7521 "unaligned indexed loads not implemented!");
7522 SDValue Chain
= LD
->getChain();
7523 SDValue Ptr
= LD
->getBasePtr();
7524 EVT VT
= LD
->getValueType(0);
7525 EVT LoadedVT
= LD
->getMemoryVT();
7527 auto &MF
= DAG
.getMachineFunction();
7529 if (VT
.isFloatingPoint() || VT
.isVector()) {
7530 EVT intVT
= EVT::getIntegerVT(*DAG
.getContext(), LoadedVT
.getSizeInBits());
7531 if (isTypeLegal(intVT
) && isTypeLegal(LoadedVT
)) {
7532 if (!isOperationLegalOrCustom(ISD::LOAD
, intVT
) &&
7533 LoadedVT
.isVector()) {
7534 // Scalarize the load and let the individual components be handled.
7535 return scalarizeVectorLoad(LD
, DAG
);
7538 // Expand to a (misaligned) integer load of the same size,
7539 // then bitconvert to floating point or vector.
7540 SDValue newLoad
= DAG
.getLoad(intVT
, dl
, Chain
, Ptr
,
7541 LD
->getMemOperand());
7542 SDValue Result
= DAG
.getNode(ISD::BITCAST
, dl
, LoadedVT
, newLoad
);
7544 Result
= DAG
.getNode(VT
.isFloatingPoint() ? ISD::FP_EXTEND
:
7545 ISD::ANY_EXTEND
, dl
, VT
, Result
);
7547 return std::make_pair(Result
, newLoad
.getValue(1));
7550 // Copy the value to a (aligned) stack slot using (unaligned) integer
7551 // loads and stores, then do a (aligned) load from the stack slot.
7552 MVT RegVT
= getRegisterType(*DAG
.getContext(), intVT
);
7553 unsigned LoadedBytes
= LoadedVT
.getStoreSize();
7554 unsigned RegBytes
= RegVT
.getSizeInBits() / 8;
7555 unsigned NumRegs
= (LoadedBytes
+ RegBytes
- 1) / RegBytes
;
7557 // Make sure the stack slot is also aligned for the register type.
7558 SDValue StackBase
= DAG
.CreateStackTemporary(LoadedVT
, RegVT
);
7559 auto FrameIndex
= cast
<FrameIndexSDNode
>(StackBase
.getNode())->getIndex();
7560 SmallVector
<SDValue
, 8> Stores
;
7561 SDValue StackPtr
= StackBase
;
7562 unsigned Offset
= 0;
7564 EVT PtrVT
= Ptr
.getValueType();
7565 EVT StackPtrVT
= StackPtr
.getValueType();
7567 SDValue PtrIncrement
= DAG
.getConstant(RegBytes
, dl
, PtrVT
);
7568 SDValue StackPtrIncrement
= DAG
.getConstant(RegBytes
, dl
, StackPtrVT
);
7570 // Do all but one copies using the full register width.
7571 for (unsigned i
= 1; i
< NumRegs
; i
++) {
7572 // Load one integer register's worth from the original location.
7573 SDValue Load
= DAG
.getLoad(
7574 RegVT
, dl
, Chain
, Ptr
, LD
->getPointerInfo().getWithOffset(Offset
),
7575 LD
->getOriginalAlign(), LD
->getMemOperand()->getFlags(),
7577 // Follow the load with a store to the stack slot. Remember the store.
7578 Stores
.push_back(DAG
.getStore(
7579 Load
.getValue(1), dl
, Load
, StackPtr
,
7580 MachinePointerInfo::getFixedStack(MF
, FrameIndex
, Offset
)));
7581 // Increment the pointers.
7584 Ptr
= DAG
.getObjectPtrOffset(dl
, Ptr
, PtrIncrement
);
7585 StackPtr
= DAG
.getObjectPtrOffset(dl
, StackPtr
, StackPtrIncrement
);
7588 // The last copy may be partial. Do an extending load.
7589 EVT MemVT
= EVT::getIntegerVT(*DAG
.getContext(),
7590 8 * (LoadedBytes
- Offset
));
7592 DAG
.getExtLoad(ISD::EXTLOAD
, dl
, RegVT
, Chain
, Ptr
,
7593 LD
->getPointerInfo().getWithOffset(Offset
), MemVT
,
7594 LD
->getOriginalAlign(), LD
->getMemOperand()->getFlags(),
7596 // Follow the load with a store to the stack slot. Remember the store.
7597 // On big-endian machines this requires a truncating store to ensure
7598 // that the bits end up in the right place.
7599 Stores
.push_back(DAG
.getTruncStore(
7600 Load
.getValue(1), dl
, Load
, StackPtr
,
7601 MachinePointerInfo::getFixedStack(MF
, FrameIndex
, Offset
), MemVT
));
7603 // The order of the stores doesn't matter - say it with a TokenFactor.
7604 SDValue TF
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
, Stores
);
7606 // Finally, perform the original load only redirected to the stack slot.
7607 Load
= DAG
.getExtLoad(LD
->getExtensionType(), dl
, VT
, TF
, StackBase
,
7608 MachinePointerInfo::getFixedStack(MF
, FrameIndex
, 0),
7611 // Callers expect a MERGE_VALUES node.
7612 return std::make_pair(Load
, TF
);
7615 assert(LoadedVT
.isInteger() && !LoadedVT
.isVector() &&
7616 "Unaligned load of unsupported type.");
7618 // Compute the new VT that is half the size of the old one. This is an
7620 unsigned NumBits
= LoadedVT
.getSizeInBits();
7622 NewLoadedVT
= EVT::getIntegerVT(*DAG
.getContext(), NumBits
/2);
7625 Align Alignment
= LD
->getOriginalAlign();
7626 unsigned IncrementSize
= NumBits
/ 8;
7627 ISD::LoadExtType HiExtType
= LD
->getExtensionType();
7629 // If the original load is NON_EXTLOAD, the hi part load must be ZEXTLOAD.
7630 if (HiExtType
== ISD::NON_EXTLOAD
)
7631 HiExtType
= ISD::ZEXTLOAD
;
7633 // Load the value in two parts
7635 if (DAG
.getDataLayout().isLittleEndian()) {
7636 Lo
= DAG
.getExtLoad(ISD::ZEXTLOAD
, dl
, VT
, Chain
, Ptr
, LD
->getPointerInfo(),
7637 NewLoadedVT
, Alignment
, LD
->getMemOperand()->getFlags(),
7640 Ptr
= DAG
.getObjectPtrOffset(dl
, Ptr
, TypeSize::Fixed(IncrementSize
));
7641 Hi
= DAG
.getExtLoad(HiExtType
, dl
, VT
, Chain
, Ptr
,
7642 LD
->getPointerInfo().getWithOffset(IncrementSize
),
7643 NewLoadedVT
, Alignment
, LD
->getMemOperand()->getFlags(),
7646 Hi
= DAG
.getExtLoad(HiExtType
, dl
, VT
, Chain
, Ptr
, LD
->getPointerInfo(),
7647 NewLoadedVT
, Alignment
, LD
->getMemOperand()->getFlags(),
7650 Ptr
= DAG
.getObjectPtrOffset(dl
, Ptr
, TypeSize::Fixed(IncrementSize
));
7651 Lo
= DAG
.getExtLoad(ISD::ZEXTLOAD
, dl
, VT
, Chain
, Ptr
,
7652 LD
->getPointerInfo().getWithOffset(IncrementSize
),
7653 NewLoadedVT
, Alignment
, LD
->getMemOperand()->getFlags(),
7657 // aggregate the two parts
7658 SDValue ShiftAmount
=
7659 DAG
.getConstant(NumBits
, dl
, getShiftAmountTy(Hi
.getValueType(),
7660 DAG
.getDataLayout()));
7661 SDValue Result
= DAG
.getNode(ISD::SHL
, dl
, VT
, Hi
, ShiftAmount
);
7662 Result
= DAG
.getNode(ISD::OR
, dl
, VT
, Result
, Lo
);
7664 SDValue TF
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
, Lo
.getValue(1),
7667 return std::make_pair(Result
, TF
);
7670 SDValue
TargetLowering::expandUnalignedStore(StoreSDNode
*ST
,
7671 SelectionDAG
&DAG
) const {
7672 assert(ST
->getAddressingMode() == ISD::UNINDEXED
&&
7673 "unaligned indexed stores not implemented!");
7674 SDValue Chain
= ST
->getChain();
7675 SDValue Ptr
= ST
->getBasePtr();
7676 SDValue Val
= ST
->getValue();
7677 EVT VT
= Val
.getValueType();
7678 Align Alignment
= ST
->getOriginalAlign();
7679 auto &MF
= DAG
.getMachineFunction();
7680 EVT StoreMemVT
= ST
->getMemoryVT();
7683 if (StoreMemVT
.isFloatingPoint() || StoreMemVT
.isVector()) {
7684 EVT intVT
= EVT::getIntegerVT(*DAG
.getContext(), VT
.getSizeInBits());
7685 if (isTypeLegal(intVT
)) {
7686 if (!isOperationLegalOrCustom(ISD::STORE
, intVT
) &&
7687 StoreMemVT
.isVector()) {
7688 // Scalarize the store and let the individual components be handled.
7689 SDValue Result
= scalarizeVectorStore(ST
, DAG
);
7692 // Expand to a bitconvert of the value to the integer type of the
7693 // same size, then a (misaligned) int store.
7694 // FIXME: Does not handle truncating floating point stores!
7695 SDValue Result
= DAG
.getNode(ISD::BITCAST
, dl
, intVT
, Val
);
7696 Result
= DAG
.getStore(Chain
, dl
, Result
, Ptr
, ST
->getPointerInfo(),
7697 Alignment
, ST
->getMemOperand()->getFlags());
7700 // Do a (aligned) store to a stack slot, then copy from the stack slot
7701 // to the final destination using (unaligned) integer loads and stores.
7702 MVT RegVT
= getRegisterType(
7704 EVT::getIntegerVT(*DAG
.getContext(), StoreMemVT
.getSizeInBits()));
7705 EVT PtrVT
= Ptr
.getValueType();
7706 unsigned StoredBytes
= StoreMemVT
.getStoreSize();
7707 unsigned RegBytes
= RegVT
.getSizeInBits() / 8;
7708 unsigned NumRegs
= (StoredBytes
+ RegBytes
- 1) / RegBytes
;
7710 // Make sure the stack slot is also aligned for the register type.
7711 SDValue StackPtr
= DAG
.CreateStackTemporary(StoreMemVT
, RegVT
);
7712 auto FrameIndex
= cast
<FrameIndexSDNode
>(StackPtr
.getNode())->getIndex();
7714 // Perform the original store, only redirected to the stack slot.
7715 SDValue Store
= DAG
.getTruncStore(
7716 Chain
, dl
, Val
, StackPtr
,
7717 MachinePointerInfo::getFixedStack(MF
, FrameIndex
, 0), StoreMemVT
);
7719 EVT StackPtrVT
= StackPtr
.getValueType();
7721 SDValue PtrIncrement
= DAG
.getConstant(RegBytes
, dl
, PtrVT
);
7722 SDValue StackPtrIncrement
= DAG
.getConstant(RegBytes
, dl
, StackPtrVT
);
7723 SmallVector
<SDValue
, 8> Stores
;
7724 unsigned Offset
= 0;
7726 // Do all but one copies using the full register width.
7727 for (unsigned i
= 1; i
< NumRegs
; i
++) {
7728 // Load one integer register's worth from the stack slot.
7729 SDValue Load
= DAG
.getLoad(
7730 RegVT
, dl
, Store
, StackPtr
,
7731 MachinePointerInfo::getFixedStack(MF
, FrameIndex
, Offset
));
7732 // Store it to the final location. Remember the store.
7733 Stores
.push_back(DAG
.getStore(Load
.getValue(1), dl
, Load
, Ptr
,
7734 ST
->getPointerInfo().getWithOffset(Offset
),
7735 ST
->getOriginalAlign(),
7736 ST
->getMemOperand()->getFlags()));
7737 // Increment the pointers.
7739 StackPtr
= DAG
.getObjectPtrOffset(dl
, StackPtr
, StackPtrIncrement
);
7740 Ptr
= DAG
.getObjectPtrOffset(dl
, Ptr
, PtrIncrement
);
7743 // The last store may be partial. Do a truncating store. On big-endian
7744 // machines this requires an extending load from the stack slot to ensure
7745 // that the bits are in the right place.
7747 EVT::getIntegerVT(*DAG
.getContext(), 8 * (StoredBytes
- Offset
));
7749 // Load from the stack slot.
7750 SDValue Load
= DAG
.getExtLoad(
7751 ISD::EXTLOAD
, dl
, RegVT
, Store
, StackPtr
,
7752 MachinePointerInfo::getFixedStack(MF
, FrameIndex
, Offset
), LoadMemVT
);
7755 DAG
.getTruncStore(Load
.getValue(1), dl
, Load
, Ptr
,
7756 ST
->getPointerInfo().getWithOffset(Offset
), LoadMemVT
,
7757 ST
->getOriginalAlign(),
7758 ST
->getMemOperand()->getFlags(), ST
->getAAInfo()));
7759 // The order of the stores doesn't matter - say it with a TokenFactor.
7760 SDValue Result
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
, Stores
);
7764 assert(StoreMemVT
.isInteger() && !StoreMemVT
.isVector() &&
7765 "Unaligned store of unknown type.");
7766 // Get the half-size VT
7767 EVT NewStoredVT
= StoreMemVT
.getHalfSizedIntegerVT(*DAG
.getContext());
7768 unsigned NumBits
= NewStoredVT
.getFixedSizeInBits();
7769 unsigned IncrementSize
= NumBits
/ 8;
7771 // Divide the stored value in two parts.
7772 SDValue ShiftAmount
= DAG
.getConstant(
7773 NumBits
, dl
, getShiftAmountTy(Val
.getValueType(), DAG
.getDataLayout()));
7775 SDValue Hi
= DAG
.getNode(ISD::SRL
, dl
, VT
, Val
, ShiftAmount
);
7777 // Store the two parts
7778 SDValue Store1
, Store2
;
7779 Store1
= DAG
.getTruncStore(Chain
, dl
,
7780 DAG
.getDataLayout().isLittleEndian() ? Lo
: Hi
,
7781 Ptr
, ST
->getPointerInfo(), NewStoredVT
, Alignment
,
7782 ST
->getMemOperand()->getFlags());
7784 Ptr
= DAG
.getObjectPtrOffset(dl
, Ptr
, TypeSize::Fixed(IncrementSize
));
7785 Store2
= DAG
.getTruncStore(
7786 Chain
, dl
, DAG
.getDataLayout().isLittleEndian() ? Hi
: Lo
, Ptr
,
7787 ST
->getPointerInfo().getWithOffset(IncrementSize
), NewStoredVT
, Alignment
,
7788 ST
->getMemOperand()->getFlags(), ST
->getAAInfo());
7791 DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
, Store1
, Store2
);
7796 TargetLowering::IncrementMemoryAddress(SDValue Addr
, SDValue Mask
,
7797 const SDLoc
&DL
, EVT DataVT
,
7799 bool IsCompressedMemory
) const {
7801 EVT AddrVT
= Addr
.getValueType();
7802 EVT MaskVT
= Mask
.getValueType();
7803 assert(DataVT
.getVectorElementCount() == MaskVT
.getVectorElementCount() &&
7804 "Incompatible types of Data and Mask");
7805 if (IsCompressedMemory
) {
7806 if (DataVT
.isScalableVector())
7808 "Cannot currently handle compressed memory with scalable vectors");
7809 // Incrementing the pointer according to number of '1's in the mask.
7810 EVT MaskIntVT
= EVT::getIntegerVT(*DAG
.getContext(), MaskVT
.getSizeInBits());
7811 SDValue MaskInIntReg
= DAG
.getBitcast(MaskIntVT
, Mask
);
7812 if (MaskIntVT
.getSizeInBits() < 32) {
7813 MaskInIntReg
= DAG
.getNode(ISD::ZERO_EXTEND
, DL
, MVT::i32
, MaskInIntReg
);
7814 MaskIntVT
= MVT::i32
;
7817 // Count '1's with POPCNT.
7818 Increment
= DAG
.getNode(ISD::CTPOP
, DL
, MaskIntVT
, MaskInIntReg
);
7819 Increment
= DAG
.getZExtOrTrunc(Increment
, DL
, AddrVT
);
7820 // Scale is an element size in bytes.
7821 SDValue Scale
= DAG
.getConstant(DataVT
.getScalarSizeInBits() / 8, DL
,
7823 Increment
= DAG
.getNode(ISD::MUL
, DL
, AddrVT
, Increment
, Scale
);
7824 } else if (DataVT
.isScalableVector()) {
7825 Increment
= DAG
.getVScale(DL
, AddrVT
,
7826 APInt(AddrVT
.getFixedSizeInBits(),
7827 DataVT
.getStoreSize().getKnownMinSize()));
7829 Increment
= DAG
.getConstant(DataVT
.getStoreSize(), DL
, AddrVT
);
7831 return DAG
.getNode(ISD::ADD
, DL
, AddrVT
, Addr
, Increment
);
7834 static SDValue
clampDynamicVectorIndex(SelectionDAG
&DAG
, SDValue Idx
,
7835 EVT VecVT
, const SDLoc
&dl
,
7836 unsigned NumSubElts
) {
7837 if (!VecVT
.isScalableVector() && isa
<ConstantSDNode
>(Idx
))
7840 EVT IdxVT
= Idx
.getValueType();
7841 unsigned NElts
= VecVT
.getVectorMinNumElements();
7842 if (VecVT
.isScalableVector()) {
7843 // If this is a constant index and we know the value plus the number of the
7844 // elements in the subvector minus one is less than the minimum number of
7845 // elements then it's safe to return Idx.
7846 if (auto *IdxCst
= dyn_cast
<ConstantSDNode
>(Idx
))
7847 if (IdxCst
->getZExtValue() + (NumSubElts
- 1) < NElts
)
7850 DAG
.getVScale(dl
, IdxVT
, APInt(IdxVT
.getFixedSizeInBits(), NElts
));
7851 unsigned SubOpcode
= NumSubElts
<= NElts
? ISD::SUB
: ISD::USUBSAT
;
7852 SDValue Sub
= DAG
.getNode(SubOpcode
, dl
, IdxVT
, VS
,
7853 DAG
.getConstant(NumSubElts
, dl
, IdxVT
));
7854 return DAG
.getNode(ISD::UMIN
, dl
, IdxVT
, Idx
, Sub
);
7856 if (isPowerOf2_32(NElts
) && NumSubElts
== 1) {
7857 APInt Imm
= APInt::getLowBitsSet(IdxVT
.getSizeInBits(), Log2_32(NElts
));
7858 return DAG
.getNode(ISD::AND
, dl
, IdxVT
, Idx
,
7859 DAG
.getConstant(Imm
, dl
, IdxVT
));
7861 unsigned MaxIndex
= NumSubElts
< NElts
? NElts
- NumSubElts
: 0;
7862 return DAG
.getNode(ISD::UMIN
, dl
, IdxVT
, Idx
,
7863 DAG
.getConstant(MaxIndex
, dl
, IdxVT
));
7866 SDValue
TargetLowering::getVectorElementPointer(SelectionDAG
&DAG
,
7867 SDValue VecPtr
, EVT VecVT
,
7868 SDValue Index
) const {
7869 return getVectorSubVecPointer(
7871 EVT::getVectorVT(*DAG
.getContext(), VecVT
.getVectorElementType(), 1),
7875 SDValue
TargetLowering::getVectorSubVecPointer(SelectionDAG
&DAG
,
7876 SDValue VecPtr
, EVT VecVT
,
7878 SDValue Index
) const {
7880 // Make sure the index type is big enough to compute in.
7881 Index
= DAG
.getZExtOrTrunc(Index
, dl
, VecPtr
.getValueType());
7883 EVT EltVT
= VecVT
.getVectorElementType();
7885 // Calculate the element offset and add it to the pointer.
7886 unsigned EltSize
= EltVT
.getFixedSizeInBits() / 8; // FIXME: should be ABI size.
7887 assert(EltSize
* 8 == EltVT
.getFixedSizeInBits() &&
7888 "Converting bits to bytes lost precision");
7890 // Scalable vectors don't need clamping as these are checked at compile time
7891 if (SubVecVT
.isFixedLengthVector()) {
7892 assert(SubVecVT
.getVectorElementType() == EltVT
&&
7893 "Sub-vector must be a fixed vector with matching element type");
7894 Index
= clampDynamicVectorIndex(DAG
, Index
, VecVT
, dl
,
7895 SubVecVT
.getVectorNumElements());
7898 EVT IdxVT
= Index
.getValueType();
7900 Index
= DAG
.getNode(ISD::MUL
, dl
, IdxVT
, Index
,
7901 DAG
.getConstant(EltSize
, dl
, IdxVT
));
7902 return DAG
.getMemBasePlusOffset(VecPtr
, Index
, dl
);
7905 //===----------------------------------------------------------------------===//
7906 // Implementation of Emulated TLS Model
7907 //===----------------------------------------------------------------------===//
7909 SDValue
TargetLowering::LowerToTLSEmulatedModel(const GlobalAddressSDNode
*GA
,
7910 SelectionDAG
&DAG
) const {
7911 // Access to address of TLS varialbe xyz is lowered to a function call:
7912 // __emutls_get_address( address of global variable named "__emutls_v.xyz" )
7913 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
7914 PointerType
*VoidPtrType
= Type::getInt8PtrTy(*DAG
.getContext());
7919 std::string NameString
= ("__emutls_v." + GA
->getGlobal()->getName()).str();
7920 Module
*VariableModule
= const_cast<Module
*>(GA
->getGlobal()->getParent());
7921 StringRef
EmuTlsVarName(NameString
);
7922 GlobalVariable
*EmuTlsVar
= VariableModule
->getNamedGlobal(EmuTlsVarName
);
7923 assert(EmuTlsVar
&& "Cannot find EmuTlsVar ");
7924 Entry
.Node
= DAG
.getGlobalAddress(EmuTlsVar
, dl
, PtrVT
);
7925 Entry
.Ty
= VoidPtrType
;
7926 Args
.push_back(Entry
);
7928 SDValue EmuTlsGetAddr
= DAG
.getExternalSymbol("__emutls_get_address", PtrVT
);
7930 TargetLowering::CallLoweringInfo
CLI(DAG
);
7931 CLI
.setDebugLoc(dl
).setChain(DAG
.getEntryNode());
7932 CLI
.setLibCallee(CallingConv::C
, VoidPtrType
, EmuTlsGetAddr
, std::move(Args
));
7933 std::pair
<SDValue
, SDValue
> CallResult
= LowerCallTo(CLI
);
7935 // TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
7936 // At last for X86 targets, maybe good for other targets too?
7937 MachineFrameInfo
&MFI
= DAG
.getMachineFunction().getFrameInfo();
7938 MFI
.setAdjustsStack(true); // Is this only for X86 target?
7939 MFI
.setHasCalls(true);
7941 assert((GA
->getOffset() == 0) &&
7942 "Emulated TLS must have zero offset in GlobalAddressSDNode");
7943 return CallResult
.first
;
7946 SDValue
TargetLowering::lowerCmpEqZeroToCtlzSrl(SDValue Op
,
7947 SelectionDAG
&DAG
) const {
7948 assert((Op
->getOpcode() == ISD::SETCC
) && "Input has to be a SETCC node.");
7951 ISD::CondCode CC
= cast
<CondCodeSDNode
>(Op
.getOperand(2))->get();
7953 if (ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(Op
.getOperand(1))) {
7954 if (C
->isNullValue() && CC
== ISD::SETEQ
) {
7955 EVT VT
= Op
.getOperand(0).getValueType();
7956 SDValue Zext
= Op
.getOperand(0);
7957 if (VT
.bitsLT(MVT::i32
)) {
7959 Zext
= DAG
.getNode(ISD::ZERO_EXTEND
, dl
, VT
, Op
.getOperand(0));
7961 unsigned Log2b
= Log2_32(VT
.getSizeInBits());
7962 SDValue Clz
= DAG
.getNode(ISD::CTLZ
, dl
, VT
, Zext
);
7963 SDValue Scc
= DAG
.getNode(ISD::SRL
, dl
, VT
, Clz
,
7964 DAG
.getConstant(Log2b
, dl
, MVT::i32
));
7965 return DAG
.getNode(ISD::TRUNCATE
, dl
, MVT::i32
, Scc
);
7971 // Convert redundant addressing modes (e.g. scaling is redundant
7972 // when accessing bytes).
7974 TargetLowering::getCanonicalIndexType(ISD::MemIndexType IndexType
, EVT MemVT
,
7975 SDValue Offsets
) const {
7976 bool IsScaledIndex
=
7977 (IndexType
== ISD::SIGNED_SCALED
) || (IndexType
== ISD::UNSIGNED_SCALED
);
7978 bool IsSignedIndex
=
7979 (IndexType
== ISD::SIGNED_SCALED
) || (IndexType
== ISD::SIGNED_UNSCALED
);
7981 // Scaling is unimportant for bytes, canonicalize to unscaled.
7982 if (IsScaledIndex
&& MemVT
.getScalarType() == MVT::i8
) {
7983 IsScaledIndex
= false;
7984 IndexType
= IsSignedIndex
? ISD::SIGNED_UNSCALED
: ISD::UNSIGNED_UNSCALED
;
7990 SDValue
TargetLowering::expandIntMINMAX(SDNode
*Node
, SelectionDAG
&DAG
) const {
7991 SDValue Op0
= Node
->getOperand(0);
7992 SDValue Op1
= Node
->getOperand(1);
7993 EVT VT
= Op0
.getValueType();
7994 unsigned Opcode
= Node
->getOpcode();
7997 // umin(x,y) -> sub(x,usubsat(x,y))
7998 if (Opcode
== ISD::UMIN
&& isOperationLegal(ISD::SUB
, VT
) &&
7999 isOperationLegal(ISD::USUBSAT
, VT
)) {
8000 return DAG
.getNode(ISD::SUB
, DL
, VT
, Op0
,
8001 DAG
.getNode(ISD::USUBSAT
, DL
, VT
, Op0
, Op1
));
8004 // umax(x,y) -> add(x,usubsat(y,x))
8005 if (Opcode
== ISD::UMAX
&& isOperationLegal(ISD::ADD
, VT
) &&
8006 isOperationLegal(ISD::USUBSAT
, VT
)) {
8007 return DAG
.getNode(ISD::ADD
, DL
, VT
, Op0
,
8008 DAG
.getNode(ISD::USUBSAT
, DL
, VT
, Op1
, Op0
));
8011 // Expand Y = MAX(A, B) -> Y = (A > B) ? A : B
8014 default: llvm_unreachable("How did we get here?");
8015 case ISD::SMAX
: CC
= ISD::SETGT
; break;
8016 case ISD::SMIN
: CC
= ISD::SETLT
; break;
8017 case ISD::UMAX
: CC
= ISD::SETUGT
; break;
8018 case ISD::UMIN
: CC
= ISD::SETULT
; break;
8021 // FIXME: Should really try to split the vector in case it's legal on a
8023 if (VT
.isVector() && !isOperationLegalOrCustom(ISD::VSELECT
, VT
))
8024 return DAG
.UnrollVectorOp(Node
);
8026 SDValue Cond
= DAG
.getSetCC(DL
, VT
, Op0
, Op1
, CC
);
8027 return DAG
.getSelect(DL
, VT
, Cond
, Op0
, Op1
);
8030 SDValue
TargetLowering::expandAddSubSat(SDNode
*Node
, SelectionDAG
&DAG
) const {
8031 unsigned Opcode
= Node
->getOpcode();
8032 SDValue LHS
= Node
->getOperand(0);
8033 SDValue RHS
= Node
->getOperand(1);
8034 EVT VT
= LHS
.getValueType();
8037 assert(VT
== RHS
.getValueType() && "Expected operands to be the same type");
8038 assert(VT
.isInteger() && "Expected operands to be integers");
8040 // usub.sat(a, b) -> umax(a, b) - b
8041 if (Opcode
== ISD::USUBSAT
&& isOperationLegal(ISD::UMAX
, VT
)) {
8042 SDValue Max
= DAG
.getNode(ISD::UMAX
, dl
, VT
, LHS
, RHS
);
8043 return DAG
.getNode(ISD::SUB
, dl
, VT
, Max
, RHS
);
8046 // uadd.sat(a, b) -> umin(a, ~b) + b
8047 if (Opcode
== ISD::UADDSAT
&& isOperationLegal(ISD::UMIN
, VT
)) {
8048 SDValue InvRHS
= DAG
.getNOT(dl
, RHS
, VT
);
8049 SDValue Min
= DAG
.getNode(ISD::UMIN
, dl
, VT
, LHS
, InvRHS
);
8050 return DAG
.getNode(ISD::ADD
, dl
, VT
, Min
, RHS
);
8053 unsigned OverflowOp
;
8056 OverflowOp
= ISD::SADDO
;
8059 OverflowOp
= ISD::UADDO
;
8062 OverflowOp
= ISD::SSUBO
;
8065 OverflowOp
= ISD::USUBO
;
8068 llvm_unreachable("Expected method to receive signed or unsigned saturation "
8069 "addition or subtraction node.");
8072 // FIXME: Should really try to split the vector in case it's legal on a
8074 if (VT
.isVector() && !isOperationLegalOrCustom(ISD::VSELECT
, VT
))
8075 return DAG
.UnrollVectorOp(Node
);
8077 unsigned BitWidth
= LHS
.getScalarValueSizeInBits();
8078 EVT BoolVT
= getSetCCResultType(DAG
.getDataLayout(), *DAG
.getContext(), VT
);
8079 SDValue Result
= DAG
.getNode(OverflowOp
, dl
, DAG
.getVTList(VT
, BoolVT
), LHS
, RHS
);
8080 SDValue SumDiff
= Result
.getValue(0);
8081 SDValue Overflow
= Result
.getValue(1);
8082 SDValue Zero
= DAG
.getConstant(0, dl
, VT
);
8083 SDValue AllOnes
= DAG
.getAllOnesConstant(dl
, VT
);
8085 if (Opcode
== ISD::UADDSAT
) {
8086 if (getBooleanContents(VT
) == ZeroOrNegativeOneBooleanContent
) {
8087 // (LHS + RHS) | OverflowMask
8088 SDValue OverflowMask
= DAG
.getSExtOrTrunc(Overflow
, dl
, VT
);
8089 return DAG
.getNode(ISD::OR
, dl
, VT
, SumDiff
, OverflowMask
);
8091 // Overflow ? 0xffff.... : (LHS + RHS)
8092 return DAG
.getSelect(dl
, VT
, Overflow
, AllOnes
, SumDiff
);
8095 if (Opcode
== ISD::USUBSAT
) {
8096 if (getBooleanContents(VT
) == ZeroOrNegativeOneBooleanContent
) {
8097 // (LHS - RHS) & ~OverflowMask
8098 SDValue OverflowMask
= DAG
.getSExtOrTrunc(Overflow
, dl
, VT
);
8099 SDValue Not
= DAG
.getNOT(dl
, OverflowMask
, VT
);
8100 return DAG
.getNode(ISD::AND
, dl
, VT
, SumDiff
, Not
);
8102 // Overflow ? 0 : (LHS - RHS)
8103 return DAG
.getSelect(dl
, VT
, Overflow
, Zero
, SumDiff
);
8106 // Overflow ? (SumDiff >> BW) ^ MinVal : SumDiff
8107 APInt MinVal
= APInt::getSignedMinValue(BitWidth
);
8108 SDValue SatMin
= DAG
.getConstant(MinVal
, dl
, VT
);
8109 SDValue Shift
= DAG
.getNode(ISD::SRA
, dl
, VT
, SumDiff
,
8110 DAG
.getConstant(BitWidth
- 1, dl
, VT
));
8111 Result
= DAG
.getNode(ISD::XOR
, dl
, VT
, Shift
, SatMin
);
8112 return DAG
.getSelect(dl
, VT
, Overflow
, Result
, SumDiff
);
8115 SDValue
TargetLowering::expandShlSat(SDNode
*Node
, SelectionDAG
&DAG
) const {
8116 unsigned Opcode
= Node
->getOpcode();
8117 bool IsSigned
= Opcode
== ISD::SSHLSAT
;
8118 SDValue LHS
= Node
->getOperand(0);
8119 SDValue RHS
= Node
->getOperand(1);
8120 EVT VT
= LHS
.getValueType();
8123 assert((Node
->getOpcode() == ISD::SSHLSAT
||
8124 Node
->getOpcode() == ISD::USHLSAT
) &&
8125 "Expected a SHLSAT opcode");
8126 assert(VT
== RHS
.getValueType() && "Expected operands to be the same type");
8127 assert(VT
.isInteger() && "Expected operands to be integers");
8129 // If LHS != (LHS << RHS) >> RHS, we have overflow and must saturate.
8131 unsigned BW
= VT
.getScalarSizeInBits();
8132 SDValue Result
= DAG
.getNode(ISD::SHL
, dl
, VT
, LHS
, RHS
);
8134 DAG
.getNode(IsSigned
? ISD::SRA
: ISD::SRL
, dl
, VT
, Result
, RHS
);
8138 SDValue SatMin
= DAG
.getConstant(APInt::getSignedMinValue(BW
), dl
, VT
);
8139 SDValue SatMax
= DAG
.getConstant(APInt::getSignedMaxValue(BW
), dl
, VT
);
8140 SatVal
= DAG
.getSelectCC(dl
, LHS
, DAG
.getConstant(0, dl
, VT
),
8141 SatMin
, SatMax
, ISD::SETLT
);
8143 SatVal
= DAG
.getConstant(APInt::getMaxValue(BW
), dl
, VT
);
8145 Result
= DAG
.getSelectCC(dl
, LHS
, Orig
, SatVal
, Result
, ISD::SETNE
);
8151 TargetLowering::expandFixedPointMul(SDNode
*Node
, SelectionDAG
&DAG
) const {
8152 assert((Node
->getOpcode() == ISD::SMULFIX
||
8153 Node
->getOpcode() == ISD::UMULFIX
||
8154 Node
->getOpcode() == ISD::SMULFIXSAT
||
8155 Node
->getOpcode() == ISD::UMULFIXSAT
) &&
8156 "Expected a fixed point multiplication opcode");
8159 SDValue LHS
= Node
->getOperand(0);
8160 SDValue RHS
= Node
->getOperand(1);
8161 EVT VT
= LHS
.getValueType();
8162 unsigned Scale
= Node
->getConstantOperandVal(2);
8163 bool Saturating
= (Node
->getOpcode() == ISD::SMULFIXSAT
||
8164 Node
->getOpcode() == ISD::UMULFIXSAT
);
8165 bool Signed
= (Node
->getOpcode() == ISD::SMULFIX
||
8166 Node
->getOpcode() == ISD::SMULFIXSAT
);
8167 EVT BoolVT
= getSetCCResultType(DAG
.getDataLayout(), *DAG
.getContext(), VT
);
8168 unsigned VTSize
= VT
.getScalarSizeInBits();
8171 // [us]mul.fix(a, b, 0) -> mul(a, b)
8173 if (isOperationLegalOrCustom(ISD::MUL
, VT
))
8174 return DAG
.getNode(ISD::MUL
, dl
, VT
, LHS
, RHS
);
8175 } else if (Signed
&& isOperationLegalOrCustom(ISD::SMULO
, VT
)) {
8177 DAG
.getNode(ISD::SMULO
, dl
, DAG
.getVTList(VT
, BoolVT
), LHS
, RHS
);
8178 SDValue Product
= Result
.getValue(0);
8179 SDValue Overflow
= Result
.getValue(1);
8180 SDValue Zero
= DAG
.getConstant(0, dl
, VT
);
8182 APInt MinVal
= APInt::getSignedMinValue(VTSize
);
8183 APInt MaxVal
= APInt::getSignedMaxValue(VTSize
);
8184 SDValue SatMin
= DAG
.getConstant(MinVal
, dl
, VT
);
8185 SDValue SatMax
= DAG
.getConstant(MaxVal
, dl
, VT
);
8186 SDValue ProdNeg
= DAG
.getSetCC(dl
, BoolVT
, Product
, Zero
, ISD::SETLT
);
8187 Result
= DAG
.getSelect(dl
, VT
, ProdNeg
, SatMax
, SatMin
);
8188 return DAG
.getSelect(dl
, VT
, Overflow
, Result
, Product
);
8189 } else if (!Signed
&& isOperationLegalOrCustom(ISD::UMULO
, VT
)) {
8191 DAG
.getNode(ISD::UMULO
, dl
, DAG
.getVTList(VT
, BoolVT
), LHS
, RHS
);
8192 SDValue Product
= Result
.getValue(0);
8193 SDValue Overflow
= Result
.getValue(1);
8195 APInt MaxVal
= APInt::getMaxValue(VTSize
);
8196 SDValue SatMax
= DAG
.getConstant(MaxVal
, dl
, VT
);
8197 return DAG
.getSelect(dl
, VT
, Overflow
, SatMax
, Product
);
8201 assert(((Signed
&& Scale
< VTSize
) || (!Signed
&& Scale
<= VTSize
)) &&
8202 "Expected scale to be less than the number of bits if signed or at "
8203 "most the number of bits if unsigned.");
8204 assert(LHS
.getValueType() == RHS
.getValueType() &&
8205 "Expected both operands to be the same type");
8207 // Get the upper and lower bits of the result.
8209 unsigned LoHiOp
= Signed
? ISD::SMUL_LOHI
: ISD::UMUL_LOHI
;
8210 unsigned HiOp
= Signed
? ISD::MULHS
: ISD::MULHU
;
8211 if (isOperationLegalOrCustom(LoHiOp
, VT
)) {
8212 SDValue Result
= DAG
.getNode(LoHiOp
, dl
, DAG
.getVTList(VT
, VT
), LHS
, RHS
);
8213 Lo
= Result
.getValue(0);
8214 Hi
= Result
.getValue(1);
8215 } else if (isOperationLegalOrCustom(HiOp
, VT
)) {
8216 Lo
= DAG
.getNode(ISD::MUL
, dl
, VT
, LHS
, RHS
);
8217 Hi
= DAG
.getNode(HiOp
, dl
, VT
, LHS
, RHS
);
8218 } else if (VT
.isVector()) {
8221 report_fatal_error("Unable to expand fixed point multiplication.");
8224 if (Scale
== VTSize
)
8225 // Result is just the top half since we'd be shifting by the width of the
8226 // operand. Overflow impossible so this works for both UMULFIX and
8230 // The result will need to be shifted right by the scale since both operands
8231 // are scaled. The result is given to us in 2 halves, so we only want part of
8232 // both in the result.
8233 EVT ShiftTy
= getShiftAmountTy(VT
, DAG
.getDataLayout());
8234 SDValue Result
= DAG
.getNode(ISD::FSHR
, dl
, VT
, Hi
, Lo
,
8235 DAG
.getConstant(Scale
, dl
, ShiftTy
));
8240 // Unsigned overflow happened if the upper (VTSize - Scale) bits (of the
8241 // widened multiplication) aren't all zeroes.
8243 // Saturate to max if ((Hi >> Scale) != 0),
8244 // which is the same as if (Hi > ((1 << Scale) - 1))
8245 APInt MaxVal
= APInt::getMaxValue(VTSize
);
8246 SDValue LowMask
= DAG
.getConstant(APInt::getLowBitsSet(VTSize
, Scale
),
8248 Result
= DAG
.getSelectCC(dl
, Hi
, LowMask
,
8249 DAG
.getConstant(MaxVal
, dl
, VT
), Result
,
8255 // Signed overflow happened if the upper (VTSize - Scale + 1) bits (of the
8256 // widened multiplication) aren't all ones or all zeroes.
8258 SDValue SatMin
= DAG
.getConstant(APInt::getSignedMinValue(VTSize
), dl
, VT
);
8259 SDValue SatMax
= DAG
.getConstant(APInt::getSignedMaxValue(VTSize
), dl
, VT
);
8262 SDValue Sign
= DAG
.getNode(ISD::SRA
, dl
, VT
, Lo
,
8263 DAG
.getConstant(VTSize
- 1, dl
, ShiftTy
));
8264 SDValue Overflow
= DAG
.getSetCC(dl
, BoolVT
, Hi
, Sign
, ISD::SETNE
);
8265 // Saturated to SatMin if wide product is negative, and SatMax if wide
8266 // product is positive ...
8267 SDValue Zero
= DAG
.getConstant(0, dl
, VT
);
8268 SDValue ResultIfOverflow
= DAG
.getSelectCC(dl
, Hi
, Zero
, SatMin
, SatMax
,
8270 // ... but only if we overflowed.
8271 return DAG
.getSelect(dl
, VT
, Overflow
, ResultIfOverflow
, Result
);
8274 // We handled Scale==0 above so all the bits to examine is in Hi.
8276 // Saturate to max if ((Hi >> (Scale - 1)) > 0),
8277 // which is the same as if (Hi > (1 << (Scale - 1)) - 1)
8278 SDValue LowMask
= DAG
.getConstant(APInt::getLowBitsSet(VTSize
, Scale
- 1),
8280 Result
= DAG
.getSelectCC(dl
, Hi
, LowMask
, SatMax
, Result
, ISD::SETGT
);
8281 // Saturate to min if (Hi >> (Scale - 1)) < -1),
8282 // which is the same as if (HI < (-1 << (Scale - 1))
8284 DAG
.getConstant(APInt::getHighBitsSet(VTSize
, VTSize
- Scale
+ 1),
8286 Result
= DAG
.getSelectCC(dl
, Hi
, HighMask
, SatMin
, Result
, ISD::SETLT
);
8291 TargetLowering::expandFixedPointDiv(unsigned Opcode
, const SDLoc
&dl
,
8292 SDValue LHS
, SDValue RHS
,
8293 unsigned Scale
, SelectionDAG
&DAG
) const {
8294 assert((Opcode
== ISD::SDIVFIX
|| Opcode
== ISD::SDIVFIXSAT
||
8295 Opcode
== ISD::UDIVFIX
|| Opcode
== ISD::UDIVFIXSAT
) &&
8296 "Expected a fixed point division opcode");
8298 EVT VT
= LHS
.getValueType();
8299 bool Signed
= Opcode
== ISD::SDIVFIX
|| Opcode
== ISD::SDIVFIXSAT
;
8300 bool Saturating
= Opcode
== ISD::SDIVFIXSAT
|| Opcode
== ISD::UDIVFIXSAT
;
8301 EVT BoolVT
= getSetCCResultType(DAG
.getDataLayout(), *DAG
.getContext(), VT
);
8303 // If there is enough room in the type to upscale the LHS or downscale the
8304 // RHS before the division, we can perform it in this type without having to
8305 // resize. For signed operations, the LHS headroom is the number of
8306 // redundant sign bits, and for unsigned ones it is the number of zeroes.
8307 // The headroom for the RHS is the number of trailing zeroes.
8308 unsigned LHSLead
= Signed
? DAG
.ComputeNumSignBits(LHS
) - 1
8309 : DAG
.computeKnownBits(LHS
).countMinLeadingZeros();
8310 unsigned RHSTrail
= DAG
.computeKnownBits(RHS
).countMinTrailingZeros();
8312 // For signed saturating operations, we need to be able to detect true integer
8313 // division overflow; that is, when you have MIN / -EPS. However, this
8314 // is undefined behavior and if we emit divisions that could take such
8315 // values it may cause undesired behavior (arithmetic exceptions on x86, for
8317 // Avoid this by requiring an extra bit so that we never get this case.
8318 // FIXME: This is a bit unfortunate as it means that for an 8-bit 7-scale
8319 // signed saturating division, we need to emit a whopping 32-bit division.
8320 if (LHSLead
+ RHSTrail
< Scale
+ (unsigned)(Saturating
&& Signed
))
8323 unsigned LHSShift
= std::min(LHSLead
, Scale
);
8324 unsigned RHSShift
= Scale
- LHSShift
;
8326 // At this point, we know that if we shift the LHS up by LHSShift and the
8327 // RHS down by RHSShift, we can emit a regular division with a final scaling
8330 EVT ShiftTy
= getShiftAmountTy(VT
, DAG
.getDataLayout());
8332 LHS
= DAG
.getNode(ISD::SHL
, dl
, VT
, LHS
,
8333 DAG
.getConstant(LHSShift
, dl
, ShiftTy
));
8335 RHS
= DAG
.getNode(Signed
? ISD::SRA
: ISD::SRL
, dl
, VT
, RHS
,
8336 DAG
.getConstant(RHSShift
, dl
, ShiftTy
));
8340 // For signed operations, if the resulting quotient is negative and the
8341 // remainder is nonzero, subtract 1 from the quotient to round towards
8342 // negative infinity.
8344 // FIXME: Ideally we would always produce an SDIVREM here, but if the
8345 // type isn't legal, SDIVREM cannot be expanded. There is no reason why
8346 // we couldn't just form a libcall, but the type legalizer doesn't do it.
8347 if (isTypeLegal(VT
) &&
8348 isOperationLegalOrCustom(ISD::SDIVREM
, VT
)) {
8349 Quot
= DAG
.getNode(ISD::SDIVREM
, dl
,
8350 DAG
.getVTList(VT
, VT
),
8352 Rem
= Quot
.getValue(1);
8353 Quot
= Quot
.getValue(0);
8355 Quot
= DAG
.getNode(ISD::SDIV
, dl
, VT
,
8357 Rem
= DAG
.getNode(ISD::SREM
, dl
, VT
,
8360 SDValue Zero
= DAG
.getConstant(0, dl
, VT
);
8361 SDValue RemNonZero
= DAG
.getSetCC(dl
, BoolVT
, Rem
, Zero
, ISD::SETNE
);
8362 SDValue LHSNeg
= DAG
.getSetCC(dl
, BoolVT
, LHS
, Zero
, ISD::SETLT
);
8363 SDValue RHSNeg
= DAG
.getSetCC(dl
, BoolVT
, RHS
, Zero
, ISD::SETLT
);
8364 SDValue QuotNeg
= DAG
.getNode(ISD::XOR
, dl
, BoolVT
, LHSNeg
, RHSNeg
);
8365 SDValue Sub1
= DAG
.getNode(ISD::SUB
, dl
, VT
, Quot
,
8366 DAG
.getConstant(1, dl
, VT
));
8367 Quot
= DAG
.getSelect(dl
, VT
,
8368 DAG
.getNode(ISD::AND
, dl
, BoolVT
, RemNonZero
, QuotNeg
),
8371 Quot
= DAG
.getNode(ISD::UDIV
, dl
, VT
,
8377 void TargetLowering::expandUADDSUBO(
8378 SDNode
*Node
, SDValue
&Result
, SDValue
&Overflow
, SelectionDAG
&DAG
) const {
8380 SDValue LHS
= Node
->getOperand(0);
8381 SDValue RHS
= Node
->getOperand(1);
8382 bool IsAdd
= Node
->getOpcode() == ISD::UADDO
;
8384 // If ADD/SUBCARRY is legal, use that instead.
8385 unsigned OpcCarry
= IsAdd
? ISD::ADDCARRY
: ISD::SUBCARRY
;
8386 if (isOperationLegalOrCustom(OpcCarry
, Node
->getValueType(0))) {
8387 SDValue CarryIn
= DAG
.getConstant(0, dl
, Node
->getValueType(1));
8388 SDValue NodeCarry
= DAG
.getNode(OpcCarry
, dl
, Node
->getVTList(),
8389 { LHS
, RHS
, CarryIn
});
8390 Result
= SDValue(NodeCarry
.getNode(), 0);
8391 Overflow
= SDValue(NodeCarry
.getNode(), 1);
8395 Result
= DAG
.getNode(IsAdd
? ISD::ADD
: ISD::SUB
, dl
,
8396 LHS
.getValueType(), LHS
, RHS
);
8398 EVT ResultType
= Node
->getValueType(1);
8399 EVT SetCCType
= getSetCCResultType(
8400 DAG
.getDataLayout(), *DAG
.getContext(), Node
->getValueType(0));
8401 ISD::CondCode CC
= IsAdd
? ISD::SETULT
: ISD::SETUGT
;
8402 SDValue SetCC
= DAG
.getSetCC(dl
, SetCCType
, Result
, LHS
, CC
);
8403 Overflow
= DAG
.getBoolExtOrTrunc(SetCC
, dl
, ResultType
, ResultType
);
8406 void TargetLowering::expandSADDSUBO(
8407 SDNode
*Node
, SDValue
&Result
, SDValue
&Overflow
, SelectionDAG
&DAG
) const {
8409 SDValue LHS
= Node
->getOperand(0);
8410 SDValue RHS
= Node
->getOperand(1);
8411 bool IsAdd
= Node
->getOpcode() == ISD::SADDO
;
8413 Result
= DAG
.getNode(IsAdd
? ISD::ADD
: ISD::SUB
, dl
,
8414 LHS
.getValueType(), LHS
, RHS
);
8416 EVT ResultType
= Node
->getValueType(1);
8417 EVT OType
= getSetCCResultType(
8418 DAG
.getDataLayout(), *DAG
.getContext(), Node
->getValueType(0));
8420 // If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
8421 unsigned OpcSat
= IsAdd
? ISD::SADDSAT
: ISD::SSUBSAT
;
8422 if (isOperationLegal(OpcSat
, LHS
.getValueType())) {
8423 SDValue Sat
= DAG
.getNode(OpcSat
, dl
, LHS
.getValueType(), LHS
, RHS
);
8424 SDValue SetCC
= DAG
.getSetCC(dl
, OType
, Result
, Sat
, ISD::SETNE
);
8425 Overflow
= DAG
.getBoolExtOrTrunc(SetCC
, dl
, ResultType
, ResultType
);
8429 SDValue Zero
= DAG
.getConstant(0, dl
, LHS
.getValueType());
8431 // For an addition, the result should be less than one of the operands (LHS)
8432 // if and only if the other operand (RHS) is negative, otherwise there will
8434 // For a subtraction, the result should be less than one of the operands
8435 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
8436 // otherwise there will be overflow.
8437 SDValue ResultLowerThanLHS
= DAG
.getSetCC(dl
, OType
, Result
, LHS
, ISD::SETLT
);
8438 SDValue ConditionRHS
=
8439 DAG
.getSetCC(dl
, OType
, RHS
, Zero
, IsAdd
? ISD::SETLT
: ISD::SETGT
);
8441 Overflow
= DAG
.getBoolExtOrTrunc(
8442 DAG
.getNode(ISD::XOR
, dl
, OType
, ConditionRHS
, ResultLowerThanLHS
), dl
,
8443 ResultType
, ResultType
);
8446 bool TargetLowering::expandMULO(SDNode
*Node
, SDValue
&Result
,
8447 SDValue
&Overflow
, SelectionDAG
&DAG
) const {
8449 EVT VT
= Node
->getValueType(0);
8450 EVT SetCCVT
= getSetCCResultType(DAG
.getDataLayout(), *DAG
.getContext(), VT
);
8451 SDValue LHS
= Node
->getOperand(0);
8452 SDValue RHS
= Node
->getOperand(1);
8453 bool isSigned
= Node
->getOpcode() == ISD::SMULO
;
8455 // For power-of-two multiplications we can use a simpler shift expansion.
8456 if (ConstantSDNode
*RHSC
= isConstOrConstSplat(RHS
)) {
8457 const APInt
&C
= RHSC
->getAPIntValue();
8458 // mulo(X, 1 << S) -> { X << S, (X << S) >> S != X }
8459 if (C
.isPowerOf2()) {
8460 // smulo(x, signed_min) is same as umulo(x, signed_min).
8461 bool UseArithShift
= isSigned
&& !C
.isMinSignedValue();
8462 EVT ShiftAmtTy
= getShiftAmountTy(VT
, DAG
.getDataLayout());
8463 SDValue ShiftAmt
= DAG
.getConstant(C
.logBase2(), dl
, ShiftAmtTy
);
8464 Result
= DAG
.getNode(ISD::SHL
, dl
, VT
, LHS
, ShiftAmt
);
8465 Overflow
= DAG
.getSetCC(dl
, SetCCVT
,
8466 DAG
.getNode(UseArithShift
? ISD::SRA
: ISD::SRL
,
8467 dl
, VT
, Result
, ShiftAmt
),
8473 EVT WideVT
= EVT::getIntegerVT(*DAG
.getContext(), VT
.getScalarSizeInBits() * 2);
8475 WideVT
= EVT::getVectorVT(*DAG
.getContext(), WideVT
,
8476 VT
.getVectorNumElements());
8480 static const unsigned Ops
[2][3] =
8481 { { ISD::MULHU
, ISD::UMUL_LOHI
, ISD::ZERO_EXTEND
},
8482 { ISD::MULHS
, ISD::SMUL_LOHI
, ISD::SIGN_EXTEND
}};
8483 if (isOperationLegalOrCustom(Ops
[isSigned
][0], VT
)) {
8484 BottomHalf
= DAG
.getNode(ISD::MUL
, dl
, VT
, LHS
, RHS
);
8485 TopHalf
= DAG
.getNode(Ops
[isSigned
][0], dl
, VT
, LHS
, RHS
);
8486 } else if (isOperationLegalOrCustom(Ops
[isSigned
][1], VT
)) {
8487 BottomHalf
= DAG
.getNode(Ops
[isSigned
][1], dl
, DAG
.getVTList(VT
, VT
), LHS
,
8489 TopHalf
= BottomHalf
.getValue(1);
8490 } else if (isTypeLegal(WideVT
)) {
8491 LHS
= DAG
.getNode(Ops
[isSigned
][2], dl
, WideVT
, LHS
);
8492 RHS
= DAG
.getNode(Ops
[isSigned
][2], dl
, WideVT
, RHS
);
8493 SDValue Mul
= DAG
.getNode(ISD::MUL
, dl
, WideVT
, LHS
, RHS
);
8494 BottomHalf
= DAG
.getNode(ISD::TRUNCATE
, dl
, VT
, Mul
);
8495 SDValue ShiftAmt
= DAG
.getConstant(VT
.getScalarSizeInBits(), dl
,
8496 getShiftAmountTy(WideVT
, DAG
.getDataLayout()));
8497 TopHalf
= DAG
.getNode(ISD::TRUNCATE
, dl
, VT
,
8498 DAG
.getNode(ISD::SRL
, dl
, WideVT
, Mul
, ShiftAmt
));
8503 // We can fall back to a libcall with an illegal type for the MUL if we
8504 // have a libcall big enough.
8505 // Also, we can fall back to a division in some cases, but that's a big
8506 // performance hit in the general case.
8507 RTLIB::Libcall LC
= RTLIB::UNKNOWN_LIBCALL
;
8508 if (WideVT
== MVT::i16
)
8509 LC
= RTLIB::MUL_I16
;
8510 else if (WideVT
== MVT::i32
)
8511 LC
= RTLIB::MUL_I32
;
8512 else if (WideVT
== MVT::i64
)
8513 LC
= RTLIB::MUL_I64
;
8514 else if (WideVT
== MVT::i128
)
8515 LC
= RTLIB::MUL_I128
;
8516 assert(LC
!= RTLIB::UNKNOWN_LIBCALL
&& "Cannot expand this operation!");
8521 // The high part is obtained by SRA'ing all but one of the bits of low
8523 unsigned LoSize
= VT
.getFixedSizeInBits();
8525 DAG
.getNode(ISD::SRA
, dl
, VT
, LHS
,
8526 DAG
.getConstant(LoSize
- 1, dl
,
8527 getPointerTy(DAG
.getDataLayout())));
8529 DAG
.getNode(ISD::SRA
, dl
, VT
, RHS
,
8530 DAG
.getConstant(LoSize
- 1, dl
,
8531 getPointerTy(DAG
.getDataLayout())));
8533 HiLHS
= DAG
.getConstant(0, dl
, VT
);
8534 HiRHS
= DAG
.getConstant(0, dl
, VT
);
8537 // Here we're passing the 2 arguments explicitly as 4 arguments that are
8538 // pre-lowered to the correct types. This all depends upon WideVT not
8539 // being a legal type for the architecture and thus has to be split to
8542 TargetLowering::MakeLibCallOptions CallOptions
;
8543 CallOptions
.setSExt(isSigned
);
8544 CallOptions
.setIsPostTypeLegalization(true);
8545 if (shouldSplitFunctionArgumentsAsLittleEndian(DAG
.getDataLayout())) {
8546 // Halves of WideVT are packed into registers in different order
8547 // depending on platform endianness. This is usually handled by
8548 // the C calling convention, but we can't defer to it in
8550 SDValue Args
[] = { LHS
, HiLHS
, RHS
, HiRHS
};
8551 Ret
= makeLibCall(DAG
, LC
, WideVT
, Args
, CallOptions
, dl
).first
;
8553 SDValue Args
[] = { HiLHS
, LHS
, HiRHS
, RHS
};
8554 Ret
= makeLibCall(DAG
, LC
, WideVT
, Args
, CallOptions
, dl
).first
;
8556 assert(Ret
.getOpcode() == ISD::MERGE_VALUES
&&
8557 "Ret value is a collection of constituent nodes holding result.");
8558 if (DAG
.getDataLayout().isLittleEndian()) {
8560 BottomHalf
= Ret
.getOperand(0);
8561 TopHalf
= Ret
.getOperand(1);
8563 BottomHalf
= Ret
.getOperand(1);
8564 TopHalf
= Ret
.getOperand(0);
8568 Result
= BottomHalf
;
8570 SDValue ShiftAmt
= DAG
.getConstant(
8571 VT
.getScalarSizeInBits() - 1, dl
,
8572 getShiftAmountTy(BottomHalf
.getValueType(), DAG
.getDataLayout()));
8573 SDValue Sign
= DAG
.getNode(ISD::SRA
, dl
, VT
, BottomHalf
, ShiftAmt
);
8574 Overflow
= DAG
.getSetCC(dl
, SetCCVT
, TopHalf
, Sign
, ISD::SETNE
);
8576 Overflow
= DAG
.getSetCC(dl
, SetCCVT
, TopHalf
,
8577 DAG
.getConstant(0, dl
, VT
), ISD::SETNE
);
8580 // Truncate the result if SetCC returns a larger type than needed.
8581 EVT RType
= Node
->getValueType(1);
8582 if (RType
.bitsLT(Overflow
.getValueType()))
8583 Overflow
= DAG
.getNode(ISD::TRUNCATE
, dl
, RType
, Overflow
);
8585 assert(RType
.getSizeInBits() == Overflow
.getValueSizeInBits() &&
8586 "Unexpected result type for S/UMULO legalization");
8590 SDValue
TargetLowering::expandVecReduce(SDNode
*Node
, SelectionDAG
&DAG
) const {
8592 unsigned BaseOpcode
= ISD::getVecReduceBaseOpcode(Node
->getOpcode());
8593 SDValue Op
= Node
->getOperand(0);
8594 EVT VT
= Op
.getValueType();
8596 if (VT
.isScalableVector())
8598 "Expanding reductions for scalable vectors is undefined.");
8600 // Try to use a shuffle reduction for power of two vectors.
8601 if (VT
.isPow2VectorType()) {
8602 while (VT
.getVectorNumElements() > 1) {
8603 EVT HalfVT
= VT
.getHalfNumVectorElementsVT(*DAG
.getContext());
8604 if (!isOperationLegalOrCustom(BaseOpcode
, HalfVT
))
8608 std::tie(Lo
, Hi
) = DAG
.SplitVector(Op
, dl
);
8609 Op
= DAG
.getNode(BaseOpcode
, dl
, HalfVT
, Lo
, Hi
);
8614 EVT EltVT
= VT
.getVectorElementType();
8615 unsigned NumElts
= VT
.getVectorNumElements();
8617 SmallVector
<SDValue
, 8> Ops
;
8618 DAG
.ExtractVectorElements(Op
, Ops
, 0, NumElts
);
8620 SDValue Res
= Ops
[0];
8621 for (unsigned i
= 1; i
< NumElts
; i
++)
8622 Res
= DAG
.getNode(BaseOpcode
, dl
, EltVT
, Res
, Ops
[i
], Node
->getFlags());
8624 // Result type may be wider than element type.
8625 if (EltVT
!= Node
->getValueType(0))
8626 Res
= DAG
.getNode(ISD::ANY_EXTEND
, dl
, Node
->getValueType(0), Res
);
8630 SDValue
TargetLowering::expandVecReduceSeq(SDNode
*Node
, SelectionDAG
&DAG
) const {
8632 SDValue AccOp
= Node
->getOperand(0);
8633 SDValue VecOp
= Node
->getOperand(1);
8634 SDNodeFlags Flags
= Node
->getFlags();
8636 EVT VT
= VecOp
.getValueType();
8637 EVT EltVT
= VT
.getVectorElementType();
8639 if (VT
.isScalableVector())
8641 "Expanding reductions for scalable vectors is undefined.");
8643 unsigned NumElts
= VT
.getVectorNumElements();
8645 SmallVector
<SDValue
, 8> Ops
;
8646 DAG
.ExtractVectorElements(VecOp
, Ops
, 0, NumElts
);
8648 unsigned BaseOpcode
= ISD::getVecReduceBaseOpcode(Node
->getOpcode());
8650 SDValue Res
= AccOp
;
8651 for (unsigned i
= 0; i
< NumElts
; i
++)
8652 Res
= DAG
.getNode(BaseOpcode
, dl
, EltVT
, Res
, Ops
[i
], Flags
);
8657 bool TargetLowering::expandREM(SDNode
*Node
, SDValue
&Result
,
8658 SelectionDAG
&DAG
) const {
8659 EVT VT
= Node
->getValueType(0);
8661 bool isSigned
= Node
->getOpcode() == ISD::SREM
;
8662 unsigned DivOpc
= isSigned
? ISD::SDIV
: ISD::UDIV
;
8663 unsigned DivRemOpc
= isSigned
? ISD::SDIVREM
: ISD::UDIVREM
;
8664 SDValue Dividend
= Node
->getOperand(0);
8665 SDValue Divisor
= Node
->getOperand(1);
8666 if (isOperationLegalOrCustom(DivRemOpc
, VT
)) {
8667 SDVTList VTs
= DAG
.getVTList(VT
, VT
);
8668 Result
= DAG
.getNode(DivRemOpc
, dl
, VTs
, Dividend
, Divisor
).getValue(1);
8671 if (isOperationLegalOrCustom(DivOpc
, VT
)) {
8673 SDValue Divide
= DAG
.getNode(DivOpc
, dl
, VT
, Dividend
, Divisor
);
8674 SDValue Mul
= DAG
.getNode(ISD::MUL
, dl
, VT
, Divide
, Divisor
);
8675 Result
= DAG
.getNode(ISD::SUB
, dl
, VT
, Dividend
, Mul
);
8681 SDValue
TargetLowering::expandFP_TO_INT_SAT(SDNode
*Node
,
8682 SelectionDAG
&DAG
) const {
8683 bool IsSigned
= Node
->getOpcode() == ISD::FP_TO_SINT_SAT
;
8684 SDLoc
dl(SDValue(Node
, 0));
8685 SDValue Src
= Node
->getOperand(0);
8687 // DstVT is the result type, while SatVT is the size to which we saturate
8688 EVT SrcVT
= Src
.getValueType();
8689 EVT DstVT
= Node
->getValueType(0);
8691 EVT SatVT
= cast
<VTSDNode
>(Node
->getOperand(1))->getVT();
8692 unsigned SatWidth
= SatVT
.getScalarSizeInBits();
8693 unsigned DstWidth
= DstVT
.getScalarSizeInBits();
8694 assert(SatWidth
<= DstWidth
&&
8695 "Expected saturation width smaller than result width");
8697 // Determine minimum and maximum integer values and their corresponding
8698 // floating-point values.
8699 APInt MinInt
, MaxInt
;
8701 MinInt
= APInt::getSignedMinValue(SatWidth
).sextOrSelf(DstWidth
);
8702 MaxInt
= APInt::getSignedMaxValue(SatWidth
).sextOrSelf(DstWidth
);
8704 MinInt
= APInt::getMinValue(SatWidth
).zextOrSelf(DstWidth
);
8705 MaxInt
= APInt::getMaxValue(SatWidth
).zextOrSelf(DstWidth
);
8708 // We cannot risk emitting FP_TO_XINT nodes with a source VT of f16, as
8709 // libcall emission cannot handle this. Large result types will fail.
8710 if (SrcVT
== MVT::f16
) {
8711 Src
= DAG
.getNode(ISD::FP_EXTEND
, dl
, MVT::f32
, Src
);
8712 SrcVT
= Src
.getValueType();
8715 APFloat
MinFloat(DAG
.EVTToAPFloatSemantics(SrcVT
));
8716 APFloat
MaxFloat(DAG
.EVTToAPFloatSemantics(SrcVT
));
8718 APFloat::opStatus MinStatus
=
8719 MinFloat
.convertFromAPInt(MinInt
, IsSigned
, APFloat::rmTowardZero
);
8720 APFloat::opStatus MaxStatus
=
8721 MaxFloat
.convertFromAPInt(MaxInt
, IsSigned
, APFloat::rmTowardZero
);
8722 bool AreExactFloatBounds
= !(MinStatus
& APFloat::opStatus::opInexact
) &&
8723 !(MaxStatus
& APFloat::opStatus::opInexact
);
8725 SDValue MinFloatNode
= DAG
.getConstantFP(MinFloat
, dl
, SrcVT
);
8726 SDValue MaxFloatNode
= DAG
.getConstantFP(MaxFloat
, dl
, SrcVT
);
8728 // If the integer bounds are exactly representable as floats and min/max are
8729 // legal, emit a min+max+fptoi sequence. Otherwise we have to use a sequence
8730 // of comparisons and selects.
8731 bool MinMaxLegal
= isOperationLegal(ISD::FMINNUM
, SrcVT
) &&
8732 isOperationLegal(ISD::FMAXNUM
, SrcVT
);
8733 if (AreExactFloatBounds
&& MinMaxLegal
) {
8734 SDValue Clamped
= Src
;
8736 // Clamp Src by MinFloat from below. If Src is NaN the result is MinFloat.
8737 Clamped
= DAG
.getNode(ISD::FMAXNUM
, dl
, SrcVT
, Clamped
, MinFloatNode
);
8738 // Clamp by MaxFloat from above. NaN cannot occur.
8739 Clamped
= DAG
.getNode(ISD::FMINNUM
, dl
, SrcVT
, Clamped
, MaxFloatNode
);
8740 // Convert clamped value to integer.
8741 SDValue FpToInt
= DAG
.getNode(IsSigned
? ISD::FP_TO_SINT
: ISD::FP_TO_UINT
,
8742 dl
, DstVT
, Clamped
);
8744 // In the unsigned case we're done, because we mapped NaN to MinFloat,
8745 // which will cast to zero.
8749 // Otherwise, select 0 if Src is NaN.
8750 SDValue ZeroInt
= DAG
.getConstant(0, dl
, DstVT
);
8751 return DAG
.getSelectCC(dl
, Src
, Src
, ZeroInt
, FpToInt
,
8752 ISD::CondCode::SETUO
);
8755 SDValue MinIntNode
= DAG
.getConstant(MinInt
, dl
, DstVT
);
8756 SDValue MaxIntNode
= DAG
.getConstant(MaxInt
, dl
, DstVT
);
8758 // Result of direct conversion. The assumption here is that the operation is
8759 // non-trapping and it's fine to apply it to an out-of-range value if we
8760 // select it away later.
8762 DAG
.getNode(IsSigned
? ISD::FP_TO_SINT
: ISD::FP_TO_UINT
, dl
, DstVT
, Src
);
8764 SDValue Select
= FpToInt
;
8766 // If Src ULT MinFloat, select MinInt. In particular, this also selects
8767 // MinInt if Src is NaN.
8768 Select
= DAG
.getSelectCC(dl
, Src
, MinFloatNode
, MinIntNode
, Select
,
8769 ISD::CondCode::SETULT
);
8770 // If Src OGT MaxFloat, select MaxInt.
8771 Select
= DAG
.getSelectCC(dl
, Src
, MaxFloatNode
, MaxIntNode
, Select
,
8772 ISD::CondCode::SETOGT
);
8774 // In the unsigned case we are done, because we mapped NaN to MinInt, which
8779 // Otherwise, select 0 if Src is NaN.
8780 SDValue ZeroInt
= DAG
.getConstant(0, dl
, DstVT
);
8781 return DAG
.getSelectCC(dl
, Src
, Src
, ZeroInt
, Select
, ISD::CondCode::SETUO
);
8784 SDValue
TargetLowering::expandVectorSplice(SDNode
*Node
,
8785 SelectionDAG
&DAG
) const {
8786 assert(Node
->getOpcode() == ISD::VECTOR_SPLICE
&& "Unexpected opcode!");
8787 assert(Node
->getValueType(0).isScalableVector() &&
8788 "Fixed length vector types expected to use SHUFFLE_VECTOR!");
8790 EVT VT
= Node
->getValueType(0);
8791 SDValue V1
= Node
->getOperand(0);
8792 SDValue V2
= Node
->getOperand(1);
8793 int64_t Imm
= cast
<ConstantSDNode
>(Node
->getOperand(2))->getSExtValue();
8796 // Expand through memory thusly:
8797 // Alloca CONCAT_VECTORS_TYPES(V1, V2) Ptr
8799 // Store V2, Ptr + sizeof(V1)
8801 // TrailingElts = -Imm
8802 // Ptr = Ptr + sizeof(V1) - (TrailingElts * sizeof(VT.Elt))
8804 // Ptr = Ptr + (Imm * sizeof(VT.Elt))
8807 Align Alignment
= DAG
.getReducedAlign(VT
, /*UseABI=*/false);
8809 EVT MemVT
= EVT::getVectorVT(*DAG
.getContext(), VT
.getVectorElementType(),
8810 VT
.getVectorElementCount() * 2);
8811 SDValue StackPtr
= DAG
.CreateStackTemporary(MemVT
.getStoreSize(), Alignment
);
8812 EVT PtrVT
= StackPtr
.getValueType();
8813 auto &MF
= DAG
.getMachineFunction();
8814 auto FrameIndex
= cast
<FrameIndexSDNode
>(StackPtr
.getNode())->getIndex();
8815 auto PtrInfo
= MachinePointerInfo::getFixedStack(MF
, FrameIndex
);
8817 // Store the lo part of CONCAT_VECTORS(V1, V2)
8818 SDValue StoreV1
= DAG
.getStore(DAG
.getEntryNode(), DL
, V1
, StackPtr
, PtrInfo
);
8819 // Store the hi part of CONCAT_VECTORS(V1, V2)
8820 SDValue OffsetToV2
= DAG
.getVScale(
8822 APInt(PtrVT
.getFixedSizeInBits(), VT
.getStoreSize().getKnownMinSize()));
8823 SDValue StackPtr2
= DAG
.getNode(ISD::ADD
, DL
, PtrVT
, StackPtr
, OffsetToV2
);
8824 SDValue StoreV2
= DAG
.getStore(StoreV1
, DL
, V2
, StackPtr2
, PtrInfo
);
8827 // Load back the required element. getVectorElementPointer takes care of
8828 // clamping the index if it's out-of-bounds.
8829 StackPtr
= getVectorElementPointer(DAG
, StackPtr
, VT
, Node
->getOperand(2));
8830 // Load the spliced result
8831 return DAG
.getLoad(VT
, DL
, StoreV2
, StackPtr
,
8832 MachinePointerInfo::getUnknownStack(MF
));
8835 uint64_t TrailingElts
= -Imm
;
8837 // NOTE: TrailingElts must be clamped so as not to read outside of V1:V2.
8838 TypeSize EltByteSize
= VT
.getVectorElementType().getStoreSize();
8839 SDValue TrailingBytes
=
8840 DAG
.getConstant(TrailingElts
* EltByteSize
, DL
, PtrVT
);
8842 if (TrailingElts
> VT
.getVectorMinNumElements()) {
8843 SDValue VLBytes
= DAG
.getVScale(
8845 APInt(PtrVT
.getFixedSizeInBits(), VT
.getStoreSize().getKnownMinSize()));
8846 TrailingBytes
= DAG
.getNode(ISD::UMIN
, DL
, PtrVT
, TrailingBytes
, VLBytes
);
8849 // Calculate the start address of the spliced result.
8850 StackPtr2
= DAG
.getNode(ISD::SUB
, DL
, PtrVT
, StackPtr2
, TrailingBytes
);
8852 // Load the spliced result
8853 return DAG
.getLoad(VT
, DL
, StoreV2
, StackPtr2
,
8854 MachinePointerInfo::getUnknownStack(MF
));
8857 bool TargetLowering::LegalizeSetCCCondCode(SelectionDAG
&DAG
, EVT VT
,
8858 SDValue
&LHS
, SDValue
&RHS
,
8859 SDValue
&CC
, bool &NeedInvert
,
8860 const SDLoc
&dl
, SDValue
&Chain
,
8861 bool IsSignaling
) const {
8862 const TargetLowering
&TLI
= DAG
.getTargetLoweringInfo();
8863 MVT OpVT
= LHS
.getSimpleValueType();
8864 ISD::CondCode CCCode
= cast
<CondCodeSDNode
>(CC
)->get();
8866 switch (TLI
.getCondCodeAction(CCCode
, OpVT
)) {
8868 llvm_unreachable("Unknown condition code action!");
8869 case TargetLowering::Legal
:
8872 case TargetLowering::Expand
: {
8873 ISD::CondCode InvCC
= ISD::getSetCCSwappedOperands(CCCode
);
8874 if (TLI
.isCondCodeLegalOrCustom(InvCC
, OpVT
)) {
8875 std::swap(LHS
, RHS
);
8876 CC
= DAG
.getCondCode(InvCC
);
8879 // Swapping operands didn't work. Try inverting the condition.
8880 bool NeedSwap
= false;
8881 InvCC
= getSetCCInverse(CCCode
, OpVT
);
8882 if (!TLI
.isCondCodeLegalOrCustom(InvCC
, OpVT
)) {
8883 // If inverting the condition is not enough, try swapping operands
8885 InvCC
= ISD::getSetCCSwappedOperands(InvCC
);
8888 if (TLI
.isCondCodeLegalOrCustom(InvCC
, OpVT
)) {
8889 CC
= DAG
.getCondCode(InvCC
);
8892 std::swap(LHS
, RHS
);
8896 ISD::CondCode CC1
= ISD::SETCC_INVALID
, CC2
= ISD::SETCC_INVALID
;
8900 llvm_unreachable("Don't know how to expand this condition!");
8902 if (TLI
.isCondCodeLegal(ISD::SETUNE
, OpVT
)) {
8908 assert(TLI
.isCondCodeLegal(ISD::SETOEQ
, OpVT
) &&
8909 "If SETUE is expanded, SETOEQ or SETUNE must be legal!");
8913 assert(TLI
.isCondCodeLegal(ISD::SETOEQ
, OpVT
) &&
8914 "If SETO is expanded, SETOEQ must be legal!");
8921 // If the SETUO or SETO CC isn't legal, we might be able to use
8922 // SETOGT || SETOLT, inverting the result for SETUEQ. We only need one
8923 // of SETOGT/SETOLT to be legal, the other can be emulated by swapping
8925 CC2
= ((unsigned)CCCode
& 0x8U
) ? ISD::SETUO
: ISD::SETO
;
8926 if (!TLI
.isCondCodeLegal(CC2
, OpVT
) &&
8927 (TLI
.isCondCodeLegal(ISD::SETOGT
, OpVT
) ||
8928 TLI
.isCondCodeLegal(ISD::SETOLT
, OpVT
))) {
8932 NeedInvert
= ((unsigned)CCCode
& 0x8U
);
8946 // If we are floating point, assign and break, otherwise fall through.
8947 if (!OpVT
.isInteger()) {
8948 // We can use the 4th bit to tell if we are the unordered
8949 // or ordered version of the opcode.
8950 CC2
= ((unsigned)CCCode
& 0x8U
) ? ISD::SETUO
: ISD::SETO
;
8951 Opc
= ((unsigned)CCCode
& 0x8U
) ? ISD::OR
: ISD::AND
;
8952 CC1
= (ISD::CondCode
)(((int)CCCode
& 0x7) | 0x10);
8955 // Fallthrough if we are unsigned integer.
8963 // If all combinations of inverting the condition and swapping operands
8964 // didn't work then we have no means to expand the condition.
8965 llvm_unreachable("Don't know how to expand this condition!");
8968 SDValue SetCC1
, SetCC2
;
8969 if (CCCode
!= ISD::SETO
&& CCCode
!= ISD::SETUO
) {
8970 // If we aren't the ordered or unorder operation,
8971 // then the pattern is (LHS CC1 RHS) Opc (LHS CC2 RHS).
8972 SetCC1
= DAG
.getSetCC(dl
, VT
, LHS
, RHS
, CC1
, Chain
, IsSignaling
);
8973 SetCC2
= DAG
.getSetCC(dl
, VT
, LHS
, RHS
, CC2
, Chain
, IsSignaling
);
8975 // Otherwise, the pattern is (LHS CC1 LHS) Opc (RHS CC2 RHS)
8976 SetCC1
= DAG
.getSetCC(dl
, VT
, LHS
, LHS
, CC1
, Chain
, IsSignaling
);
8977 SetCC2
= DAG
.getSetCC(dl
, VT
, RHS
, RHS
, CC2
, Chain
, IsSignaling
);
8980 Chain
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
, SetCC1
.getValue(1),
8981 SetCC2
.getValue(1));
8982 LHS
= DAG
.getNode(Opc
, dl
, VT
, SetCC1
, SetCC2
);