[PowerPC] Remove self-copies in pre-emit peephole
[llvm-core.git] / lib / CodeGen / SelectionDAG / TargetLowering.cpp
blob1c3c60b1a858254ed1b5c5c712ff6da8a5c5e3e3
1 //===-- TargetLowering.cpp - Implement the TargetLowering class -----------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This implements the TargetLowering class.
12 //===----------------------------------------------------------------------===//
14 #include "llvm/CodeGen/TargetLowering.h"
15 #include "llvm/ADT/BitVector.h"
16 #include "llvm/ADT/STLExtras.h"
17 #include "llvm/CodeGen/CallingConvLower.h"
18 #include "llvm/CodeGen/MachineFrameInfo.h"
19 #include "llvm/CodeGen/MachineFunction.h"
20 #include "llvm/CodeGen/MachineJumpTableInfo.h"
21 #include "llvm/CodeGen/MachineRegisterInfo.h"
22 #include "llvm/CodeGen/SelectionDAG.h"
23 #include "llvm/CodeGen/TargetRegisterInfo.h"
24 #include "llvm/CodeGen/TargetSubtargetInfo.h"
25 #include "llvm/IR/DataLayout.h"
26 #include "llvm/IR/DerivedTypes.h"
27 #include "llvm/IR/GlobalVariable.h"
28 #include "llvm/IR/LLVMContext.h"
29 #include "llvm/MC/MCAsmInfo.h"
30 #include "llvm/MC/MCExpr.h"
31 #include "llvm/Support/ErrorHandling.h"
32 #include "llvm/Support/KnownBits.h"
33 #include "llvm/Support/MathExtras.h"
34 #include "llvm/Target/TargetLoweringObjectFile.h"
35 #include "llvm/Target/TargetMachine.h"
36 #include <cctype>
37 using namespace llvm;
39 /// NOTE: The TargetMachine owns TLOF.
40 TargetLowering::TargetLowering(const TargetMachine &tm)
41 : TargetLoweringBase(tm) {}
43 const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
44 return nullptr;
47 bool TargetLowering::isPositionIndependent() const {
48 return getTargetMachine().isPositionIndependent();
51 /// Check whether a given call node is in tail position within its function. If
52 /// so, it sets Chain to the input chain of the tail call.
53 bool TargetLowering::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
54 SDValue &Chain) const {
55 const Function &F = DAG.getMachineFunction().getFunction();
57 // Conservatively require the attributes of the call to match those of
58 // the return. Ignore NoAlias and NonNull because they don't affect the
59 // call sequence.
60 AttributeList CallerAttrs = F.getAttributes();
61 if (AttrBuilder(CallerAttrs, AttributeList::ReturnIndex)
62 .removeAttribute(Attribute::NoAlias)
63 .removeAttribute(Attribute::NonNull)
64 .hasAttributes())
65 return false;
67 // It's not safe to eliminate the sign / zero extension of the return value.
68 if (CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::ZExt) ||
69 CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::SExt))
70 return false;
72 // Check if the only use is a function return node.
73 return isUsedByReturnOnly(Node, Chain);
76 bool TargetLowering::parametersInCSRMatch(const MachineRegisterInfo &MRI,
77 const uint32_t *CallerPreservedMask,
78 const SmallVectorImpl<CCValAssign> &ArgLocs,
79 const SmallVectorImpl<SDValue> &OutVals) const {
80 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
81 const CCValAssign &ArgLoc = ArgLocs[I];
82 if (!ArgLoc.isRegLoc())
83 continue;
84 unsigned Reg = ArgLoc.getLocReg();
85 // Only look at callee saved registers.
86 if (MachineOperand::clobbersPhysReg(CallerPreservedMask, Reg))
87 continue;
88 // Check that we pass the value used for the caller.
89 // (We look for a CopyFromReg reading a virtual register that is used
90 // for the function live-in value of register Reg)
91 SDValue Value = OutVals[I];
92 if (Value->getOpcode() != ISD::CopyFromReg)
93 return false;
94 unsigned ArgReg = cast<RegisterSDNode>(Value->getOperand(1))->getReg();
95 if (MRI.getLiveInPhysReg(ArgReg) != Reg)
96 return false;
98 return true;
101 /// Set CallLoweringInfo attribute flags based on a call instruction
102 /// and called function attributes.
103 void TargetLoweringBase::ArgListEntry::setAttributes(ImmutableCallSite *CS,
104 unsigned ArgIdx) {
105 IsSExt = CS->paramHasAttr(ArgIdx, Attribute::SExt);
106 IsZExt = CS->paramHasAttr(ArgIdx, Attribute::ZExt);
107 IsInReg = CS->paramHasAttr(ArgIdx, Attribute::InReg);
108 IsSRet = CS->paramHasAttr(ArgIdx, Attribute::StructRet);
109 IsNest = CS->paramHasAttr(ArgIdx, Attribute::Nest);
110 IsByVal = CS->paramHasAttr(ArgIdx, Attribute::ByVal);
111 IsInAlloca = CS->paramHasAttr(ArgIdx, Attribute::InAlloca);
112 IsReturned = CS->paramHasAttr(ArgIdx, Attribute::Returned);
113 IsSwiftSelf = CS->paramHasAttr(ArgIdx, Attribute::SwiftSelf);
114 IsSwiftError = CS->paramHasAttr(ArgIdx, Attribute::SwiftError);
115 Alignment = CS->getParamAlignment(ArgIdx);
118 /// Generate a libcall taking the given operands as arguments and returning a
119 /// result of type RetVT.
120 std::pair<SDValue, SDValue>
121 TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT,
122 ArrayRef<SDValue> Ops, bool isSigned,
123 const SDLoc &dl, bool doesNotReturn,
124 bool isReturnValueUsed) const {
125 TargetLowering::ArgListTy Args;
126 Args.reserve(Ops.size());
128 TargetLowering::ArgListEntry Entry;
129 for (SDValue Op : Ops) {
130 Entry.Node = Op;
131 Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
132 Entry.IsSExt = shouldSignExtendTypeInLibCall(Op.getValueType(), isSigned);
133 Entry.IsZExt = !shouldSignExtendTypeInLibCall(Op.getValueType(), isSigned);
134 Args.push_back(Entry);
137 if (LC == RTLIB::UNKNOWN_LIBCALL)
138 report_fatal_error("Unsupported library call operation!");
139 SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
140 getPointerTy(DAG.getDataLayout()));
142 Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
143 TargetLowering::CallLoweringInfo CLI(DAG);
144 bool signExtend = shouldSignExtendTypeInLibCall(RetVT, isSigned);
145 CLI.setDebugLoc(dl)
146 .setChain(DAG.getEntryNode())
147 .setLibCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
148 .setNoReturn(doesNotReturn)
149 .setDiscardResult(!isReturnValueUsed)
150 .setSExtResult(signExtend)
151 .setZExtResult(!signExtend);
152 return LowerCallTo(CLI);
155 /// Soften the operands of a comparison. This code is shared among BR_CC,
156 /// SELECT_CC, and SETCC handlers.
157 void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
158 SDValue &NewLHS, SDValue &NewRHS,
159 ISD::CondCode &CCCode,
160 const SDLoc &dl) const {
161 assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128 || VT == MVT::ppcf128)
162 && "Unsupported setcc type!");
164 // Expand into one or more soft-fp libcall(s).
165 RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL;
166 bool ShouldInvertCC = false;
167 switch (CCCode) {
168 case ISD::SETEQ:
169 case ISD::SETOEQ:
170 LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
171 (VT == MVT::f64) ? RTLIB::OEQ_F64 :
172 (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
173 break;
174 case ISD::SETNE:
175 case ISD::SETUNE:
176 LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 :
177 (VT == MVT::f64) ? RTLIB::UNE_F64 :
178 (VT == MVT::f128) ? RTLIB::UNE_F128 : RTLIB::UNE_PPCF128;
179 break;
180 case ISD::SETGE:
181 case ISD::SETOGE:
182 LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
183 (VT == MVT::f64) ? RTLIB::OGE_F64 :
184 (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
185 break;
186 case ISD::SETLT:
187 case ISD::SETOLT:
188 LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
189 (VT == MVT::f64) ? RTLIB::OLT_F64 :
190 (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
191 break;
192 case ISD::SETLE:
193 case ISD::SETOLE:
194 LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
195 (VT == MVT::f64) ? RTLIB::OLE_F64 :
196 (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
197 break;
198 case ISD::SETGT:
199 case ISD::SETOGT:
200 LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
201 (VT == MVT::f64) ? RTLIB::OGT_F64 :
202 (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
203 break;
204 case ISD::SETUO:
205 LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
206 (VT == MVT::f64) ? RTLIB::UO_F64 :
207 (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
208 break;
209 case ISD::SETO:
210 LC1 = (VT == MVT::f32) ? RTLIB::O_F32 :
211 (VT == MVT::f64) ? RTLIB::O_F64 :
212 (VT == MVT::f128) ? RTLIB::O_F128 : RTLIB::O_PPCF128;
213 break;
214 case ISD::SETONE:
215 // SETONE = SETOLT | SETOGT
216 LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
217 (VT == MVT::f64) ? RTLIB::OLT_F64 :
218 (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
219 LC2 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
220 (VT == MVT::f64) ? RTLIB::OGT_F64 :
221 (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
222 break;
223 case ISD::SETUEQ:
224 LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
225 (VT == MVT::f64) ? RTLIB::UO_F64 :
226 (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
227 LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
228 (VT == MVT::f64) ? RTLIB::OEQ_F64 :
229 (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
230 break;
231 default:
232 // Invert CC for unordered comparisons
233 ShouldInvertCC = true;
234 switch (CCCode) {
235 case ISD::SETULT:
236 LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 :
237 (VT == MVT::f64) ? RTLIB::OGE_F64 :
238 (VT == MVT::f128) ? RTLIB::OGE_F128 : RTLIB::OGE_PPCF128;
239 break;
240 case ISD::SETULE:
241 LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 :
242 (VT == MVT::f64) ? RTLIB::OGT_F64 :
243 (VT == MVT::f128) ? RTLIB::OGT_F128 : RTLIB::OGT_PPCF128;
244 break;
245 case ISD::SETUGT:
246 LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 :
247 (VT == MVT::f64) ? RTLIB::OLE_F64 :
248 (VT == MVT::f128) ? RTLIB::OLE_F128 : RTLIB::OLE_PPCF128;
249 break;
250 case ISD::SETUGE:
251 LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 :
252 (VT == MVT::f64) ? RTLIB::OLT_F64 :
253 (VT == MVT::f128) ? RTLIB::OLT_F128 : RTLIB::OLT_PPCF128;
254 break;
255 default: llvm_unreachable("Do not know how to soften this setcc!");
259 // Use the target specific return value for comparions lib calls.
260 EVT RetVT = getCmpLibcallReturnType();
261 SDValue Ops[2] = {NewLHS, NewRHS};
262 NewLHS = makeLibCall(DAG, LC1, RetVT, Ops, false /*sign irrelevant*/,
263 dl).first;
264 NewRHS = DAG.getConstant(0, dl, RetVT);
266 CCCode = getCmpLibcallCC(LC1);
267 if (ShouldInvertCC)
268 CCCode = getSetCCInverse(CCCode, /*isInteger=*/true);
270 if (LC2 != RTLIB::UNKNOWN_LIBCALL) {
271 SDValue Tmp = DAG.getNode(
272 ISD::SETCC, dl,
273 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT),
274 NewLHS, NewRHS, DAG.getCondCode(CCCode));
275 NewLHS = makeLibCall(DAG, LC2, RetVT, Ops, false/*sign irrelevant*/,
276 dl).first;
277 NewLHS = DAG.getNode(
278 ISD::SETCC, dl,
279 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT),
280 NewLHS, NewRHS, DAG.getCondCode(getCmpLibcallCC(LC2)));
281 NewLHS = DAG.getNode(ISD::OR, dl, Tmp.getValueType(), Tmp, NewLHS);
282 NewRHS = SDValue();
286 /// Return the entry encoding for a jump table in the current function. The
287 /// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum.
288 unsigned TargetLowering::getJumpTableEncoding() const {
289 // In non-pic modes, just use the address of a block.
290 if (!isPositionIndependent())
291 return MachineJumpTableInfo::EK_BlockAddress;
293 // In PIC mode, if the target supports a GPRel32 directive, use it.
294 if (getTargetMachine().getMCAsmInfo()->getGPRel32Directive() != nullptr)
295 return MachineJumpTableInfo::EK_GPRel32BlockAddress;
297 // Otherwise, use a label difference.
298 return MachineJumpTableInfo::EK_LabelDifference32;
301 SDValue TargetLowering::getPICJumpTableRelocBase(SDValue Table,
302 SelectionDAG &DAG) const {
303 // If our PIC model is GP relative, use the global offset table as the base.
304 unsigned JTEncoding = getJumpTableEncoding();
306 if ((JTEncoding == MachineJumpTableInfo::EK_GPRel64BlockAddress) ||
307 (JTEncoding == MachineJumpTableInfo::EK_GPRel32BlockAddress))
308 return DAG.getGLOBAL_OFFSET_TABLE(getPointerTy(DAG.getDataLayout()));
310 return Table;
313 /// This returns the relocation base for the given PIC jumptable, the same as
314 /// getPICJumpTableRelocBase, but as an MCExpr.
315 const MCExpr *
316 TargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
317 unsigned JTI,MCContext &Ctx) const{
318 // The normal PIC reloc base is the label at the start of the jump table.
319 return MCSymbolRefExpr::create(MF->getJTISymbol(JTI, Ctx), Ctx);
322 bool
323 TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
324 const TargetMachine &TM = getTargetMachine();
325 const GlobalValue *GV = GA->getGlobal();
327 // If the address is not even local to this DSO we will have to load it from
328 // a got and then add the offset.
329 if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
330 return false;
332 // If the code is position independent we will have to add a base register.
333 if (isPositionIndependent())
334 return false;
336 // Otherwise we can do it.
337 return true;
340 //===----------------------------------------------------------------------===//
341 // Optimization Methods
342 //===----------------------------------------------------------------------===//
344 /// If the specified instruction has a constant integer operand and there are
345 /// bits set in that constant that are not demanded, then clear those bits and
346 /// return true.
347 bool TargetLowering::ShrinkDemandedConstant(SDValue Op, const APInt &Demanded,
348 TargetLoweringOpt &TLO) const {
349 SelectionDAG &DAG = TLO.DAG;
350 SDLoc DL(Op);
351 unsigned Opcode = Op.getOpcode();
353 // Do target-specific constant optimization.
354 if (targetShrinkDemandedConstant(Op, Demanded, TLO))
355 return TLO.New.getNode();
357 // FIXME: ISD::SELECT, ISD::SELECT_CC
358 switch (Opcode) {
359 default:
360 break;
361 case ISD::XOR:
362 case ISD::AND:
363 case ISD::OR: {
364 auto *Op1C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
365 if (!Op1C)
366 return false;
368 // If this is a 'not' op, don't touch it because that's a canonical form.
369 const APInt &C = Op1C->getAPIntValue();
370 if (Opcode == ISD::XOR && Demanded.isSubsetOf(C))
371 return false;
373 if (!C.isSubsetOf(Demanded)) {
374 EVT VT = Op.getValueType();
375 SDValue NewC = DAG.getConstant(Demanded & C, DL, VT);
376 SDValue NewOp = DAG.getNode(Opcode, DL, VT, Op.getOperand(0), NewC);
377 return TLO.CombineTo(Op, NewOp);
380 break;
384 return false;
387 /// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free.
388 /// This uses isZExtFree and ZERO_EXTEND for the widening cast, but it could be
389 /// generalized for targets with other types of implicit widening casts.
390 bool TargetLowering::ShrinkDemandedOp(SDValue Op, unsigned BitWidth,
391 const APInt &Demanded,
392 TargetLoweringOpt &TLO) const {
393 assert(Op.getNumOperands() == 2 &&
394 "ShrinkDemandedOp only supports binary operators!");
395 assert(Op.getNode()->getNumValues() == 1 &&
396 "ShrinkDemandedOp only supports nodes with one result!");
398 SelectionDAG &DAG = TLO.DAG;
399 SDLoc dl(Op);
401 // Early return, as this function cannot handle vector types.
402 if (Op.getValueType().isVector())
403 return false;
405 // Don't do this if the node has another user, which may require the
406 // full value.
407 if (!Op.getNode()->hasOneUse())
408 return false;
410 // Search for the smallest integer type with free casts to and from
411 // Op's type. For expedience, just check power-of-2 integer types.
412 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
413 unsigned DemandedSize = Demanded.getActiveBits();
414 unsigned SmallVTBits = DemandedSize;
415 if (!isPowerOf2_32(SmallVTBits))
416 SmallVTBits = NextPowerOf2(SmallVTBits);
417 for (; SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) {
418 EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), SmallVTBits);
419 if (TLI.isTruncateFree(Op.getValueType(), SmallVT) &&
420 TLI.isZExtFree(SmallVT, Op.getValueType())) {
421 // We found a type with free casts.
422 SDValue X = DAG.getNode(
423 Op.getOpcode(), dl, SmallVT,
424 DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(0)),
425 DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getOperand(1)));
426 assert(DemandedSize <= SmallVTBits && "Narrowed below demanded bits?");
427 SDValue Z = DAG.getNode(ISD::ANY_EXTEND, dl, Op.getValueType(), X);
428 return TLO.CombineTo(Op, Z);
431 return false;
434 bool
435 TargetLowering::SimplifyDemandedBits(SDNode *User, unsigned OpIdx,
436 const APInt &Demanded,
437 DAGCombinerInfo &DCI,
438 TargetLoweringOpt &TLO) const {
439 SDValue Op = User->getOperand(OpIdx);
440 KnownBits Known;
442 if (!SimplifyDemandedBits(Op, Demanded, Known, TLO, 0, true))
443 return false;
446 // Old will not always be the same as Op. For example:
448 // Demanded = 0xffffff
449 // Op = i64 truncate (i32 and x, 0xffffff)
450 // In this case simplify demand bits will want to replace the 'and' node
451 // with the value 'x', which will give us:
452 // Old = i32 and x, 0xffffff
453 // New = x
454 if (TLO.Old.hasOneUse()) {
455 // For the one use case, we just commit the change.
456 DCI.CommitTargetLoweringOpt(TLO);
457 return true;
460 // If Old has more than one use then it must be Op, because the
461 // AssumeSingleUse flag is not propogated to recursive calls of
462 // SimplifyDemanded bits, so the only node with multiple use that
463 // it will attempt to combine will be Op.
464 assert(TLO.Old == Op);
466 SmallVector <SDValue, 4> NewOps;
467 for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
468 if (i == OpIdx) {
469 NewOps.push_back(TLO.New);
470 continue;
472 NewOps.push_back(User->getOperand(i));
474 User = TLO.DAG.UpdateNodeOperands(User, NewOps);
475 // Op has less users now, so we may be able to perform additional combines
476 // with it.
477 DCI.AddToWorklist(Op.getNode());
478 // User's operands have been updated, so we may be able to do new combines
479 // with it.
480 DCI.AddToWorklist(User);
481 return true;
484 bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedMask,
485 DAGCombinerInfo &DCI) const {
487 SelectionDAG &DAG = DCI.DAG;
488 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
489 !DCI.isBeforeLegalizeOps());
490 KnownBits Known;
492 bool Simplified = SimplifyDemandedBits(Op, DemandedMask, Known, TLO);
493 if (Simplified)
494 DCI.CommitTargetLoweringOpt(TLO);
495 return Simplified;
498 /// Look at Op. At this point, we know that only the DemandedMask bits of the
499 /// result of Op are ever used downstream. If we can use this information to
500 /// simplify Op, create a new simplified DAG node and return true, returning the
501 /// original and new nodes in Old and New. Otherwise, analyze the expression and
502 /// return a mask of Known bits for the expression (used to simplify the
503 /// caller). The Known bits may only be accurate for those bits in the
504 /// DemandedMask.
505 bool TargetLowering::SimplifyDemandedBits(SDValue Op,
506 const APInt &DemandedMask,
507 KnownBits &Known,
508 TargetLoweringOpt &TLO,
509 unsigned Depth,
510 bool AssumeSingleUse) const {
511 unsigned BitWidth = DemandedMask.getBitWidth();
512 assert(Op.getScalarValueSizeInBits() == BitWidth &&
513 "Mask size mismatches value type size!");
514 APInt NewMask = DemandedMask;
515 SDLoc dl(Op);
516 auto &DL = TLO.DAG.getDataLayout();
518 // Don't know anything.
519 Known = KnownBits(BitWidth);
521 if (Op.getOpcode() == ISD::Constant) {
522 // We know all of the bits for a constant!
523 Known.One = cast<ConstantSDNode>(Op)->getAPIntValue();
524 Known.Zero = ~Known.One;
525 return false;
528 // Other users may use these bits.
529 EVT VT = Op.getValueType();
530 if (!Op.getNode()->hasOneUse() && !AssumeSingleUse) {
531 if (Depth != 0) {
532 // If not at the root, Just compute the Known bits to
533 // simplify things downstream.
534 TLO.DAG.computeKnownBits(Op, Known, Depth);
535 return false;
537 // If this is the root being simplified, allow it to have multiple uses,
538 // just set the NewMask to all bits.
539 NewMask = APInt::getAllOnesValue(BitWidth);
540 } else if (DemandedMask == 0) {
541 // Not demanding any bits from Op.
542 if (!Op.isUndef())
543 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
544 return false;
545 } else if (Depth == 6) { // Limit search depth.
546 return false;
549 KnownBits Known2, KnownOut;
550 switch (Op.getOpcode()) {
551 case ISD::BUILD_VECTOR:
552 // Collect the known bits that are shared by every constant vector element.
553 Known.Zero.setAllBits(); Known.One.setAllBits();
554 for (SDValue SrcOp : Op->ops()) {
555 if (!isa<ConstantSDNode>(SrcOp)) {
556 // We can only handle all constant values - bail out with no known bits.
557 Known = KnownBits(BitWidth);
558 return false;
560 Known2.One = cast<ConstantSDNode>(SrcOp)->getAPIntValue();
561 Known2.Zero = ~Known2.One;
563 // BUILD_VECTOR can implicitly truncate sources, we must handle this.
564 if (Known2.One.getBitWidth() != BitWidth) {
565 assert(Known2.getBitWidth() > BitWidth &&
566 "Expected BUILD_VECTOR implicit truncation");
567 Known2 = Known2.trunc(BitWidth);
570 // Known bits are the values that are shared by every element.
571 // TODO: support per-element known bits.
572 Known.One &= Known2.One;
573 Known.Zero &= Known2.Zero;
575 return false; // Don't fall through, will infinitely loop.
576 case ISD::AND:
577 // If the RHS is a constant, check to see if the LHS would be zero without
578 // using the bits from the RHS. Below, we use knowledge about the RHS to
579 // simplify the LHS, here we're using information from the LHS to simplify
580 // the RHS.
581 if (ConstantSDNode *RHSC = isConstOrConstSplat(Op.getOperand(1))) {
582 SDValue Op0 = Op.getOperand(0);
583 KnownBits LHSKnown;
584 // Do not increment Depth here; that can cause an infinite loop.
585 TLO.DAG.computeKnownBits(Op0, LHSKnown, Depth);
586 // If the LHS already has zeros where RHSC does, this 'and' is dead.
587 if ((LHSKnown.Zero & NewMask) == (~RHSC->getAPIntValue() & NewMask))
588 return TLO.CombineTo(Op, Op0);
590 // If any of the set bits in the RHS are known zero on the LHS, shrink
591 // the constant.
592 if (ShrinkDemandedConstant(Op, ~LHSKnown.Zero & NewMask, TLO))
593 return true;
595 // Bitwise-not (xor X, -1) is a special case: we don't usually shrink its
596 // constant, but if this 'and' is only clearing bits that were just set by
597 // the xor, then this 'and' can be eliminated by shrinking the mask of
598 // the xor. For example, for a 32-bit X:
599 // and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1
600 if (isBitwiseNot(Op0) && Op0.hasOneUse() &&
601 LHSKnown.One == ~RHSC->getAPIntValue()) {
602 SDValue Xor = TLO.DAG.getNode(ISD::XOR, dl, VT, Op0.getOperand(0),
603 Op.getOperand(1));
604 return TLO.CombineTo(Op, Xor);
608 if (SimplifyDemandedBits(Op.getOperand(1), NewMask, Known, TLO, Depth+1))
609 return true;
610 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
611 if (SimplifyDemandedBits(Op.getOperand(0), ~Known.Zero & NewMask,
612 Known2, TLO, Depth+1))
613 return true;
614 assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
616 // If all of the demanded bits are known one on one side, return the other.
617 // These bits cannot contribute to the result of the 'and'.
618 if (NewMask.isSubsetOf(Known2.Zero | Known.One))
619 return TLO.CombineTo(Op, Op.getOperand(0));
620 if (NewMask.isSubsetOf(Known.Zero | Known2.One))
621 return TLO.CombineTo(Op, Op.getOperand(1));
622 // If all of the demanded bits in the inputs are known zeros, return zero.
623 if (NewMask.isSubsetOf(Known.Zero | Known2.Zero))
624 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, dl, VT));
625 // If the RHS is a constant, see if we can simplify it.
626 if (ShrinkDemandedConstant(Op, ~Known2.Zero & NewMask, TLO))
627 return true;
628 // If the operation can be done in a smaller type, do so.
629 if (ShrinkDemandedOp(Op, BitWidth, NewMask, TLO))
630 return true;
632 // Output known-1 bits are only known if set in both the LHS & RHS.
633 Known.One &= Known2.One;
634 // Output known-0 are known to be clear if zero in either the LHS | RHS.
635 Known.Zero |= Known2.Zero;
636 break;
637 case ISD::OR:
638 if (SimplifyDemandedBits(Op.getOperand(1), NewMask, Known, TLO, Depth+1))
639 return true;
640 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
641 if (SimplifyDemandedBits(Op.getOperand(0), ~Known.One & NewMask,
642 Known2, TLO, Depth+1))
643 return true;
644 assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
646 // If all of the demanded bits are known zero on one side, return the other.
647 // These bits cannot contribute to the result of the 'or'.
648 if (NewMask.isSubsetOf(Known2.One | Known.Zero))
649 return TLO.CombineTo(Op, Op.getOperand(0));
650 if (NewMask.isSubsetOf(Known.One | Known2.Zero))
651 return TLO.CombineTo(Op, Op.getOperand(1));
652 // If the RHS is a constant, see if we can simplify it.
653 if (ShrinkDemandedConstant(Op, NewMask, TLO))
654 return true;
655 // If the operation can be done in a smaller type, do so.
656 if (ShrinkDemandedOp(Op, BitWidth, NewMask, TLO))
657 return true;
659 // Output known-0 bits are only known if clear in both the LHS & RHS.
660 Known.Zero &= Known2.Zero;
661 // Output known-1 are known to be set if set in either the LHS | RHS.
662 Known.One |= Known2.One;
663 break;
664 case ISD::XOR: {
665 if (SimplifyDemandedBits(Op.getOperand(1), NewMask, Known, TLO, Depth+1))
666 return true;
667 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
668 if (SimplifyDemandedBits(Op.getOperand(0), NewMask, Known2, TLO, Depth+1))
669 return true;
670 assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
672 // If all of the demanded bits are known zero on one side, return the other.
673 // These bits cannot contribute to the result of the 'xor'.
674 if (NewMask.isSubsetOf(Known.Zero))
675 return TLO.CombineTo(Op, Op.getOperand(0));
676 if (NewMask.isSubsetOf(Known2.Zero))
677 return TLO.CombineTo(Op, Op.getOperand(1));
678 // If the operation can be done in a smaller type, do so.
679 if (ShrinkDemandedOp(Op, BitWidth, NewMask, TLO))
680 return true;
682 // If all of the unknown bits are known to be zero on one side or the other
683 // (but not both) turn this into an *inclusive* or.
684 // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0
685 if (NewMask.isSubsetOf(Known.Zero|Known2.Zero))
686 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, VT,
687 Op.getOperand(0),
688 Op.getOperand(1)));
690 // Output known-0 bits are known if clear or set in both the LHS & RHS.
691 KnownOut.Zero = (Known.Zero & Known2.Zero) | (Known.One & Known2.One);
692 // Output known-1 are known to be set if set in only one of the LHS, RHS.
693 KnownOut.One = (Known.Zero & Known2.One) | (Known.One & Known2.Zero);
695 if (ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(1))) {
696 // If one side is a constant, and all of the known set bits on the other
697 // side are also set in the constant, turn this into an AND, as we know
698 // the bits will be cleared.
699 // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2
700 // NB: it is okay if more bits are known than are requested
701 if (C->getAPIntValue() == Known2.One) {
702 SDValue ANDC = TLO.DAG.getConstant(~C->getAPIntValue() & NewMask,
703 dl, VT);
704 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT,
705 Op.getOperand(0), ANDC));
708 // If the RHS is a constant, see if we can change it. Don't alter a -1
709 // constant because that's a 'not' op, and that is better for combining
710 // and codegen.
711 if (!C->isAllOnesValue()) {
712 if (NewMask.isSubsetOf(C->getAPIntValue())) {
713 // We're flipping all demanded bits. Flip the undemanded bits too.
714 SDValue New = TLO.DAG.getNOT(dl, Op.getOperand(0), VT);
715 return TLO.CombineTo(Op, New);
717 // If we can't turn this into a 'not', try to shrink the constant.
718 if (ShrinkDemandedConstant(Op, NewMask, TLO))
719 return true;
723 Known = std::move(KnownOut);
724 break;
726 case ISD::SELECT:
727 if (SimplifyDemandedBits(Op.getOperand(2), NewMask, Known, TLO, Depth+1))
728 return true;
729 if (SimplifyDemandedBits(Op.getOperand(1), NewMask, Known2, TLO, Depth+1))
730 return true;
731 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
732 assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
734 // If the operands are constants, see if we can simplify them.
735 if (ShrinkDemandedConstant(Op, NewMask, TLO))
736 return true;
738 // Only known if known in both the LHS and RHS.
739 Known.One &= Known2.One;
740 Known.Zero &= Known2.Zero;
741 break;
742 case ISD::SELECT_CC:
743 if (SimplifyDemandedBits(Op.getOperand(3), NewMask, Known, TLO, Depth+1))
744 return true;
745 if (SimplifyDemandedBits(Op.getOperand(2), NewMask, Known2, TLO, Depth+1))
746 return true;
747 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
748 assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
750 // If the operands are constants, see if we can simplify them.
751 if (ShrinkDemandedConstant(Op, NewMask, TLO))
752 return true;
754 // Only known if known in both the LHS and RHS.
755 Known.One &= Known2.One;
756 Known.Zero &= Known2.Zero;
757 break;
758 case ISD::SETCC: {
759 SDValue Op0 = Op.getOperand(0);
760 SDValue Op1 = Op.getOperand(1);
761 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
762 // If (1) we only need the sign-bit, (2) the setcc operands are the same
763 // width as the setcc result, and (3) the result of a setcc conforms to 0 or
764 // -1, we may be able to bypass the setcc.
765 if (NewMask.isSignMask() && Op0.getScalarValueSizeInBits() == BitWidth &&
766 getBooleanContents(VT) ==
767 BooleanContent::ZeroOrNegativeOneBooleanContent) {
768 // If we're testing X < 0, then this compare isn't needed - just use X!
769 // FIXME: We're limiting to integer types here, but this should also work
770 // if we don't care about FP signed-zero. The use of SETLT with FP means
771 // that we don't care about NaNs.
772 if (CC == ISD::SETLT && Op1.getValueType().isInteger() &&
773 (isNullConstant(Op1) || ISD::isBuildVectorAllZeros(Op1.getNode())))
774 return TLO.CombineTo(Op, Op0);
776 // TODO: Should we check for other forms of sign-bit comparisons?
777 // Examples: X <= -1, X >= 0
779 if (getBooleanContents(Op0.getValueType()) ==
780 TargetLowering::ZeroOrOneBooleanContent &&
781 BitWidth > 1)
782 Known.Zero.setBitsFrom(1);
783 break;
785 case ISD::SHL:
786 if (ConstantSDNode *SA = isConstOrConstSplat(Op.getOperand(1))) {
787 SDValue InOp = Op.getOperand(0);
789 // If the shift count is an invalid immediate, don't do anything.
790 if (SA->getAPIntValue().uge(BitWidth))
791 break;
793 unsigned ShAmt = SA->getZExtValue();
795 // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
796 // single shift. We can do this if the bottom bits (which are shifted
797 // out) are never demanded.
798 if (InOp.getOpcode() == ISD::SRL) {
799 if (ConstantSDNode *SA2 = isConstOrConstSplat(InOp.getOperand(1))) {
800 if (ShAmt && (NewMask & APInt::getLowBitsSet(BitWidth, ShAmt)) == 0) {
801 if (SA2->getAPIntValue().ult(BitWidth)) {
802 unsigned C1 = SA2->getZExtValue();
803 unsigned Opc = ISD::SHL;
804 int Diff = ShAmt-C1;
805 if (Diff < 0) {
806 Diff = -Diff;
807 Opc = ISD::SRL;
810 SDValue NewSA =
811 TLO.DAG.getConstant(Diff, dl, Op.getOperand(1).getValueType());
812 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT,
813 InOp.getOperand(0),
814 NewSA));
820 if (SimplifyDemandedBits(InOp, NewMask.lshr(ShAmt), Known, TLO, Depth+1))
821 return true;
823 // Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits
824 // are not demanded. This will likely allow the anyext to be folded away.
825 if (InOp.getNode()->getOpcode() == ISD::ANY_EXTEND) {
826 SDValue InnerOp = InOp.getOperand(0);
827 EVT InnerVT = InnerOp.getValueType();
828 unsigned InnerBits = InnerVT.getScalarSizeInBits();
829 if (ShAmt < InnerBits && NewMask.getActiveBits() <= InnerBits &&
830 isTypeDesirableForOp(ISD::SHL, InnerVT)) {
831 EVT ShTy = getShiftAmountTy(InnerVT, DL);
832 if (!APInt(BitWidth, ShAmt).isIntN(ShTy.getSizeInBits()))
833 ShTy = InnerVT;
834 SDValue NarrowShl =
835 TLO.DAG.getNode(ISD::SHL, dl, InnerVT, InnerOp,
836 TLO.DAG.getConstant(ShAmt, dl, ShTy));
837 return
838 TLO.CombineTo(Op,
839 TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, NarrowShl));
841 // Repeat the SHL optimization above in cases where an extension
842 // intervenes: (shl (anyext (shr x, c1)), c2) to
843 // (shl (anyext x), c2-c1). This requires that the bottom c1 bits
844 // aren't demanded (as above) and that the shifted upper c1 bits of
845 // x aren't demanded.
846 if (InOp.hasOneUse() && InnerOp.getOpcode() == ISD::SRL &&
847 InnerOp.hasOneUse()) {
848 if (ConstantSDNode *SA2 = isConstOrConstSplat(InnerOp.getOperand(1))) {
849 unsigned InnerShAmt = SA2->getLimitedValue(InnerBits);
850 if (InnerShAmt < ShAmt &&
851 InnerShAmt < InnerBits &&
852 NewMask.getActiveBits() <= (InnerBits - InnerShAmt + ShAmt) &&
853 NewMask.countTrailingZeros() >= ShAmt) {
854 SDValue NewSA =
855 TLO.DAG.getConstant(ShAmt - InnerShAmt, dl,
856 Op.getOperand(1).getValueType());
857 SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
858 InnerOp.getOperand(0));
859 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT,
860 NewExt, NewSA));
866 Known.Zero <<= ShAmt;
867 Known.One <<= ShAmt;
868 // low bits known zero.
869 Known.Zero.setLowBits(ShAmt);
871 break;
872 case ISD::SRL:
873 if (ConstantSDNode *SA = isConstOrConstSplat(Op.getOperand(1))) {
874 SDValue InOp = Op.getOperand(0);
876 // If the shift count is an invalid immediate, don't do anything.
877 if (SA->getAPIntValue().uge(BitWidth))
878 break;
880 unsigned ShAmt = SA->getZExtValue();
881 APInt InDemandedMask = (NewMask << ShAmt);
883 // If the shift is exact, then it does demand the low bits (and knows that
884 // they are zero).
885 if (Op->getFlags().hasExact())
886 InDemandedMask.setLowBits(ShAmt);
888 // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
889 // single shift. We can do this if the top bits (which are shifted out)
890 // are never demanded.
891 if (InOp.getOpcode() == ISD::SHL) {
892 if (ConstantSDNode *SA2 = isConstOrConstSplat(InOp.getOperand(1))) {
893 if (ShAmt &&
894 (NewMask & APInt::getHighBitsSet(BitWidth, ShAmt)) == 0) {
895 if (SA2->getAPIntValue().ult(BitWidth)) {
896 unsigned C1 = SA2->getZExtValue();
897 unsigned Opc = ISD::SRL;
898 int Diff = ShAmt-C1;
899 if (Diff < 0) {
900 Diff = -Diff;
901 Opc = ISD::SHL;
904 SDValue NewSA =
905 TLO.DAG.getConstant(Diff, dl, Op.getOperand(1).getValueType());
906 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT,
907 InOp.getOperand(0),
908 NewSA));
914 // Compute the new bits that are at the top now.
915 if (SimplifyDemandedBits(InOp, InDemandedMask, Known, TLO, Depth+1))
916 return true;
917 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
918 Known.Zero.lshrInPlace(ShAmt);
919 Known.One.lshrInPlace(ShAmt);
921 Known.Zero.setHighBits(ShAmt); // High bits known zero.
923 break;
924 case ISD::SRA:
925 // If this is an arithmetic shift right and only the low-bit is set, we can
926 // always convert this into a logical shr, even if the shift amount is
927 // variable. The low bit of the shift cannot be an input sign bit unless
928 // the shift amount is >= the size of the datatype, which is undefined.
929 if (NewMask.isOneValue())
930 return TLO.CombineTo(Op,
931 TLO.DAG.getNode(ISD::SRL, dl, VT, Op.getOperand(0),
932 Op.getOperand(1)));
934 if (ConstantSDNode *SA = isConstOrConstSplat(Op.getOperand(1))) {
935 // If the shift count is an invalid immediate, don't do anything.
936 if (SA->getAPIntValue().uge(BitWidth))
937 break;
939 unsigned ShAmt = SA->getZExtValue();
940 APInt InDemandedMask = (NewMask << ShAmt);
942 // If the shift is exact, then it does demand the low bits (and knows that
943 // they are zero).
944 if (Op->getFlags().hasExact())
945 InDemandedMask.setLowBits(ShAmt);
947 // If any of the demanded bits are produced by the sign extension, we also
948 // demand the input sign bit.
949 if (NewMask.countLeadingZeros() < ShAmt)
950 InDemandedMask.setSignBit();
952 if (SimplifyDemandedBits(Op.getOperand(0), InDemandedMask, Known, TLO,
953 Depth+1))
954 return true;
955 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
956 Known.Zero.lshrInPlace(ShAmt);
957 Known.One.lshrInPlace(ShAmt);
959 // If the input sign bit is known to be zero, or if none of the top bits
960 // are demanded, turn this into an unsigned shift right.
961 if (Known.Zero[BitWidth - ShAmt - 1] ||
962 NewMask.countLeadingZeros() >= ShAmt) {
963 SDNodeFlags Flags;
964 Flags.setExact(Op->getFlags().hasExact());
965 return TLO.CombineTo(Op,
966 TLO.DAG.getNode(ISD::SRL, dl, VT, Op.getOperand(0),
967 Op.getOperand(1), Flags));
970 int Log2 = NewMask.exactLogBase2();
971 if (Log2 >= 0) {
972 // The bit must come from the sign.
973 SDValue NewSA =
974 TLO.DAG.getConstant(BitWidth - 1 - Log2, dl,
975 Op.getOperand(1).getValueType());
976 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT,
977 Op.getOperand(0), NewSA));
980 if (Known.One[BitWidth - ShAmt - 1])
981 // New bits are known one.
982 Known.One.setHighBits(ShAmt);
984 break;
985 case ISD::SIGN_EXTEND_INREG: {
986 EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
987 unsigned ExVTBits = ExVT.getScalarSizeInBits();
989 // If we only care about the highest bit, don't bother shifting right.
990 if (NewMask.isSignMask()) {
991 SDValue InOp = Op.getOperand(0);
992 bool AlreadySignExtended =
993 TLO.DAG.ComputeNumSignBits(InOp) >= BitWidth-ExVTBits+1;
994 // However if the input is already sign extended we expect the sign
995 // extension to be dropped altogether later and do not simplify.
996 if (!AlreadySignExtended) {
997 // Compute the correct shift amount type, which must be getShiftAmountTy
998 // for scalar types after legalization.
999 EVT ShiftAmtTy = VT;
1000 if (TLO.LegalTypes() && !ShiftAmtTy.isVector())
1001 ShiftAmtTy = getShiftAmountTy(ShiftAmtTy, DL);
1003 SDValue ShiftAmt = TLO.DAG.getConstant(BitWidth - ExVTBits, dl,
1004 ShiftAmtTy);
1005 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT, InOp,
1006 ShiftAmt));
1010 // If none of the extended bits are demanded, eliminate the sextinreg.
1011 if (NewMask.getActiveBits() <= ExVTBits)
1012 return TLO.CombineTo(Op, Op.getOperand(0));
1014 APInt InputDemandedBits = NewMask.getLoBits(ExVTBits);
1016 // Since the sign extended bits are demanded, we know that the sign
1017 // bit is demanded.
1018 InputDemandedBits.setBit(ExVTBits - 1);
1020 if (SimplifyDemandedBits(Op.getOperand(0), InputDemandedBits,
1021 Known, TLO, Depth+1))
1022 return true;
1023 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1025 // If the sign bit of the input is known set or clear, then we know the
1026 // top bits of the result.
1028 // If the input sign bit is known zero, convert this into a zero extension.
1029 if (Known.Zero[ExVTBits - 1])
1030 return TLO.CombineTo(Op, TLO.DAG.getZeroExtendInReg(
1031 Op.getOperand(0), dl, ExVT.getScalarType()));
1033 APInt Mask = APInt::getLowBitsSet(BitWidth, ExVTBits);
1034 if (Known.One[ExVTBits - 1]) { // Input sign bit known set
1035 Known.One.setBitsFrom(ExVTBits);
1036 Known.Zero &= Mask;
1037 } else { // Input sign bit unknown
1038 Known.Zero &= Mask;
1039 Known.One &= Mask;
1041 break;
1043 case ISD::BUILD_PAIR: {
1044 EVT HalfVT = Op.getOperand(0).getValueType();
1045 unsigned HalfBitWidth = HalfVT.getScalarSizeInBits();
1047 APInt MaskLo = NewMask.getLoBits(HalfBitWidth).trunc(HalfBitWidth);
1048 APInt MaskHi = NewMask.getHiBits(HalfBitWidth).trunc(HalfBitWidth);
1050 KnownBits KnownLo, KnownHi;
1052 if (SimplifyDemandedBits(Op.getOperand(0), MaskLo, KnownLo, TLO, Depth + 1))
1053 return true;
1055 if (SimplifyDemandedBits(Op.getOperand(1), MaskHi, KnownHi, TLO, Depth + 1))
1056 return true;
1058 Known.Zero = KnownLo.Zero.zext(BitWidth) |
1059 KnownHi.Zero.zext(BitWidth).shl(HalfBitWidth);
1061 Known.One = KnownLo.One.zext(BitWidth) |
1062 KnownHi.One.zext(BitWidth).shl(HalfBitWidth);
1063 break;
1065 case ISD::ZERO_EXTEND: {
1066 unsigned OperandBitWidth = Op.getOperand(0).getScalarValueSizeInBits();
1068 // If none of the top bits are demanded, convert this into an any_extend.
1069 if (NewMask.getActiveBits() <= OperandBitWidth)
1070 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
1071 Op.getOperand(0)));
1073 APInt InMask = NewMask.trunc(OperandBitWidth);
1074 if (SimplifyDemandedBits(Op.getOperand(0), InMask, Known, TLO, Depth+1))
1075 return true;
1076 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1077 Known = Known.zext(BitWidth);
1078 Known.Zero.setBitsFrom(OperandBitWidth);
1079 break;
1081 case ISD::SIGN_EXTEND: {
1082 unsigned InBits = Op.getOperand(0).getValueType().getScalarSizeInBits();
1084 // If none of the top bits are demanded, convert this into an any_extend.
1085 if (NewMask.getActiveBits() <= InBits)
1086 return TLO.CombineTo(Op,TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT,
1087 Op.getOperand(0)));
1089 // Since some of the sign extended bits are demanded, we know that the sign
1090 // bit is demanded.
1091 APInt InDemandedBits = NewMask.trunc(InBits);
1092 InDemandedBits.setBit(InBits - 1);
1094 if (SimplifyDemandedBits(Op.getOperand(0), InDemandedBits, Known, TLO,
1095 Depth+1))
1096 return true;
1097 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1098 // If the sign bit is known one, the top bits match.
1099 Known = Known.sext(BitWidth);
1101 // If the sign bit is known zero, convert this to a zero extend.
1102 if (Known.isNonNegative())
1103 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT,
1104 Op.getOperand(0)));
1105 break;
1107 case ISD::ANY_EXTEND: {
1108 unsigned OperandBitWidth = Op.getOperand(0).getScalarValueSizeInBits();
1109 APInt InMask = NewMask.trunc(OperandBitWidth);
1110 if (SimplifyDemandedBits(Op.getOperand(0), InMask, Known, TLO, Depth+1))
1111 return true;
1112 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1113 Known = Known.zext(BitWidth);
1114 break;
1116 case ISD::TRUNCATE: {
1117 // Simplify the input, using demanded bit information, and compute the known
1118 // zero/one bits live out.
1119 unsigned OperandBitWidth = Op.getOperand(0).getScalarValueSizeInBits();
1120 APInt TruncMask = NewMask.zext(OperandBitWidth);
1121 if (SimplifyDemandedBits(Op.getOperand(0), TruncMask, Known, TLO, Depth+1))
1122 return true;
1123 Known = Known.trunc(BitWidth);
1125 // If the input is only used by this truncate, see if we can shrink it based
1126 // on the known demanded bits.
1127 if (Op.getOperand(0).getNode()->hasOneUse()) {
1128 SDValue In = Op.getOperand(0);
1129 switch (In.getOpcode()) {
1130 default: break;
1131 case ISD::SRL:
1132 // Shrink SRL by a constant if none of the high bits shifted in are
1133 // demanded.
1134 if (TLO.LegalTypes() && !isTypeDesirableForOp(ISD::SRL, VT))
1135 // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
1136 // undesirable.
1137 break;
1138 ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(In.getOperand(1));
1139 if (!ShAmt)
1140 break;
1141 SDValue Shift = In.getOperand(1);
1142 if (TLO.LegalTypes()) {
1143 uint64_t ShVal = ShAmt->getZExtValue();
1144 Shift = TLO.DAG.getConstant(ShVal, dl, getShiftAmountTy(VT, DL));
1147 if (ShAmt->getZExtValue() < BitWidth) {
1148 APInt HighBits = APInt::getHighBitsSet(OperandBitWidth,
1149 OperandBitWidth - BitWidth);
1150 HighBits.lshrInPlace(ShAmt->getZExtValue());
1151 HighBits = HighBits.trunc(BitWidth);
1153 if (!(HighBits & NewMask)) {
1154 // None of the shifted in bits are needed. Add a truncate of the
1155 // shift input, then shift it.
1156 SDValue NewTrunc = TLO.DAG.getNode(ISD::TRUNCATE, dl, VT,
1157 In.getOperand(0));
1158 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc,
1159 Shift));
1162 break;
1166 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1167 break;
1169 case ISD::AssertZext: {
1170 // AssertZext demands all of the high bits, plus any of the low bits
1171 // demanded by its users.
1172 EVT ZVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
1173 APInt InMask = APInt::getLowBitsSet(BitWidth, ZVT.getSizeInBits());
1174 if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | NewMask,
1175 Known, TLO, Depth+1))
1176 return true;
1177 assert(!Known.hasConflict() && "Bits known to be one AND zero?");
1179 Known.Zero |= ~InMask;
1180 break;
1182 case ISD::BITCAST: {
1183 SDValue Src = Op.getOperand(0);
1184 EVT SrcVT = Src.getValueType();
1185 unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
1187 // If this is an FP->Int bitcast and if the sign bit is the only
1188 // thing demanded, turn this into a FGETSIGN.
1189 if (!TLO.LegalOperations() && !VT.isVector() && !SrcVT.isVector() &&
1190 NewMask == APInt::getSignMask(Op.getValueSizeInBits()) &&
1191 SrcVT.isFloatingPoint()) {
1192 bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, VT);
1193 bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32);
1194 if ((OpVTLegal || i32Legal) && VT.isSimple() && SrcVT != MVT::f16 &&
1195 SrcVT != MVT::f128) {
1196 // Cannot eliminate/lower SHL for f128 yet.
1197 EVT Ty = OpVTLegal ? VT : MVT::i32;
1198 // Make a FGETSIGN + SHL to move the sign bit into the appropriate
1199 // place. We expect the SHL to be eliminated by other optimizations.
1200 SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, Ty, Src);
1201 unsigned OpVTSizeInBits = Op.getValueSizeInBits();
1202 if (!OpVTLegal && OpVTSizeInBits > 32)
1203 Sign = TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Sign);
1204 unsigned ShVal = Op.getValueSizeInBits() - 1;
1205 SDValue ShAmt = TLO.DAG.getConstant(ShVal, dl, VT);
1206 return TLO.CombineTo(Op,
1207 TLO.DAG.getNode(ISD::SHL, dl, VT, Sign, ShAmt));
1210 // If bitcast from a vector and the mask covers entire elements, see if we
1211 // can use SimplifyDemandedVectorElts.
1212 // TODO - bigendian once we have test coverage.
1213 // TODO - bool vectors once SimplifyDemandedVectorElts has SETCC support.
1214 if (SrcVT.isVector() && NumSrcEltBits > 1 &&
1215 (BitWidth % NumSrcEltBits) == 0 &&
1216 TLO.DAG.getDataLayout().isLittleEndian()) {
1217 unsigned Scale = BitWidth / NumSrcEltBits;
1218 auto GetDemandedSubMask = [&](APInt &DemandedSubElts) -> bool {
1219 DemandedSubElts = APInt::getNullValue(Scale);
1220 for (unsigned i = 0; i != Scale; ++i) {
1221 unsigned Offset = i * NumSrcEltBits;
1222 APInt Sub = NewMask.extractBits(NumSrcEltBits, Offset);
1223 if (Sub.isAllOnesValue())
1224 DemandedSubElts.setBit(i);
1225 else if (!Sub.isNullValue())
1226 return false;
1228 return true;
1231 APInt DemandedSubElts;
1232 if (GetDemandedSubMask(DemandedSubElts)) {
1233 unsigned NumSrcElts = SrcVT.getVectorNumElements();
1234 APInt DemandedElts = APInt::getSplat(NumSrcElts, DemandedSubElts);
1236 APInt KnownUndef, KnownZero;
1237 if (SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef, KnownZero,
1238 TLO, Depth + 1))
1239 return true;
1242 // If this is a bitcast, let computeKnownBits handle it. Only do this on a
1243 // recursive call where Known may be useful to the caller.
1244 if (Depth > 0) {
1245 TLO.DAG.computeKnownBits(Op, Known, Depth);
1246 return false;
1248 break;
1250 case ISD::ADD:
1251 case ISD::MUL:
1252 case ISD::SUB: {
1253 // Add, Sub, and Mul don't demand any bits in positions beyond that
1254 // of the highest bit demanded of them.
1255 SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
1256 unsigned NewMaskLZ = NewMask.countLeadingZeros();
1257 APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - NewMaskLZ);
1258 if (SimplifyDemandedBits(Op0, LoMask, Known2, TLO, Depth + 1) ||
1259 SimplifyDemandedBits(Op1, LoMask, Known2, TLO, Depth + 1) ||
1260 // See if the operation should be performed at a smaller bit width.
1261 ShrinkDemandedOp(Op, BitWidth, NewMask, TLO)) {
1262 SDNodeFlags Flags = Op.getNode()->getFlags();
1263 if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) {
1264 // Disable the nsw and nuw flags. We can no longer guarantee that we
1265 // won't wrap after simplification.
1266 Flags.setNoSignedWrap(false);
1267 Flags.setNoUnsignedWrap(false);
1268 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1,
1269 Flags);
1270 return TLO.CombineTo(Op, NewOp);
1272 return true;
1275 // If we have a constant operand, we may be able to turn it into -1 if we
1276 // do not demand the high bits. This can make the constant smaller to
1277 // encode, allow more general folding, or match specialized instruction
1278 // patterns (eg, 'blsr' on x86). Don't bother changing 1 to -1 because that
1279 // is probably not useful (and could be detrimental).
1280 ConstantSDNode *C = isConstOrConstSplat(Op1);
1281 APInt HighMask = APInt::getHighBitsSet(NewMask.getBitWidth(), NewMaskLZ);
1282 if (C && !C->isAllOnesValue() && !C->isOne() &&
1283 (C->getAPIntValue() | HighMask).isAllOnesValue()) {
1284 SDValue Neg1 = TLO.DAG.getAllOnesConstant(dl, VT);
1285 // We can't guarantee that the new math op doesn't wrap, so explicitly
1286 // clear those flags to prevent folding with a potential existing node
1287 // that has those flags set.
1288 SDNodeFlags Flags;
1289 Flags.setNoSignedWrap(false);
1290 Flags.setNoUnsignedWrap(false);
1291 SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Neg1, Flags);
1292 return TLO.CombineTo(Op, NewOp);
1295 LLVM_FALLTHROUGH;
1297 default:
1298 // Just use computeKnownBits to compute output bits.
1299 TLO.DAG.computeKnownBits(Op, Known, Depth);
1300 break;
1303 // If we know the value of all of the demanded bits, return this as a
1304 // constant.
1305 if (NewMask.isSubsetOf(Known.Zero|Known.One)) {
1306 // Avoid folding to a constant if any OpaqueConstant is involved.
1307 const SDNode *N = Op.getNode();
1308 for (SDNodeIterator I = SDNodeIterator::begin(N),
1309 E = SDNodeIterator::end(N); I != E; ++I) {
1310 SDNode *Op = *I;
1311 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op))
1312 if (C->isOpaque())
1313 return false;
1315 return TLO.CombineTo(Op, TLO.DAG.getConstant(Known.One, dl, VT));
1318 return false;
1321 bool TargetLowering::SimplifyDemandedVectorElts(SDValue Op,
1322 const APInt &DemandedElts,
1323 APInt &KnownUndef,
1324 APInt &KnownZero,
1325 DAGCombinerInfo &DCI) const {
1326 SelectionDAG &DAG = DCI.DAG;
1327 TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
1328 !DCI.isBeforeLegalizeOps());
1330 bool Simplified =
1331 SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero, TLO);
1332 if (Simplified)
1333 DCI.CommitTargetLoweringOpt(TLO);
1334 return Simplified;
1337 bool TargetLowering::SimplifyDemandedVectorElts(
1338 SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef,
1339 APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth,
1340 bool AssumeSingleUse) const {
1341 EVT VT = Op.getValueType();
1342 APInt DemandedElts = DemandedEltMask;
1343 unsigned NumElts = DemandedElts.getBitWidth();
1344 assert(VT.isVector() && "Expected vector op");
1345 assert(VT.getVectorNumElements() == NumElts &&
1346 "Mask size mismatches value type element count!");
1348 KnownUndef = KnownZero = APInt::getNullValue(NumElts);
1350 // Undef operand.
1351 if (Op.isUndef()) {
1352 KnownUndef.setAllBits();
1353 return false;
1356 // If Op has other users, assume that all elements are needed.
1357 if (!Op.getNode()->hasOneUse() && !AssumeSingleUse)
1358 DemandedElts.setAllBits();
1360 // Not demanding any elements from Op.
1361 if (DemandedElts == 0) {
1362 KnownUndef.setAllBits();
1363 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
1366 // Limit search depth.
1367 if (Depth >= 6)
1368 return false;
1370 SDLoc DL(Op);
1371 unsigned EltSizeInBits = VT.getScalarSizeInBits();
1373 switch (Op.getOpcode()) {
1374 case ISD::SCALAR_TO_VECTOR: {
1375 if (!DemandedElts[0]) {
1376 KnownUndef.setAllBits();
1377 return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
1379 KnownUndef.setHighBits(NumElts - 1);
1380 break;
1382 case ISD::BITCAST: {
1383 SDValue Src = Op.getOperand(0);
1384 EVT SrcVT = Src.getValueType();
1386 // We only handle vectors here.
1387 // TODO - investigate calling SimplifyDemandedBits/ComputeKnownBits?
1388 if (!SrcVT.isVector())
1389 break;
1391 // Fast handling of 'identity' bitcasts.
1392 unsigned NumSrcElts = SrcVT.getVectorNumElements();
1393 if (NumSrcElts == NumElts)
1394 return SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef,
1395 KnownZero, TLO, Depth + 1);
1397 APInt SrcZero, SrcUndef;
1398 APInt SrcDemandedElts = APInt::getNullValue(NumSrcElts);
1400 // Bitcast from 'large element' src vector to 'small element' vector, we
1401 // must demand a source element if any DemandedElt maps to it.
1402 if ((NumElts % NumSrcElts) == 0) {
1403 unsigned Scale = NumElts / NumSrcElts;
1404 for (unsigned i = 0; i != NumElts; ++i)
1405 if (DemandedElts[i])
1406 SrcDemandedElts.setBit(i / Scale);
1408 if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
1409 TLO, Depth + 1))
1410 return true;
1412 // If the src element is zero/undef then all the output elements will be -
1413 // only demanded elements are guaranteed to be correct.
1414 for (unsigned i = 0; i != NumSrcElts; ++i) {
1415 if (SrcDemandedElts[i]) {
1416 if (SrcZero[i])
1417 KnownZero.setBits(i * Scale, (i + 1) * Scale);
1418 if (SrcUndef[i])
1419 KnownUndef.setBits(i * Scale, (i + 1) * Scale);
1424 // Bitcast from 'small element' src vector to 'large element' vector, we
1425 // demand all smaller source elements covered by the larger demanded element
1426 // of this vector.
1427 if ((NumSrcElts % NumElts) == 0) {
1428 unsigned Scale = NumSrcElts / NumElts;
1429 for (unsigned i = 0; i != NumElts; ++i)
1430 if (DemandedElts[i])
1431 SrcDemandedElts.setBits(i * Scale, (i + 1) * Scale);
1433 if (SimplifyDemandedVectorElts(Src, SrcDemandedElts, SrcUndef, SrcZero,
1434 TLO, Depth + 1))
1435 return true;
1437 // If all the src elements covering an output element are zero/undef, then
1438 // the output element will be as well, assuming it was demanded.
1439 for (unsigned i = 0; i != NumElts; ++i) {
1440 if (DemandedElts[i]) {
1441 if (SrcZero.extractBits(Scale, i * Scale).isAllOnesValue())
1442 KnownZero.setBit(i);
1443 if (SrcUndef.extractBits(Scale, i * Scale).isAllOnesValue())
1444 KnownUndef.setBit(i);
1448 break;
1450 case ISD::BUILD_VECTOR: {
1451 // Check all elements and simplify any unused elements with UNDEF.
1452 if (!DemandedElts.isAllOnesValue()) {
1453 // Don't simplify BROADCASTS.
1454 if (llvm::any_of(Op->op_values(),
1455 [&](SDValue Elt) { return Op.getOperand(0) != Elt; })) {
1456 SmallVector<SDValue, 32> Ops(Op->op_begin(), Op->op_end());
1457 bool Updated = false;
1458 for (unsigned i = 0; i != NumElts; ++i) {
1459 if (!DemandedElts[i] && !Ops[i].isUndef()) {
1460 Ops[i] = TLO.DAG.getUNDEF(Ops[0].getValueType());
1461 KnownUndef.setBit(i);
1462 Updated = true;
1465 if (Updated)
1466 return TLO.CombineTo(Op, TLO.DAG.getBuildVector(VT, DL, Ops));
1469 for (unsigned i = 0; i != NumElts; ++i) {
1470 SDValue SrcOp = Op.getOperand(i);
1471 if (SrcOp.isUndef()) {
1472 KnownUndef.setBit(i);
1473 } else if (EltSizeInBits == SrcOp.getScalarValueSizeInBits() &&
1474 (isNullConstant(SrcOp) || isNullFPConstant(SrcOp))) {
1475 KnownZero.setBit(i);
1478 break;
1480 case ISD::CONCAT_VECTORS: {
1481 EVT SubVT = Op.getOperand(0).getValueType();
1482 unsigned NumSubVecs = Op.getNumOperands();
1483 unsigned NumSubElts = SubVT.getVectorNumElements();
1484 for (unsigned i = 0; i != NumSubVecs; ++i) {
1485 SDValue SubOp = Op.getOperand(i);
1486 APInt SubElts = DemandedElts.extractBits(NumSubElts, i * NumSubElts);
1487 APInt SubUndef, SubZero;
1488 if (SimplifyDemandedVectorElts(SubOp, SubElts, SubUndef, SubZero, TLO,
1489 Depth + 1))
1490 return true;
1491 KnownUndef.insertBits(SubUndef, i * NumSubElts);
1492 KnownZero.insertBits(SubZero, i * NumSubElts);
1494 break;
1496 case ISD::INSERT_SUBVECTOR: {
1497 if (!isa<ConstantSDNode>(Op.getOperand(2)))
1498 break;
1499 SDValue Base = Op.getOperand(0);
1500 SDValue Sub = Op.getOperand(1);
1501 EVT SubVT = Sub.getValueType();
1502 unsigned NumSubElts = SubVT.getVectorNumElements();
1503 const APInt& Idx = cast<ConstantSDNode>(Op.getOperand(2))->getAPIntValue();
1504 if (Idx.uge(NumElts - NumSubElts))
1505 break;
1506 unsigned SubIdx = Idx.getZExtValue();
1507 APInt SubElts = DemandedElts.extractBits(NumSubElts, SubIdx);
1508 APInt SubUndef, SubZero;
1509 if (SimplifyDemandedVectorElts(Sub, SubElts, SubUndef, SubZero, TLO,
1510 Depth + 1))
1511 return true;
1512 APInt BaseElts = DemandedElts;
1513 BaseElts.insertBits(APInt::getNullValue(NumSubElts), SubIdx);
1514 if (SimplifyDemandedVectorElts(Base, BaseElts, KnownUndef, KnownZero, TLO,
1515 Depth + 1))
1516 return true;
1517 KnownUndef.insertBits(SubUndef, SubIdx);
1518 KnownZero.insertBits(SubZero, SubIdx);
1519 break;
1521 case ISD::EXTRACT_SUBVECTOR: {
1522 SDValue Src = Op.getOperand(0);
1523 ConstantSDNode *SubIdx = dyn_cast<ConstantSDNode>(Op.getOperand(1));
1524 unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
1525 if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) {
1526 // Offset the demanded elts by the subvector index.
1527 uint64_t Idx = SubIdx->getZExtValue();
1528 APInt SrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx);
1529 APInt SrcUndef, SrcZero;
1530 if (SimplifyDemandedVectorElts(Src, SrcElts, SrcUndef, SrcZero, TLO,
1531 Depth + 1))
1532 return true;
1533 KnownUndef = SrcUndef.extractBits(NumElts, Idx);
1534 KnownZero = SrcZero.extractBits(NumElts, Idx);
1536 break;
1538 case ISD::INSERT_VECTOR_ELT: {
1539 SDValue Vec = Op.getOperand(0);
1540 SDValue Scl = Op.getOperand(1);
1541 auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
1543 // For a legal, constant insertion index, if we don't need this insertion
1544 // then strip it, else remove it from the demanded elts.
1545 if (CIdx && CIdx->getAPIntValue().ult(NumElts)) {
1546 unsigned Idx = CIdx->getZExtValue();
1547 if (!DemandedElts[Idx])
1548 return TLO.CombineTo(Op, Vec);
1549 DemandedElts.clearBit(Idx);
1551 if (SimplifyDemandedVectorElts(Vec, DemandedElts, KnownUndef,
1552 KnownZero, TLO, Depth + 1))
1553 return true;
1555 KnownUndef.clearBit(Idx);
1556 if (Scl.isUndef())
1557 KnownUndef.setBit(Idx);
1559 KnownZero.clearBit(Idx);
1560 if (isNullConstant(Scl) || isNullFPConstant(Scl))
1561 KnownZero.setBit(Idx);
1562 break;
1565 APInt VecUndef, VecZero;
1566 if (SimplifyDemandedVectorElts(Vec, DemandedElts, VecUndef, VecZero, TLO,
1567 Depth + 1))
1568 return true;
1569 // Without knowing the insertion index we can't set KnownUndef/KnownZero.
1570 break;
1572 case ISD::VSELECT: {
1573 // Try to transform the select condition based on the current demanded
1574 // elements.
1575 // TODO: If a condition element is undef, we can choose from one arm of the
1576 // select (and if one arm is undef, then we can propagate that to the
1577 // result).
1578 // TODO - add support for constant vselect masks (see IR version of this).
1579 APInt UnusedUndef, UnusedZero;
1580 if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, UnusedUndef,
1581 UnusedZero, TLO, Depth + 1))
1582 return true;
1584 // See if we can simplify either vselect operand.
1585 APInt DemandedLHS(DemandedElts);
1586 APInt DemandedRHS(DemandedElts);
1587 APInt UndefLHS, ZeroLHS;
1588 APInt UndefRHS, ZeroRHS;
1589 if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedLHS, UndefLHS,
1590 ZeroLHS, TLO, Depth + 1))
1591 return true;
1592 if (SimplifyDemandedVectorElts(Op.getOperand(2), DemandedRHS, UndefRHS,
1593 ZeroRHS, TLO, Depth + 1))
1594 return true;
1596 KnownUndef = UndefLHS & UndefRHS;
1597 KnownZero = ZeroLHS & ZeroRHS;
1598 break;
1600 case ISD::VECTOR_SHUFFLE: {
1601 ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
1603 // Collect demanded elements from shuffle operands..
1604 APInt DemandedLHS(NumElts, 0);
1605 APInt DemandedRHS(NumElts, 0);
1606 for (unsigned i = 0; i != NumElts; ++i) {
1607 int M = ShuffleMask[i];
1608 if (M < 0 || !DemandedElts[i])
1609 continue;
1610 assert(0 <= M && M < (int)(2 * NumElts) && "Shuffle index out of range");
1611 if (M < (int)NumElts)
1612 DemandedLHS.setBit(M);
1613 else
1614 DemandedRHS.setBit(M - NumElts);
1617 // See if we can simplify either shuffle operand.
1618 APInt UndefLHS, ZeroLHS;
1619 APInt UndefRHS, ZeroRHS;
1620 if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedLHS, UndefLHS,
1621 ZeroLHS, TLO, Depth + 1))
1622 return true;
1623 if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedRHS, UndefRHS,
1624 ZeroRHS, TLO, Depth + 1))
1625 return true;
1627 // Simplify mask using undef elements from LHS/RHS.
1628 bool Updated = false;
1629 bool IdentityLHS = true, IdentityRHS = true;
1630 SmallVector<int, 32> NewMask(ShuffleMask.begin(), ShuffleMask.end());
1631 for (unsigned i = 0; i != NumElts; ++i) {
1632 int &M = NewMask[i];
1633 if (M < 0)
1634 continue;
1635 if (!DemandedElts[i] || (M < (int)NumElts && UndefLHS[M]) ||
1636 (M >= (int)NumElts && UndefRHS[M - NumElts])) {
1637 Updated = true;
1638 M = -1;
1640 IdentityLHS &= (M < 0) || (M == (int)i);
1641 IdentityRHS &= (M < 0) || ((M - NumElts) == i);
1644 // Update legal shuffle masks based on demanded elements if it won't reduce
1645 // to Identity which can cause premature removal of the shuffle mask.
1646 if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps &&
1647 isShuffleMaskLegal(NewMask, VT))
1648 return TLO.CombineTo(Op,
1649 TLO.DAG.getVectorShuffle(VT, DL, Op.getOperand(0),
1650 Op.getOperand(1), NewMask));
1652 // Propagate undef/zero elements from LHS/RHS.
1653 for (unsigned i = 0; i != NumElts; ++i) {
1654 int M = ShuffleMask[i];
1655 if (M < 0) {
1656 KnownUndef.setBit(i);
1657 } else if (M < (int)NumElts) {
1658 if (UndefLHS[M])
1659 KnownUndef.setBit(i);
1660 if (ZeroLHS[M])
1661 KnownZero.setBit(i);
1662 } else {
1663 if (UndefRHS[M - NumElts])
1664 KnownUndef.setBit(i);
1665 if (ZeroRHS[M - NumElts])
1666 KnownZero.setBit(i);
1669 break;
1671 case ISD::ADD:
1672 case ISD::SUB: {
1673 APInt SrcUndef, SrcZero;
1674 if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedElts, SrcUndef,
1675 SrcZero, TLO, Depth + 1))
1676 return true;
1677 if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
1678 KnownZero, TLO, Depth + 1))
1679 return true;
1680 KnownZero &= SrcZero;
1681 KnownUndef &= SrcUndef;
1682 break;
1684 case ISD::TRUNCATE:
1685 if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
1686 KnownZero, TLO, Depth + 1))
1687 return true;
1688 break;
1689 default: {
1690 if (Op.getOpcode() >= ISD::BUILTIN_OP_END)
1691 if (SimplifyDemandedVectorEltsForTargetNode(Op, DemandedElts, KnownUndef,
1692 KnownZero, TLO, Depth))
1693 return true;
1694 break;
1698 assert((KnownUndef & KnownZero) == 0 && "Elements flagged as undef AND zero");
1699 return false;
1702 /// Determine which of the bits specified in Mask are known to be either zero or
1703 /// one and return them in the Known.
1704 void TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
1705 KnownBits &Known,
1706 const APInt &DemandedElts,
1707 const SelectionDAG &DAG,
1708 unsigned Depth) const {
1709 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
1710 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
1711 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
1712 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
1713 "Should use MaskedValueIsZero if you don't know whether Op"
1714 " is a target node!");
1715 Known.resetAll();
1718 void TargetLowering::computeKnownBitsForFrameIndex(const SDValue Op,
1719 KnownBits &Known,
1720 const APInt &DemandedElts,
1721 const SelectionDAG &DAG,
1722 unsigned Depth) const {
1723 assert(isa<FrameIndexSDNode>(Op) && "expected FrameIndex");
1725 if (unsigned Align = DAG.InferPtrAlignment(Op)) {
1726 // The low bits are known zero if the pointer is aligned.
1727 Known.Zero.setLowBits(Log2_32(Align));
1731 /// This method can be implemented by targets that want to expose additional
1732 /// information about sign bits to the DAG Combiner.
1733 unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
1734 const APInt &,
1735 const SelectionDAG &,
1736 unsigned Depth) const {
1737 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
1738 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
1739 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
1740 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
1741 "Should use ComputeNumSignBits if you don't know whether Op"
1742 " is a target node!");
1743 return 1;
1746 bool TargetLowering::SimplifyDemandedVectorEltsForTargetNode(
1747 SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero,
1748 TargetLoweringOpt &TLO, unsigned Depth) const {
1749 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
1750 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
1751 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
1752 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
1753 "Should use SimplifyDemandedVectorElts if you don't know whether Op"
1754 " is a target node!");
1755 return false;
1758 bool TargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
1759 const SelectionDAG &DAG,
1760 bool SNaN,
1761 unsigned Depth) const {
1762 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END ||
1763 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
1764 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
1765 Op.getOpcode() == ISD::INTRINSIC_VOID) &&
1766 "Should use isKnownNeverNaN if you don't know whether Op"
1767 " is a target node!");
1768 return false;
1771 // FIXME: Ideally, this would use ISD::isConstantSplatVector(), but that must
1772 // work with truncating build vectors and vectors with elements of less than
1773 // 8 bits.
1774 bool TargetLowering::isConstTrueVal(const SDNode *N) const {
1775 if (!N)
1776 return false;
1778 APInt CVal;
1779 if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
1780 CVal = CN->getAPIntValue();
1781 } else if (auto *BV = dyn_cast<BuildVectorSDNode>(N)) {
1782 auto *CN = BV->getConstantSplatNode();
1783 if (!CN)
1784 return false;
1786 // If this is a truncating build vector, truncate the splat value.
1787 // Otherwise, we may fail to match the expected values below.
1788 unsigned BVEltWidth = BV->getValueType(0).getScalarSizeInBits();
1789 CVal = CN->getAPIntValue();
1790 if (BVEltWidth < CVal.getBitWidth())
1791 CVal = CVal.trunc(BVEltWidth);
1792 } else {
1793 return false;
1796 switch (getBooleanContents(N->getValueType(0))) {
1797 case UndefinedBooleanContent:
1798 return CVal[0];
1799 case ZeroOrOneBooleanContent:
1800 return CVal.isOneValue();
1801 case ZeroOrNegativeOneBooleanContent:
1802 return CVal.isAllOnesValue();
1805 llvm_unreachable("Invalid boolean contents");
1808 bool TargetLowering::isConstFalseVal(const SDNode *N) const {
1809 if (!N)
1810 return false;
1812 const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
1813 if (!CN) {
1814 const BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N);
1815 if (!BV)
1816 return false;
1818 // Only interested in constant splats, we don't care about undef
1819 // elements in identifying boolean constants and getConstantSplatNode
1820 // returns NULL if all ops are undef;
1821 CN = BV->getConstantSplatNode();
1822 if (!CN)
1823 return false;
1826 if (getBooleanContents(N->getValueType(0)) == UndefinedBooleanContent)
1827 return !CN->getAPIntValue()[0];
1829 return CN->isNullValue();
1832 bool TargetLowering::isExtendedTrueVal(const ConstantSDNode *N, EVT VT,
1833 bool SExt) const {
1834 if (VT == MVT::i1)
1835 return N->isOne();
1837 TargetLowering::BooleanContent Cnt = getBooleanContents(VT);
1838 switch (Cnt) {
1839 case TargetLowering::ZeroOrOneBooleanContent:
1840 // An extended value of 1 is always true, unless its original type is i1,
1841 // in which case it will be sign extended to -1.
1842 return (N->isOne() && !SExt) || (SExt && (N->getValueType(0) != MVT::i1));
1843 case TargetLowering::UndefinedBooleanContent:
1844 case TargetLowering::ZeroOrNegativeOneBooleanContent:
1845 return N->isAllOnesValue() && SExt;
1847 llvm_unreachable("Unexpected enumeration.");
1850 /// This helper function of SimplifySetCC tries to optimize the comparison when
1851 /// either operand of the SetCC node is a bitwise-and instruction.
1852 SDValue TargetLowering::simplifySetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
1853 ISD::CondCode Cond,
1854 DAGCombinerInfo &DCI,
1855 const SDLoc &DL) const {
1856 // Match these patterns in any of their permutations:
1857 // (X & Y) == Y
1858 // (X & Y) != Y
1859 if (N1.getOpcode() == ISD::AND && N0.getOpcode() != ISD::AND)
1860 std::swap(N0, N1);
1862 EVT OpVT = N0.getValueType();
1863 if (N0.getOpcode() != ISD::AND || !OpVT.isInteger() ||
1864 (Cond != ISD::SETEQ && Cond != ISD::SETNE))
1865 return SDValue();
1867 SDValue X, Y;
1868 if (N0.getOperand(0) == N1) {
1869 X = N0.getOperand(1);
1870 Y = N0.getOperand(0);
1871 } else if (N0.getOperand(1) == N1) {
1872 X = N0.getOperand(0);
1873 Y = N0.getOperand(1);
1874 } else {
1875 return SDValue();
1878 SelectionDAG &DAG = DCI.DAG;
1879 SDValue Zero = DAG.getConstant(0, DL, OpVT);
1880 if (DAG.isKnownToBeAPowerOfTwo(Y)) {
1881 // Simplify X & Y == Y to X & Y != 0 if Y has exactly one bit set.
1882 // Note that where Y is variable and is known to have at most one bit set
1883 // (for example, if it is Z & 1) we cannot do this; the expressions are not
1884 // equivalent when Y == 0.
1885 Cond = ISD::getSetCCInverse(Cond, /*isInteger=*/true);
1886 if (DCI.isBeforeLegalizeOps() ||
1887 isCondCodeLegal(Cond, N0.getSimpleValueType()))
1888 return DAG.getSetCC(DL, VT, N0, Zero, Cond);
1889 } else if (N0.hasOneUse() && hasAndNotCompare(Y)) {
1890 // If the target supports an 'and-not' or 'and-complement' logic operation,
1891 // try to use that to make a comparison operation more efficient.
1892 // But don't do this transform if the mask is a single bit because there are
1893 // more efficient ways to deal with that case (for example, 'bt' on x86 or
1894 // 'rlwinm' on PPC).
1896 // Bail out if the compare operand that we want to turn into a zero is
1897 // already a zero (otherwise, infinite loop).
1898 auto *YConst = dyn_cast<ConstantSDNode>(Y);
1899 if (YConst && YConst->isNullValue())
1900 return SDValue();
1902 // Transform this into: ~X & Y == 0.
1903 SDValue NotX = DAG.getNOT(SDLoc(X), X, OpVT);
1904 SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, NotX, Y);
1905 return DAG.getSetCC(DL, VT, NewAnd, Zero, Cond);
1908 return SDValue();
1911 /// There are multiple IR patterns that could be checking whether certain
1912 /// truncation of a signed number would be lossy or not. The pattern which is
1913 /// best at IR level, may not lower optimally. Thus, we want to unfold it.
1914 /// We are looking for the following pattern: (KeptBits is a constant)
1915 /// (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits)
1916 /// KeptBits won't be bitwidth(x), that will be constant-folded to true/false.
1917 /// KeptBits also can't be 1, that would have been folded to %x dstcond 0
1918 /// We will unfold it into the natural trunc+sext pattern:
1919 /// ((%x << C) a>> C) dstcond %x
1920 /// Where C = bitwidth(x) - KeptBits and C u< bitwidth(x)
1921 SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck(
1922 EVT SCCVT, SDValue N0, SDValue N1, ISD::CondCode Cond, DAGCombinerInfo &DCI,
1923 const SDLoc &DL) const {
1924 // We must be comparing with a constant.
1925 ConstantSDNode *C1;
1926 if (!(C1 = dyn_cast<ConstantSDNode>(N1)))
1927 return SDValue();
1929 // N0 should be: add %x, (1 << (KeptBits-1))
1930 if (N0->getOpcode() != ISD::ADD)
1931 return SDValue();
1933 // And we must be 'add'ing a constant.
1934 ConstantSDNode *C01;
1935 if (!(C01 = dyn_cast<ConstantSDNode>(N0->getOperand(1))))
1936 return SDValue();
1938 SDValue X = N0->getOperand(0);
1939 EVT XVT = X.getValueType();
1941 // Validate constants ...
1943 APInt I1 = C1->getAPIntValue();
1945 ISD::CondCode NewCond;
1946 if (Cond == ISD::CondCode::SETULT) {
1947 NewCond = ISD::CondCode::SETEQ;
1948 } else if (Cond == ISD::CondCode::SETULE) {
1949 NewCond = ISD::CondCode::SETEQ;
1950 // But need to 'canonicalize' the constant.
1951 I1 += 1;
1952 } else if (Cond == ISD::CondCode::SETUGT) {
1953 NewCond = ISD::CondCode::SETNE;
1954 // But need to 'canonicalize' the constant.
1955 I1 += 1;
1956 } else if (Cond == ISD::CondCode::SETUGE) {
1957 NewCond = ISD::CondCode::SETNE;
1958 } else
1959 return SDValue();
1961 APInt I01 = C01->getAPIntValue();
1963 auto checkConstants = [&I1, &I01]() -> bool {
1964 // Both of them must be power-of-two, and the constant from setcc is bigger.
1965 return I1.ugt(I01) && I1.isPowerOf2() && I01.isPowerOf2();
1968 if (checkConstants()) {
1969 // Great, e.g. got icmp ult i16 (add i16 %x, 128), 256
1970 } else {
1971 // What if we invert constants? (and the target predicate)
1972 I1.negate();
1973 I01.negate();
1974 NewCond = getSetCCInverse(NewCond, /*isInteger=*/true);
1975 if (!checkConstants())
1976 return SDValue();
1977 // Great, e.g. got icmp uge i16 (add i16 %x, -128), -256
1980 // They are power-of-two, so which bit is set?
1981 const unsigned KeptBits = I1.logBase2();
1982 const unsigned KeptBitsMinusOne = I01.logBase2();
1984 // Magic!
1985 if (KeptBits != (KeptBitsMinusOne + 1))
1986 return SDValue();
1987 assert(KeptBits > 0 && KeptBits < XVT.getSizeInBits() && "unreachable");
1989 // We don't want to do this in every single case.
1990 SelectionDAG &DAG = DCI.DAG;
1991 if (!DAG.getTargetLoweringInfo().shouldTransformSignedTruncationCheck(
1992 XVT, KeptBits))
1993 return SDValue();
1995 const unsigned MaskedBits = XVT.getSizeInBits() - KeptBits;
1996 assert(MaskedBits > 0 && MaskedBits < XVT.getSizeInBits() && "unreachable");
1998 // Unfold into: ((%x << C) a>> C) cond %x
1999 // Where 'cond' will be either 'eq' or 'ne'.
2000 SDValue ShiftAmt = DAG.getConstant(MaskedBits, DL, XVT);
2001 SDValue T0 = DAG.getNode(ISD::SHL, DL, XVT, X, ShiftAmt);
2002 SDValue T1 = DAG.getNode(ISD::SRA, DL, XVT, T0, ShiftAmt);
2003 SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, X, NewCond);
2005 return T2;
2008 /// Try to simplify a setcc built with the specified operands and cc. If it is
2009 /// unable to simplify it, return a null SDValue.
2010 SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
2011 ISD::CondCode Cond, bool foldBooleans,
2012 DAGCombinerInfo &DCI,
2013 const SDLoc &dl) const {
2014 SelectionDAG &DAG = DCI.DAG;
2015 EVT OpVT = N0.getValueType();
2017 // These setcc operations always fold.
2018 switch (Cond) {
2019 default: break;
2020 case ISD::SETFALSE:
2021 case ISD::SETFALSE2: return DAG.getBoolConstant(false, dl, VT, OpVT);
2022 case ISD::SETTRUE:
2023 case ISD::SETTRUE2: return DAG.getBoolConstant(true, dl, VT, OpVT);
2026 // Ensure that the constant occurs on the RHS and fold constant comparisons.
2027 // TODO: Handle non-splat vector constants. All undef causes trouble.
2028 ISD::CondCode SwappedCC = ISD::getSetCCSwappedOperands(Cond);
2029 if (isConstOrConstSplat(N0) &&
2030 (DCI.isBeforeLegalizeOps() ||
2031 isCondCodeLegal(SwappedCC, N0.getSimpleValueType())))
2032 return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
2034 if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
2035 const APInt &C1 = N1C->getAPIntValue();
2037 // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an
2038 // equality comparison, then we're just comparing whether X itself is
2039 // zero.
2040 if (N0.getOpcode() == ISD::SRL && (C1.isNullValue() || C1.isOneValue()) &&
2041 N0.getOperand(0).getOpcode() == ISD::CTLZ &&
2042 N0.getOperand(1).getOpcode() == ISD::Constant) {
2043 const APInt &ShAmt
2044 = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
2045 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
2046 ShAmt == Log2_32(N0.getValueSizeInBits())) {
2047 if ((C1 == 0) == (Cond == ISD::SETEQ)) {
2048 // (srl (ctlz x), 5) == 0 -> X != 0
2049 // (srl (ctlz x), 5) != 1 -> X != 0
2050 Cond = ISD::SETNE;
2051 } else {
2052 // (srl (ctlz x), 5) != 0 -> X == 0
2053 // (srl (ctlz x), 5) == 1 -> X == 0
2054 Cond = ISD::SETEQ;
2056 SDValue Zero = DAG.getConstant(0, dl, N0.getValueType());
2057 return DAG.getSetCC(dl, VT, N0.getOperand(0).getOperand(0),
2058 Zero, Cond);
2062 SDValue CTPOP = N0;
2063 // Look through truncs that don't change the value of a ctpop.
2064 if (N0.hasOneUse() && N0.getOpcode() == ISD::TRUNCATE)
2065 CTPOP = N0.getOperand(0);
2067 if (CTPOP.hasOneUse() && CTPOP.getOpcode() == ISD::CTPOP &&
2068 (N0 == CTPOP ||
2069 N0.getValueSizeInBits() > Log2_32_Ceil(CTPOP.getValueSizeInBits()))) {
2070 EVT CTVT = CTPOP.getValueType();
2071 SDValue CTOp = CTPOP.getOperand(0);
2073 // (ctpop x) u< 2 -> (x & x-1) == 0
2074 // (ctpop x) u> 1 -> (x & x-1) != 0
2075 if ((Cond == ISD::SETULT && C1 == 2) || (Cond == ISD::SETUGT && C1 == 1)){
2076 SDValue Sub = DAG.getNode(ISD::SUB, dl, CTVT, CTOp,
2077 DAG.getConstant(1, dl, CTVT));
2078 SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Sub);
2079 ISD::CondCode CC = Cond == ISD::SETULT ? ISD::SETEQ : ISD::SETNE;
2080 return DAG.getSetCC(dl, VT, And, DAG.getConstant(0, dl, CTVT), CC);
2083 // TODO: (ctpop x) == 1 -> x && (x & x-1) == 0 iff ctpop is illegal.
2086 // (zext x) == C --> x == (trunc C)
2087 // (sext x) == C --> x == (trunc C)
2088 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
2089 DCI.isBeforeLegalize() && N0->hasOneUse()) {
2090 unsigned MinBits = N0.getValueSizeInBits();
2091 SDValue PreExt;
2092 bool Signed = false;
2093 if (N0->getOpcode() == ISD::ZERO_EXTEND) {
2094 // ZExt
2095 MinBits = N0->getOperand(0).getValueSizeInBits();
2096 PreExt = N0->getOperand(0);
2097 } else if (N0->getOpcode() == ISD::AND) {
2098 // DAGCombine turns costly ZExts into ANDs
2099 if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1)))
2100 if ((C->getAPIntValue()+1).isPowerOf2()) {
2101 MinBits = C->getAPIntValue().countTrailingOnes();
2102 PreExt = N0->getOperand(0);
2104 } else if (N0->getOpcode() == ISD::SIGN_EXTEND) {
2105 // SExt
2106 MinBits = N0->getOperand(0).getValueSizeInBits();
2107 PreExt = N0->getOperand(0);
2108 Signed = true;
2109 } else if (auto *LN0 = dyn_cast<LoadSDNode>(N0)) {
2110 // ZEXTLOAD / SEXTLOAD
2111 if (LN0->getExtensionType() == ISD::ZEXTLOAD) {
2112 MinBits = LN0->getMemoryVT().getSizeInBits();
2113 PreExt = N0;
2114 } else if (LN0->getExtensionType() == ISD::SEXTLOAD) {
2115 Signed = true;
2116 MinBits = LN0->getMemoryVT().getSizeInBits();
2117 PreExt = N0;
2121 // Figure out how many bits we need to preserve this constant.
2122 unsigned ReqdBits = Signed ?
2123 C1.getBitWidth() - C1.getNumSignBits() + 1 :
2124 C1.getActiveBits();
2126 // Make sure we're not losing bits from the constant.
2127 if (MinBits > 0 &&
2128 MinBits < C1.getBitWidth() &&
2129 MinBits >= ReqdBits) {
2130 EVT MinVT = EVT::getIntegerVT(*DAG.getContext(), MinBits);
2131 if (isTypeDesirableForOp(ISD::SETCC, MinVT)) {
2132 // Will get folded away.
2133 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreExt);
2134 if (MinBits == 1 && C1 == 1)
2135 // Invert the condition.
2136 return DAG.getSetCC(dl, VT, Trunc, DAG.getConstant(0, dl, MVT::i1),
2137 Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
2138 SDValue C = DAG.getConstant(C1.trunc(MinBits), dl, MinVT);
2139 return DAG.getSetCC(dl, VT, Trunc, C, Cond);
2142 // If truncating the setcc operands is not desirable, we can still
2143 // simplify the expression in some cases:
2144 // setcc ([sz]ext (setcc x, y, cc)), 0, setne) -> setcc (x, y, cc)
2145 // setcc ([sz]ext (setcc x, y, cc)), 0, seteq) -> setcc (x, y, inv(cc))
2146 // setcc (zext (setcc x, y, cc)), 1, setne) -> setcc (x, y, inv(cc))
2147 // setcc (zext (setcc x, y, cc)), 1, seteq) -> setcc (x, y, cc)
2148 // setcc (sext (setcc x, y, cc)), -1, setne) -> setcc (x, y, inv(cc))
2149 // setcc (sext (setcc x, y, cc)), -1, seteq) -> setcc (x, y, cc)
2150 SDValue TopSetCC = N0->getOperand(0);
2151 unsigned N0Opc = N0->getOpcode();
2152 bool SExt = (N0Opc == ISD::SIGN_EXTEND);
2153 if (TopSetCC.getValueType() == MVT::i1 && VT == MVT::i1 &&
2154 TopSetCC.getOpcode() == ISD::SETCC &&
2155 (N0Opc == ISD::ZERO_EXTEND || N0Opc == ISD::SIGN_EXTEND) &&
2156 (isConstFalseVal(N1C) ||
2157 isExtendedTrueVal(N1C, N0->getValueType(0), SExt))) {
2159 bool Inverse = (N1C->isNullValue() && Cond == ISD::SETEQ) ||
2160 (!N1C->isNullValue() && Cond == ISD::SETNE);
2162 if (!Inverse)
2163 return TopSetCC;
2165 ISD::CondCode InvCond = ISD::getSetCCInverse(
2166 cast<CondCodeSDNode>(TopSetCC.getOperand(2))->get(),
2167 TopSetCC.getOperand(0).getValueType().isInteger());
2168 return DAG.getSetCC(dl, VT, TopSetCC.getOperand(0),
2169 TopSetCC.getOperand(1),
2170 InvCond);
2175 // If the LHS is '(and load, const)', the RHS is 0, the test is for
2176 // equality or unsigned, and all 1 bits of the const are in the same
2177 // partial word, see if we can shorten the load.
2178 if (DCI.isBeforeLegalize() &&
2179 !ISD::isSignedIntSetCC(Cond) &&
2180 N0.getOpcode() == ISD::AND && C1 == 0 &&
2181 N0.getNode()->hasOneUse() &&
2182 isa<LoadSDNode>(N0.getOperand(0)) &&
2183 N0.getOperand(0).getNode()->hasOneUse() &&
2184 isa<ConstantSDNode>(N0.getOperand(1))) {
2185 LoadSDNode *Lod = cast<LoadSDNode>(N0.getOperand(0));
2186 APInt bestMask;
2187 unsigned bestWidth = 0, bestOffset = 0;
2188 if (!Lod->isVolatile() && Lod->isUnindexed()) {
2189 unsigned origWidth = N0.getValueSizeInBits();
2190 unsigned maskWidth = origWidth;
2191 // We can narrow (e.g.) 16-bit extending loads on 32-bit target to
2192 // 8 bits, but have to be careful...
2193 if (Lod->getExtensionType() != ISD::NON_EXTLOAD)
2194 origWidth = Lod->getMemoryVT().getSizeInBits();
2195 const APInt &Mask =
2196 cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
2197 for (unsigned width = origWidth / 2; width>=8; width /= 2) {
2198 APInt newMask = APInt::getLowBitsSet(maskWidth, width);
2199 for (unsigned offset=0; offset<origWidth/width; offset++) {
2200 if (Mask.isSubsetOf(newMask)) {
2201 if (DAG.getDataLayout().isLittleEndian())
2202 bestOffset = (uint64_t)offset * (width/8);
2203 else
2204 bestOffset = (origWidth/width - offset - 1) * (width/8);
2205 bestMask = Mask.lshr(offset * (width/8) * 8);
2206 bestWidth = width;
2207 break;
2209 newMask <<= width;
2213 if (bestWidth) {
2214 EVT newVT = EVT::getIntegerVT(*DAG.getContext(), bestWidth);
2215 if (newVT.isRound()) {
2216 EVT PtrType = Lod->getOperand(1).getValueType();
2217 SDValue Ptr = Lod->getBasePtr();
2218 if (bestOffset != 0)
2219 Ptr = DAG.getNode(ISD::ADD, dl, PtrType, Lod->getBasePtr(),
2220 DAG.getConstant(bestOffset, dl, PtrType));
2221 unsigned NewAlign = MinAlign(Lod->getAlignment(), bestOffset);
2222 SDValue NewLoad = DAG.getLoad(
2223 newVT, dl, Lod->getChain(), Ptr,
2224 Lod->getPointerInfo().getWithOffset(bestOffset), NewAlign);
2225 return DAG.getSetCC(dl, VT,
2226 DAG.getNode(ISD::AND, dl, newVT, NewLoad,
2227 DAG.getConstant(bestMask.trunc(bestWidth),
2228 dl, newVT)),
2229 DAG.getConstant(0LL, dl, newVT), Cond);
2234 // If the LHS is a ZERO_EXTEND, perform the comparison on the input.
2235 if (N0.getOpcode() == ISD::ZERO_EXTEND) {
2236 unsigned InSize = N0.getOperand(0).getValueSizeInBits();
2238 // If the comparison constant has bits in the upper part, the
2239 // zero-extended value could never match.
2240 if (C1.intersects(APInt::getHighBitsSet(C1.getBitWidth(),
2241 C1.getBitWidth() - InSize))) {
2242 switch (Cond) {
2243 case ISD::SETUGT:
2244 case ISD::SETUGE:
2245 case ISD::SETEQ:
2246 return DAG.getConstant(0, dl, VT);
2247 case ISD::SETULT:
2248 case ISD::SETULE:
2249 case ISD::SETNE:
2250 return DAG.getConstant(1, dl, VT);
2251 case ISD::SETGT:
2252 case ISD::SETGE:
2253 // True if the sign bit of C1 is set.
2254 return DAG.getConstant(C1.isNegative(), dl, VT);
2255 case ISD::SETLT:
2256 case ISD::SETLE:
2257 // True if the sign bit of C1 isn't set.
2258 return DAG.getConstant(C1.isNonNegative(), dl, VT);
2259 default:
2260 break;
2264 // Otherwise, we can perform the comparison with the low bits.
2265 switch (Cond) {
2266 case ISD::SETEQ:
2267 case ISD::SETNE:
2268 case ISD::SETUGT:
2269 case ISD::SETUGE:
2270 case ISD::SETULT:
2271 case ISD::SETULE: {
2272 EVT newVT = N0.getOperand(0).getValueType();
2273 if (DCI.isBeforeLegalizeOps() ||
2274 (isOperationLegal(ISD::SETCC, newVT) &&
2275 isCondCodeLegal(Cond, newVT.getSimpleVT()))) {
2276 EVT NewSetCCVT =
2277 getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), newVT);
2278 SDValue NewConst = DAG.getConstant(C1.trunc(InSize), dl, newVT);
2280 SDValue NewSetCC = DAG.getSetCC(dl, NewSetCCVT, N0.getOperand(0),
2281 NewConst, Cond);
2282 return DAG.getBoolExtOrTrunc(NewSetCC, dl, VT, N0.getValueType());
2284 break;
2286 default:
2287 break; // todo, be more careful with signed comparisons
2289 } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
2290 (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
2291 EVT ExtSrcTy = cast<VTSDNode>(N0.getOperand(1))->getVT();
2292 unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits();
2293 EVT ExtDstTy = N0.getValueType();
2294 unsigned ExtDstTyBits = ExtDstTy.getSizeInBits();
2296 // If the constant doesn't fit into the number of bits for the source of
2297 // the sign extension, it is impossible for both sides to be equal.
2298 if (C1.getMinSignedBits() > ExtSrcTyBits)
2299 return DAG.getConstant(Cond == ISD::SETNE, dl, VT);
2301 SDValue ZextOp;
2302 EVT Op0Ty = N0.getOperand(0).getValueType();
2303 if (Op0Ty == ExtSrcTy) {
2304 ZextOp = N0.getOperand(0);
2305 } else {
2306 APInt Imm = APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits);
2307 ZextOp = DAG.getNode(ISD::AND, dl, Op0Ty, N0.getOperand(0),
2308 DAG.getConstant(Imm, dl, Op0Ty));
2310 if (!DCI.isCalledByLegalizer())
2311 DCI.AddToWorklist(ZextOp.getNode());
2312 // Otherwise, make this a use of a zext.
2313 return DAG.getSetCC(dl, VT, ZextOp,
2314 DAG.getConstant(C1 & APInt::getLowBitsSet(
2315 ExtDstTyBits,
2316 ExtSrcTyBits),
2317 dl, ExtDstTy),
2318 Cond);
2319 } else if ((N1C->isNullValue() || N1C->isOne()) &&
2320 (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
2321 // SETCC (SETCC), [0|1], [EQ|NE] -> SETCC
2322 if (N0.getOpcode() == ISD::SETCC &&
2323 isTypeLegal(VT) && VT.bitsLE(N0.getValueType())) {
2324 bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (!N1C->isOne());
2325 if (TrueWhenTrue)
2326 return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);
2327 // Invert the condition.
2328 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
2329 CC = ISD::getSetCCInverse(CC,
2330 N0.getOperand(0).getValueType().isInteger());
2331 if (DCI.isBeforeLegalizeOps() ||
2332 isCondCodeLegal(CC, N0.getOperand(0).getSimpleValueType()))
2333 return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
2336 if ((N0.getOpcode() == ISD::XOR ||
2337 (N0.getOpcode() == ISD::AND &&
2338 N0.getOperand(0).getOpcode() == ISD::XOR &&
2339 N0.getOperand(1) == N0.getOperand(0).getOperand(1))) &&
2340 isa<ConstantSDNode>(N0.getOperand(1)) &&
2341 cast<ConstantSDNode>(N0.getOperand(1))->isOne()) {
2342 // If this is (X^1) == 0/1, swap the RHS and eliminate the xor. We
2343 // can only do this if the top bits are known zero.
2344 unsigned BitWidth = N0.getValueSizeInBits();
2345 if (DAG.MaskedValueIsZero(N0,
2346 APInt::getHighBitsSet(BitWidth,
2347 BitWidth-1))) {
2348 // Okay, get the un-inverted input value.
2349 SDValue Val;
2350 if (N0.getOpcode() == ISD::XOR) {
2351 Val = N0.getOperand(0);
2352 } else {
2353 assert(N0.getOpcode() == ISD::AND &&
2354 N0.getOperand(0).getOpcode() == ISD::XOR);
2355 // ((X^1)&1)^1 -> X & 1
2356 Val = DAG.getNode(ISD::AND, dl, N0.getValueType(),
2357 N0.getOperand(0).getOperand(0),
2358 N0.getOperand(1));
2361 return DAG.getSetCC(dl, VT, Val, N1,
2362 Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
2364 } else if (N1C->isOne() &&
2365 (VT == MVT::i1 ||
2366 getBooleanContents(N0->getValueType(0)) ==
2367 ZeroOrOneBooleanContent)) {
2368 SDValue Op0 = N0;
2369 if (Op0.getOpcode() == ISD::TRUNCATE)
2370 Op0 = Op0.getOperand(0);
2372 if ((Op0.getOpcode() == ISD::XOR) &&
2373 Op0.getOperand(0).getOpcode() == ISD::SETCC &&
2374 Op0.getOperand(1).getOpcode() == ISD::SETCC) {
2375 // (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc)
2376 Cond = (Cond == ISD::SETEQ) ? ISD::SETNE : ISD::SETEQ;
2377 return DAG.getSetCC(dl, VT, Op0.getOperand(0), Op0.getOperand(1),
2378 Cond);
2380 if (Op0.getOpcode() == ISD::AND &&
2381 isa<ConstantSDNode>(Op0.getOperand(1)) &&
2382 cast<ConstantSDNode>(Op0.getOperand(1))->isOne()) {
2383 // If this is (X&1) == / != 1, normalize it to (X&1) != / == 0.
2384 if (Op0.getValueType().bitsGT(VT))
2385 Op0 = DAG.getNode(ISD::AND, dl, VT,
2386 DAG.getNode(ISD::TRUNCATE, dl, VT, Op0.getOperand(0)),
2387 DAG.getConstant(1, dl, VT));
2388 else if (Op0.getValueType().bitsLT(VT))
2389 Op0 = DAG.getNode(ISD::AND, dl, VT,
2390 DAG.getNode(ISD::ANY_EXTEND, dl, VT, Op0.getOperand(0)),
2391 DAG.getConstant(1, dl, VT));
2393 return DAG.getSetCC(dl, VT, Op0,
2394 DAG.getConstant(0, dl, Op0.getValueType()),
2395 Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
2397 if (Op0.getOpcode() == ISD::AssertZext &&
2398 cast<VTSDNode>(Op0.getOperand(1))->getVT() == MVT::i1)
2399 return DAG.getSetCC(dl, VT, Op0,
2400 DAG.getConstant(0, dl, Op0.getValueType()),
2401 Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
2405 if (SDValue V =
2406 optimizeSetCCOfSignedTruncationCheck(VT, N0, N1, Cond, DCI, dl))
2407 return V;
2410 // These simplifications apply to splat vectors as well.
2411 // TODO: Handle more splat vector cases.
2412 if (auto *N1C = isConstOrConstSplat(N1)) {
2413 const APInt &C1 = N1C->getAPIntValue();
2415 APInt MinVal, MaxVal;
2416 unsigned OperandBitSize = N1C->getValueType(0).getScalarSizeInBits();
2417 if (ISD::isSignedIntSetCC(Cond)) {
2418 MinVal = APInt::getSignedMinValue(OperandBitSize);
2419 MaxVal = APInt::getSignedMaxValue(OperandBitSize);
2420 } else {
2421 MinVal = APInt::getMinValue(OperandBitSize);
2422 MaxVal = APInt::getMaxValue(OperandBitSize);
2425 // Canonicalize GE/LE comparisons to use GT/LT comparisons.
2426 if (Cond == ISD::SETGE || Cond == ISD::SETUGE) {
2427 // X >= MIN --> true
2428 if (C1 == MinVal)
2429 return DAG.getBoolConstant(true, dl, VT, OpVT);
2431 if (!VT.isVector()) { // TODO: Support this for vectors.
2432 // X >= C0 --> X > (C0 - 1)
2433 APInt C = C1 - 1;
2434 ISD::CondCode NewCC = (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT;
2435 if ((DCI.isBeforeLegalizeOps() ||
2436 isCondCodeLegal(NewCC, VT.getSimpleVT())) &&
2437 (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
2438 isLegalICmpImmediate(C.getSExtValue())))) {
2439 return DAG.getSetCC(dl, VT, N0,
2440 DAG.getConstant(C, dl, N1.getValueType()),
2441 NewCC);
2446 if (Cond == ISD::SETLE || Cond == ISD::SETULE) {
2447 // X <= MAX --> true
2448 if (C1 == MaxVal)
2449 return DAG.getBoolConstant(true, dl, VT, OpVT);
2451 // X <= C0 --> X < (C0 + 1)
2452 if (!VT.isVector()) { // TODO: Support this for vectors.
2453 APInt C = C1 + 1;
2454 ISD::CondCode NewCC = (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT;
2455 if ((DCI.isBeforeLegalizeOps() ||
2456 isCondCodeLegal(NewCC, VT.getSimpleVT())) &&
2457 (!N1C->isOpaque() || (C.getBitWidth() <= 64 &&
2458 isLegalICmpImmediate(C.getSExtValue())))) {
2459 return DAG.getSetCC(dl, VT, N0,
2460 DAG.getConstant(C, dl, N1.getValueType()),
2461 NewCC);
2466 if (Cond == ISD::SETLT || Cond == ISD::SETULT) {
2467 if (C1 == MinVal)
2468 return DAG.getBoolConstant(false, dl, VT, OpVT); // X < MIN --> false
2470 // TODO: Support this for vectors after legalize ops.
2471 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
2472 // Canonicalize setlt X, Max --> setne X, Max
2473 if (C1 == MaxVal)
2474 return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
2476 // If we have setult X, 1, turn it into seteq X, 0
2477 if (C1 == MinVal+1)
2478 return DAG.getSetCC(dl, VT, N0,
2479 DAG.getConstant(MinVal, dl, N0.getValueType()),
2480 ISD::SETEQ);
2484 if (Cond == ISD::SETGT || Cond == ISD::SETUGT) {
2485 if (C1 == MaxVal)
2486 return DAG.getBoolConstant(false, dl, VT, OpVT); // X > MAX --> false
2488 // TODO: Support this for vectors after legalize ops.
2489 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
2490 // Canonicalize setgt X, Min --> setne X, Min
2491 if (C1 == MinVal)
2492 return DAG.getSetCC(dl, VT, N0, N1, ISD::SETNE);
2494 // If we have setugt X, Max-1, turn it into seteq X, Max
2495 if (C1 == MaxVal-1)
2496 return DAG.getSetCC(dl, VT, N0,
2497 DAG.getConstant(MaxVal, dl, N0.getValueType()),
2498 ISD::SETEQ);
2502 // If we have "setcc X, C0", check to see if we can shrink the immediate
2503 // by changing cc.
2504 // TODO: Support this for vectors after legalize ops.
2505 if (!VT.isVector() || DCI.isBeforeLegalizeOps()) {
2506 // SETUGT X, SINTMAX -> SETLT X, 0
2507 if (Cond == ISD::SETUGT &&
2508 C1 == APInt::getSignedMaxValue(OperandBitSize))
2509 return DAG.getSetCC(dl, VT, N0,
2510 DAG.getConstant(0, dl, N1.getValueType()),
2511 ISD::SETLT);
2513 // SETULT X, SINTMIN -> SETGT X, -1
2514 if (Cond == ISD::SETULT &&
2515 C1 == APInt::getSignedMinValue(OperandBitSize)) {
2516 SDValue ConstMinusOne =
2517 DAG.getConstant(APInt::getAllOnesValue(OperandBitSize), dl,
2518 N1.getValueType());
2519 return DAG.getSetCC(dl, VT, N0, ConstMinusOne, ISD::SETGT);
2524 // Back to non-vector simplifications.
2525 // TODO: Can we do these for vector splats?
2526 if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
2527 const APInt &C1 = N1C->getAPIntValue();
2529 // Fold bit comparisons when we can.
2530 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
2531 (VT == N0.getValueType() ||
2532 (isTypeLegal(VT) && VT.bitsLE(N0.getValueType()))) &&
2533 N0.getOpcode() == ISD::AND) {
2534 auto &DL = DAG.getDataLayout();
2535 if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
2536 EVT ShiftTy = getShiftAmountTy(N0.getValueType(), DL,
2537 !DCI.isBeforeLegalize());
2538 if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3
2539 // Perform the xform if the AND RHS is a single bit.
2540 if (AndRHS->getAPIntValue().isPowerOf2()) {
2541 return DAG.getNode(ISD::TRUNCATE, dl, VT,
2542 DAG.getNode(ISD::SRL, dl, N0.getValueType(), N0,
2543 DAG.getConstant(AndRHS->getAPIntValue().logBase2(), dl,
2544 ShiftTy)));
2546 } else if (Cond == ISD::SETEQ && C1 == AndRHS->getAPIntValue()) {
2547 // (X & 8) == 8 --> (X & 8) >> 3
2548 // Perform the xform if C1 is a single bit.
2549 if (C1.isPowerOf2()) {
2550 return DAG.getNode(ISD::TRUNCATE, dl, VT,
2551 DAG.getNode(ISD::SRL, dl, N0.getValueType(), N0,
2552 DAG.getConstant(C1.logBase2(), dl,
2553 ShiftTy)));
2559 if (C1.getMinSignedBits() <= 64 &&
2560 !isLegalICmpImmediate(C1.getSExtValue())) {
2561 // (X & -256) == 256 -> (X >> 8) == 1
2562 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
2563 N0.getOpcode() == ISD::AND && N0.hasOneUse()) {
2564 if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
2565 const APInt &AndRHSC = AndRHS->getAPIntValue();
2566 if ((-AndRHSC).isPowerOf2() && (AndRHSC & C1) == C1) {
2567 unsigned ShiftBits = AndRHSC.countTrailingZeros();
2568 auto &DL = DAG.getDataLayout();
2569 EVT ShiftTy = getShiftAmountTy(N0.getValueType(), DL,
2570 !DCI.isBeforeLegalize());
2571 EVT CmpTy = N0.getValueType();
2572 SDValue Shift = DAG.getNode(ISD::SRL, dl, CmpTy, N0.getOperand(0),
2573 DAG.getConstant(ShiftBits, dl,
2574 ShiftTy));
2575 SDValue CmpRHS = DAG.getConstant(C1.lshr(ShiftBits), dl, CmpTy);
2576 return DAG.getSetCC(dl, VT, Shift, CmpRHS, Cond);
2579 } else if (Cond == ISD::SETULT || Cond == ISD::SETUGE ||
2580 Cond == ISD::SETULE || Cond == ISD::SETUGT) {
2581 bool AdjOne = (Cond == ISD::SETULE || Cond == ISD::SETUGT);
2582 // X < 0x100000000 -> (X >> 32) < 1
2583 // X >= 0x100000000 -> (X >> 32) >= 1
2584 // X <= 0x0ffffffff -> (X >> 32) < 1
2585 // X > 0x0ffffffff -> (X >> 32) >= 1
2586 unsigned ShiftBits;
2587 APInt NewC = C1;
2588 ISD::CondCode NewCond = Cond;
2589 if (AdjOne) {
2590 ShiftBits = C1.countTrailingOnes();
2591 NewC = NewC + 1;
2592 NewCond = (Cond == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
2593 } else {
2594 ShiftBits = C1.countTrailingZeros();
2596 NewC.lshrInPlace(ShiftBits);
2597 if (ShiftBits && NewC.getMinSignedBits() <= 64 &&
2598 isLegalICmpImmediate(NewC.getSExtValue())) {
2599 auto &DL = DAG.getDataLayout();
2600 EVT ShiftTy = getShiftAmountTy(N0.getValueType(), DL,
2601 !DCI.isBeforeLegalize());
2602 EVT CmpTy = N0.getValueType();
2603 SDValue Shift = DAG.getNode(ISD::SRL, dl, CmpTy, N0,
2604 DAG.getConstant(ShiftBits, dl, ShiftTy));
2605 SDValue CmpRHS = DAG.getConstant(NewC, dl, CmpTy);
2606 return DAG.getSetCC(dl, VT, Shift, CmpRHS, NewCond);
2612 if (isa<ConstantFPSDNode>(N0.getNode())) {
2613 // Constant fold or commute setcc.
2614 SDValue O = DAG.FoldSetCC(VT, N0, N1, Cond, dl);
2615 if (O.getNode()) return O;
2616 } else if (auto *CFP = dyn_cast<ConstantFPSDNode>(N1.getNode())) {
2617 // If the RHS of an FP comparison is a constant, simplify it away in
2618 // some cases.
2619 if (CFP->getValueAPF().isNaN()) {
2620 // If an operand is known to be a nan, we can fold it.
2621 switch (ISD::getUnorderedFlavor(Cond)) {
2622 default: llvm_unreachable("Unknown flavor!");
2623 case 0: // Known false.
2624 return DAG.getBoolConstant(false, dl, VT, OpVT);
2625 case 1: // Known true.
2626 return DAG.getBoolConstant(true, dl, VT, OpVT);
2627 case 2: // Undefined.
2628 return DAG.getUNDEF(VT);
2632 // Otherwise, we know the RHS is not a NaN. Simplify the node to drop the
2633 // constant if knowing that the operand is non-nan is enough. We prefer to
2634 // have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to
2635 // materialize 0.0.
2636 if (Cond == ISD::SETO || Cond == ISD::SETUO)
2637 return DAG.getSetCC(dl, VT, N0, N0, Cond);
2639 // setcc (fneg x), C -> setcc swap(pred) x, -C
2640 if (N0.getOpcode() == ISD::FNEG) {
2641 ISD::CondCode SwapCond = ISD::getSetCCSwappedOperands(Cond);
2642 if (DCI.isBeforeLegalizeOps() ||
2643 isCondCodeLegal(SwapCond, N0.getSimpleValueType())) {
2644 SDValue NegN1 = DAG.getNode(ISD::FNEG, dl, N0.getValueType(), N1);
2645 return DAG.getSetCC(dl, VT, N0.getOperand(0), NegN1, SwapCond);
2649 // If the condition is not legal, see if we can find an equivalent one
2650 // which is legal.
2651 if (!isCondCodeLegal(Cond, N0.getSimpleValueType())) {
2652 // If the comparison was an awkward floating-point == or != and one of
2653 // the comparison operands is infinity or negative infinity, convert the
2654 // condition to a less-awkward <= or >=.
2655 if (CFP->getValueAPF().isInfinity()) {
2656 if (CFP->getValueAPF().isNegative()) {
2657 if (Cond == ISD::SETOEQ &&
2658 isCondCodeLegal(ISD::SETOLE, N0.getSimpleValueType()))
2659 return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOLE);
2660 if (Cond == ISD::SETUEQ &&
2661 isCondCodeLegal(ISD::SETOLE, N0.getSimpleValueType()))
2662 return DAG.getSetCC(dl, VT, N0, N1, ISD::SETULE);
2663 if (Cond == ISD::SETUNE &&
2664 isCondCodeLegal(ISD::SETUGT, N0.getSimpleValueType()))
2665 return DAG.getSetCC(dl, VT, N0, N1, ISD::SETUGT);
2666 if (Cond == ISD::SETONE &&
2667 isCondCodeLegal(ISD::SETUGT, N0.getSimpleValueType()))
2668 return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOGT);
2669 } else {
2670 if (Cond == ISD::SETOEQ &&
2671 isCondCodeLegal(ISD::SETOGE, N0.getSimpleValueType()))
2672 return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOGE);
2673 if (Cond == ISD::SETUEQ &&
2674 isCondCodeLegal(ISD::SETOGE, N0.getSimpleValueType()))
2675 return DAG.getSetCC(dl, VT, N0, N1, ISD::SETUGE);
2676 if (Cond == ISD::SETUNE &&
2677 isCondCodeLegal(ISD::SETULT, N0.getSimpleValueType()))
2678 return DAG.getSetCC(dl, VT, N0, N1, ISD::SETULT);
2679 if (Cond == ISD::SETONE &&
2680 isCondCodeLegal(ISD::SETULT, N0.getSimpleValueType()))
2681 return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOLT);
2687 if (N0 == N1) {
2688 // The sext(setcc()) => setcc() optimization relies on the appropriate
2689 // constant being emitted.
2691 bool EqTrue = ISD::isTrueWhenEqual(Cond);
2693 // We can always fold X == X for integer setcc's.
2694 if (N0.getValueType().isInteger())
2695 return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
2697 unsigned UOF = ISD::getUnorderedFlavor(Cond);
2698 if (UOF == 2) // FP operators that are undefined on NaNs.
2699 return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
2700 if (UOF == unsigned(EqTrue))
2701 return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
2702 // Otherwise, we can't fold it. However, we can simplify it to SETUO/SETO
2703 // if it is not already.
2704 ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO;
2705 if (NewCond != Cond &&
2706 (DCI.isBeforeLegalizeOps() ||
2707 isCondCodeLegal(NewCond, N0.getSimpleValueType())))
2708 return DAG.getSetCC(dl, VT, N0, N1, NewCond);
2711 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
2712 N0.getValueType().isInteger()) {
2713 if (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB ||
2714 N0.getOpcode() == ISD::XOR) {
2715 // Simplify (X+Y) == (X+Z) --> Y == Z
2716 if (N0.getOpcode() == N1.getOpcode()) {
2717 if (N0.getOperand(0) == N1.getOperand(0))
2718 return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(1), Cond);
2719 if (N0.getOperand(1) == N1.getOperand(1))
2720 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(0), Cond);
2721 if (isCommutativeBinOp(N0.getOpcode())) {
2722 // If X op Y == Y op X, try other combinations.
2723 if (N0.getOperand(0) == N1.getOperand(1))
2724 return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(0),
2725 Cond);
2726 if (N0.getOperand(1) == N1.getOperand(0))
2727 return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(1),
2728 Cond);
2732 // If RHS is a legal immediate value for a compare instruction, we need
2733 // to be careful about increasing register pressure needlessly.
2734 bool LegalRHSImm = false;
2736 if (auto *RHSC = dyn_cast<ConstantSDNode>(N1)) {
2737 if (auto *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
2738 // Turn (X+C1) == C2 --> X == C2-C1
2739 if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse()) {
2740 return DAG.getSetCC(dl, VT, N0.getOperand(0),
2741 DAG.getConstant(RHSC->getAPIntValue()-
2742 LHSR->getAPIntValue(),
2743 dl, N0.getValueType()), Cond);
2746 // Turn (X^C1) == C2 into X == C1^C2 iff X&~C1 = 0.
2747 if (N0.getOpcode() == ISD::XOR)
2748 // If we know that all of the inverted bits are zero, don't bother
2749 // performing the inversion.
2750 if (DAG.MaskedValueIsZero(N0.getOperand(0), ~LHSR->getAPIntValue()))
2751 return
2752 DAG.getSetCC(dl, VT, N0.getOperand(0),
2753 DAG.getConstant(LHSR->getAPIntValue() ^
2754 RHSC->getAPIntValue(),
2755 dl, N0.getValueType()),
2756 Cond);
2759 // Turn (C1-X) == C2 --> X == C1-C2
2760 if (auto *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0))) {
2761 if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse()) {
2762 return
2763 DAG.getSetCC(dl, VT, N0.getOperand(1),
2764 DAG.getConstant(SUBC->getAPIntValue() -
2765 RHSC->getAPIntValue(),
2766 dl, N0.getValueType()),
2767 Cond);
2771 // Could RHSC fold directly into a compare?
2772 if (RHSC->getValueType(0).getSizeInBits() <= 64)
2773 LegalRHSImm = isLegalICmpImmediate(RHSC->getSExtValue());
2776 // Simplify (X+Z) == X --> Z == 0
2777 // Don't do this if X is an immediate that can fold into a cmp
2778 // instruction and X+Z has other uses. It could be an induction variable
2779 // chain, and the transform would increase register pressure.
2780 if (!LegalRHSImm || N0.getNode()->hasOneUse()) {
2781 if (N0.getOperand(0) == N1)
2782 return DAG.getSetCC(dl, VT, N0.getOperand(1),
2783 DAG.getConstant(0, dl, N0.getValueType()), Cond);
2784 if (N0.getOperand(1) == N1) {
2785 if (isCommutativeBinOp(N0.getOpcode()))
2786 return DAG.getSetCC(dl, VT, N0.getOperand(0),
2787 DAG.getConstant(0, dl, N0.getValueType()),
2788 Cond);
2789 if (N0.getNode()->hasOneUse()) {
2790 assert(N0.getOpcode() == ISD::SUB && "Unexpected operation!");
2791 auto &DL = DAG.getDataLayout();
2792 // (Z-X) == X --> Z == X<<1
2793 SDValue SH = DAG.getNode(
2794 ISD::SHL, dl, N1.getValueType(), N1,
2795 DAG.getConstant(1, dl,
2796 getShiftAmountTy(N1.getValueType(), DL,
2797 !DCI.isBeforeLegalize())));
2798 if (!DCI.isCalledByLegalizer())
2799 DCI.AddToWorklist(SH.getNode());
2800 return DAG.getSetCC(dl, VT, N0.getOperand(0), SH, Cond);
2806 if (N1.getOpcode() == ISD::ADD || N1.getOpcode() == ISD::SUB ||
2807 N1.getOpcode() == ISD::XOR) {
2808 // Simplify X == (X+Z) --> Z == 0
2809 if (N1.getOperand(0) == N0)
2810 return DAG.getSetCC(dl, VT, N1.getOperand(1),
2811 DAG.getConstant(0, dl, N1.getValueType()), Cond);
2812 if (N1.getOperand(1) == N0) {
2813 if (isCommutativeBinOp(N1.getOpcode()))
2814 return DAG.getSetCC(dl, VT, N1.getOperand(0),
2815 DAG.getConstant(0, dl, N1.getValueType()), Cond);
2816 if (N1.getNode()->hasOneUse()) {
2817 assert(N1.getOpcode() == ISD::SUB && "Unexpected operation!");
2818 auto &DL = DAG.getDataLayout();
2819 // X == (Z-X) --> X<<1 == Z
2820 SDValue SH = DAG.getNode(
2821 ISD::SHL, dl, N1.getValueType(), N0,
2822 DAG.getConstant(1, dl, getShiftAmountTy(N0.getValueType(), DL,
2823 !DCI.isBeforeLegalize())));
2824 if (!DCI.isCalledByLegalizer())
2825 DCI.AddToWorklist(SH.getNode());
2826 return DAG.getSetCC(dl, VT, SH, N1.getOperand(0), Cond);
2831 if (SDValue V = simplifySetCCWithAnd(VT, N0, N1, Cond, DCI, dl))
2832 return V;
2835 // Fold away ALL boolean setcc's.
2836 SDValue Temp;
2837 if (N0.getValueType().getScalarType() == MVT::i1 && foldBooleans) {
2838 EVT OpVT = N0.getValueType();
2839 switch (Cond) {
2840 default: llvm_unreachable("Unknown integer setcc!");
2841 case ISD::SETEQ: // X == Y -> ~(X^Y)
2842 Temp = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
2843 N0 = DAG.getNOT(dl, Temp, OpVT);
2844 if (!DCI.isCalledByLegalizer())
2845 DCI.AddToWorklist(Temp.getNode());
2846 break;
2847 case ISD::SETNE: // X != Y --> (X^Y)
2848 N0 = DAG.getNode(ISD::XOR, dl, OpVT, N0, N1);
2849 break;
2850 case ISD::SETGT: // X >s Y --> X == 0 & Y == 1 --> ~X & Y
2851 case ISD::SETULT: // X <u Y --> X == 0 & Y == 1 --> ~X & Y
2852 Temp = DAG.getNOT(dl, N0, OpVT);
2853 N0 = DAG.getNode(ISD::AND, dl, OpVT, N1, Temp);
2854 if (!DCI.isCalledByLegalizer())
2855 DCI.AddToWorklist(Temp.getNode());
2856 break;
2857 case ISD::SETLT: // X <s Y --> X == 1 & Y == 0 --> ~Y & X
2858 case ISD::SETUGT: // X >u Y --> X == 1 & Y == 0 --> ~Y & X
2859 Temp = DAG.getNOT(dl, N1, OpVT);
2860 N0 = DAG.getNode(ISD::AND, dl, OpVT, N0, Temp);
2861 if (!DCI.isCalledByLegalizer())
2862 DCI.AddToWorklist(Temp.getNode());
2863 break;
2864 case ISD::SETULE: // X <=u Y --> X == 0 | Y == 1 --> ~X | Y
2865 case ISD::SETGE: // X >=s Y --> X == 0 | Y == 1 --> ~X | Y
2866 Temp = DAG.getNOT(dl, N0, OpVT);
2867 N0 = DAG.getNode(ISD::OR, dl, OpVT, N1, Temp);
2868 if (!DCI.isCalledByLegalizer())
2869 DCI.AddToWorklist(Temp.getNode());
2870 break;
2871 case ISD::SETUGE: // X >=u Y --> X == 1 | Y == 0 --> ~Y | X
2872 case ISD::SETLE: // X <=s Y --> X == 1 | Y == 0 --> ~Y | X
2873 Temp = DAG.getNOT(dl, N1, OpVT);
2874 N0 = DAG.getNode(ISD::OR, dl, OpVT, N0, Temp);
2875 break;
2877 if (VT.getScalarType() != MVT::i1) {
2878 if (!DCI.isCalledByLegalizer())
2879 DCI.AddToWorklist(N0.getNode());
2880 // FIXME: If running after legalize, we probably can't do this.
2881 ISD::NodeType ExtendCode = getExtendForContent(getBooleanContents(OpVT));
2882 N0 = DAG.getNode(ExtendCode, dl, VT, N0);
2884 return N0;
2887 // Could not fold it.
2888 return SDValue();
2891 /// Returns true (and the GlobalValue and the offset) if the node is a
2892 /// GlobalAddress + offset.
2893 bool TargetLowering::isGAPlusOffset(SDNode *N, const GlobalValue *&GA,
2894 int64_t &Offset) const {
2895 if (auto *GASD = dyn_cast<GlobalAddressSDNode>(N)) {
2896 GA = GASD->getGlobal();
2897 Offset += GASD->getOffset();
2898 return true;
2901 if (N->getOpcode() == ISD::ADD) {
2902 SDValue N1 = N->getOperand(0);
2903 SDValue N2 = N->getOperand(1);
2904 if (isGAPlusOffset(N1.getNode(), GA, Offset)) {
2905 if (auto *V = dyn_cast<ConstantSDNode>(N2)) {
2906 Offset += V->getSExtValue();
2907 return true;
2909 } else if (isGAPlusOffset(N2.getNode(), GA, Offset)) {
2910 if (auto *V = dyn_cast<ConstantSDNode>(N1)) {
2911 Offset += V->getSExtValue();
2912 return true;
2917 return false;
2920 SDValue TargetLowering::PerformDAGCombine(SDNode *N,
2921 DAGCombinerInfo &DCI) const {
2922 // Default implementation: no optimization.
2923 return SDValue();
2926 //===----------------------------------------------------------------------===//
2927 // Inline Assembler Implementation Methods
2928 //===----------------------------------------------------------------------===//
2930 TargetLowering::ConstraintType
2931 TargetLowering::getConstraintType(StringRef Constraint) const {
2932 unsigned S = Constraint.size();
2934 if (S == 1) {
2935 switch (Constraint[0]) {
2936 default: break;
2937 case 'r': return C_RegisterClass;
2938 case 'm': // memory
2939 case 'o': // offsetable
2940 case 'V': // not offsetable
2941 return C_Memory;
2942 case 'i': // Simple Integer or Relocatable Constant
2943 case 'n': // Simple Integer
2944 case 'E': // Floating Point Constant
2945 case 'F': // Floating Point Constant
2946 case 's': // Relocatable Constant
2947 case 'p': // Address.
2948 case 'X': // Allow ANY value.
2949 case 'I': // Target registers.
2950 case 'J':
2951 case 'K':
2952 case 'L':
2953 case 'M':
2954 case 'N':
2955 case 'O':
2956 case 'P':
2957 case '<':
2958 case '>':
2959 return C_Other;
2963 if (S > 1 && Constraint[0] == '{' && Constraint[S-1] == '}') {
2964 if (S == 8 && Constraint.substr(1, 6) == "memory") // "{memory}"
2965 return C_Memory;
2966 return C_Register;
2968 return C_Unknown;
2971 /// Try to replace an X constraint, which matches anything, with another that
2972 /// has more specific requirements based on the type of the corresponding
2973 /// operand.
2974 const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const{
2975 if (ConstraintVT.isInteger())
2976 return "r";
2977 if (ConstraintVT.isFloatingPoint())
2978 return "f"; // works for many targets
2979 return nullptr;
2982 /// Lower the specified operand into the Ops vector.
2983 /// If it is invalid, don't add anything to Ops.
2984 void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
2985 std::string &Constraint,
2986 std::vector<SDValue> &Ops,
2987 SelectionDAG &DAG) const {
2989 if (Constraint.length() > 1) return;
2991 char ConstraintLetter = Constraint[0];
2992 switch (ConstraintLetter) {
2993 default: break;
2994 case 'X': // Allows any operand; labels (basic block) use this.
2995 if (Op.getOpcode() == ISD::BasicBlock) {
2996 Ops.push_back(Op);
2997 return;
2999 LLVM_FALLTHROUGH;
3000 case 'i': // Simple Integer or Relocatable Constant
3001 case 'n': // Simple Integer
3002 case 's': { // Relocatable Constant
3003 // These operands are interested in values of the form (GV+C), where C may
3004 // be folded in as an offset of GV, or it may be explicitly added. Also, it
3005 // is possible and fine if either GV or C are missing.
3006 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
3007 GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op);
3009 // If we have "(add GV, C)", pull out GV/C
3010 if (Op.getOpcode() == ISD::ADD) {
3011 C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
3012 GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(0));
3013 if (!C || !GA) {
3014 C = dyn_cast<ConstantSDNode>(Op.getOperand(0));
3015 GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(1));
3017 if (!C || !GA) {
3018 C = nullptr;
3019 GA = nullptr;
3023 // If we find a valid operand, map to the TargetXXX version so that the
3024 // value itself doesn't get selected.
3025 if (GA) { // Either &GV or &GV+C
3026 if (ConstraintLetter != 'n') {
3027 int64_t Offs = GA->getOffset();
3028 if (C) Offs += C->getZExtValue();
3029 Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(),
3030 C ? SDLoc(C) : SDLoc(),
3031 Op.getValueType(), Offs));
3033 return;
3035 if (C) { // just C, no GV.
3036 // Simple constants are not allowed for 's'.
3037 if (ConstraintLetter != 's') {
3038 // gcc prints these as sign extended. Sign extend value to 64 bits
3039 // now; without this it would get ZExt'd later in
3040 // ScheduleDAGSDNodes::EmitNode, which is very generic.
3041 Ops.push_back(DAG.getTargetConstant(C->getSExtValue(),
3042 SDLoc(C), MVT::i64));
3044 return;
3046 break;
3051 std::pair<unsigned, const TargetRegisterClass *>
3052 TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *RI,
3053 StringRef Constraint,
3054 MVT VT) const {
3055 if (Constraint.empty() || Constraint[0] != '{')
3056 return std::make_pair(0u, static_cast<TargetRegisterClass*>(nullptr));
3057 assert(*(Constraint.end()-1) == '}' && "Not a brace enclosed constraint?");
3059 // Remove the braces from around the name.
3060 StringRef RegName(Constraint.data()+1, Constraint.size()-2);
3062 std::pair<unsigned, const TargetRegisterClass*> R =
3063 std::make_pair(0u, static_cast<const TargetRegisterClass*>(nullptr));
3065 // Figure out which register class contains this reg.
3066 for (const TargetRegisterClass *RC : RI->regclasses()) {
3067 // If none of the value types for this register class are valid, we
3068 // can't use it. For example, 64-bit reg classes on 32-bit targets.
3069 if (!isLegalRC(*RI, *RC))
3070 continue;
3072 for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end();
3073 I != E; ++I) {
3074 if (RegName.equals_lower(RI->getRegAsmName(*I))) {
3075 std::pair<unsigned, const TargetRegisterClass*> S =
3076 std::make_pair(*I, RC);
3078 // If this register class has the requested value type, return it,
3079 // otherwise keep searching and return the first class found
3080 // if no other is found which explicitly has the requested type.
3081 if (RI->isTypeLegalForClass(*RC, VT))
3082 return S;
3083 if (!R.second)
3084 R = S;
3089 return R;
3092 //===----------------------------------------------------------------------===//
3093 // Constraint Selection.
3095 /// Return true of this is an input operand that is a matching constraint like
3096 /// "4".
3097 bool TargetLowering::AsmOperandInfo::isMatchingInputConstraint() const {
3098 assert(!ConstraintCode.empty() && "No known constraint!");
3099 return isdigit(static_cast<unsigned char>(ConstraintCode[0]));
3102 /// If this is an input matching constraint, this method returns the output
3103 /// operand it matches.
3104 unsigned TargetLowering::AsmOperandInfo::getMatchedOperand() const {
3105 assert(!ConstraintCode.empty() && "No known constraint!");
3106 return atoi(ConstraintCode.c_str());
3109 /// Split up the constraint string from the inline assembly value into the
3110 /// specific constraints and their prefixes, and also tie in the associated
3111 /// operand values.
3112 /// If this returns an empty vector, and if the constraint string itself
3113 /// isn't empty, there was an error parsing.
3114 TargetLowering::AsmOperandInfoVector
3115 TargetLowering::ParseConstraints(const DataLayout &DL,
3116 const TargetRegisterInfo *TRI,
3117 ImmutableCallSite CS) const {
3118 /// Information about all of the constraints.
3119 AsmOperandInfoVector ConstraintOperands;
3120 const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue());
3121 unsigned maCount = 0; // Largest number of multiple alternative constraints.
3123 // Do a prepass over the constraints, canonicalizing them, and building up the
3124 // ConstraintOperands list.
3125 unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
3126 unsigned ResNo = 0; // ResNo - The result number of the next output.
3128 for (InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) {
3129 ConstraintOperands.emplace_back(std::move(CI));
3130 AsmOperandInfo &OpInfo = ConstraintOperands.back();
3132 // Update multiple alternative constraint count.
3133 if (OpInfo.multipleAlternatives.size() > maCount)
3134 maCount = OpInfo.multipleAlternatives.size();
3136 OpInfo.ConstraintVT = MVT::Other;
3138 // Compute the value type for each operand.
3139 switch (OpInfo.Type) {
3140 case InlineAsm::isOutput:
3141 // Indirect outputs just consume an argument.
3142 if (OpInfo.isIndirect) {
3143 OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++));
3144 break;
3147 // The return value of the call is this value. As such, there is no
3148 // corresponding argument.
3149 assert(!CS.getType()->isVoidTy() &&
3150 "Bad inline asm!");
3151 if (StructType *STy = dyn_cast<StructType>(CS.getType())) {
3152 OpInfo.ConstraintVT =
3153 getSimpleValueType(DL, STy->getElementType(ResNo));
3154 } else {
3155 assert(ResNo == 0 && "Asm only has one result!");
3156 OpInfo.ConstraintVT = getSimpleValueType(DL, CS.getType());
3158 ++ResNo;
3159 break;
3160 case InlineAsm::isInput:
3161 OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++));
3162 break;
3163 case InlineAsm::isClobber:
3164 // Nothing to do.
3165 break;
3168 if (OpInfo.CallOperandVal) {
3169 llvm::Type *OpTy = OpInfo.CallOperandVal->getType();
3170 if (OpInfo.isIndirect) {
3171 llvm::PointerType *PtrTy = dyn_cast<PointerType>(OpTy);
3172 if (!PtrTy)
3173 report_fatal_error("Indirect operand for inline asm not a pointer!");
3174 OpTy = PtrTy->getElementType();
3177 // Look for vector wrapped in a struct. e.g. { <16 x i8> }.
3178 if (StructType *STy = dyn_cast<StructType>(OpTy))
3179 if (STy->getNumElements() == 1)
3180 OpTy = STy->getElementType(0);
3182 // If OpTy is not a single value, it may be a struct/union that we
3183 // can tile with integers.
3184 if (!OpTy->isSingleValueType() && OpTy->isSized()) {
3185 unsigned BitSize = DL.getTypeSizeInBits(OpTy);
3186 switch (BitSize) {
3187 default: break;
3188 case 1:
3189 case 8:
3190 case 16:
3191 case 32:
3192 case 64:
3193 case 128:
3194 OpInfo.ConstraintVT =
3195 MVT::getVT(IntegerType::get(OpTy->getContext(), BitSize), true);
3196 break;
3198 } else if (PointerType *PT = dyn_cast<PointerType>(OpTy)) {
3199 unsigned PtrSize = DL.getPointerSizeInBits(PT->getAddressSpace());
3200 OpInfo.ConstraintVT = MVT::getIntegerVT(PtrSize);
3201 } else {
3202 OpInfo.ConstraintVT = MVT::getVT(OpTy, true);
3207 // If we have multiple alternative constraints, select the best alternative.
3208 if (!ConstraintOperands.empty()) {
3209 if (maCount) {
3210 unsigned bestMAIndex = 0;
3211 int bestWeight = -1;
3212 // weight: -1 = invalid match, and 0 = so-so match to 5 = good match.
3213 int weight = -1;
3214 unsigned maIndex;
3215 // Compute the sums of the weights for each alternative, keeping track
3216 // of the best (highest weight) one so far.
3217 for (maIndex = 0; maIndex < maCount; ++maIndex) {
3218 int weightSum = 0;
3219 for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
3220 cIndex != eIndex; ++cIndex) {
3221 AsmOperandInfo& OpInfo = ConstraintOperands[cIndex];
3222 if (OpInfo.Type == InlineAsm::isClobber)
3223 continue;
3225 // If this is an output operand with a matching input operand,
3226 // look up the matching input. If their types mismatch, e.g. one
3227 // is an integer, the other is floating point, or their sizes are
3228 // different, flag it as an maCantMatch.
3229 if (OpInfo.hasMatchingInput()) {
3230 AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
3231 if (OpInfo.ConstraintVT != Input.ConstraintVT) {
3232 if ((OpInfo.ConstraintVT.isInteger() !=
3233 Input.ConstraintVT.isInteger()) ||
3234 (OpInfo.ConstraintVT.getSizeInBits() !=
3235 Input.ConstraintVT.getSizeInBits())) {
3236 weightSum = -1; // Can't match.
3237 break;
3241 weight = getMultipleConstraintMatchWeight(OpInfo, maIndex);
3242 if (weight == -1) {
3243 weightSum = -1;
3244 break;
3246 weightSum += weight;
3248 // Update best.
3249 if (weightSum > bestWeight) {
3250 bestWeight = weightSum;
3251 bestMAIndex = maIndex;
3255 // Now select chosen alternative in each constraint.
3256 for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
3257 cIndex != eIndex; ++cIndex) {
3258 AsmOperandInfo& cInfo = ConstraintOperands[cIndex];
3259 if (cInfo.Type == InlineAsm::isClobber)
3260 continue;
3261 cInfo.selectAlternative(bestMAIndex);
3266 // Check and hook up tied operands, choose constraint code to use.
3267 for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
3268 cIndex != eIndex; ++cIndex) {
3269 AsmOperandInfo& OpInfo = ConstraintOperands[cIndex];
3271 // If this is an output operand with a matching input operand, look up the
3272 // matching input. If their types mismatch, e.g. one is an integer, the
3273 // other is floating point, or their sizes are different, flag it as an
3274 // error.
3275 if (OpInfo.hasMatchingInput()) {
3276 AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
3278 if (OpInfo.ConstraintVT != Input.ConstraintVT) {
3279 std::pair<unsigned, const TargetRegisterClass *> MatchRC =
3280 getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode,
3281 OpInfo.ConstraintVT);
3282 std::pair<unsigned, const TargetRegisterClass *> InputRC =
3283 getRegForInlineAsmConstraint(TRI, Input.ConstraintCode,
3284 Input.ConstraintVT);
3285 if ((OpInfo.ConstraintVT.isInteger() !=
3286 Input.ConstraintVT.isInteger()) ||
3287 (MatchRC.second != InputRC.second)) {
3288 report_fatal_error("Unsupported asm: input constraint"
3289 " with a matching output constraint of"
3290 " incompatible type!");
3296 return ConstraintOperands;
3299 /// Return an integer indicating how general CT is.
3300 static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) {
3301 switch (CT) {
3302 case TargetLowering::C_Other:
3303 case TargetLowering::C_Unknown:
3304 return 0;
3305 case TargetLowering::C_Register:
3306 return 1;
3307 case TargetLowering::C_RegisterClass:
3308 return 2;
3309 case TargetLowering::C_Memory:
3310 return 3;
3312 llvm_unreachable("Invalid constraint type");
3315 /// Examine constraint type and operand type and determine a weight value.
3316 /// This object must already have been set up with the operand type
3317 /// and the current alternative constraint selected.
3318 TargetLowering::ConstraintWeight
3319 TargetLowering::getMultipleConstraintMatchWeight(
3320 AsmOperandInfo &info, int maIndex) const {
3321 InlineAsm::ConstraintCodeVector *rCodes;
3322 if (maIndex >= (int)info.multipleAlternatives.size())
3323 rCodes = &info.Codes;
3324 else
3325 rCodes = &info.multipleAlternatives[maIndex].Codes;
3326 ConstraintWeight BestWeight = CW_Invalid;
3328 // Loop over the options, keeping track of the most general one.
3329 for (unsigned i = 0, e = rCodes->size(); i != e; ++i) {
3330 ConstraintWeight weight =
3331 getSingleConstraintMatchWeight(info, (*rCodes)[i].c_str());
3332 if (weight > BestWeight)
3333 BestWeight = weight;
3336 return BestWeight;
3339 /// Examine constraint type and operand type and determine a weight value.
3340 /// This object must already have been set up with the operand type
3341 /// and the current alternative constraint selected.
3342 TargetLowering::ConstraintWeight
3343 TargetLowering::getSingleConstraintMatchWeight(
3344 AsmOperandInfo &info, const char *constraint) const {
3345 ConstraintWeight weight = CW_Invalid;
3346 Value *CallOperandVal = info.CallOperandVal;
3347 // If we don't have a value, we can't do a match,
3348 // but allow it at the lowest weight.
3349 if (!CallOperandVal)
3350 return CW_Default;
3351 // Look at the constraint type.
3352 switch (*constraint) {
3353 case 'i': // immediate integer.
3354 case 'n': // immediate integer with a known value.
3355 if (isa<ConstantInt>(CallOperandVal))
3356 weight = CW_Constant;
3357 break;
3358 case 's': // non-explicit intregal immediate.
3359 if (isa<GlobalValue>(CallOperandVal))
3360 weight = CW_Constant;
3361 break;
3362 case 'E': // immediate float if host format.
3363 case 'F': // immediate float.
3364 if (isa<ConstantFP>(CallOperandVal))
3365 weight = CW_Constant;
3366 break;
3367 case '<': // memory operand with autodecrement.
3368 case '>': // memory operand with autoincrement.
3369 case 'm': // memory operand.
3370 case 'o': // offsettable memory operand
3371 case 'V': // non-offsettable memory operand
3372 weight = CW_Memory;
3373 break;
3374 case 'r': // general register.
3375 case 'g': // general register, memory operand or immediate integer.
3376 // note: Clang converts "g" to "imr".
3377 if (CallOperandVal->getType()->isIntegerTy())
3378 weight = CW_Register;
3379 break;
3380 case 'X': // any operand.
3381 default:
3382 weight = CW_Default;
3383 break;
3385 return weight;
3388 /// If there are multiple different constraints that we could pick for this
3389 /// operand (e.g. "imr") try to pick the 'best' one.
3390 /// This is somewhat tricky: constraints fall into four classes:
3391 /// Other -> immediates and magic values
3392 /// Register -> one specific register
3393 /// RegisterClass -> a group of regs
3394 /// Memory -> memory
3395 /// Ideally, we would pick the most specific constraint possible: if we have
3396 /// something that fits into a register, we would pick it. The problem here
3397 /// is that if we have something that could either be in a register or in
3398 /// memory that use of the register could cause selection of *other*
3399 /// operands to fail: they might only succeed if we pick memory. Because of
3400 /// this the heuristic we use is:
3402 /// 1) If there is an 'other' constraint, and if the operand is valid for
3403 /// that constraint, use it. This makes us take advantage of 'i'
3404 /// constraints when available.
3405 /// 2) Otherwise, pick the most general constraint present. This prefers
3406 /// 'm' over 'r', for example.
3408 static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo,
3409 const TargetLowering &TLI,
3410 SDValue Op, SelectionDAG *DAG) {
3411 assert(OpInfo.Codes.size() > 1 && "Doesn't have multiple constraint options");
3412 unsigned BestIdx = 0;
3413 TargetLowering::ConstraintType BestType = TargetLowering::C_Unknown;
3414 int BestGenerality = -1;
3416 // Loop over the options, keeping track of the most general one.
3417 for (unsigned i = 0, e = OpInfo.Codes.size(); i != e; ++i) {
3418 TargetLowering::ConstraintType CType =
3419 TLI.getConstraintType(OpInfo.Codes[i]);
3421 // If this is an 'other' constraint, see if the operand is valid for it.
3422 // For example, on X86 we might have an 'rI' constraint. If the operand
3423 // is an integer in the range [0..31] we want to use I (saving a load
3424 // of a register), otherwise we must use 'r'.
3425 if (CType == TargetLowering::C_Other && Op.getNode()) {
3426 assert(OpInfo.Codes[i].size() == 1 &&
3427 "Unhandled multi-letter 'other' constraint");
3428 std::vector<SDValue> ResultOps;
3429 TLI.LowerAsmOperandForConstraint(Op, OpInfo.Codes[i],
3430 ResultOps, *DAG);
3431 if (!ResultOps.empty()) {
3432 BestType = CType;
3433 BestIdx = i;
3434 break;
3438 // Things with matching constraints can only be registers, per gcc
3439 // documentation. This mainly affects "g" constraints.
3440 if (CType == TargetLowering::C_Memory && OpInfo.hasMatchingInput())
3441 continue;
3443 // This constraint letter is more general than the previous one, use it.
3444 int Generality = getConstraintGenerality(CType);
3445 if (Generality > BestGenerality) {
3446 BestType = CType;
3447 BestIdx = i;
3448 BestGenerality = Generality;
3452 OpInfo.ConstraintCode = OpInfo.Codes[BestIdx];
3453 OpInfo.ConstraintType = BestType;
3456 /// Determines the constraint code and constraint type to use for the specific
3457 /// AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType.
3458 void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,
3459 SDValue Op,
3460 SelectionDAG *DAG) const {
3461 assert(!OpInfo.Codes.empty() && "Must have at least one constraint");
3463 // Single-letter constraints ('r') are very common.
3464 if (OpInfo.Codes.size() == 1) {
3465 OpInfo.ConstraintCode = OpInfo.Codes[0];
3466 OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
3467 } else {
3468 ChooseConstraint(OpInfo, *this, Op, DAG);
3471 // 'X' matches anything.
3472 if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) {
3473 // Labels and constants are handled elsewhere ('X' is the only thing
3474 // that matches labels). For Functions, the type here is the type of
3475 // the result, which is not what we want to look at; leave them alone.
3476 Value *v = OpInfo.CallOperandVal;
3477 if (isa<BasicBlock>(v) || isa<ConstantInt>(v) || isa<Function>(v)) {
3478 OpInfo.CallOperandVal = v;
3479 return;
3482 // Otherwise, try to resolve it to something we know about by looking at
3483 // the actual operand type.
3484 if (const char *Repl = LowerXConstraint(OpInfo.ConstraintVT)) {
3485 OpInfo.ConstraintCode = Repl;
3486 OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode);
3491 /// Given an exact SDIV by a constant, create a multiplication
3492 /// with the multiplicative inverse of the constant.
3493 static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N,
3494 const SDLoc &dl, SelectionDAG &DAG,
3495 SmallVectorImpl<SDNode *> &Created) {
3496 SDValue Op0 = N->getOperand(0);
3497 SDValue Op1 = N->getOperand(1);
3498 EVT VT = N->getValueType(0);
3499 EVT SVT = VT.getScalarType();
3500 EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
3501 EVT ShSVT = ShVT.getScalarType();
3503 bool UseSRA = false;
3504 SmallVector<SDValue, 16> Shifts, Factors;
3506 auto BuildSDIVPattern = [&](ConstantSDNode *C) {
3507 if (C->isNullValue())
3508 return false;
3509 APInt Divisor = C->getAPIntValue();
3510 unsigned Shift = Divisor.countTrailingZeros();
3511 if (Shift) {
3512 Divisor.ashrInPlace(Shift);
3513 UseSRA = true;
3515 // Calculate the multiplicative inverse, using Newton's method.
3516 APInt t;
3517 APInt Factor = Divisor;
3518 while ((t = Divisor * Factor) != 1)
3519 Factor *= APInt(Divisor.getBitWidth(), 2) - t;
3520 Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT));
3521 Factors.push_back(DAG.getConstant(Factor, dl, SVT));
3522 return true;
3525 // Collect all magic values from the build vector.
3526 if (!ISD::matchUnaryPredicate(Op1, BuildSDIVPattern))
3527 return SDValue();
3529 SDValue Shift, Factor;
3530 if (VT.isVector()) {
3531 Shift = DAG.getBuildVector(ShVT, dl, Shifts);
3532 Factor = DAG.getBuildVector(VT, dl, Factors);
3533 } else {
3534 Shift = Shifts[0];
3535 Factor = Factors[0];
3538 SDValue Res = Op0;
3540 // Shift the value upfront if it is even, so the LSB is one.
3541 if (UseSRA) {
3542 // TODO: For UDIV use SRL instead of SRA.
3543 SDNodeFlags Flags;
3544 Flags.setExact(true);
3545 Res = DAG.getNode(ISD::SRA, dl, VT, Res, Shift, Flags);
3546 Created.push_back(Res.getNode());
3549 return DAG.getNode(ISD::MUL, dl, VT, Res, Factor);
3552 SDValue TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
3553 SelectionDAG &DAG,
3554 SmallVectorImpl<SDNode *> &Created) const {
3555 AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3556 const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3557 if (TLI.isIntDivCheap(N->getValueType(0), Attr))
3558 return SDValue(N,0); // Lower SDIV as SDIV
3559 return SDValue();
3562 /// Given an ISD::SDIV node expressing a divide by constant,
3563 /// return a DAG expression to select that will generate the same value by
3564 /// multiplying by a magic number.
3565 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
3566 SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
3567 bool IsAfterLegalization,
3568 SmallVectorImpl<SDNode *> &Created) const {
3569 SDLoc dl(N);
3570 EVT VT = N->getValueType(0);
3571 EVT SVT = VT.getScalarType();
3572 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
3573 EVT ShSVT = ShVT.getScalarType();
3574 unsigned EltBits = VT.getScalarSizeInBits();
3576 // Check to see if we can do this.
3577 // FIXME: We should be more aggressive here.
3578 if (!isTypeLegal(VT))
3579 return SDValue();
3581 // If the sdiv has an 'exact' bit we can use a simpler lowering.
3582 if (N->getFlags().hasExact())
3583 return BuildExactSDIV(*this, N, dl, DAG, Created);
3585 SmallVector<SDValue, 16> MagicFactors, Factors, Shifts, ShiftMasks;
3587 auto BuildSDIVPattern = [&](ConstantSDNode *C) {
3588 if (C->isNullValue())
3589 return false;
3591 const APInt &Divisor = C->getAPIntValue();
3592 APInt::ms magics = Divisor.magic();
3593 int NumeratorFactor = 0;
3594 int ShiftMask = -1;
3596 if (Divisor.isOneValue() || Divisor.isAllOnesValue()) {
3597 // If d is +1/-1, we just multiply the numerator by +1/-1.
3598 NumeratorFactor = Divisor.getSExtValue();
3599 magics.m = 0;
3600 magics.s = 0;
3601 ShiftMask = 0;
3602 } else if (Divisor.isStrictlyPositive() && magics.m.isNegative()) {
3603 // If d > 0 and m < 0, add the numerator.
3604 NumeratorFactor = 1;
3605 } else if (Divisor.isNegative() && magics.m.isStrictlyPositive()) {
3606 // If d < 0 and m > 0, subtract the numerator.
3607 NumeratorFactor = -1;
3610 MagicFactors.push_back(DAG.getConstant(magics.m, dl, SVT));
3611 Factors.push_back(DAG.getConstant(NumeratorFactor, dl, SVT));
3612 Shifts.push_back(DAG.getConstant(magics.s, dl, ShSVT));
3613 ShiftMasks.push_back(DAG.getConstant(ShiftMask, dl, SVT));
3614 return true;
3617 SDValue N0 = N->getOperand(0);
3618 SDValue N1 = N->getOperand(1);
3620 // Collect the shifts / magic values from each element.
3621 if (!ISD::matchUnaryPredicate(N1, BuildSDIVPattern))
3622 return SDValue();
3624 SDValue MagicFactor, Factor, Shift, ShiftMask;
3625 if (VT.isVector()) {
3626 MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
3627 Factor = DAG.getBuildVector(VT, dl, Factors);
3628 Shift = DAG.getBuildVector(ShVT, dl, Shifts);
3629 ShiftMask = DAG.getBuildVector(VT, dl, ShiftMasks);
3630 } else {
3631 MagicFactor = MagicFactors[0];
3632 Factor = Factors[0];
3633 Shift = Shifts[0];
3634 ShiftMask = ShiftMasks[0];
3637 // Multiply the numerator (operand 0) by the magic value.
3638 // FIXME: We should support doing a MUL in a wider type.
3639 SDValue Q;
3640 if (IsAfterLegalization ? isOperationLegal(ISD::MULHS, VT)
3641 : isOperationLegalOrCustom(ISD::MULHS, VT))
3642 Q = DAG.getNode(ISD::MULHS, dl, VT, N0, MagicFactor);
3643 else if (IsAfterLegalization ? isOperationLegal(ISD::SMUL_LOHI, VT)
3644 : isOperationLegalOrCustom(ISD::SMUL_LOHI, VT)) {
3645 SDValue LoHi =
3646 DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT), N0, MagicFactor);
3647 Q = SDValue(LoHi.getNode(), 1);
3648 } else
3649 return SDValue(); // No mulhs or equivalent.
3650 Created.push_back(Q.getNode());
3652 // (Optionally) Add/subtract the numerator using Factor.
3653 Factor = DAG.getNode(ISD::MUL, dl, VT, N0, Factor);
3654 Created.push_back(Factor.getNode());
3655 Q = DAG.getNode(ISD::ADD, dl, VT, Q, Factor);
3656 Created.push_back(Q.getNode());
3658 // Shift right algebraic by shift value.
3659 Q = DAG.getNode(ISD::SRA, dl, VT, Q, Shift);
3660 Created.push_back(Q.getNode());
3662 // Extract the sign bit, mask it and add it to the quotient.
3663 SDValue SignShift = DAG.getConstant(EltBits - 1, dl, ShVT);
3664 SDValue T = DAG.getNode(ISD::SRL, dl, VT, Q, SignShift);
3665 Created.push_back(T.getNode());
3666 T = DAG.getNode(ISD::AND, dl, VT, T, ShiftMask);
3667 Created.push_back(T.getNode());
3668 return DAG.getNode(ISD::ADD, dl, VT, Q, T);
3671 /// Given an ISD::UDIV node expressing a divide by constant,
3672 /// return a DAG expression to select that will generate the same value by
3673 /// multiplying by a magic number.
3674 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
3675 SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
3676 bool IsAfterLegalization,
3677 SmallVectorImpl<SDNode *> &Created) const {
3678 SDLoc dl(N);
3679 EVT VT = N->getValueType(0);
3680 EVT SVT = VT.getScalarType();
3681 EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
3682 EVT ShSVT = ShVT.getScalarType();
3683 unsigned EltBits = VT.getScalarSizeInBits();
3685 // Check to see if we can do this.
3686 // FIXME: We should be more aggressive here.
3687 if (!isTypeLegal(VT))
3688 return SDValue();
3690 bool UseNPQ = false;
3691 SmallVector<SDValue, 16> PreShifts, PostShifts, MagicFactors, NPQFactors;
3693 auto BuildUDIVPattern = [&](ConstantSDNode *C) {
3694 if (C->isNullValue())
3695 return false;
3696 // FIXME: We should use a narrower constant when the upper
3697 // bits are known to be zero.
3698 APInt Divisor = C->getAPIntValue();
3699 APInt::mu magics = Divisor.magicu();
3700 unsigned PreShift = 0, PostShift = 0;
3702 // If the divisor is even, we can avoid using the expensive fixup by
3703 // shifting the divided value upfront.
3704 if (magics.a != 0 && !Divisor[0]) {
3705 PreShift = Divisor.countTrailingZeros();
3706 // Get magic number for the shifted divisor.
3707 magics = Divisor.lshr(PreShift).magicu(PreShift);
3708 assert(magics.a == 0 && "Should use cheap fixup now");
3711 APInt Magic = magics.m;
3713 unsigned SelNPQ;
3714 if (magics.a == 0 || Divisor.isOneValue()) {
3715 assert(magics.s < Divisor.getBitWidth() &&
3716 "We shouldn't generate an undefined shift!");
3717 PostShift = magics.s;
3718 SelNPQ = false;
3719 } else {
3720 PostShift = magics.s - 1;
3721 SelNPQ = true;
3724 PreShifts.push_back(DAG.getConstant(PreShift, dl, ShSVT));
3725 MagicFactors.push_back(DAG.getConstant(Magic, dl, SVT));
3726 NPQFactors.push_back(
3727 DAG.getConstant(SelNPQ ? APInt::getOneBitSet(EltBits, EltBits - 1)
3728 : APInt::getNullValue(EltBits),
3729 dl, SVT));
3730 PostShifts.push_back(DAG.getConstant(PostShift, dl, ShSVT));
3731 UseNPQ |= SelNPQ;
3732 return true;
3735 SDValue N0 = N->getOperand(0);
3736 SDValue N1 = N->getOperand(1);
3738 // Collect the shifts/magic values from each element.
3739 if (!ISD::matchUnaryPredicate(N1, BuildUDIVPattern))
3740 return SDValue();
3742 SDValue PreShift, PostShift, MagicFactor, NPQFactor;
3743 if (VT.isVector()) {
3744 PreShift = DAG.getBuildVector(ShVT, dl, PreShifts);
3745 MagicFactor = DAG.getBuildVector(VT, dl, MagicFactors);
3746 NPQFactor = DAG.getBuildVector(VT, dl, NPQFactors);
3747 PostShift = DAG.getBuildVector(ShVT, dl, PostShifts);
3748 } else {
3749 PreShift = PreShifts[0];
3750 MagicFactor = MagicFactors[0];
3751 PostShift = PostShifts[0];
3754 SDValue Q = N0;
3755 Q = DAG.getNode(ISD::SRL, dl, VT, Q, PreShift);
3756 Created.push_back(Q.getNode());
3758 // FIXME: We should support doing a MUL in a wider type.
3759 auto GetMULHU = [&](SDValue X, SDValue Y) {
3760 if (IsAfterLegalization ? isOperationLegal(ISD::MULHU, VT)
3761 : isOperationLegalOrCustom(ISD::MULHU, VT))
3762 return DAG.getNode(ISD::MULHU, dl, VT, X, Y);
3763 if (IsAfterLegalization ? isOperationLegal(ISD::UMUL_LOHI, VT)
3764 : isOperationLegalOrCustom(ISD::UMUL_LOHI, VT)) {
3765 SDValue LoHi =
3766 DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), X, Y);
3767 return SDValue(LoHi.getNode(), 1);
3769 return SDValue(); // No mulhu or equivalent
3772 // Multiply the numerator (operand 0) by the magic value.
3773 Q = GetMULHU(Q, MagicFactor);
3774 if (!Q)
3775 return SDValue();
3777 Created.push_back(Q.getNode());
3779 if (UseNPQ) {
3780 SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N0, Q);
3781 Created.push_back(NPQ.getNode());
3783 // For vectors we might have a mix of non-NPQ/NPQ paths, so use
3784 // MULHU to act as a SRL-by-1 for NPQ, else multiply by zero.
3785 if (VT.isVector())
3786 NPQ = GetMULHU(NPQ, NPQFactor);
3787 else
3788 NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ, DAG.getConstant(1, dl, ShVT));
3790 Created.push_back(NPQ.getNode());
3792 Q = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q);
3793 Created.push_back(Q.getNode());
3796 Q = DAG.getNode(ISD::SRL, dl, VT, Q, PostShift);
3797 Created.push_back(Q.getNode());
3799 SDValue One = DAG.getConstant(1, dl, VT);
3800 SDValue IsOne = DAG.getSetCC(dl, VT, N1, One, ISD::SETEQ);
3801 return DAG.getSelect(dl, VT, IsOne, N0, Q);
3804 bool TargetLowering::
3805 verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const {
3806 if (!isa<ConstantSDNode>(Op.getOperand(0))) {
3807 DAG.getContext()->emitError("argument to '__builtin_return_address' must "
3808 "be a constant integer");
3809 return true;
3812 return false;
3815 //===----------------------------------------------------------------------===//
3816 // Legalization Utilities
3817 //===----------------------------------------------------------------------===//
3819 bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, SDLoc dl,
3820 SDValue LHS, SDValue RHS,
3821 SmallVectorImpl<SDValue> &Result,
3822 EVT HiLoVT, SelectionDAG &DAG,
3823 MulExpansionKind Kind, SDValue LL,
3824 SDValue LH, SDValue RL, SDValue RH) const {
3825 assert(Opcode == ISD::MUL || Opcode == ISD::UMUL_LOHI ||
3826 Opcode == ISD::SMUL_LOHI);
3828 bool HasMULHS = (Kind == MulExpansionKind::Always) ||
3829 isOperationLegalOrCustom(ISD::MULHS, HiLoVT);
3830 bool HasMULHU = (Kind == MulExpansionKind::Always) ||
3831 isOperationLegalOrCustom(ISD::MULHU, HiLoVT);
3832 bool HasSMUL_LOHI = (Kind == MulExpansionKind::Always) ||
3833 isOperationLegalOrCustom(ISD::SMUL_LOHI, HiLoVT);
3834 bool HasUMUL_LOHI = (Kind == MulExpansionKind::Always) ||
3835 isOperationLegalOrCustom(ISD::UMUL_LOHI, HiLoVT);
3837 if (!HasMULHU && !HasMULHS && !HasUMUL_LOHI && !HasSMUL_LOHI)
3838 return false;
3840 unsigned OuterBitSize = VT.getScalarSizeInBits();
3841 unsigned InnerBitSize = HiLoVT.getScalarSizeInBits();
3842 unsigned LHSSB = DAG.ComputeNumSignBits(LHS);
3843 unsigned RHSSB = DAG.ComputeNumSignBits(RHS);
3845 // LL, LH, RL, and RH must be either all NULL or all set to a value.
3846 assert((LL.getNode() && LH.getNode() && RL.getNode() && RH.getNode()) ||
3847 (!LL.getNode() && !LH.getNode() && !RL.getNode() && !RH.getNode()));
3849 SDVTList VTs = DAG.getVTList(HiLoVT, HiLoVT);
3850 auto MakeMUL_LOHI = [&](SDValue L, SDValue R, SDValue &Lo, SDValue &Hi,
3851 bool Signed) -> bool {
3852 if ((Signed && HasSMUL_LOHI) || (!Signed && HasUMUL_LOHI)) {
3853 Lo = DAG.getNode(Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI, dl, VTs, L, R);
3854 Hi = SDValue(Lo.getNode(), 1);
3855 return true;
3857 if ((Signed && HasMULHS) || (!Signed && HasMULHU)) {
3858 Lo = DAG.getNode(ISD::MUL, dl, HiLoVT, L, R);
3859 Hi = DAG.getNode(Signed ? ISD::MULHS : ISD::MULHU, dl, HiLoVT, L, R);
3860 return true;
3862 return false;
3865 SDValue Lo, Hi;
3867 if (!LL.getNode() && !RL.getNode() &&
3868 isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) {
3869 LL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LHS);
3870 RL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RHS);
3873 if (!LL.getNode())
3874 return false;
3876 APInt HighMask = APInt::getHighBitsSet(OuterBitSize, InnerBitSize);
3877 if (DAG.MaskedValueIsZero(LHS, HighMask) &&
3878 DAG.MaskedValueIsZero(RHS, HighMask)) {
3879 // The inputs are both zero-extended.
3880 if (MakeMUL_LOHI(LL, RL, Lo, Hi, false)) {
3881 Result.push_back(Lo);
3882 Result.push_back(Hi);
3883 if (Opcode != ISD::MUL) {
3884 SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
3885 Result.push_back(Zero);
3886 Result.push_back(Zero);
3888 return true;
3892 if (!VT.isVector() && Opcode == ISD::MUL && LHSSB > InnerBitSize &&
3893 RHSSB > InnerBitSize) {
3894 // The input values are both sign-extended.
3895 // TODO non-MUL case?
3896 if (MakeMUL_LOHI(LL, RL, Lo, Hi, true)) {
3897 Result.push_back(Lo);
3898 Result.push_back(Hi);
3899 return true;
3903 unsigned ShiftAmount = OuterBitSize - InnerBitSize;
3904 EVT ShiftAmountTy = getShiftAmountTy(VT, DAG.getDataLayout());
3905 if (APInt::getMaxValue(ShiftAmountTy.getSizeInBits()).ult(ShiftAmount)) {
3906 // FIXME getShiftAmountTy does not always return a sensible result when VT
3907 // is an illegal type, and so the type may be too small to fit the shift
3908 // amount. Override it with i32. The shift will have to be legalized.
3909 ShiftAmountTy = MVT::i32;
3911 SDValue Shift = DAG.getConstant(ShiftAmount, dl, ShiftAmountTy);
3913 if (!LH.getNode() && !RH.getNode() &&
3914 isOperationLegalOrCustom(ISD::SRL, VT) &&
3915 isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) {
3916 LH = DAG.getNode(ISD::SRL, dl, VT, LHS, Shift);
3917 LH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LH);
3918 RH = DAG.getNode(ISD::SRL, dl, VT, RHS, Shift);
3919 RH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RH);
3922 if (!LH.getNode())
3923 return false;
3925 if (!MakeMUL_LOHI(LL, RL, Lo, Hi, false))
3926 return false;
3928 Result.push_back(Lo);
3930 if (Opcode == ISD::MUL) {
3931 RH = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RH);
3932 LH = DAG.getNode(ISD::MUL, dl, HiLoVT, LH, RL);
3933 Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, RH);
3934 Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, LH);
3935 Result.push_back(Hi);
3936 return true;
3939 // Compute the full width result.
3940 auto Merge = [&](SDValue Lo, SDValue Hi) -> SDValue {
3941 Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Lo);
3942 Hi = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
3943 Hi = DAG.getNode(ISD::SHL, dl, VT, Hi, Shift);
3944 return DAG.getNode(ISD::OR, dl, VT, Lo, Hi);
3947 SDValue Next = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
3948 if (!MakeMUL_LOHI(LL, RH, Lo, Hi, false))
3949 return false;
3951 // This is effectively the add part of a multiply-add of half-sized operands,
3952 // so it cannot overflow.
3953 Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
3955 if (!MakeMUL_LOHI(LH, RL, Lo, Hi, false))
3956 return false;
3958 Next = DAG.getNode(ISD::ADDC, dl, DAG.getVTList(VT, MVT::Glue), Next,
3959 Merge(Lo, Hi));
3961 SDValue Carry = Next.getValue(1);
3962 Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
3963 Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
3965 if (!MakeMUL_LOHI(LH, RH, Lo, Hi, Opcode == ISD::SMUL_LOHI))
3966 return false;
3968 SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
3969 Hi = DAG.getNode(ISD::ADDE, dl, DAG.getVTList(HiLoVT, MVT::Glue), Hi, Zero,
3970 Carry);
3971 Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
3973 if (Opcode == ISD::SMUL_LOHI) {
3974 SDValue NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
3975 DAG.getNode(ISD::ZERO_EXTEND, dl, VT, RL));
3976 Next = DAG.getSelectCC(dl, LH, Zero, NextSub, Next, ISD::SETLT);
3978 NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
3979 DAG.getNode(ISD::ZERO_EXTEND, dl, VT, LL));
3980 Next = DAG.getSelectCC(dl, RH, Zero, NextSub, Next, ISD::SETLT);
3983 Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
3984 Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
3985 Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
3986 return true;
3989 bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT,
3990 SelectionDAG &DAG, MulExpansionKind Kind,
3991 SDValue LL, SDValue LH, SDValue RL,
3992 SDValue RH) const {
3993 SmallVector<SDValue, 2> Result;
3994 bool Ok = expandMUL_LOHI(N->getOpcode(), N->getValueType(0), N,
3995 N->getOperand(0), N->getOperand(1), Result, HiLoVT,
3996 DAG, Kind, LL, LH, RL, RH);
3997 if (Ok) {
3998 assert(Result.size() == 2);
3999 Lo = Result[0];
4000 Hi = Result[1];
4002 return Ok;
4005 bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result,
4006 SelectionDAG &DAG) const {
4007 EVT VT = Node->getOperand(0).getValueType();
4008 EVT NVT = Node->getValueType(0);
4009 SDLoc dl(SDValue(Node, 0));
4011 // FIXME: Only f32 to i64 conversions are supported.
4012 if (VT != MVT::f32 || NVT != MVT::i64)
4013 return false;
4015 // Expand f32 -> i64 conversion
4016 // This algorithm comes from compiler-rt's implementation of fixsfdi:
4017 // https://github.com/llvm-mirror/compiler-rt/blob/master/lib/builtins/fixsfdi.c
4018 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(),
4019 VT.getSizeInBits());
4020 SDValue ExponentMask = DAG.getConstant(0x7F800000, dl, IntVT);
4021 SDValue ExponentLoBit = DAG.getConstant(23, dl, IntVT);
4022 SDValue Bias = DAG.getConstant(127, dl, IntVT);
4023 SDValue SignMask = DAG.getConstant(APInt::getSignMask(VT.getSizeInBits()), dl,
4024 IntVT);
4025 SDValue SignLowBit = DAG.getConstant(VT.getSizeInBits() - 1, dl, IntVT);
4026 SDValue MantissaMask = DAG.getConstant(0x007FFFFF, dl, IntVT);
4028 SDValue Bits = DAG.getNode(ISD::BITCAST, dl, IntVT, Node->getOperand(0));
4030 auto &DL = DAG.getDataLayout();
4031 SDValue ExponentBits = DAG.getNode(
4032 ISD::SRL, dl, IntVT, DAG.getNode(ISD::AND, dl, IntVT, Bits, ExponentMask),
4033 DAG.getZExtOrTrunc(ExponentLoBit, dl, getShiftAmountTy(IntVT, DL)));
4034 SDValue Exponent = DAG.getNode(ISD::SUB, dl, IntVT, ExponentBits, Bias);
4036 SDValue Sign = DAG.getNode(
4037 ISD::SRA, dl, IntVT, DAG.getNode(ISD::AND, dl, IntVT, Bits, SignMask),
4038 DAG.getZExtOrTrunc(SignLowBit, dl, getShiftAmountTy(IntVT, DL)));
4039 Sign = DAG.getSExtOrTrunc(Sign, dl, NVT);
4041 SDValue R = DAG.getNode(ISD::OR, dl, IntVT,
4042 DAG.getNode(ISD::AND, dl, IntVT, Bits, MantissaMask),
4043 DAG.getConstant(0x00800000, dl, IntVT));
4045 R = DAG.getZExtOrTrunc(R, dl, NVT);
4047 R = DAG.getSelectCC(
4048 dl, Exponent, ExponentLoBit,
4049 DAG.getNode(ISD::SHL, dl, NVT, R,
4050 DAG.getZExtOrTrunc(
4051 DAG.getNode(ISD::SUB, dl, IntVT, Exponent, ExponentLoBit),
4052 dl, getShiftAmountTy(IntVT, DL))),
4053 DAG.getNode(ISD::SRL, dl, NVT, R,
4054 DAG.getZExtOrTrunc(
4055 DAG.getNode(ISD::SUB, dl, IntVT, ExponentLoBit, Exponent),
4056 dl, getShiftAmountTy(IntVT, DL))),
4057 ISD::SETGT);
4059 SDValue Ret = DAG.getNode(ISD::SUB, dl, NVT,
4060 DAG.getNode(ISD::XOR, dl, NVT, R, Sign),
4061 Sign);
4063 Result = DAG.getSelectCC(dl, Exponent, DAG.getConstant(0, dl, IntVT),
4064 DAG.getConstant(0, dl, NVT), Ret, ISD::SETLT);
4065 return true;
4068 SDValue TargetLowering::scalarizeVectorLoad(LoadSDNode *LD,
4069 SelectionDAG &DAG) const {
4070 SDLoc SL(LD);
4071 SDValue Chain = LD->getChain();
4072 SDValue BasePTR = LD->getBasePtr();
4073 EVT SrcVT = LD->getMemoryVT();
4074 ISD::LoadExtType ExtType = LD->getExtensionType();
4076 unsigned NumElem = SrcVT.getVectorNumElements();
4078 EVT SrcEltVT = SrcVT.getScalarType();
4079 EVT DstEltVT = LD->getValueType(0).getScalarType();
4081 unsigned Stride = SrcEltVT.getSizeInBits() / 8;
4082 assert(SrcEltVT.isByteSized());
4084 SmallVector<SDValue, 8> Vals;
4085 SmallVector<SDValue, 8> LoadChains;
4087 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
4088 SDValue ScalarLoad =
4089 DAG.getExtLoad(ExtType, SL, DstEltVT, Chain, BasePTR,
4090 LD->getPointerInfo().getWithOffset(Idx * Stride),
4091 SrcEltVT, MinAlign(LD->getAlignment(), Idx * Stride),
4092 LD->getMemOperand()->getFlags(), LD->getAAInfo());
4094 BasePTR = DAG.getObjectPtrOffset(SL, BasePTR, Stride);
4096 Vals.push_back(ScalarLoad.getValue(0));
4097 LoadChains.push_back(ScalarLoad.getValue(1));
4100 SDValue NewChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoadChains);
4101 SDValue Value = DAG.getBuildVector(LD->getValueType(0), SL, Vals);
4103 return DAG.getMergeValues({ Value, NewChain }, SL);
4106 SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST,
4107 SelectionDAG &DAG) const {
4108 SDLoc SL(ST);
4110 SDValue Chain = ST->getChain();
4111 SDValue BasePtr = ST->getBasePtr();
4112 SDValue Value = ST->getValue();
4113 EVT StVT = ST->getMemoryVT();
4115 // The type of the data we want to save
4116 EVT RegVT = Value.getValueType();
4117 EVT RegSclVT = RegVT.getScalarType();
4119 // The type of data as saved in memory.
4120 EVT MemSclVT = StVT.getScalarType();
4122 EVT IdxVT = getVectorIdxTy(DAG.getDataLayout());
4123 unsigned NumElem = StVT.getVectorNumElements();
4125 // A vector must always be stored in memory as-is, i.e. without any padding
4126 // between the elements, since various code depend on it, e.g. in the
4127 // handling of a bitcast of a vector type to int, which may be done with a
4128 // vector store followed by an integer load. A vector that does not have
4129 // elements that are byte-sized must therefore be stored as an integer
4130 // built out of the extracted vector elements.
4131 if (!MemSclVT.isByteSized()) {
4132 unsigned NumBits = StVT.getSizeInBits();
4133 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), NumBits);
4135 SDValue CurrVal = DAG.getConstant(0, SL, IntVT);
4137 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
4138 SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value,
4139 DAG.getConstant(Idx, SL, IdxVT));
4140 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, MemSclVT, Elt);
4141 SDValue ExtElt = DAG.getNode(ISD::ZERO_EXTEND, SL, IntVT, Trunc);
4142 unsigned ShiftIntoIdx =
4143 (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
4144 SDValue ShiftAmount =
4145 DAG.getConstant(ShiftIntoIdx * MemSclVT.getSizeInBits(), SL, IntVT);
4146 SDValue ShiftedElt =
4147 DAG.getNode(ISD::SHL, SL, IntVT, ExtElt, ShiftAmount);
4148 CurrVal = DAG.getNode(ISD::OR, SL, IntVT, CurrVal, ShiftedElt);
4151 return DAG.getStore(Chain, SL, CurrVal, BasePtr, ST->getPointerInfo(),
4152 ST->getAlignment(), ST->getMemOperand()->getFlags(),
4153 ST->getAAInfo());
4156 // Store Stride in bytes
4157 unsigned Stride = MemSclVT.getSizeInBits() / 8;
4158 assert (Stride && "Zero stride!");
4159 // Extract each of the elements from the original vector and save them into
4160 // memory individually.
4161 SmallVector<SDValue, 8> Stores;
4162 for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
4163 SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, RegSclVT, Value,
4164 DAG.getConstant(Idx, SL, IdxVT));
4166 SDValue Ptr = DAG.getObjectPtrOffset(SL, BasePtr, Idx * Stride);
4168 // This scalar TruncStore may be illegal, but we legalize it later.
4169 SDValue Store = DAG.getTruncStore(
4170 Chain, SL, Elt, Ptr, ST->getPointerInfo().getWithOffset(Idx * Stride),
4171 MemSclVT, MinAlign(ST->getAlignment(), Idx * Stride),
4172 ST->getMemOperand()->getFlags(), ST->getAAInfo());
4174 Stores.push_back(Store);
4177 return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, Stores);
4180 std::pair<SDValue, SDValue>
4181 TargetLowering::expandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG) const {
4182 assert(LD->getAddressingMode() == ISD::UNINDEXED &&
4183 "unaligned indexed loads not implemented!");
4184 SDValue Chain = LD->getChain();
4185 SDValue Ptr = LD->getBasePtr();
4186 EVT VT = LD->getValueType(0);
4187 EVT LoadedVT = LD->getMemoryVT();
4188 SDLoc dl(LD);
4189 auto &MF = DAG.getMachineFunction();
4191 if (VT.isFloatingPoint() || VT.isVector()) {
4192 EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits());
4193 if (isTypeLegal(intVT) && isTypeLegal(LoadedVT)) {
4194 if (!isOperationLegalOrCustom(ISD::LOAD, intVT) &&
4195 LoadedVT.isVector()) {
4196 // Scalarize the load and let the individual components be handled.
4197 SDValue Scalarized = scalarizeVectorLoad(LD, DAG);
4198 if (Scalarized->getOpcode() == ISD::MERGE_VALUES)
4199 return std::make_pair(Scalarized.getOperand(0), Scalarized.getOperand(1));
4200 return std::make_pair(Scalarized.getValue(0), Scalarized.getValue(1));
4203 // Expand to a (misaligned) integer load of the same size,
4204 // then bitconvert to floating point or vector.
4205 SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr,
4206 LD->getMemOperand());
4207 SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad);
4208 if (LoadedVT != VT)
4209 Result = DAG.getNode(VT.isFloatingPoint() ? ISD::FP_EXTEND :
4210 ISD::ANY_EXTEND, dl, VT, Result);
4212 return std::make_pair(Result, newLoad.getValue(1));
4215 // Copy the value to a (aligned) stack slot using (unaligned) integer
4216 // loads and stores, then do a (aligned) load from the stack slot.
4217 MVT RegVT = getRegisterType(*DAG.getContext(), intVT);
4218 unsigned LoadedBytes = LoadedVT.getStoreSize();
4219 unsigned RegBytes = RegVT.getSizeInBits() / 8;
4220 unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes;
4222 // Make sure the stack slot is also aligned for the register type.
4223 SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT);
4224 auto FrameIndex = cast<FrameIndexSDNode>(StackBase.getNode())->getIndex();
4225 SmallVector<SDValue, 8> Stores;
4226 SDValue StackPtr = StackBase;
4227 unsigned Offset = 0;
4229 EVT PtrVT = Ptr.getValueType();
4230 EVT StackPtrVT = StackPtr.getValueType();
4232 SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
4233 SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
4235 // Do all but one copies using the full register width.
4236 for (unsigned i = 1; i < NumRegs; i++) {
4237 // Load one integer register's worth from the original location.
4238 SDValue Load = DAG.getLoad(
4239 RegVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(Offset),
4240 MinAlign(LD->getAlignment(), Offset), LD->getMemOperand()->getFlags(),
4241 LD->getAAInfo());
4242 // Follow the load with a store to the stack slot. Remember the store.
4243 Stores.push_back(DAG.getStore(
4244 Load.getValue(1), dl, Load, StackPtr,
4245 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset)));
4246 // Increment the pointers.
4247 Offset += RegBytes;
4249 Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
4250 StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
4253 // The last copy may be partial. Do an extending load.
4254 EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
4255 8 * (LoadedBytes - Offset));
4256 SDValue Load =
4257 DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Chain, Ptr,
4258 LD->getPointerInfo().getWithOffset(Offset), MemVT,
4259 MinAlign(LD->getAlignment(), Offset),
4260 LD->getMemOperand()->getFlags(), LD->getAAInfo());
4261 // Follow the load with a store to the stack slot. Remember the store.
4262 // On big-endian machines this requires a truncating store to ensure
4263 // that the bits end up in the right place.
4264 Stores.push_back(DAG.getTruncStore(
4265 Load.getValue(1), dl, Load, StackPtr,
4266 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), MemVT));
4268 // The order of the stores doesn't matter - say it with a TokenFactor.
4269 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
4271 // Finally, perform the original load only redirected to the stack slot.
4272 Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase,
4273 MachinePointerInfo::getFixedStack(MF, FrameIndex, 0),
4274 LoadedVT);
4276 // Callers expect a MERGE_VALUES node.
4277 return std::make_pair(Load, TF);
4280 assert(LoadedVT.isInteger() && !LoadedVT.isVector() &&
4281 "Unaligned load of unsupported type.");
4283 // Compute the new VT that is half the size of the old one. This is an
4284 // integer MVT.
4285 unsigned NumBits = LoadedVT.getSizeInBits();
4286 EVT NewLoadedVT;
4287 NewLoadedVT = EVT::getIntegerVT(*DAG.getContext(), NumBits/2);
4288 NumBits >>= 1;
4290 unsigned Alignment = LD->getAlignment();
4291 unsigned IncrementSize = NumBits / 8;
4292 ISD::LoadExtType HiExtType = LD->getExtensionType();
4294 // If the original load is NON_EXTLOAD, the hi part load must be ZEXTLOAD.
4295 if (HiExtType == ISD::NON_EXTLOAD)
4296 HiExtType = ISD::ZEXTLOAD;
4298 // Load the value in two parts
4299 SDValue Lo, Hi;
4300 if (DAG.getDataLayout().isLittleEndian()) {
4301 Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo(),
4302 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
4303 LD->getAAInfo());
4305 Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize);
4306 Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr,
4307 LD->getPointerInfo().getWithOffset(IncrementSize),
4308 NewLoadedVT, MinAlign(Alignment, IncrementSize),
4309 LD->getMemOperand()->getFlags(), LD->getAAInfo());
4310 } else {
4311 Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo(),
4312 NewLoadedVT, Alignment, LD->getMemOperand()->getFlags(),
4313 LD->getAAInfo());
4315 Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize);
4316 Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr,
4317 LD->getPointerInfo().getWithOffset(IncrementSize),
4318 NewLoadedVT, MinAlign(Alignment, IncrementSize),
4319 LD->getMemOperand()->getFlags(), LD->getAAInfo());
4322 // aggregate the two parts
4323 SDValue ShiftAmount =
4324 DAG.getConstant(NumBits, dl, getShiftAmountTy(Hi.getValueType(),
4325 DAG.getDataLayout()));
4326 SDValue Result = DAG.getNode(ISD::SHL, dl, VT, Hi, ShiftAmount);
4327 Result = DAG.getNode(ISD::OR, dl, VT, Result, Lo);
4329 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
4330 Hi.getValue(1));
4332 return std::make_pair(Result, TF);
4335 SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST,
4336 SelectionDAG &DAG) const {
4337 assert(ST->getAddressingMode() == ISD::UNINDEXED &&
4338 "unaligned indexed stores not implemented!");
4339 SDValue Chain = ST->getChain();
4340 SDValue Ptr = ST->getBasePtr();
4341 SDValue Val = ST->getValue();
4342 EVT VT = Val.getValueType();
4343 int Alignment = ST->getAlignment();
4344 auto &MF = DAG.getMachineFunction();
4345 EVT MemVT = ST->getMemoryVT();
4347 SDLoc dl(ST);
4348 if (MemVT.isFloatingPoint() || MemVT.isVector()) {
4349 EVT intVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
4350 if (isTypeLegal(intVT)) {
4351 if (!isOperationLegalOrCustom(ISD::STORE, intVT) &&
4352 MemVT.isVector()) {
4353 // Scalarize the store and let the individual components be handled.
4354 SDValue Result = scalarizeVectorStore(ST, DAG);
4356 return Result;
4358 // Expand to a bitconvert of the value to the integer type of the
4359 // same size, then a (misaligned) int store.
4360 // FIXME: Does not handle truncating floating point stores!
4361 SDValue Result = DAG.getNode(ISD::BITCAST, dl, intVT, Val);
4362 Result = DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(),
4363 Alignment, ST->getMemOperand()->getFlags());
4364 return Result;
4366 // Do a (aligned) store to a stack slot, then copy from the stack slot
4367 // to the final destination using (unaligned) integer loads and stores.
4368 EVT StoredVT = ST->getMemoryVT();
4369 MVT RegVT =
4370 getRegisterType(*DAG.getContext(),
4371 EVT::getIntegerVT(*DAG.getContext(),
4372 StoredVT.getSizeInBits()));
4373 EVT PtrVT = Ptr.getValueType();
4374 unsigned StoredBytes = StoredVT.getStoreSize();
4375 unsigned RegBytes = RegVT.getSizeInBits() / 8;
4376 unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes;
4378 // Make sure the stack slot is also aligned for the register type.
4379 SDValue StackPtr = DAG.CreateStackTemporary(StoredVT, RegVT);
4380 auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
4382 // Perform the original store, only redirected to the stack slot.
4383 SDValue Store = DAG.getTruncStore(
4384 Chain, dl, Val, StackPtr,
4385 MachinePointerInfo::getFixedStack(MF, FrameIndex, 0), StoredVT);
4387 EVT StackPtrVT = StackPtr.getValueType();
4389 SDValue PtrIncrement = DAG.getConstant(RegBytes, dl, PtrVT);
4390 SDValue StackPtrIncrement = DAG.getConstant(RegBytes, dl, StackPtrVT);
4391 SmallVector<SDValue, 8> Stores;
4392 unsigned Offset = 0;
4394 // Do all but one copies using the full register width.
4395 for (unsigned i = 1; i < NumRegs; i++) {
4396 // Load one integer register's worth from the stack slot.
4397 SDValue Load = DAG.getLoad(
4398 RegVT, dl, Store, StackPtr,
4399 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset));
4400 // Store it to the final location. Remember the store.
4401 Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr,
4402 ST->getPointerInfo().getWithOffset(Offset),
4403 MinAlign(ST->getAlignment(), Offset),
4404 ST->getMemOperand()->getFlags()));
4405 // Increment the pointers.
4406 Offset += RegBytes;
4407 StackPtr = DAG.getObjectPtrOffset(dl, StackPtr, StackPtrIncrement);
4408 Ptr = DAG.getObjectPtrOffset(dl, Ptr, PtrIncrement);
4411 // The last store may be partial. Do a truncating store. On big-endian
4412 // machines this requires an extending load from the stack slot to ensure
4413 // that the bits are in the right place.
4414 EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
4415 8 * (StoredBytes - Offset));
4417 // Load from the stack slot.
4418 SDValue Load = DAG.getExtLoad(
4419 ISD::EXTLOAD, dl, RegVT, Store, StackPtr,
4420 MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), MemVT);
4422 Stores.push_back(
4423 DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr,
4424 ST->getPointerInfo().getWithOffset(Offset), MemVT,
4425 MinAlign(ST->getAlignment(), Offset),
4426 ST->getMemOperand()->getFlags(), ST->getAAInfo()));
4427 // The order of the stores doesn't matter - say it with a TokenFactor.
4428 SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Stores);
4429 return Result;
4432 assert(ST->getMemoryVT().isInteger() &&
4433 !ST->getMemoryVT().isVector() &&
4434 "Unaligned store of unknown type.");
4435 // Get the half-size VT
4436 EVT NewStoredVT = ST->getMemoryVT().getHalfSizedIntegerVT(*DAG.getContext());
4437 int NumBits = NewStoredVT.getSizeInBits();
4438 int IncrementSize = NumBits / 8;
4440 // Divide the stored value in two parts.
4441 SDValue ShiftAmount =
4442 DAG.getConstant(NumBits, dl, getShiftAmountTy(Val.getValueType(),
4443 DAG.getDataLayout()));
4444 SDValue Lo = Val;
4445 SDValue Hi = DAG.getNode(ISD::SRL, dl, VT, Val, ShiftAmount);
4447 // Store the two parts
4448 SDValue Store1, Store2;
4449 Store1 = DAG.getTruncStore(Chain, dl,
4450 DAG.getDataLayout().isLittleEndian() ? Lo : Hi,
4451 Ptr, ST->getPointerInfo(), NewStoredVT, Alignment,
4452 ST->getMemOperand()->getFlags());
4454 Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize);
4455 Alignment = MinAlign(Alignment, IncrementSize);
4456 Store2 = DAG.getTruncStore(
4457 Chain, dl, DAG.getDataLayout().isLittleEndian() ? Hi : Lo, Ptr,
4458 ST->getPointerInfo().getWithOffset(IncrementSize), NewStoredVT, Alignment,
4459 ST->getMemOperand()->getFlags(), ST->getAAInfo());
4461 SDValue Result =
4462 DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);
4463 return Result;
4466 SDValue
4467 TargetLowering::IncrementMemoryAddress(SDValue Addr, SDValue Mask,
4468 const SDLoc &DL, EVT DataVT,
4469 SelectionDAG &DAG,
4470 bool IsCompressedMemory) const {
4471 SDValue Increment;
4472 EVT AddrVT = Addr.getValueType();
4473 EVT MaskVT = Mask.getValueType();
4474 assert(DataVT.getVectorNumElements() == MaskVT.getVectorNumElements() &&
4475 "Incompatible types of Data and Mask");
4476 if (IsCompressedMemory) {
4477 // Incrementing the pointer according to number of '1's in the mask.
4478 EVT MaskIntVT = EVT::getIntegerVT(*DAG.getContext(), MaskVT.getSizeInBits());
4479 SDValue MaskInIntReg = DAG.getBitcast(MaskIntVT, Mask);
4480 if (MaskIntVT.getSizeInBits() < 32) {
4481 MaskInIntReg = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, MaskInIntReg);
4482 MaskIntVT = MVT::i32;
4485 // Count '1's with POPCNT.
4486 Increment = DAG.getNode(ISD::CTPOP, DL, MaskIntVT, MaskInIntReg);
4487 Increment = DAG.getZExtOrTrunc(Increment, DL, AddrVT);
4488 // Scale is an element size in bytes.
4489 SDValue Scale = DAG.getConstant(DataVT.getScalarSizeInBits() / 8, DL,
4490 AddrVT);
4491 Increment = DAG.getNode(ISD::MUL, DL, AddrVT, Increment, Scale);
4492 } else
4493 Increment = DAG.getConstant(DataVT.getStoreSize(), DL, AddrVT);
4495 return DAG.getNode(ISD::ADD, DL, AddrVT, Addr, Increment);
4498 static SDValue clampDynamicVectorIndex(SelectionDAG &DAG,
4499 SDValue Idx,
4500 EVT VecVT,
4501 const SDLoc &dl) {
4502 if (isa<ConstantSDNode>(Idx))
4503 return Idx;
4505 EVT IdxVT = Idx.getValueType();
4506 unsigned NElts = VecVT.getVectorNumElements();
4507 if (isPowerOf2_32(NElts)) {
4508 APInt Imm = APInt::getLowBitsSet(IdxVT.getSizeInBits(),
4509 Log2_32(NElts));
4510 return DAG.getNode(ISD::AND, dl, IdxVT, Idx,
4511 DAG.getConstant(Imm, dl, IdxVT));
4514 return DAG.getNode(ISD::UMIN, dl, IdxVT, Idx,
4515 DAG.getConstant(NElts - 1, dl, IdxVT));
4518 SDValue TargetLowering::getVectorElementPointer(SelectionDAG &DAG,
4519 SDValue VecPtr, EVT VecVT,
4520 SDValue Index) const {
4521 SDLoc dl(Index);
4522 // Make sure the index type is big enough to compute in.
4523 Index = DAG.getZExtOrTrunc(Index, dl, VecPtr.getValueType());
4525 EVT EltVT = VecVT.getVectorElementType();
4527 // Calculate the element offset and add it to the pointer.
4528 unsigned EltSize = EltVT.getSizeInBits() / 8; // FIXME: should be ABI size.
4529 assert(EltSize * 8 == EltVT.getSizeInBits() &&
4530 "Converting bits to bytes lost precision");
4532 Index = clampDynamicVectorIndex(DAG, Index, VecVT, dl);
4534 EVT IdxVT = Index.getValueType();
4536 Index = DAG.getNode(ISD::MUL, dl, IdxVT, Index,
4537 DAG.getConstant(EltSize, dl, IdxVT));
4538 return DAG.getNode(ISD::ADD, dl, IdxVT, VecPtr, Index);
4541 //===----------------------------------------------------------------------===//
4542 // Implementation of Emulated TLS Model
4543 //===----------------------------------------------------------------------===//
4545 SDValue TargetLowering::LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA,
4546 SelectionDAG &DAG) const {
4547 // Access to address of TLS varialbe xyz is lowered to a function call:
4548 // __emutls_get_address( address of global variable named "__emutls_v.xyz" )
4549 EVT PtrVT = getPointerTy(DAG.getDataLayout());
4550 PointerType *VoidPtrType = Type::getInt8PtrTy(*DAG.getContext());
4551 SDLoc dl(GA);
4553 ArgListTy Args;
4554 ArgListEntry Entry;
4555 std::string NameString = ("__emutls_v." + GA->getGlobal()->getName()).str();
4556 Module *VariableModule = const_cast<Module*>(GA->getGlobal()->getParent());
4557 StringRef EmuTlsVarName(NameString);
4558 GlobalVariable *EmuTlsVar = VariableModule->getNamedGlobal(EmuTlsVarName);
4559 assert(EmuTlsVar && "Cannot find EmuTlsVar ");
4560 Entry.Node = DAG.getGlobalAddress(EmuTlsVar, dl, PtrVT);
4561 Entry.Ty = VoidPtrType;
4562 Args.push_back(Entry);
4564 SDValue EmuTlsGetAddr = DAG.getExternalSymbol("__emutls_get_address", PtrVT);
4566 TargetLowering::CallLoweringInfo CLI(DAG);
4567 CLI.setDebugLoc(dl).setChain(DAG.getEntryNode());
4568 CLI.setLibCallee(CallingConv::C, VoidPtrType, EmuTlsGetAddr, std::move(Args));
4569 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
4571 // TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
4572 // At last for X86 targets, maybe good for other targets too?
4573 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
4574 MFI.setAdjustsStack(true); // Is this only for X86 target?
4575 MFI.setHasCalls(true);
4577 assert((GA->getOffset() == 0) &&
4578 "Emulated TLS must have zero offset in GlobalAddressSDNode");
4579 return CallResult.first;
4582 SDValue TargetLowering::lowerCmpEqZeroToCtlzSrl(SDValue Op,
4583 SelectionDAG &DAG) const {
4584 assert((Op->getOpcode() == ISD::SETCC) && "Input has to be a SETCC node.");
4585 if (!isCtlzFast())
4586 return SDValue();
4587 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
4588 SDLoc dl(Op);
4589 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
4590 if (C->isNullValue() && CC == ISD::SETEQ) {
4591 EVT VT = Op.getOperand(0).getValueType();
4592 SDValue Zext = Op.getOperand(0);
4593 if (VT.bitsLT(MVT::i32)) {
4594 VT = MVT::i32;
4595 Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0));
4597 unsigned Log2b = Log2_32(VT.getSizeInBits());
4598 SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext);
4599 SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,
4600 DAG.getConstant(Log2b, dl, MVT::i32));
4601 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);
4604 return SDValue();